k-Nearest Neighbors sometimes gets a bad reputation for being too simple. So I implemented it in as few lines as possible.
Download, convert names to integer class labels, and randomly split data into training and test sets.
wget https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data
sed -i 's/Iris-setosa/1/g' iris.data
sed -i 's/Iris-versicolor/2/g' iris.data
sed -i 's/Iris-virginica/3/g' iris.data
sort -R iris.data > iris_random.data
head -n 100 iris_random.data > iris_train.data
tail -n 51 iris_random.data > iris_test.data
from statistics import mode
import numpy as np
from heapq import heappush, heappop
class Data:
def __init__(self, file_name):
Xy = np.loadtxt(file_name, delimiter=',', dtype=float)
self.X = np.delete(Xy, -1, axis=1)
self.y = Xy[:, -1]
self.x_mean, self.x_std = np.mean(self.X, axis=0), np.std(self.X, axis=0)
def normalize(self, mean, std):
self.X = np.divide(np.subtract(self.X, mean), std)
class KNN:
def __init__(self, TrainData, TestData, k):
self.train, self.test, self.k = TrainData, TestData, k
predicted = [self.classify(self.test.X[j], []) for j in range(self.test.X.shape[0])]
self.accuracy = (predicted == self.test.y).mean()
def classify(self, vector, heap):
[heappush(heap, (self.EuclideanDistance(self.train.X[i], vector), self.train.y[i])) for i in range(self.train.X.shape[0])]
return mode([heappop(heap)[1] for _ in range(self.k)])
def EuclideanDistance(self, Q, P):
return np.sqrt(np.sum(np.square(np.subtract(Q, P))))
train = Data('iris_train.data')
train.normalize(train.x_mean, train.x_std)
validation = Data('iris_test.data')
validation.normalize(train.x_mean, train.x_std)
for k in [1, 3, 5, 7, 9, 11, 13, 15]:
neighbors = KNN(train, validation, k)
print('{0}-NN Accuracy: {1}'.format(k, neighbors.accuracy))
Sample run:
1-NN Accuracy: 0.94
3-NN Accuracy: 0.98
5-NN Accuracy: 0.96
7-NN Accuracy: 0.96
9-NN Accuracy: 0.98
11-NN Accuracy: 0.98
13-NN Accuracy: 0.98
15-NN Accuracy: 0.96
Attribution:
The voronoi diagram in the teaser image was generated using Mathias Westerdahl’s voronoi implementation.