1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88
| import numpy as np from collections import Counter
from sklearn import datasets from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score from sklearn.neighbors import KNeighborsClassifier
class KNNClassifier(object): def __init__(self, k=3): """ 构造方法,只有一个参数k,最小值为1,默认为3。最好是奇数。 k: 邻居数,int型 """ assert k >= 1, "k must be integer and larger than 0" self.k = k self.X = None self.y = None def fit(self, X, y): """ 训练,KNN无训练过程,因此只做赋值。同时要求以下两点: 1、X和y的维度要一致 2、k要小于X的训练样本个数,否则没有意义 X: 训练样本,array型 y: 训练标签,array型 """ assert X.shape[0] == y.shape[0], "the shape of X and y must be match" assert self.k <= X.shape[0], "k must be smaller than shape of X" self.X = X self.y = y def _calc_euclidean_distance(self, array1, array2): """ 计算两个样本array1和array2之间的欧式距离 array1: 第一个样本,array型 array2: 第二个样本,array型 """ return np.linalg.norm(array1 - array2) def _vote(self, topk_y): """ 根据选出的k个标签,进行投票,少数服从多数 topk_y: 选出的k个标签值,array型 """ return Counter(topk_y).most_common()[0][0] def predict(self, x): """ 预测,遍历测试样本,对于每个样本,分别与所有训练样本计算距离,然后选出距离最近的k个标签,进行决策 x: 测试样本,维度要与训练样本X保持一致,array型 """ assert self.X is not None and self.y is not None , "must training before predict" assert x.shape[1] == self.X.shape[1] y_pred = [] for i in range(len(x)): distances = [self._calc_euclidean_distance(x[i], self.X[j]) for j in range(len(self.X))] nearest_index = np.argsort(distances) topk_index = nearest_index[:self.k] y_pred.append(self._vote(self.y[topk_index])) return np.array(y_pred)
iris = datasets.load_iris() X = iris.data y = iris.target X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=888)
knn_me = KNNClassifier(3) knn_me.fit(X_train, y_train) y_predict_me = knn_me.predict(X_test) print("The accuracy of my knn is {}.".format(accuracy_score(y_test, y_predict_me)))
knn_std = KNeighborsClassifier(n_neighbors=3) knn_std.fit(X_train, y_train) y_predict_std = knn_std.predict(X_test) print("The accuracy of sklearn knn is {}.".format(accuracy_score(y_test, y_predict_std)))
|