还是不使用sklearn:
class KNN(): def __init__(self,X_train,Y_train,n_neighbors=3,p=2): self.n=n_neighbors # 邻居个数默认为3 self.p=p #默认为欧式距离 self.X_train=X_train self.Y_train=Y_train def prediction(self,X): knn_list=[] #先取出训练集中n个点,knn_list存放测试集点与训练集点的距离及其对应标签 for i in range(self.n): dist=np.linalg.norm(X-self.X_train[i],ord=self.p) #linalg=linear(线性)+algebra(代数),norm则表示范数。 knn_list.append((dist,self.Y_train[i])) #再取出训练集剩下的点,然后与n_neighbors个点比较大小,将距离大的点更新 #保证knn_list列表中的点是距离最小的点 for i in range(self.n,len(self.X_train)): max_index=knn_list.index(max(knn_list,key=lambda x:x[0])) dist=np.linalg.norm(X-self.X_train[i],ord=self.p) if knn_list[max_index][0]>dist: knn_list[max_index]=(dist,self.Y_train[i]) # 统计分类最多的点,确定预测数据的分类 # 应该是多数表决 knn=[k[-1] for k in knn_list ] count1=Counter(knn) max_count=sorted(count1.items(),key=lambda x:x[1])[-1][0] return max_count def score(self,X_test,Y_test): right_counts=0 #self.n=10 for X,Y in zip(X_test,Y_test): y_pre=self.prediction(X) #返回数据分类,判断正确率 if y_pre==Y: right_counts+=1 return right_counts/len(X_test)
sklearn还是更简单些:
clf_sk = KNeighborsClassifier() clf_sk.fit(X_train, y_train)
XD
完整代码:
from sklearn import datasets import numpy as np import matplotlib.pyplot as plt from sklearn.model_selection import train_test_split from collections import Counter from sklearn.neighbors import KNeighborsClassifier iris = datasets.load_iris() X_train,X_test,Y_train,Y_test = train_test_split(iris.data,iris.target,test_size=0.2,shuffle=True) #print(X_train[:50][:,0]) """plt.scatter(X_train[:50][:,0],X_train[:50][:,1],label=0) plt.scatter(X_train[50:100][:,0],X_train[50:100][:,1],label=1) plt.xlabel('sepal length') plt.ylabel('sepal width') plt.legend() plt.show() """ X_train = X_train[:][:,0:2] Y_train = Y_train[:] X_test = X_test[:20][:,0:2] Y_test = Y_test[:20] #print(np.shape(X_train)) #print(np.shape(Y_train)) class KNN(): def __init__(self,X_train,Y_train,n_neighbors=3,p=2): self.n=n_neighbors # 邻居个数默认为3 self.p=p #默认为欧式距离 self.X_train=X_train self.Y_train=Y_train def prediction(self,X): knn_list=[] #先取出n个点,knn_list存放预测点与训练集点的距离及其对应标签 for i in range(self.n): dist=np.linalg.norm(X-self.X_train[i],ord=self.p) #linalg=linear(线性)+algebra(代数),norm则表示范数。 knn_list.append((dist,self.Y_train[i])) #再取出训练集剩下的点,然后与n_neighbors个点比较大小,将距离大的点更新 #保证knn_list列表中的点是距离最小的点 for i in range(self.n,len(self.X_train)): max_index=knn_list.index(max(knn_list,key=lambda x:x[0])) dist=np.linalg.norm(X-self.X_train[i],ord=self.p) if knn_list[max_index][0]>dist: knn_list[max_index]=(dist,self.Y_train[i]) # 统计分类最多的点,确定预测数据的分类 knn=[k[-1] for k in knn_list ] count1=Counter(knn) max_count=sorted(count1.items(),key=lambda x:x[1])[-1][0] return max_count def score(self,X_test,Y_test): right_counts=0 #self.n=10 for X,Y in zip(X_test,Y_test): y_pre=self.prediction(X) if y_pre==Y: right_counts+=1 return right_counts/len(X_test) Model = KNN(X_train,Y_train) print(Model.score(X_test,Y_test)) #—————————————————————————————————————————————————— clf_sk = KNeighborsClassifier() clf_sk.fit(X_train,Y_train) print(clf_sk.score(X_test,Y_test))
代码参考来自:https://github.com/fengdu78/lihang-code
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)