knn算法实现对鸢尾花数据集的分类
import pandas as pd import numpy as np # 求每一个训练集数据对比测试集的距离,返回一个list def distance(h): list_a = [] for i in range(len(test__value)): A = np.array(train__value[i, 0:3]) B = np.array(test__value[h, 0:3]) a = np.sqrt(sum(np.power((A - B), 2))) list_a.append(a) return list_a # 将标签添加到列表 def creat_biaoqian(list=[]): list__a = [] group = list lable = train__value[:, 4] for i in range(len(group)): t = (group[i], lable[i]) list__a.append(t) # rint(lable[i]) return list__a # 判断距离哪一个lable更近,确定分类 def max__lable(a=[]): lables = {"1": 0, "2": 0, "3": 0} for i in range(len(a)): if a[i][1] == 1: lables["1"] = lables["1"] + 1 if a[i][1] == 2: lables["2"] = lables["2"] + 1 if a[i][1] == 3: lables["3"] = lables["3"] + 1 lable = sorted(lables.items(), key=lambda d: d[1], reverse=True) # print(lable) return lable[0][0] test__data = pd.read_csv("iristest.csv", header=None) train__data = pd.read_csv("iristrain.csv", header=None) # print(test__data) # print(train__data) # 读取需要的数据 test__value = test__data.values train__value = test__data.values lable_test = test__value[:, 4] la = lable_test.tolist() count__True = 0 for i in range(len(test__value)): List_test = [] List_test = distance(i) # 把生成的列表进行小到大的排序 在默认情况下sort和sorted函数接收的参数是元组时,它将会先按元组的第一个元素进行排序再按第二个元素进行排序,再按第三个、第四个…依次排序。 a = creat_biaoqian(List_test) # List_test.sort() a.sort() # print(a) # print(len(List_test)) # 3)选取距离最小的K个点 K = 7 List_a = a[:K] # # print(a) max = float(max__lable(List_a)) # print("出现次数最多的标签是 %f"%max) # print(la[i]) if max == la[i]: # print("第{}个数据预测结果是{},与测试集结果相同".format(i+1 , max)) count__True += 1 else: print("第{}数据测出来是{},测试集的标签是{}".format(i + 1,max,la[i])) Ture__sb = count__True / len(test__value) # print(count__True) print("当k值取{}时精度是: {:.2%}".format(K, Ture__sb))
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)