对比代码来自GitHub
''' 感知机perception,使用随机梯度下降优化 以iris数据集为例,sepal length和sepal width作为特征,对0和1进行分类 ''' import pandas as pd import numpy as np from sklearn.datasets import load_iris from sklearn.model_selection import train_test_split import matplotlib.pyplot as plt #load data iris=load_iris() df=pd.Dataframe(iris.data,columns=iris.feature_names) df['label']=iris.target df.columns=['sepal length','sepal width','petal length','petal width','label'] #原数据集标签带'(cm)' #print(df) ''' plt.scatter(df[0:50]['sepal length'],df[0:50]['sepal width'],label='0') #前50行的数据是0 plt.scatter(df[50:100]['sepal length'],df[50:100]['sepal width'],label='1') #前50-100行的数据是1 plt.xlabel('sepal length') plt.ylabel('sepal width') plt.legend() plt.show() ''' data=np.array(df.iloc[:100,0:5]) X=data[:,[0,1]] #只要第一列和第二列 y=data[:,-1] y=np.array([1 if i==1 else -1 for i in y]) #感知机分类正类是1,负类是0 #print(X,y) #model class MyPerception(): def __init__(self,feature_dim,data_size): self.feature_dim=feature_dim self.data_size=data_size self.w=np.zeros(feature_dim) self.b=0 self.True_List=np.zeros(data_size) #初始设置为全部误分类,误分类标记为0,正确分类标记为1 self.alpha=1 #步长 def train(self,X,y): """ Loss是所有误分类点到超平面的距离之和,忽略二范数,因为最后优化到分子为零 注意距离公式的分子有绝对值号,不能直接w@x+b,如果它为负样本,距离应该是-w@x+b L(w,b)=-sum{y_i(w@x_i+b)} forall:y_i*(w@x_i+b)<0 对每一个误分类的样本迭代w和b,直到没有误分类 """ times=0 while sum(self.True_List)!=self.data_size: #如果不全为1 times+=1 for index in range(self.data_size): if y[index]*(self.w@X[index]+self.b)<=0: #误分类,注意这里等于零也是误分类:y_i不可能等于0,w@x+b=0说明样点在超平面上,这也是不可能的 #print("发现误分类:x=[{},{}],y={}".format(X[index][0],X[index][1],y[index])) self.True_List[index]=0 self.w=self.w+self.alpha*y[index]*X[index] self.b=self.b+self.alpha*y[index] else: self.True_List[index]=1 #print("times={}".format(times)) print("w={},b={}".format(self.w,self.b)) def predict(self,data): if (self.w@data+self.b<0): return -1 #感知机中的-1对应的是负类,即数据集label=0 elif (self.w@data+self.b>0): return 1 #train ''' train_X,test_X,train_y,test_y=train_test_split(X,y,test_size=0.4) print(train_X.shape,train_y.shape) feature_dim=2 data_size=60 model=MyPerception(feature_dim,data_size) model.train(train_X,train_y) cnt=0 for i in range(40): if test_y[i]==model.predict(test_X[i]): cnt+=1 print("Myperception准确率={}".format(cnt/40)) ''' #下面是GitHub的code data = np.array(df.iloc[:100, [0, 1, -1]]) X, y = data[:,:-1], data[:,-1] y = np.array([1 if i == 1 else -1 for i in y]) #print(data.shape) #print(data) class GitHubCode: def __init__(self): self.w = np.ones(len(data[0]) - 1, dtype=np.float32) #这里data[0]是第一行数据,而不是取的维度数据 self.b = 0 self.l_rate = 0.1 # self.data = data def sign(self, x, w, b): y = np.dot(x, w) + b return y # 随机梯度下降法 def fit(self, X_train, y_train): is_wrong = False while not is_wrong: wrong_count = 0 for d in range(len(X_train)): X = X_train[d] y = y_train[d] if y * self.sign(X, self.w, self.b) <= 0: self.w = self.w + self.l_rate * np.dot(y, X) self.b = self.b + self.l_rate * y wrong_count += 1 if wrong_count == 0: is_wrong = True return 'Perceptron Model!' def score(self): pass ''' #perceptron = Model() perceptron = MyPerception(2,100) perceptron.train(X, y) x_points = np.linspace(4, 7, 10) y_ = -(perceptron.w[0] * x_points + perceptron.b) / perceptron.w[1] plt.plot(x_points, y_) plt.plot(data[:50, 0], data[:50, 1], 'bo', color='blue', label='0') plt.plot(data[50:100, 0], data[50:100, 1], 'bo', color='orange', label='1') plt.xlabel('sepal length') plt.ylabel('sepal width') plt.legend() plt.show() ''' from sklearn.linear_model import Perceptron clf = Perceptron(fit_intercept=True, max_iter=1000, tol=None, #这里tol不设置为None可能有被误分类的点 shuffle=True) clf.fit(X, y) # 画布大小 plt.figure(figsize=(10,10)) # 中文标题 plt.rcParams['font.sans-serif']=['SimHei'] plt.rcParams['axes.unicode_minus'] = False plt.title('鸢尾花线性数据示例') plt.scatter(data[:50, 0], data[:50, 1], c='b', label='Iris-setosa',) plt.scatter(data[50:100, 0], data[50:100, 1], c='orange', label='Iris-versicolor') # 画感知机的线 x_ponits = np.arange(4, 8) y_ = -(clf.coef_[0][0]*x_ponits + clf.intercept_)/clf.coef_[0][1] plt.plot(x_ponits, y_) # 其他部分 plt.legend() # 显示图例 plt.grid(False) # 不显示网格 plt.xlabel('sepal length') plt.ylabel('sepal width') plt.legend() plt.show()
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)