机器学习:监督学习

机器学习:监督学习,第1张

机器学习:监督学习 线性回归

单变量底层 (数据集自备)

import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

#模型函数
def mode(theta):
    h=np.dot(xx,theta)
    return h

#代价函数
def cost(h):
    m=xx.shape[0]
    J=1/(2*m)*((h-y).T.dot(h-y))
    return J

#梯度下降
def GrandientDecline(xx,alpha,nums):
    m,n=xx.shape
    theta=np.zeros(n)
    J=np.zeros(nums)
    for i in range(nums):
        h=mode(theta)
        J[i]=cost(h)
        deltatheta=(1/m)*xx.T.dot(h-y)
        theta=theta-alpha*deltatheta

    return theta,h,J

if __name__ == '__main__':
    data = np.loadtxt(r'ex1data1.txt', delimiter=',')
    # print(data)
    x = data[:, :-1]
    y = data[:, -1]
    xx = np.c_[np.ones(len(x)), x]
    theta,h,J=GrandientDecline(xx,0.001,15000)
    plt.subplot(1,2,1)
    # plt.xlim(4.5,23)
    plt.scatter(x,y)
    plt.plot(x,h)
    plt.subplot(1,2,2)
    plt.plot(J)
    plt.show()

    print('最佳θ为:',theta)
    print('代价函数为:',J)
    print('模型函数为:',h)


多变量底层

import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

#模型函数
def mode(x,theta):
    h=np.dot(x,theta)
    return h

#代价函数
def cost(h,y):
    m=X.shape[0]
    J=1/(2*m)*((h-y).T.dot(h-y))
    return J

#梯度下降
def StandadFn(x,y):
    theta=np.dot(np.linalg.inv(x.T.dot(x)),x.T.dot(y))

    return theta

def suof(x):
    minX=np.min(x,axis=0)
    maxX=np.max(x,axis=0)
    X=(x-minX)/(maxX-minX)
    return X

if __name__ == '__main__':
    data=np.loadtxt(r'ex1data2.txt',delimiter=',')

    x=data[:,:-1]
    y=data[:,-1:]
    print('x=n',x)
    print('y=n',y)

    #特诊缩放
    #归一化缩放
    minX=np.min(x,axis=0)
    maxX=np.max(x,axis=0)
    X=(x-minX)/(maxX-minX)

    # #标准缩放
    # meanX=np.mean(x,axis=0)
    # sigmaX=np.std(x,axis=0,ddof=1)
    # x=(x-meanX)/sigmaX

    #数据拼接
    xx=np.c_[np.ones(len(x)),x]
    Y=np.c_[y]
    #训练集和测试集切分
    # m=len(x)
    # trainNum=int(m*0.7)
    # trainX=X[:trainNum,:]
    # testX=X[trainNum:,:]
    # trainY=y[:trainNum,:]
    # testY=y[trainNum:,:]

    theta = StandadFn(xx,y)
    H = mode(xx, theta)
    # print(len(H))
    # J = cost(H, y)
    # print(len(xx))

    plt.plot(Y, Y, c='b')  # 画数据真实值散点图
    plt.scatter(Y, H, c='r')  # 画线性模型
    plt.show()

简易导包实现 

from sklearn.datasets import load_boston
from sklearn.linear_model import LinearRegression
import numpy as np
'''
1.特征缩放
2.拼接数据集
3.数据切分
4.调库                             
4.1训练模型
5.输出精度,权重,截距,预测值
'''
def suof(x):
    minX=np.min(x,axis=0)
    maxX=np.max(x,axis=0)
    X=(x-minX)/(maxX-minX)
    return X

if __name__ == '__main__':
    data = load_boston()
    # data=np.array(data)
    # print(data)

    x = data.data
    y = data.target

    print(x)
    # print(y)
    m,n=x.shape
    XX=np.c_[np.ones(m),suof(x)]
    # train=int(len(x)*0.7)
    # trainX=x[:train,:]
    # trainY=y[:train]
    # testX=x[train:,:]
    # testY=y[train:]

    DATA=LinearRegression()
    DATA.fit(x, y)
    print('score=',DATA.score(x,y))
    print('权重为:',DATA.coef_)
    print('截距为:',DATA.intercept_)

多项式回归

import numpy as np
import matplotlib.pyplot as plt
from sklearn import  linear_model
from pylab import polyfit#polyfit--->#多项式拟合

plt.rcParams['font.sans-serif']=['SimHei'] #显示中文

x=[1.5,0.8,2.6,1.0,0.6,2.8,1.2,0.9,0.4,1.3,1.2,2.0,1.6,1.8,2.2]
y=[3.1,1.9,4.2,2.3,1.6,4.9,2.8,2.1,1.4,2.4,2.4,3.8,3.0,3.4,4.0]
x1=np.c_[x]
xx=np.c_[np.ones(len(x)),x]

'''调库应用'''
line=linear_model.LinearRegression()
line.fit(x1,y)

theta0=line.intercept_#偏执——θ0
print('theta0=',theta0)
theta1=line.coef_#截距θ1
print('theta1=',theta1)

plt.scatter(x,y,c='r',label='样本值')
plt.plot(x,line.predict(x1))#预测函数
score=line.score(x1,y)
print('精度=',score)
plt.show()

'''正规方程'''
theta3=np.linalg.inv(xx.T.dot(xx)).dot(xx.T).dot(y)
print('正规方程的θ值=',theta3)
#999
'''多项式回归'''
nihe=polyfit(x,y,deg=2)#函数拟合,输出的是θ值
print('nihe=',nihe)#得出的theta值
p=np.poly1d(nihe)#输入三个theta值,得到2项公式p=θ0*x**2+θ1*x+θ3
print('p(x)"--------n',p(x)) #得到多项式回归公式p,将x一一带入公式里去求解。预测值

plt.figure('多项式回归求解')
plt.title('predicet-sample')
plt.scatter(x,y,c='g',label='样本值')
plt.scatter(x,p(x),c='r',label='预测值')
plt.legend(loc='best')
plt.show()
逻辑回归底层
import matplotlib.pyplot as plt
import  numpy as np#导包
import warnings
warnings.filterwarnings('ignore')

plt.rcParams['font.sans-serif']=['SimHei']

data=np.loadtxt('ex2data1.txt',delimiter=',')#加载数据

#切分数据
x=data[:,0:2]#取前两列特征
y=data[:,-1]#取最后一列标签

#特征缩放
meanx=np.mean(x,axis=0)#取每列均值
sigma=np.std(x,axis=0)#标准差
xx=(x-meanx)/sigma

#数据拼接
xxx=np.c_[np.ones(len(x)),xx]

#预测函数
def sigmoid(theta):
    z=xxx.dot(theta)
    h=1/(1+np.exp(-z))
    return h
#代价函数
def cost(h):
    J=-np.mean(y*np.log(h)+(1-y)*np.log(1-h))
    return J

#梯度下降函数

def Grad(nums,alpha):
    m,n=xxx.shape
    theta=np.zeros(n)
    #theta=np.zeros((n,1))
    J=np.zeros(nums)
    for i in range(nums):
        h=sigmoid(theta)
        J[i]=cost(h)
        delteatheta=(1/m)*xxx.T.dot(h-y)
        theta=theta-alpha*delteatheta
    return theta,J,h

#精度求解
def score(xxx,theta):
    m, n = xxx.shape
    # predict_y=[]
    count=0
    for i in range(m):
        h=sigmoid(theta)
        #predict_y.append(h)#m个预测值
        if(np.where(h[i]>0.5,1,0)==y[i]):
            count+=1

    accurancy=count/m
    return count,accurancy

# prd_y=np.where(h>=0.5,1,0)
# acc=np.where(np.equal(prd_y,text_y))
# print(acc)


if __name__ == '__main__':
    theta, J,h =Grad(10000,0.1)
    print(theta)

    plt.plot(J)
    plt.xlabel('迭代次数')
    plt.ylabel('代价值')
    plt.show()

    #plt.scatter(x[:,0],x[:,1])
    plt.scatter(xx[y==0,0],xx[y==0,1],c='b')
    plt.scatter(xx[y==1,0],xx[y==1,1],c='r')
    # plt.scatter(xx[y[:, -1] == 0, 0], xx[y[:, -1] == 0, 1], c='b')
    # plt.scatter(xx[y[:, -1] == 1, 0], xx[y[:, -1] == 1, 1], c='r')
    xx1min=xx[:,0].min()#取第一列特征的最小值
    xx1max=xx[:,0].max()#取第一列特征的最大值
    xx2min=-(theta[0]+theta[1]*xx1min)/theta[2]#xx1min最小值对应的最小值xx2min
    xx2max=-(theta[0]+theta[1]*xx1max)/theta[2]


    plt.plot([xx1min,xx1max],[xx2min,xx2max],c='k')
    plt.show()

    print('score=',score(xxx,theta))

    # x=np.array([30,43])
    # if 1/(1+np.exp(-(theta[0]+x[0]*theta[1]+x[1]*theta[2])))>0.5:
    #     print(1)
    # else:
    #     print(0)










# data=pd.read_csv('ex2data1.txt')

#
# #print(data)
#
# #分割数据
# x=data[:,0:2]
# y=data[:,-1]
# #x=data[:,:-1]
# #print(x)
# #特征缩放
# mu=np.mean(x,axis=0)#每一列的均值
# sigma=np.std(x,axis=0,ddof=1)#标准差
# x1=(x-mu)/sigma
#
# #初始化
# xx=np.c_[np.ones(len(x1)),x1]
#
# #模型函数
# def sigmoid(theta):
#     z=xx.dot(theta)
#     h=1/(1+np.exp(-z))
#     return h
#
# #代价函数
# def cost(h):
#     J=-np.mean(y*np.log(h)+(1-y)*np.log(1-h))
#     return J
#
# #梯度下降函数
# def grade(num,alpha):
#     m,n=xx.shape
#     theta=np.zeros(n)
#     J=np.zeros(num)
#     for i in range(num):
#         h=sigmoid(theta)
#         J[i]=cost(h)
#         deltatheta=(1/m)*xx.T.dot(h-y)
#         theta=theta-alpha*deltatheta
#
#     return h,J,theta
#
#  #''' 精度求解'''
# def scorelogisc(xx,theta):
#     m,n=xx.shape
#     predict_y=[]
#     count=0
#     for i in range(m):
#         h=sigmoid(theta)
#         predict_y.append(h)
#         if (np.where(h[i]>0.5,1,0)==(y[i])):
#             count+=1
#
#     accuracy=count/m
#     return  count,accuracy
#
#
#
#
#
# if __name__ == '__main__':
#     m=xx.shape[0]
#     h, J, theta=grade(20000,0.01)
#     print(theta)
#     count, accuracy=scorelogisc(xx,theta)
#     print('精准度=',accuracy)
#     print(count)
#     #精准度求解
#     s=np.where(abs(h-y)<0.5,1,0)
#     accuracy1=sum(s)/m
#     print('accuracy1=',accuracy1)
#
#
#
#
#
#     plt.figure()
#     plt.subplot(121)
#     plt.plot(J)
#
#     '''画边界图'''
#     plt.subplot(122)
#    # plt.scatter(x[:,0],x[:,1])
#     plt.scatter(x1[y==0,0],x1[y==0,1],c='g')#用特征缩放的数据x1
#     #plt.scatter(x1[:,0][y==0],x1[:,1][y==0],c='g')#用特征缩放的数据x1
#     plt.scatter(x1[y==1,0],x1[y==1,1],c='r')#用特征缩放的数据x1
#     x1min=x1[:,0].min()#第一个特征的最小值
#     x1max=x1[:,0].max()#第一个特征的最大值
#     x2min=-(theta[0]+theta[1]*x1min)/theta[2]#x1最新小值对应的x2最值
#     x2max=-(theta[0]+theta[1]*x1max)/theta[2]#x1最新大值对应的x2最值
#     plt.plot([x1min,x1max],[x2min,x2max],c='b')
#     plt.show()
#
正则

引入正则参数λ

单变量正则

import numpy as np
import matplotlib.pyplot as plt

#加载数据
data=np.loadtxt('ex1data1.txt',delimiter=',')
# print('data.shape=',data.shape)

#提取数据
x=data[:,0]
y=data[:,-1]

#特征缩放
mu=np.mean(x)
sigma=np.std(x,0,ddof=1)
x-=mu
X=x/sigma

#数据标准化
# print('X.shape=',X.shape)
m=X.shape[0]            #取样本数和特征数
X=np.c_[np.ones(m),X]
# y=np.c_[y]
# print('y.shape=',y.shape)

def cost(x,y,theta,lambd=0.1):
    m,f=x.shape
    h=np.dot(x,theta)
    R=lambd/(2*m)*np.dot(theta.T,theta)
    J=1/(2*m)*((h-y).T.dot(h-y))+R

    return J

def grand(x,y,alpha,nums,lambd=0.1):
    m,n=x.shape
    theta=np.zeros(n)
    J=np.zeros(nums)
    for i in range(nums):
        h=np.dot(x,theta)
        J[i]=cost(x,y,theta)
        deltatheta=1/m*x.T.dot(h-y)+lambd/m*theta
        theta-=alpha*deltatheta

    return theta,h,J

theta,h,J=grand(X,y,0.01,15000)
plt.plot(J)
plt.show()
plt.scatter(x,y)
plt.plot(x,h)
plt.show()

多变量正则

import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')



#代价函数
def cost(x,y,theta,lambd=0.1):
    m=x.shape[0]
    h = np.dot(x, theta)
    R=lambd/(2*m)*(theta.T.dot(theta))
    J=1/(2*m)*((h-y).T.dot(h-y))+R
    return J

#梯度下降
def Grand(x,y,alpha,nums,lambd=0.1):
    m, n = x.shape  # m是X的行数,n是列数
    theta = np.zeros(n)
    J = np.zeros(nums)  # 存储代价的数组

    for i in range(nums):
        H=np.dot(x,theta)
        # e=H-Y
        J[i] = cost(x, y,theta)  # 存储每一次迭代的代价
        deltatheta = 1.0 / m * x.T.dot(H - y)- lambd/m*theta # theta 的更新方向
        theta -= alpha * deltatheta  # 更新theta

    return J, theta, H


if __name__ == '__main__':
    data=np.loadtxt(r'ex1data2.txt',delimiter=',')

    x=data[:,:-1]
    y=data[:,-1]


    #特征缩放
    #归一化缩放
    minX=np.min(x,axis=0)
    maxX=np.max(x,axis=0)
    X=(x-minX)/(maxX-minX)

    # #标准缩放
    # meanX=np.mean(x,axis=0)
    # sigmaX=np.std(x,axis=0,ddof=1)
    # x=(x-meanX)/sigmaX

    #数据拼接
    xx=np.c_[np.ones(len(x)),X]

    #训练集和测试集切分
    # m=len(x)
    # trainNum=int(m*0.7)
    # trainX=X[:trainNum,:]
    # testX=X[trainNum:,:]
    # trainY=y[:trainNum,:]
    # testY=y[trainNum:,:]

    J, theta, H = Grand(xx,y,alpha=0.01,nums=15000)

    # print(len(H))
    # J = cost(H, y)
    # print(len(xx))

    plt.plot(y, y, c='b')  # 画数据真实值散点图
    plt.scatter(y, H, c='r')  # 画线性模型
    plt.show()

 逻辑回归正则

# 1.加载数据包
import numpy as np
import matplotlib.pyplot as plt
# 2.加载指定数据集
data=np.loadtxt(r'ex2data2.txt',delimiter=',')
# 2.1 数据预处理 洗牌 ,特征(标准差), 拼接 数据分成训练,测试
x=data[:,:2]
y=data[:,2]


xmean=np.mean(x,axis=0)
xstd=np.std(x,axis=0)
x=(x-xmean)/xstd

np.random.seed(4)
m1=np.random.permutation(len(x))
x=x[m1]
y=y[m1]

x1=np.c_[np.ones(len(x)),x]
# print(x1)
# trainx=x1[:int(len(x)*0.7)]
# trainy=y[:int(len(x)*0.7)]
# testx=x1[int(len(x)*0.7):]
# testy=y[int(len(x)*0.7):]
m,n=x1.shape
# 3.写模型函数
def sigmoid(x,theta):
    z=x.dot(theta)
    h=1/(1+np.exp(-z))

    return h

# 4.写代价函数
def cost(x,y,theta,lamda=0.01):
    h=sigmoid(x,theta)
    # J=-(1/m)*(y*np.log(h)+(1-y)*np.log(1-h))
    R = lamda / (2 * m) * np.dot(theta.T, theta)
    J = -np.mean(y * np.log(h) + (1 - y) * np.log(1 - h))+R
    return J
# 5.写梯度下降
def grandDE(x,y,alpha,nums,lamda=0.01):
    theta=np.zeros(n)
    J=np.zeros(nums)
    for i in range(nums):
        h=sigmoid(x,theta)
        J[i]=cost(x,y,theta,lamda)
        deltatheta=1/m*x.T.dot(h-y)+lamda/m*theta
        theta=theta-alpha*deltatheta

    return theta,J,h
# 6.写精度函数
def score(x,y,theta):
    count=0
    for i in range(m):
        h=sigmoid(x,theta)
        if (np.where(h[i]>0.5,1,0)==y[i]):
            count+=1

    acc=count/m

    return f'精确度为{acc*100}%'
# 7.画出代价图,画出边界线
if __name__=='__main__':
    theta,J,h=grandDE(x1,y,0.01,100000)
    plt.subplot(121)
    plt.plot(J)
    plt.subplot(122)
    plt.scatter(x[y==0,0],x[y==0,1])
    plt.scatter(x[y==1,0],x[y==1,1])
    x1max=x[:,0].max()
    x1min=x[:,0].min()
    x2max=x[:,1].max()
    x2min=x[:,1].min()
    # x=(x1max+x1min)/2
    # y1=(x2min+x2max)/2
    # r=np.min([x-x1min,x1max-x,x2max-y1,y1-x2min])
    # circle = plt.Circle((x, y1), r-0.5, color='y', fill=False)
    # plt.gcf().gca().add_artist(circle)
    plt.plot([x1max,x2min],[x1min,x2max])
    plt.show()
    # print(x1.shape)
    # print(y.shape)
    print(score(x1,y,theta))
    print(theta)



神经网络

此处不对数学逻辑进行赘述,多隐藏层的单元数可自行设定

单隐藏层

import numpy as np
import matplotlib.pyplot as plt

#西瓜数据
X1 = [0.697,0.774,0.634,0.608,0.556,0.403,0.481,0.437,0.666,0.243,0.245,0.343,0.639,0.657,0.360,0.593,0.719]
X2 = [0.460,0.376,0.264,0.318,0.215,0.237,0.149,0.211,0.091,0.267,0.057,0.099,0.161,0.198,0.370,0.042,0.103]
Y =  [1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0]


#数据处理(1切分,2拼接,3特征缩放,4洗牌)
xx=np.c_[np.ones(len(X1)),X1,X2]
yy=np.c_[Y]
#洗牌
order=np.random.permutation(len(X1))
xx1=xx[order]
yy1=yy[order]

'''
1,加载数据
2,数据处理(1切分,2拼接,,3特征缩放,4洗牌,)
3,写激活函数-模型函数
4,前向传播
5,写代价函数
6,BP(反向传播)
7,梯度下降(迭代反复学习)
8.出结果
'''

#激活函数
def sigmoid(z,grid=False):

    if grid==True:
        return z*(1-z)

    return 1 / (1 + np.exp(-z))

#前向传播
def FP(a1,theta1,theta2):
    #第一层
    z2=a1.dot(theta1)
    a2=sigmoid(z2)

    #第二层
    z3=a2.dot(theta2)
    a3=sigmoid(z3)

    return a2,a3

#代价函数
def cost(a3,y):
    J=-np.mean(y*np.log(a3)+(1-y)*np.log(1-a3))

    return J

#反向传播
def BP(a3,a2,a1,y,theta2,theta1,alpha):
    sigma3=(a3-y)/len(a1)
    sigma2=sigma3.dot(theta2.T)*sigmoid(a2,True)

    deltatheta2=a2.T.dot(sigma3)
    deltatheta1=a1.T.dot(sigma2)

    theta2=theta2-alpha*deltatheta2
    theta1=theta1-alpha*deltatheta1

    return theta1,theta2

#梯度下降
def grand(a1,y,alpha,nums,k):
    m,n=a1.shape
    J=np.zeros(nums)
    # theta1=np.zeros((n,k))
    # theta2=np.zeros((k,1))
    np.random.seed(0)
    theta1=2*np.random.rand(n,k)-1
    theta2=2*np.random.rand(k,1)-1
    for i in range(nums):
        a2,a3=FP(a1,theta1,theta2)
        J[i]=cost(a3,y)
        theta1, theta2=BP(a3,a2,a1,y,theta2,theta1,alpha)

    return theta1,theta2,J,a3

def score(a3,y):
    count=0
    m,n=a3.shape
    for i in range(m):
        if np.where(a3[i]>0.5,1,0)==y[i]:
            count+=1
    return count/m

theta1,theta2,J,a3=grand(xx1,yy1,0.03,50000,60)
plt.plot(J)
plt.show()
print(score(a3,yy1))

多隐藏层

import numpy as np
import matplotlib.pyplot as plt

#西瓜数据
X1 = [0.697,0.774,0.634,0.608,0.556,0.403,0.481,0.437,0.666,0.243,0.245,0.343,0.639,0.657,0.360,0.593,0.719]
X2 = [0.460,0.376,0.264,0.318,0.215,0.237,0.149,0.211,0.091,0.267,0.057,0.099,0.161,0.198,0.370,0.042,0.103]
Y =  [1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0]


#数据处理(1切分,2拼接,3特征缩放,4洗牌)
xx=np.c_[np.ones(len(X1)),X1,X2]
yy=np.c_[Y]
#洗牌
order=np.random.permutation(len(X1))
xx1=xx[order]
yy1=yy[order]

'''
1,加载数据
2,数据处理(1切分,2拼接,,3特征缩放,4洗牌,)
3,写激活函数-模型函数
4,前向传播
5,写代价函数
6,BP(反向传播)
7,梯度下降(迭代反复学习)
8.出结果
'''

#激活函数
def sigmoid(z,grid=False):

    if grid==True:
        return z*(1-z)

    return 1 / (1 + np.exp(-z))

#前向传播
def FP(a1,theta1,theta2,theta3,theta4):
    #第一层
    z2=a1.dot(theta1)
    a2=sigmoid(z2)

    #第二层
    z3=a2.dot(theta2)
    a3=sigmoid(z3)

    #第三层
    z4=a3.dot(theta3)
    a4=sigmoid(z4)

    #第四层
    z5=a4.dot(theta4)
    a5=sigmoid(z5)

    return a2,a3,a4,a5

#代价函数
def cost(a5,y):
    J=-np.mean(y*np.log(a5)+(1-y)*np.log(1-a5))

    return J

#反向传播
def BP(a5,a4,a3,a2,a1,y,theta4,theta3,theta2,theta1,alpha):
    sigma5=(a5-y)/len(a1)
    sigma4=sigma5.dot(theta4.T)*sigmoid(a4,True)
    sigma3=sigma4.dot(theta3.T)*sigmoid(a3,True)
    sigma2=sigma3.dot(theta2.T)*sigmoid(a2,True)


    deltatheta4 = a4.T.dot(sigma5)
    deltatheta3 = a3.T.dot(sigma4)
    deltatheta2=a2.T.dot(sigma3)
    deltatheta1=a1.T.dot(sigma2)

    theta4 = theta4 - alpha * deltatheta4
    theta3 = theta3 - alpha * deltatheta3
    theta2=theta2-alpha*deltatheta2
    theta1=theta1-alpha*deltatheta1

    return theta1,theta2,theta3,theta4

#梯度下降
def grand(a1,y,alpha,nums):
    m,n=a1.shape
    J=np.zeros(nums)
    theta1=np.zeros((n,4))
    theta2=np.zeros((4,4))
    theta3=np.zeros((4,3))
    theta4=np.zeros((3,1))
    for i in range(nums):
        a2,a3,a4,a5=FP(a1,theta1,theta2,theta3,theta4)
        J[i]=cost(a5,y)
        theta1,theta2,theta3,theta4=BP(a5,a4,a3,a2,a1,y,theta4,theta3,theta2,theta1,alpha)

    return theta1,theta2,theta3,theta4,J,a5

def score(x,y,a5):
    count=0
    for i in range(len(x)):
        if np.where(a5[i]>0.5,1,0)==y[i]:
            count+=1

    acc=count/len(x)
    return f'准确率为{acc*100}%'

theta1,theta2,theta3,theta4,J,a5=grand(xx1,yy1,0.01,100000)
plt.plot(J)
plt.show()
print(score(xx1,yy1,a5))

 简易导包实现

import numpy as np
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import LinearRegression,LogisticRegression

#西瓜数据
X1 = [0.697,0.774,0.634,0.608,0.556,0.403,0.481,0.437,0.666,0.243,0.245,0.343,0.639,0.657,0.360,0.593,0.719]
X2 = [0.460,0.376,0.264,0.318,0.215,0.237,0.149,0.211,0.091,0.267,0.057,0.099,0.161,0.198,0.370,0.042,0.103]
Y =  [1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0]


#数据处理(1切分,2拼接,3特征缩放,4洗牌)
xx=np.c_[np.ones(len(X1)),X1,X2]
yy=np.c_[Y]
#洗牌
order=np.random.permutation(len(X1))
xx1=xx[order]
yy1=yy[order]

CLF=MLPClassifier()
CLF.fit(xx1,yy1)
CLF.predict(xx1)
print('精度acc=',CLF.score(xx1,yy1))
print('预测h=',CLF.predict(xx1))
print('实际Y=',np.hstack(yy1))
支持向量机
import numpy as np
import matplotlib.pyplot as plt
import sklearn.datasets as dts
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings(('ignore'))

#导入数据(鸢尾花数据集)
data=dts.load_iris()
# print(data)
X=data['data']
Y=data['target']
# print(X.shape)
# print(Y.shape)
x1=X[Y!=0,1:3]
y1=Y[Y!=0]
#特征缩放
xmean=np.mean(x1,axis=0)
xsg=np.std(x1,axis=0)
x2=(x1-xmean)/xsg

#切分数据集,洗牌
trainx,testx,trainy,testy=train_test_split(x2,y1,train_size=0.7,shuffle=True)
# print(trainx.shape)
# print(testx.shape)

model=SVC(C=3)#创建对象
pG=0.5
model.fit(trainx,trainy)#模型创建
print('精度=',model.score(testx, testy))
testh=model.predict(testx)
print('testh=n',testh)
print('testy=n',testy)
print('==================================================')
print('支持向量的个数=n',model.n_support_)
print('支持向量的索引位置=n',model.support_)

#画图,图片格式化
#c=y1自动将不同类标以不同颜色
# model.support_vectors_用于将向量从数据中挑选出来
s=model.support_vectors_
#c=Y按类别自动给颜色,cmap=plt.cm.Paired自动匹配一对颜色,
# edgecolors='k'点的边线颜色,zorder=4,让点在面的上面
#facecolors='none'让样本点的面的颜色是透明的
plt.scatter(x2[:,0],x2[:,1],c=y1,edgecolors='y',s=30,zorder=4)
plt.scatter(s[:,0],s[:,1],c='b',edgecolors='y',s=50,facecolors='none',zorder=4)

#获取网格最大值最小值
minx1,maxx1,minx2,maxx2=min(x2[:,0]),max(x2[:,0]),min(x2[:,1]),max(x2[:,1])
#对数据进行切分后,生成二维数据点
gri_XX,gri_YY=np.mgrid[minx1:maxx1:200j,minx2:maxx2:200j]
#复数200J代表取200个点

#计算格网点到超平面的距离
z=model.decision_function(np.c_[gri_XX.ravel(),gri_YY.ravel()])#4万行
#将Z整形成XX矩阵
z=z.reshape(gri_XX.shape)##1维转换成2维
print(z.shape)
#画分界面,参数说明:xx和yy表示x轴方向的二维数据,yy表示y轴的二维方向的数据,Z表示颜色对应的标签值, cmap表示颜色板
plt.contourf(gri_XX,gri_YY,z>0,cmap='Accent')#xx,YY是z的坐标值---plt.contourf用来画出不同分类的边界线
#画分界线(等高线)
plt.contour(gri_XX,gri_YY,z,levels=[-1,0,1],linestyles=['--','-','--'],colors=['r','k','b'])
#levels=代表等高线的距离
#plt.title('C=%.1f,gamma=%.1f,方差=%.2f,精度=%.2f'%(pC,pG,np.var(testH),score))
plt.show()

欢迎分享,转载请注明来源:内存溢出

原文地址: http://outofmemory.cn/zaji/5650689.html

(0)
打赏 微信扫一扫 微信扫一扫 支付宝扫一扫 支付宝扫一扫
上一篇 2022-12-16
下一篇 2022-12-16

发表评论

登录后才能评论

评论列表(0条)

保存