编了一份一元数据,反映成绩和学习时间之间的正相关关系
用Python进行拟合并绘制图像
from collections import OrderedDict import pandas as pd import matplotlib.pyplot as plt from sklearn.linear_model import LinearRegression from sklearn.model_selection import train_test_split lib = {'学习时间': [0.50, 0.75, 1.00, 1.25, 1.50, 1.75, 1.75, 2.00, 2.25, 2.50, 2.75, 3.00, 3.25, 3.50, 4.00, 4.25, 4.50, 4.75, 5.00, 5.50], '分数': [10, 22, 13, 43, 20, 22, 33, 50, 62, 48, 55, 75, 62, 73, 81, 76, 64, 82, 90, 93]} order_lib = OrderedDict(lib) df_lib = pd.Dataframe(order_lib) x = df_lib.loc[:, '学习时间'] y = df_lib.loc[:, '分数'] plt.scatter(x, y) plt.xlabel('Hours') plt.ylabel('Score') x_train, x_test, y_train, y_test = train_test_split(x, y, train_size=0.8) x_train = x_train.values.reshape(-1, 1) x_test = x_test.values.reshape(-1, 1) model = LinearRegression() model.fit(x_train, y_train) a = model.intercept_ b = model.coef_ plt.scatter(x_train, y_train, color='orange') y_train_predict = model.predict(x_train) plt.plot(x_train, y_train_predict, color='black') plt.title('y = {:.4f}+{:.4f}x'.format(*b, a)) plt.grid() plt.show()<2>多项式非线性回归
用一元二次方程对正态分布进行拟合
import matplotlib.pyplot as plt import numpy as np from scipy.stats import norm from sklearn.pipeline import Pipeline from sklearn.linear_model import LinearRegression, SGDClassifier from sklearn.preprocessing import PolynomialFeatures, StandardScaler x = np.arange(0, 1, 0.002) y = norm.rvs(0, size=500, scale=0.1) # 高斯分布数据 y = y + x**2 plt.scatter(x, y, s=5, color='orange') y_test = [] y_test = np.array(y_test) # clf = LinearRegression(fit_intercept=False) clf = Pipeline([('poly', PolynomialFeatures(degree=100)), ('linear', LinearRegression(fit_intercept=False))]) clf.fit(x[:, np.newaxis], y) y_test = clf.predict(x[:, np.newaxis]) plt.plot(x, y_test, linewidth=2) plt.grid() # 显示网格 plt.show()
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)