机器学习——线性回归

it2022-05-05  141

机器学习——线性回归

一元线性回归

加载数据;数据切分;模型训练;预测;结果分析; # -*- coding: utf-8 -*- import pandas as pd import seaborn as sns import numpy as np from sklearn import linear_model,metrics from sklearn.model_selection import train_test_split import matplotlib.pyplot as plt df = pd.read_csv('ex1data1.txt', names=['population', 'profit'])#读取数据并赋予列名 # sns.lmplot('population', 'profit', df, height=6, fit_reg=False) # plt.show() X = df.iloc[:,:1].values Y = df.iloc[:,1].values X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=0) linreg = linear_model.LinearRegression() linreg.fit(X_train, Y_train) #使用测试集进行预测 y_pred = linreg.predict(X_test) print("截距", linreg.intercept_) print("系数:", linreg.coef_) fig = plt.figure(12) ax = fig.add_subplot(211) ax.scatter(df.population,df.profit,label="RAW") ax.plot(df.population,df.population*linreg.coef_+linreg.intercept_,'r',label="FITTING") ax.set_title(u'原始数据和拟合直线',fontproperties='SimHei') ax.legend(loc=2) ax2 = fig.add_subplot(212) X_index = np.arange(0,Y_test.size,1) ax2.plot(X_index, Y_test,label='REAL') ax2.plot(X_index, y_pred,label='PREDICTION') ax2.set_title(u'实际值和预测值',fontproperties='SimHei') ax2.legend(loc=2) plt.show() # 均方误差MSE print("MSE: ", metrics.mean_squared_error(Y_test, y_pred))

运行结果:

多元线性回归

加载数据;数据切分;特征缩放(StandardScaler)模型训练;预测;结果分析; # -*- coding: utf-8 -*- import pandas as pd import seaborn as sns import numpy as np from sklearn import linear_model,metrics,preprocessing from sklearn.model_selection import train_test_split import matplotlib.pyplot as plt df= pd.read_csv('ex1data2.txt', names=['square', 'bedrooms', 'price'])#读取数据并赋予列名 print(df.head()) X = df.iloc[:,:-1].values Y = df.iloc[:,2].values X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=0) standard_scaler = preprocessing.StandardScaler() X_train=standard_scaler.fit_transform(X_train)#特征缩放 X_test=standard_scaler.fit_transform(X_test) linreg = linear_model.LinearRegression() linreg.fit(X_train, Y_train) y_pred = linreg.predict(X_test) print("截距", linreg.intercept_) print("系数:", linreg.coef_) # 均方误差MSE print("MSE: ", metrics.mean_squared_error(Y_test, y_pred)) X_index = np.arange(0,Y_test.size,1) plt.plot(X_index, Y_test,label='REAL') plt.plot(X_index, y_pred,label='PREDICTION') plt.title(u'实际值和预测值',fontproperties='SimHei') plt.legend(loc=2) plt.show()

运行结果:


最新回复(0)