机器学习逻辑回归实战
解决分类的一种模型逻辑回归预测考试通过基于examdata.csv数据建立逻辑回归模型 预测Exam175Exam260时该同学在Exam3时passed or failedimportpandasaspdimportnumpyasnp datapd.read_csv(examdata.csv)data.head()#可视化%matplotlib inlinefrommatplotlibimportpyplotasplt fig1plt.figure()plt.scatter(data.loc[:,Exam1],data.loc[:,Exam2])plt.title(Exam1-Exam2)plt.xlabel(Exam1)plt.ylabel(Exam2)plt.show()maskdata.loc[:,Pass]1print(mask)# print(~mask) 取反fig2plt.figure()passedplt.scatter(data.loc[:,Eaxm1][mask],data.loc[:,Exam2][mask])failedplt.scatter(data.loc[:,Exam1][~mask],data.loc[:,Exam2][~mask])plt.title(Exam1-Exam2)plt.xlabel(Exam1)plt.ylabel(Exam2)plt.legend((passed,failed),(passed,failed))plt.show()#定义XyXdata.drop([Pass],axis1)ydata.loc[:,Pass]X1data.loc[:,Exam1]X2data.loc[:,Exam2]# 逻辑回归训练模型fromsklearn.linear_modelimportLogisticRegression LRLogisticRegression()LR.fit(X,y)#预测结果和评估模型表现y_predictLR.predict(X)print(y_predict)fromsklean.metricsimportaccuracy_score accuracyaccuracy_score(y,y_predict)print(accuracy)# 预测结果 exam170 exam265y_testLR.predict([[70,65]])print(passedify_test1elsefailed)获取边界函数# 获取模型参数LR.coef_ LR.intercept_ theta0LR.intercept theta1,theta2LR.coef_[0][0],LR.coef_[0][1]print(theta0,theta1,theta2)X2_new-(theta0theta1*X1)/theta2 fig3plt.figure()passedplt.scatter(data.loc(:,Exam1)[mask],data.loc[:Exam2][mask])failedplt.scatter(data.loc[:,Exam1][~mask],data.loc[:Exam2][~mask])plt.plot(X1,X2_new)# 根据边界线可以得出准确率并不高plt.title(Exam1-Exam2)plt.xlabel(Exam1)plt.ylabel(Exam2)plt.legend((passed,failed),(passed,failed))plt.show()建立二阶边界提高模型准确度X1_2X1*X1#平方X2_2X2*X2 X1_X2X1*X2print(X1,X1_2)X_new{X1:X1,X2:X2,X1_2:X1_2,X2_2:X2_2,X1_X2:X1_X2}X_newpd.DataFrame(X_new)print(X_new)# 模型训练LR2LogisticRegression()LR2.fig(X_new,y)y2_predictLR2.predict(X_new)accuracy2accuracy_score(y,y2_predict)print(accuracy2)# 1.0 预测结果最优#先排序X1_newX1.sort_values()print(X1,X1_new)theta0LR2.intercept theta1,theta2,theta3,theta4,theta5LR2.coef_[0][0],LR2.coef_[0][1],LR2.coef_[0][2],LR2.coef_[0][3],LR2.coef_[0][4]atheta4 btheta5*X1_newtheta2 ctheta0theta1*X1_newtheta3*X1_new*X1_new X2_new_boundary(-bnp.sqrt(b*b-4*a*c))/(2*a)fig4plt.figure()plt.plot(X1_new,X2_new_boundary)plt.show()芯片检测#加载数据importpandasaspdimportnumpyasnp datapd.read_csv(chip_test.csv)data.head()#清洗数据去掉pass列maskdata.loc[:,pass]1print(~mask)#可视化%matplotlib inlinefrommatplotlibimportpyplotasplt fig1plt.figure()passedplt.scatter(data.loc[:,test1][mask],data.loc[:,test2][mask])failedplt.scatter(ata.loc[:,test1][~mask],data.loc[:,test2][~mask])plt.title(test1-test2)plt.xlabel(test1)plt.ylabel(test2)plt.legend((passed,failed),(passed,failed))plt.show()#生成新数据Xdata.drop([pass],axis1)ydata.loc[:,pass]X1data.loc[:,test1]X2data.loc[:,test2]X1.head()X1_2X1*X1 X2_X2X2*X2 X1_X2X1*X2 X_new{X1:X1,X2:X2,X1_2:X1_2,X2_2:X2_2,X1_X2:X1_X2}X_newpd.DataFrame(X_new)print(X_new)#训练模型fromsklearn.linear_modelimportLogisticRegression LR2LogisticRegression()LR2.fit(X_new,y)#预测fromsklearn.metricsimportaccuracy_score y2_predictLR2.predict(X_new)accuracy2accuracy_score(y,y2_predict)print(accuracy2)#定义函数deff(x):atheta4 btheta5*xtheta2 ctheta0theta1*xtheta3*x*x X2_new_boundary1(-bnp.sqrt(b*b-4*a*c))/(2*a)X2_new_boundary2(-b-np.sqrt(b*b-4*a*c))/(2*a)returnX2_new_boundary1,X2_new_boundary2X2_new_boundary1[]X2_new_boundary2[]forxinX1_new:X2_new_boundary1.append(f(x)[0])X2_new_boundary2.append(f(x)[1])print(X2_new_boundary1,X2_new_boundary2)癌症分类预测defdm_logisticRegression():# 1.获取数据datapd.read_csv(./data/breast-cancer-wisconsin.csv)data.info()# 2.基本数据处理datadata.replace(to_replace?,valuenp.NaN)datadata.dropna(axis0,inplaceTrue)# axis0,表示行删除包含缺失值的行# 3.特征工程xdata.iloc[:,1,-1]print(x.head()--\n,x.head())ydata[Class]print(y.head()--\n,y.head())x_train,x_test,y_train,y_testtrain_test_split(x,y,random_state22)# 标准化transferStandardScaler()x_traintransfer.fit_transform(x_train)x_testtransfer.transform(x_test)# 4.机器学习逻辑回归estimatorLogisticRegression()estimator.fit(x_train,y_train)# 5.模型评估y_predictestimator.predict(x_test)print(y_predict--,y_predict)accuracyestimator.score(x_test,y_test)print(accuracy--,accuracy)
本文来自互联网用户投稿,该文观点仅代表作者本人,不代表本站立场。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如若转载,请注明出处:http://www.coloradmin.cn/o/2415171.html
如若内容造成侵权/违法违规/事实不符,请联系多彩编程网进行投诉反馈,一经查实,立即删除!