大连网站建设报价,郑州可以做网站的公司,wordpress.org,甘肃住房城乡建设厅网站logistic回归又称logistic回归分析#xff0c;是一种广义的线性回归分析模型,以胃癌病情分析为例#xff0c;选择两组人群#xff0c;一组是胃癌组#xff0c;一组是非胃癌组#xff0c;两组人群必定具有不同的体征与生活方式等。因此因变量就为是否胃癌#xff0c;值为“…logistic回归又称logistic回归分析是一种广义的线性回归分析模型,以胃癌病情分析为例选择两组人群一组是胃癌组一组是非胃癌组两组人群必定具有不同的体征与生活方式等。因此因变量就为是否胃癌值为“是”或“否”自变量就可以包括很多了如年龄、性别、饮食习惯、幽门螺杆菌感染等。逻辑回归需要将原本线性回归结果的值域置于(01)之间概率大于0.5看作结果为1常使用sigmoid函数将结果变为(01)之间的值域绘制sigmoid曲线def sigmoid(t):return 1/(1np.exp(-t))x np.linspace(-10,10,100)y sigmoid(x)plt.plot(x,y)逻辑回归的损失函数该损失函数没有公式解可以用梯度下降法求最优解损失函数求导封装Logistics模型我使用Logistics模型及鸢尾花数据集的前两列可得到1.0准确率的预测精准度。# 代码与线性回归及其相似只是推导公式不同# _*_ encoding:utf-8 _*_import numpy as npfrom sklearn.metrics import r2_scorefrom metrics import accuracy_scoreclass LinearRegression:def __init__(self): self.coef_ None self.interception_ None self._theta None def _sigmoid(self,t): return 1./(1. np.exp(-t)) def fit(self,X_train,y_train,eta0.01,n_iters1e6): def J(theta,X_b,y):y_hat self._sigmoid(X_b.dot(theta)) try:return -np.sum(y*np.log(y_hat)(1-y)*np.log(1-y_hat))/len(y) except:return float(inf) def dJ(theta,X_b,y): # res np.empty()# res[0] np.sum(X_b.dot(theta)-y)# for i in range(1,len(theta)):# res[i] (X_b.dot(theta)-y).dot(X_b[:,i])# return res * 2 / len(X_b)return X_b.T.dot(self._sigmoid(X_b.dot(theta))-y)/len(X_b) def gradient_descent(X_b,y,initial_theta,eta,n_iters1e6,epsilon1e-8):theta initial_thetacur_iter 0while cur_itergradient dJ(theta,X_b,y)last_theta thetatheta theta - eta * gradient if (abs(J(theta,X_b,y) - J(last_theta,X_b,y)) cur_iter1return thetaX_b np.hstack([np.ones((len(X_train),1)),X_train])initial_theta np.zeros(X_b.shape[1]) self._theta gradient_descent(X_b,y_train,initial_theta,eta,n_iters) self.interception_ self._theta[0] self.coef_ self._theta[1: return selfdef predict_proba(self,X_predict):X_b np.hstack([np.ones((len(X_predict),1)),X_predict]) return self._sigmoid(X_b.dot(self._theta)) def predict(self,X_predict):proba self.predict_proba(X_predict) return proba0.5def score(self,X_test,y_test): return accuracy_score(y_test,self.predict(X_test)) def __repr__(self): return LogisticRegreesion()决策边界在逻辑回归中易得决策边界为 theta*X_b0 的直线如果只有两个特征值则很容易通过公式画出逻辑回归的决策边界蓝线即为鸢尾花数据前两列特征值通过逻辑回归得到的决策边界不规则的决策边界的绘制一种绘制思路def plot_decision_boundary(model,axis):x0,x1 np.meshgrid(np.linspace(axis[0],axis[1],int((axis[1]-axis[0])*100)),np.linspace(axis[2],axis[3],int((axis[3]-axis[2])*100)))X_new np.c_[x0.ravel(),x1.ravel()]y_predict model.predict(X_new)zz y_predict.reshape(x0.shape)from matplotlib.colors import ListedColormapcustom_cmap ListedColormap([#EF9A9A,#FFF59D,#90CAF9])plt.contourf(x0,x1,zz,linewidth5,cmapcustom_cmap)plot_decision_boundary(log_reg,axis[4,7.5,1.5,4.5])plt.scatter(X[y0,0],X[y0,1])plt.scatter(X[y1,0],X[y1,1])plt.show()knn算法的决策边界knn_clf KNeighborsClassifier()knn_clf.fit(iris.data[:,:2],iris.target)plot_decision_boundary(knn_clf,axis[4,7.5,1.5,4.5])plt.scatter(X[y0,0],X[y0,1])plt.scatter(X[y1,0],X[y1,1])plt.show()n_neighbors默认等于5时n_neighbors等于50时多项式特征应用于逻辑回归#准备数据X np.random.normal(0,1,size(200,2))y np.array(X[:,0]**2 X[:,1]**21.5,dtypeint)def PolynomialLogisticRegression(degree):return Pipeline([(Poly,PolynomialFeatures(degreedegree)),(std_scaler,StandardScaler()),(Logistic,LogisticRegression())])Log_reg PolynomialLogisticRegression(2)Log_reg.fit(X,y)逻辑回归的模型正则化逻辑回归的模型正则化方式#准备数据import numpy as npimport matplotlib.pyplot as pltX np.random.normal(0,1,size(200,2))y np.array(X[:,0]**2 X[:,1]1.5,dtypeint)for _ in range(20):y[np.random.randint(200)] 1 #噪音plt .scatter(X[y0,0],X[y0,1])plt .scatter(X[y1,0],X[y1,1])plt.show()数据.pngfrom sklearn.linear_model import LogisticRegressionfrom sklearn.preprocessing import PolynomialFeaturesfrom sklearn.pipeline import Pipelinelog_reg LogisticRegression()log_reg.fit(X,y)LogisticRegression(C1.0, class_weightNone, dualFalse, fit_interceptTrue,intercept_scaling1, max_iter100, multi_classovr, n_jobs1,penaltyl2, random_stateNone, solverliblinear, tol0.0001,verbose0, warm_startFalse)其中模型正则化公式中的参数C默认为1,penalty默认为l2from sklearn.preprocessing import StandardScalerdef PolynomialLogisticRegression(degree,C1.0,penaltyl2):return Pipeline([(Poly,PolynomialFeatures(degreedegree)),(std_scaler,StandardScaler()),(Logistic,LogisticRegression(CC,penaltypenalty))])poly_log_reg PolynomialLogisticRegression(degree20,C0.1,penaltyl1)poly_log_reg.fit(X,y)曲线相对平滑应用OVR和OVO使逻辑回归处理多分类问题OVR:One Vs RestOVO:One Vs OneOVR耗时较少性能较高但分类准确度略低OVO耗时较多分类准确度较高#为了数据可视化方便我们只使用鸢尾花数据集的前两列特征from sklearn import datasetsiris datasets.load_iris()X iris[data][:,:2]y iris[target]log_reg LogisticRegression(multi_classovr) #传入multi_class参数可以指定使用ovr或ovo默认ovrlog_reg.score(X_test,y_test) 0.578 #由于只使用前两列特征导致分类准确度较低log_reg LogisticRegression(multi_classovr,solvernewton-cg)log_reg.fit(X_train,y_train)log_reg.score(X_test,y_test) 0.7894736842105263OVR分类决策边界OVO分类决策边界使用scikitlearn中的OVO及OVR类来进行多分类from sklearn.multiclass import OneVsOneClassifierfrom sklearn.multiclass import OneVsRestClassifierovr OneVsRestClassifier(log_reg)ovr.fit(X_train,y_train)print(ovr.score(X_test,y_test))ovo OneVsOneClassifier(log_reg)ovo.fit(X_train,y_train)print(ovo.score(X_test,y_test)) 0.7894736842105263 0.8157894736842105作者冰源_63ad链接https://www.jianshu.com/p/c5ba12a1b2c8