制作网站空间域名,哈尔滨网站建设 博客,广西上林县住房城乡建设网站,自建网站免费教程对于一个简单的文本情感分类来说#xff0c;其实就是一个二分类#xff0c;这篇博客主要讲述的是使用scikit-learn来做文本情感分类。分类主要分为两步#xff1a;1)训练#xff0c;主要根据训练集来学习分类模型的规则。2)分类#xff0c;先用已知的测试集评估分类的准确…对于一个简单的文本情感分类来说其实就是一个二分类这篇博客主要讲述的是使用scikit-learn来做文本情感分类。分类主要分为两步1)训练主要根据训练集来学习分类模型的规则。2)分类先用已知的测试集评估分类的准确率等如果效果还可以那么该模型对无标注的待测样本进行预测。首先先介绍下我样本集样本是已经分好词的酒店评论第一列为标签第二列为评论前半部分为积极评论后半部分为消极评论格式如下下面实现了SVM,NB,逻辑回归决策树逻辑森林KNN 等几种分类方法主要代码如下#coding:utf-8from matplotlib import pyplotimport scipy as spimport numpy as npfrom sklearn.cross_validation import train_test_splitfrom sklearn.feature_extraction.text import CountVectorizerfrom sklearn.feature_extraction.text import TfidfVectorizerfrom sklearn.metrics import precision_recall_curvefrom sklearn.metrics import classification_reportfrom numpy import *#SVM#def SvmClass(x_train, y_train):from sklearn.svm import SVC#调分类器clf SVC(kernel linear,probabilityTrue)#default with rbfclf.fit(x_train, y_train)#训练对于监督模型来说是 fit(X, y)对于非监督模型是 fit(X)return clf#NB#def NbClass(x_train, y_train):from sklearn.naive_bayes import MultinomialNBclfMultinomialNB(alpha0.01).fit(x_train, y_train)return clf#Logistic Regression#def LogisticClass(x_train, y_train):from sklearn.linear_model import LogisticRegressionclf LogisticRegression(penaltyl2)clf.fit(x_train, y_train)return clf#KNN#def KnnClass(x_train,y_train):from sklearn.neighbors import KNeighborsClassifierclfKNeighborsClassifier()clf.fit(x_train,y_train)return clf#Decision Tree #def DccisionClass(x_train,y_train):from sklearn import treeclftree.DecisionTreeClassifier()clf.fit(x_train,y_train)return clf#Random Forest Classifier #def random_forest_class(x_train,y_train):from sklearn.ensemble import RandomForestClassifierclf RandomForestClassifier(n_estimators8)#参数n_estimators设置弱分类器的数量clf.fit(x_train,y_train)return clf#准确率召回率 #def Precision(clf):doc_class_predicted clf.predict(x_test)print(np.mean(doc_class_predicted y_test))#预测结果和真实标签#准确率与召回率precision, recall, thresholds precision_recall_curve(y_test, clf.predict(x_test))answer clf.predict_proba(x_test)[:,1]report answer 0.5print(classification_report(y_test, report, target_names [neg, pos]))print(--------------------)from sklearn.metrics import accuracy_scoreprint(准确率: %.2f % accuracy_score(y_test, doc_class_predicted))if __name__ __main__:data[]labels[]with open (train2.txt,r)as file:for line in file:lineline[0:1]labels.append(line)with open(train2.txt,r)as file:for line in file:lineline[1:]data.append(line)xnp.array(data)labelsnp.array(labels)labels[int (i)for i in labels]movie_targetlabels#转换成空间向量count_vec TfidfVectorizer(binary False)#加载数据集切分数据集80%训练20%测试x_train, x_test, y_train, y_test train_test_split(x, movie_target, test_size 0.2)x_train count_vec.fit_transform(x_train)x_test count_vec.transform(x_test)print(**************支持向量机************ )Precision(SvmClass(x_train, y_train))print(**************朴素贝叶斯************ )Precision(NbClass(x_train, y_train))print(**************最近邻************ )Precision(KnnClass(x_train,y_train))print(**************逻辑回归************ )Precision(LogisticClass(x_train, y_train))print(**************决策树************ )Precision(DccisionClass(x_train,y_train))print(**************逻辑森林************ )Precision(random_forest_class(x_train,y_train))结果如下对于整体代码和语料的下载可以去下载