분류 알고리즘(3) - LR NB SVM KNN 호출 예

# -*- encoding=utf-8 -*-

from sklearn import svm
from sklearn import  neighbors, linear_model
from sklearn import metrics
from sklearn.feature_extraction.text import  TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
import jieba
import pyltp
from classification_practice.practice_one.search_count_auto_words import AutoWordsCounter
from statistic.default_string_search import DefaultStringSearch as ACSearcher
import pandas as pd
from sklearn import metrics
import jieba
from sklearn.ensemble import GradientBoostingRegressor,GradientBoostingClassifier
from sklearn.externals import joblib
# segmentor = pyltp.Segmentor()
# segmentor.load("D:\\Gridsum\\LTPModel\\cws.model")


def read_files(filename):
    x_train = []
    y_train = []

    with open(filename,'r',encoding="utf-8") as lines:
        for line in lines:
            item = line.replace("
","").strip().split("\t") sentence = " ".join(jieba.cut(item[2])) # sentence = " ".join(list(segmentor.segment(item[1]))) sentence = sentence x_train.append(sentence) y_train.append(int(item[1])) return x_train,y_train def read_files1(filename): x_train = [] y_train = [] df = pd.read_excel(filename).drop_duplicates() labels = df["sentiment"].map({" ": -1, " ": 0, " ": 1}) for sentiment,sentence in zip(labels,df['sentence']): cut_sentence = " ".join(jieba.cut(sentence)) x_train.append(cut_sentence) y_train.append(sentiment) return x_train,y_train vect = TfidfVectorizer(min_df=2,max_df=0.8) v_test,v_useless = read_files("train1.5.1.txt") vect.fit(v_test) knn = neighbors.KNeighborsClassifier() logistic = linear_model.LogisticRegression(solver='newton-cg') X_train, y_train = read_files("train1.5.1.txt") X_train = vect.transform(X_train) X_test, y_test = read_files("test1.2.0.txt") X_test = vect.transform(X_test) nb = MultinomialNB() nb.fit(X_train, y_train) X_predict = nb.predict(X_test) print ("NaiveBayes Result:") print(metrics.classification_report(y_test, X_predict)) ################################################### knn = neighbors.KNeighborsClassifier() knn.fit(X_train, y_train) t = knn.predict(X_test) print('KNN score: %f' % knn.score(X_test, y_test)) print(metrics.classification_report(y_test, t)) ################################################# print("LR ") logistic = linear_model.LogisticRegression(solver='newton-cg') logistic.fit(X_train, y_train) X_predict = logistic.predict(X_test) print(metrics.classification_report(y_test, X_predict)) ################################################# print("SVM-linear ") C = 1.0 # SVM regularization parameter X_predict = svm.SVC(kernel='linear', C=C).fit(X_train, y_train).predict(X_test) # for ture_result, predict_result in zip(y_test,X_predict): # print(ture_result+"\t"+ predict_result) print(metrics.classification_report(y_test, X_predict)) ################################################# print("SVM-Rbf ") X_predict = svm.SVC(kernel='rbf', gamma=0.7, C=C).fit(X_train, y_train).predict(X_test) print(metrics.classification_report(y_test, X_predict)) ################################################# print("SVM-poly ") X_predict = svm.SVC(kernel='poly', degree=3, C=C).fit(X_train, y_train).predict(X_test) print(metrics.classification_report(y_test, X_predict)) ################################################# print("SVM-svc ") X_predict = svm.LinearSVC(C=C).fit(X_train, y_train).predict(X_test) print(metrics.classification_report(y_test, X_predict)) ################################################# print("finish! ")

관련 소개 후속 보충...

좋은 웹페이지 즐겨찾기