분류 알고리즘(3) - LR NB SVM KNN 호출 예
# -*- encoding=utf-8 -*-
from sklearn import svm
from sklearn import neighbors, linear_model
from sklearn import metrics
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
import jieba
import pyltp
from classification_practice.practice_one.search_count_auto_words import AutoWordsCounter
from statistic.default_string_search import DefaultStringSearch as ACSearcher
import pandas as pd
from sklearn import metrics
import jieba
from sklearn.ensemble import GradientBoostingRegressor,GradientBoostingClassifier
from sklearn.externals import joblib
# segmentor = pyltp.Segmentor()
# segmentor.load("D:\\Gridsum\\LTPModel\\cws.model")
def read_files(filename):
x_train = []
y_train = []
with open(filename,'r',encoding="utf-8") as lines:
for line in lines:
item = line.replace("
","").strip().split("\t")
sentence = " ".join(jieba.cut(item[2]))
# sentence = " ".join(list(segmentor.segment(item[1])))
sentence = sentence
x_train.append(sentence)
y_train.append(int(item[1]))
return x_train,y_train
def read_files1(filename):
x_train = []
y_train = []
df = pd.read_excel(filename).drop_duplicates()
labels = df["sentiment"].map({" ": -1, " ": 0, " ": 1})
for sentiment,sentence in zip(labels,df['sentence']):
cut_sentence = " ".join(jieba.cut(sentence))
x_train.append(cut_sentence)
y_train.append(sentiment)
return x_train,y_train
vect = TfidfVectorizer(min_df=2,max_df=0.8)
v_test,v_useless = read_files("train1.5.1.txt")
vect.fit(v_test)
knn = neighbors.KNeighborsClassifier()
logistic = linear_model.LogisticRegression(solver='newton-cg')
X_train, y_train = read_files("train1.5.1.txt")
X_train = vect.transform(X_train)
X_test, y_test = read_files("test1.2.0.txt")
X_test = vect.transform(X_test)
nb = MultinomialNB()
nb.fit(X_train, y_train)
X_predict = nb.predict(X_test)
print ("NaiveBayes Result:")
print(metrics.classification_report(y_test, X_predict))
###################################################
knn = neighbors.KNeighborsClassifier()
knn.fit(X_train, y_train)
t = knn.predict(X_test)
print('KNN score: %f' % knn.score(X_test, y_test))
print(metrics.classification_report(y_test, t))
#################################################
print("LR ")
logistic = linear_model.LogisticRegression(solver='newton-cg')
logistic.fit(X_train, y_train)
X_predict = logistic.predict(X_test)
print(metrics.classification_report(y_test, X_predict))
#################################################
print("SVM-linear ")
C = 1.0 # SVM regularization parameter
X_predict = svm.SVC(kernel='linear', C=C).fit(X_train, y_train).predict(X_test)
# for ture_result, predict_result in zip(y_test,X_predict):
# print(ture_result+"\t"+ predict_result)
print(metrics.classification_report(y_test, X_predict))
#################################################
print("SVM-Rbf ")
X_predict = svm.SVC(kernel='rbf', gamma=0.7, C=C).fit(X_train, y_train).predict(X_test)
print(metrics.classification_report(y_test, X_predict))
#################################################
print("SVM-poly ")
X_predict = svm.SVC(kernel='poly', degree=3, C=C).fit(X_train, y_train).predict(X_test)
print(metrics.classification_report(y_test, X_predict))
#################################################
print("SVM-svc ")
X_predict = svm.LinearSVC(C=C).fit(X_train, y_train).predict(X_test)
print(metrics.classification_report(y_test, X_predict))
#################################################
print("finish! ")
관련 소개 후속 보충...
이 내용에 흥미가 있습니까?
현재 기사가 여러분의 문제를 해결하지 못하는 경우 AI 엔진은 머신러닝 분석(스마트 모델이 방금 만들어져 부정확한 경우가 있을 수 있음)을 통해 가장 유사한 기사를 추천합니다:
문장 분류 어노테이션을 ipywidgets로 UI를 작성하여 효율화문서 분류 어노테이션을 조금이라도 편하게 하기 위해, 노트북상에서 움직이는 어노테이션용 UI를 ipywidgets로 구현했습니다. 아래는 실제 화면입니다 (livedoor 뉴스를 어노테이션하는 형태를 가정). 버튼을...
텍스트를 자유롭게 공유하거나 복사할 수 있습니다.하지만 이 문서의 URL은 참조 URL로 남겨 두십시오.
CC BY-SA 2.5, CC BY-SA 3.0 및 CC BY-SA 4.0에 따라 라이센스가 부여됩니다.