기말

8196 단어

import jieba

path=r'"E:\ \147\"'
 
with open(r'E:\ \stopsCN.txt',encoding='utf-8')as f:
    stopword=f.read().split('
')




List01=[]
List02=[]

def read_text(name,start,end):
    for file in range(start,end):
            file = 'E:\\ \\147\\'+name+'\\'+str(file)+".txt"
            with open(file,'r',encoding='utf-8') as f:
                texts=f.read()
          
           
            target = name
             
            texts = "".join([text for text in texts if text.isalpha()])
 
            texts = [text for text in jieba.cut(texts,cut_all=True) if len(text) >=2]
 
            texts = " ".join([text for text in texts if text not in stopword])
 
 
            List01.append(target)
            List02.append(texts)
      
read_text(" ",798977,798997)
read_text(" ",256822,256842)
read_text(" ",264410,264430)
read_text(" ",644579,644599)




 
List01
List02




from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(List02,List01,test_size=0.2)


from sklearn.feature_extraction.text import TfidfVectorizer
vec = TfidfVectorizer()
X_train = vec.fit_transform(x_train)
X_test = vec.transform(x_test)




from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import cross_val_score
from sklearn.metrics import classification_report
#  
mnb = MultinomialNB()
module = mnb.fit(X_train, y_train)
y_predict = module.predict(X_test)
#  5 
scores=cross_val_score(mnb,X_test,y_test,cv=5)
print(" :",scores.mean())
print(" :
",classification_report(y_predict,y_test))




import collections
#  
testCount = collections.Counter(y_test)
predCount = collections.Counter(y_predict)
print(' ：',testCount,'
', ' ', predCount)
 
#  ， ， ，
nameList = list(testCount.keys())
testList = list(testCount.values())
predictList = list(predCount.values())
x = list(range(len(nameList)))
print(" ：",nameList,'
'," ：",testList,'
'," ：",predictList)

이 내용에 흥미가 있습니까?

현재 기사가 여러분의 문제를 해결하지 못하는 경우 AI 엔진은 머신러닝 분석(스마트 모델이 방금 만들어져 부정확한 경우가 있을 수 있음)을 통해 가장 유사한 기사를 추천합니다:

다양한 언어의 JSON

JSON은 Javascript 표기법을 사용하여 데이터 구조를 레이아웃하는 데이터 형식입니다. 그러나 Javascript가 코드에서 이러한 구조를 나타낼 수 있는 유일한 언어는 아닙니다. 저는 일반적으로 '객체'{}...

텍스트를 자유롭게 공유하거나 복사할 수 있습니다.하지만 이 문서의 URL은 참조 URL로 남겨 두십시오.

CC BY-SA 2.5, CC BY-SA 3.0 및 CC BY-SA 4.0에 따라 라이센스가 부여됩니다.

좋은 웹페이지 즐겨찾기

개발자 우수 사이트 수집

개발자가 알아야 할 필수 사이트 100선 추천 우리는 당신을 위해 100개의 자주 사용하는 개발자 학습 사이트를 정리했습니다