기계 학습 기본 구조

16008 단어
기초 가 튼튼 하지 않 고 산 이 흔 들 리 며 아래 코드 에 따라 데이터 발굴 의 기본 절 차 를 간단하게 이해 할 수 있 지만 현재 자신 은 유연 하 게 운용 할 수 없 으 며 자신 이 데이터 구조, 확률론 지식 을 다시 복습 한 후에 기계 학습 을 깊이 이해 할 수 있다.댓 글: 기계 학습 데이터 발굴 방향 에 대한 간단 한 구조
# Loadlibraries
import pandas as pd
from pandas.plotting import scatter_matrix
import matplotlib.pyplot as plt
from sklearn import model_selection
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
import warnings
warnings.filterwarnings('ignore')
# Loaddataset
names =['sepal-length', 'sepal-width','petal-length', 'petal-width','class']
dataset =pd.read_csv("iris.data",names=names)
# shape
print(dataset.shape)
# head
print(dataset.head(20))
# descriptions
print(dataset.describe())
# classdistribution
print(dataset.groupby('class').size())
# boxand whisker plots
dataset.plot(kind='box',subplots=True,layout=(2,2), sharex=False, sharey=False)
plt.show()
#histograms
dataset.hist()
plt.show()
#scatter plot matrix
scatter_matrix(dataset)
plt.show()
#Split-out validation dataset
array =dataset.values
print(array)
X = array[:,:3]#    1 3   X =array[:,0:4]
print(X)
Y =array[:,4]#    4   
print(Y)
validation_size = 0.20#     
seed =7
X_train, X_validation, Y_train,Y_validation = model_selection.train_test_split(X, Y, test_size=validation_size, random_state=seed)
#     :X_train,Y_train
#     :X_validation,Y_validation
# Testoptions and evaluation metric
seed =7
scoring= 'accuracy'
# SpotCheck Algorithms
models =[]
models.append(('LR', LogisticRegression()))
models.append(('LDA', LinearDiscriminantAnalysis()))
models.append(('KNN', KNeighborsClassifier()))
models.append(('CART', DecisionTreeClassifier()))
models.append(('NB', GaussianNB()))
models.append(('SVM', SVC()))
#evaluate each model in turn
results =[]
names =[]
for name, model in models:
    kfold= model_selection.KFold(n_splits=10, random_state=seed)
    cv_results= model_selection.cross_val_score(model,X_train, Y_train, cv=kfold, scoring=scoring)
    results.append(cv_results)
    names.append(name)
    msg= "%s: %f (%f)" % (name, cv_results.mean(),cv_results.std())
    print(msg)
#Compare Algorithms
fig =plt.figure()
fig.suptitle('Algorithm Comparison')
ax =fig.add_subplot(111)
plt.boxplot(results)
ax.set_xticklabels(names)
plt.show()

# Makepredictions on validation dataset
knn =KNeighborsClassifier()
knn.fit(X_train,Y_train)
predictions = knn.predict(X_validation)
print(accuracy_score(Y_validation, predictions))
print(confusion_matrix(Y_validation, predictions))
print(classification_report(Y_validation, predictions))

좋은 웹페이지 즐겨찾기