sklearn 의 PCA 사용

1.데이터 준비
#        
import pandas as pd
import numpy as np
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
import time

#           
train = pd.read_csv('./data/MNIST_train.csv')
test = pd.read_csv('./data/MNIST_test.csv')

y_train = train.label.values
X_train = train.drop("label",axis=1).values
X_test = test.values 

#        [0,255],           --> [0,1]
X_train = X_train / 255.0
X_test = X_test / 255.0

#               
print('the shape of train_image: {}'.format(X_train.shape))
print('the shape of test_image: {}'.format(X_test.shape))

#       ,         
X_train = X_train[:10000]
X_test = X_test[:10000]
y_train = y_train[:10000]

2.조작
#                ,               (PCA   )
X_train_part, X_val, y_train_part, y_val = train_test_split(X_train,y_train, train_size = 0.8,random_state = 0)

#                
print(X_train_part.shape)
print(X_val.shape)

#      (PCA   n)        ,                  
def n_component_analysis(n, X_train, y_train, X_val, y_val):
    start = time.time()
    
    pca = PCA(n_components=n)
    print("PCA begin with n_components: {}".format(n));
    pca.fit(X_train)
    
    #            
    X_train_pca = pca.transform(X_train)
    X_val_pca = pca.transform(X_val)
    
    #   SVC  
    print('SVC begin')
    clf1 = svm.SVC()
    clf1.fit(X_train_pca, y_train)
    
    #   accuracy
    accuracy = clf1.score(X_val_pca, y_val)
    
    end = time.time()
    print("accuracy: {}, time elaps:{}".format(accuracy, int(end-start)))
    return accuracy



#      (PCA  )    
n_s = np.linspace(0.70, 0.85, num=15)
accuracy = []
for n in n_s:
    tmp = n_component_analysis(n, X_train_part, y_train_part, X_val, y_val)
    accuracy.append(tmp)


#     PCA        ,      /  (    )
import matplotlib.pyplot as plt
%matplotlib inline
plt.plot(n_s, np.array(accuracy), 'b-')

3.최 적 매개 변수 훈련
#      
pca = PCA(n_components=0.75)

#      ,              
pca.fit(X_train)

pca.n_components_

#      ,         
X_train_pca = pca.transform(X_train)

#      ,       
X_test_pca = pca.transform(X_test)

#         
print(X_train_pca.shape)
print(X_test_pca.shape)

#             SVM   
clf = svm.SVC()
clf.fit(X_train_pca, y_train)

#                            
y_predict = clf.predict(X_test_pca)

#        
df = pd.DataFrame(y_predict)
df.columns=['Label']
df.index+=1
df.index.name = 'Imageid'
df.to_csv('SVC_Minist_submission.csv', header=True)

좋은 웹페이지 즐겨찾기