machine learning/앙상블 학습과 랜덤포레스트

투표기반 분류기

from sklearn.model_selection import train_test_split
from sklearn.datasets import make_moons

X, y = make_moons(n_samples=500, noise=0.30, random_state=42)

X.shape

(500, 2)

y.shape

(500,)

import matplotlib.pyplot as plt
def plot_dataset(X, y):
    plt.plot(X[:, 0][y==0], X[:, 1][y==0], "bs")
    plt.plot(X[:, 0][y==1], X[:, 1][y==1], "g^")
    plt.grid(True, which='both')
    plt.xlabel("X1", fontsize=20)
    plt.ylabel("X2", fontsize=20, rotation=0)
plot_dataset(X, y)
plt.show()

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

log_clf = LogisticRegression(random_state=42)
rnd_clf = RandomForestClassifier(random_state=42)
svm_clf = SVC(random_state=42)

voting_clf = VotingClassifier(
    estimators=[('lr', log_clf),('rf', rnd_clf),('sf', svm_clf)],
    voting='hard')

for clf in (log_clf, rnd_clf, svm_clf, voting_clf):
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    print(clf.__class__.__name__, accuracy_score(y_test, y_pred))

LogisticRegression 0.864
RandomForestClassifier 0.896
SVC 0.896
VotingClassifier 0.912

배깅

from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

log_clf = LogisticRegression(random_state=42)
rnd_clf = RandomForestClassifier(random_state=42)
svm_clf = SVC(probability=True, random_state=42)

voting_clf = VotingClassifier(
    estimators=[('lr', log_clf),('rf', rnd_clf),('sf', svm_clf)],
    voting='soft')

for clf in (log_clf, rnd_clf, svm_clf, voting_clf):
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    print(clf.__class__.__name__, accuracy_score(y_test, y_pred))

LogisticRegression 0.864
RandomForestClassifier 0.896
SVC 0.896
VotingClassifier 0.92

from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier

bag_clf = BaggingClassifier(
    DecisionTreeClassifier(random_state=42), n_estimators=500,
    max_samples=100, bootstrap=True, n_jobs = 1, random_state=42
)

bag_clf.fit(X_train, y_train)

y_pred = bag_clf.predict(X_test)

accuracy_score(y_test, y_pred)

0.904

tree_clf = DecisionTreeClassifier(random_state=42)
tree_clf.fit(X_train, y_train)
y_pred = tree_clf.predict(X_test)
accuracy_score(y_test, y_pred)

0.856

bag_clf = BaggingClassifier(
    DecisionTreeClassifier(random_state=42), n_estimators=500,
    max_samples=100, oob_score=True, bootstrap=True, n_jobs = 1, random_state=42
)

bag_clf.fit(X_train, y_train)

BaggingClassifier(base_estimator=DecisionTreeClassifier(random_state=42),
                  max_samples=100, n_estimators=500, n_jobs=1, oob_score=True,
                  random_state=42)

bag_clf.oob_score_

0.9253333333333333

랜덤포레스트

bag_clf = BaggingClassifier(
    DecisionTreeClassifier(max_leaf_nodes=16, random_state=42), n_estimators=500,
    max_samples=100, oob_score=True, bootstrap=True, n_jobs = 1, random_state=42
)

bag_clf.fit(X_train, y_train)

BaggingClassifier(base_estimator=DecisionTreeClassifier(max_leaf_nodes=16,
                                                        random_state=42),
                  max_samples=100, n_estimators=500, n_jobs=1, oob_score=True,
                  random_state=42)

y_pred = bag_clf.predict(X_test)

accuracy_score(y_test, y_pred)

0.904

from sklearn.ensemble import RandomForestClassifier

rnd_clf = RandomForestClassifier(n_estimators=500, max_leaf_nodes=16, n_jobs=1, random_state=42)
rnd_clf.fit(X_train, y_train)

RandomForestClassifier(max_leaf_nodes=16, n_estimators=500, n_jobs=1,
                       random_state=42)

y_predict = rnd_clf.predict(X_test)

accuracy_score(y_test, y_predict)

0.912


                
                    
        
    
    
    
    
    
                
                

                
                
                    
                        
                            

                            
                            Author And Source
                            


                            이 문제에 관하여(machine learning/앙상블 학습과 랜덤포레스트), 우리는 이곳에서 더 많은 자료를 발견하고 링크를 클릭하여 보았다
                                
                                https://velog.io/@bbkyoo/machine-learning앙상블-학습과-랜덤포레스트
                            

                            
                            
                                저자 귀속: 원작자 정보가 원작자 URL에 포함되어 있으며 저작권은 원작자 소유입니다.
                            
                            
                                
                                
                                

                                
                                

                                우수한 개발자 콘텐츠 발견에 전념
                                (Collection and Share based on the CC Protocol.)

machine learning/앙상블 학습과 랜덤포레스트

투표기반 분류기

배깅

랜덤포레스트

Author And Source

좋은 웹페이지 즐겨찾기