《상무와 경제통계》 파이톤 구현 노트(4)

2479 단어 데이터 분석
일원 선형 회귀
import pandas as pd
import numpy as np
import statsmodels.api as sm
# import statsmodels.formula.api as smf   


x = np.array([2,6,8,8,12,16,20,20,22,26])
y = np.array([58,105,88,118,117,137,157,169,149,202])

# s = pd.DataFrame()
# s['x'] = x
# s['y'] = y
# model2 = smf.ols(formula='y ~ x',data = s).fit()
# model2.summary()

X = sm.add_constant(x) #     
model = sm.OLS(y,X).fit()

model.summary()


분류 변수
import pandas as pd
import numpy as np
import statsmodels.formula.api as smf
import matplotlib.pyplot as plt

re = np.array([2,6,8,3,2,7,9,8,4,6])
# typ = np.array(["  ","  ","  ","  ","  ","  ","  ","  ","  ","  "])
typ = np.array([1,0,1,0,1,1,0,0,1,1]) #    k-1      
#      pd.get_dummies()
time = np.array([2.9,3.0,4.8,1.8,2.9,4.9,4.2,4.8,4.4,4.5])

data = pd.DataFrame([re,typ,time]).T

model = ols('time ~ re+typ',data).fit()
model.summary()


잔차 분석
outliers = model.get_influence()
# outliers.summary_table()
#    

#     (    )
leverage = outliers.hat_matrix_diag
#dffits 
dffits = outliers.dffits[0]
#     
resid_stu = outliers.resid_studentized_external
#cook  
cook = outliers.cooks_distance[0]


그림을 그리다
#         
import matplotlib.pyplot as plt
plt.scatter(x,y)
Y = model.predict(X)
plt.plot(x,Y,"r-")
plt.show()

# x   
plt.scatter(x,y-Y)
plt.axhline(0)


#       
model.resid.plot.density()

#       
plt.scatter(x,resid_stu)
plt.axhline(0)
plt.axhline(2)
plt.axhline(-2)#            (-2,2)  ,           
plt.show()



로지스틱 컴백.
import pandas as pd
import numpy as np
import statsmodels.api as sma 

# data = pd.read_excel(r"C:\Users\liuhao\Desktop\a.xls")

data = pd.read_excel(r"C:\Users\liuhao\Desktop\python_work\Python         \chapter5\demo\data\bankloan.xls")
x = data[["  ","  ","   ","     "]].values
y = data.iloc[:,8].values

# x = data[["    ","   "]].values
# y = data.iloc[:,-1].values
X = sma.add_constant(x)

logit = sma.Logit(y,X)
result = logit.fit()
result.summary2()

from sklearn.linear_model import LogisticRegression
modelLR=LogisticRegression()
modelLR.fit(x,y)
b=modelLR.coef_
a=modelLR.intercept_
print(a,b)
modelLR.score(x,y)


두 가지 방법으로 계산된 매개 변수는 작은 차이가 존재할 수 있다

좋은 웹페이지 즐겨찾기