Pandas 합병 merge

키 통합
import pandas as pd
left = pd.DataFrame({'key':['K0','K1','K2','K3'],
                     'A':['A0','A1','A2','A3'],
                     'B':['B0','B1','B2','B3']})
right = pd.DataFrame({'key':['K0','K1','K2','K3'],
                      'C':['C0','C1','C2','C3'],
                      'D':['D0','D1','D2','D3']})
print(left)
print(right)

    A   B key
0  A0  B0  K0
1  A1  B1  K1
2  A2  B2  K2
3  A3  B3  K3
    C   D key
0  C0  D0  K0
1  C1  D1  K1
2  C2  D2  K2
3  C3  D3  K3

#   key  
res = pd.merge(left,right,on = 'key')
print(res)

    A   B key   C   D
0  A0  B0  K0  C0  D0
1  A1  B1  K1  C1  D1
2  A2  B2  K2  C2  D2
3  A3  B3  K3  C3  D3

여러 keys 를 통 해 통합
left = pd.DataFrame({'key1':['K0','K0','K1','K2'],
                     'key2':['K0','K1','K0','K1'],
                     'A':['A0','A1','A2','A3'],
                     'B':['B0','B1','B2','B3']})
right = pd.DataFrame({'key1':['K0','K1','K1','K2'],
                      'key2':['K0','K0','K0','K0'],
                      'C':['C0','C1','C2','C3'],
                      'D':['D0','D1','D2','D3']})
print(left)
print(right)

   A   B key1 key2
0  A0  B0   K0   K0
1  A1  B1   K0   K1
2  A2  B2   K1   K0
3  A3  B3   K2   K1
    C   D key1 key2
0  C0  D0   K0   K0
1  C1  D1   K1   K0
2  C2  D2   K1   K0
3  C3  D3   K2   K0

#     keys  
res = pd.merge(left,right,on = ['key1','key2']) # merge  how='inner'
print(res) #right K1、KO        , left       

    A   B key1 key2   C   D
0  A0  B0   K0   K0  C0  D0
1  A2  B2   K1   K0  C1  D1
2  A2  B2   K1   K0  C2  D2

how = ['left','right','outer','inner']
# how = ’left‘
res = pd.merge(left,right,on = ['key1','key2'],how = 'left')
print(res)

    A   B key1 key2    C    D
0  A0  B0   K0   K0   C0   D0
1  A1  B1   K0   K1  NaN  NaN
2  A2  B2   K1   K0   C1   D1
3  A2  B2   K1   K0   C2   D2
4  A3  B3   K2   K1  NaN  NaN

# how = ’right‘
res = pd.merge(left,right,on = ['key1','key2'],how = 'right')
print(res)

    A    B key1 key2   C   D
0   A0   B0   K0   K0  C0  D0
1   A2   B2   K1   K0  C1  D1
2   A2   B2   K1   K0  C2  D2
3  NaN  NaN   K2   K0  C3  D3

# how = ’outer‘
res = pd.merge(left,right,on = ['key1','key2'],how = 'outer')
print(res)

     A    B key1 key2    C    D
0   A0   B0   K0   K0   C0   D0
1   A1   B1   K0   K1  NaN  NaN
2   A2   B2   K1   K0   C1   D1
3   A2   B2   K1   K0   C2   D2
4   A3   B3   K2   K1  NaN  NaN
5  NaN  NaN   K2   K0   C3   D3

indicator: merge 를 표시 하 는 방식
df1 = pd.DataFrame({'col1':[0,1],'col_left':['a','b']})
df2 = pd.DataFrame({'col1':[1,2,2],'col_right':[2,2,2]})
print(df1)
print(df2)

   col1 col_left
0     0        a
1     1        b
   col1  col_right
0     1          2
1     2          2
2     2          2

# indicator=True
res = pd.merge(df1,df2,on='col1',how='outer',indicator=True)
print(res)

   col1 col_left  col_right      _merge
0     0        a        NaN   left_only
1     1        b        2.0        both
2     2      NaN        2.0  right_only
3     2      NaN        2.0  right_only

#    indicator   give the indicator a custom name
res = pd.merge(df1,df2,on='col1',how='outer',indicator='indicator_column')
print(res)

   col1 col_left  col_right indicator_column
0     0        a        NaN        left_only
1     1        b        2.0             both
2     2      NaN        2.0       right_only
3     2      NaN        2.0       right_only

index 를 통 해 합병
left = pd.DataFrame({'A':['A0','A1','A2'],
                     'B':['B0','B1','B2']},
                      index=['K0','K1','K2'])
right = pd.DataFrame({'C':['C0','C2','C3'],
                      'D':['D0','D2','D3']},
                      index=['K0','K2','K3'])
print(left)
print(right)

     A   B
K0  A0  B0
K1  A1  B1
K2  A2  B2
     C   D
K0  C0  D0
K2  C2  D2
K3  C3  D3

# left_index & right_index
res = pd.merge(left,right,left_index=True,right_index=True,how='outer')
print(res)

      A    B    C    D
K0   A0   B0   C0   D0
K1   A1   B1  NaN  NaN
K2   A2   B2   C2   D2
K3  NaN  NaN   C3   D3

처리 중복 데이터
boys = pd.DataFrame({'K':['K0','K1','K2'],
                     'age':[1,2,3]})
girls = pd.DataFrame({'K':['K0','K0','K3'],
                     'age':[4,5,6]})
print(boys)
print(girls)

   K  age
0  K0    1
1  K1    2
2  K2    3
    K  age
0  K0    4
1  K0    5
2  K3    6

# how='inner'
res = pd.merge(boys,girls,on='K',suffixes=['_boys','_girls'],how='inner')
print(res)

    K  age_boys  age_girls
0  K0         1          4
1  K0         1          5

#how='outer'
res = pd.merge(boys,girls,on='K',suffixes=['_boys','_girls'],how='outer')
print(res)

    K  age_boys  age_girls
0  K0       1.0        4.0
1  K0       1.0        5.0
2  K1       2.0        NaN
3  K2       3.0        NaN
4  K3       NaN        6.0

Pandas 학습 강좌 출처 여기 찍 어 주세요.

좋은 웹페이지 즐겨찾기