python 기초 라 이브 러 리 의 Pandas
22254 단어 {python 기본 라 이브 러 리 NumpyPandas}
:win10 64 py 2.7 pycharm 2018.1.1
#!/usr/bin/python
# -*- coding:utf-8 -*-
from pandas import Series,DataFrame
import pandas as pd
import numpy as np
#pandas
# pandas, :Series DataFrame。
#Series , ( numpy ) ( ) 。
# Series:
obj = Series([4, 7, -5, 3])
print(obj)
#serice : , 。
# , 0 N-1(N ) 。
# series values index 。
print obj.values
print obj.index
# Series :
obj2 = Series([4, 7, -5, 3],index=['d','b','a','c'])
print obj2['a']
#
obj2['a']=10
print obj2
#NumPy ( , , ) :
print obj2[obj2>0]
print np.exp(obj2)
# Serice , 。 :
print 'b' in obj2
# python , Series
sdata={'Ohio':35000,'Texas':71000,'Oregon':16000,'Utah':5000}
obj3 = Series(sdata)
print obj3
# , Series ( )
states=['California','Ohio','Oregon','Texas']
obj4=Series(sdata,index=states)
print obj4
# pandas isnull notnull :
print pd.isnull(obj4)
print pd.notnull(obj4)
#Series : 。
print obj3+obj4
# Series name , pandas :
obj4.name='population'
obj4.index.name='state'
print obj4
# DataFrame
# DataFrame , , ( , , )
# DataFrame , Series 。
# ( R data.frame),DataFrame 。
#
# DataFrame 。
#
# DataFrame , numpy :
data = {'state': ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada'], 'year': [2000, 2001, 2002, 2001, 2002],
'pop': [1.5, 1.7, 3.6, 2.4, 2.9]}
frame=DataFrame(data)
print frame
# ,DataFrame :
frame2 = DataFrame(data,columns=['year','state','pop'])
print frame2
# Series , , NA :
frame3=DataFrame(data,columns=['year','state','pop','debt'],index=['one','two','three','four','five'])
print frame3
print frame3.columns
# , DataFrame Series:
print frame3['state']
print frame3.year
# Series DataFrame , name 。
# 。
frame3.ix['three']
# 。 , ”debt“ 。
frame3['debt']=16.5
print frame3
# , DataFrame 。 Series , DataFrame , 。
val=Series([-1.2,-1.5,-1.7],index=['two','four','five'])
frame3['debt']=val
print frame3
# 。 del :
frame3['eastern']=frame3.state=='Ohio'
print frame3
#
del frame3['eastern']
print frame3.columns
# ( ):
pop={'Nevada':{2001:2.4,2002:2.9},'Ohio':{2000:1.5,2001:1.7,2002:3.6}}
frame4=DataFrame(pop)
print frame4
# :
print frame4.T
# 、 。 , :
frame5=DataFrame(pop,index=[2001,2002,2003])
print frame5
# Series :
pdata={'Ohio':frame4['Ohio'][:-1],'Nevada':frame4['Nevada'][:2]}
print DataFrame(pdata)
# Data.Frame index columns name
frame4.index.name='year'
frame4.columns.name='state'
print frame4
print frame4.values
#
# pandas ( )。 Series DataFrame , Index:
obj=Series(range(3),index=['a','b','c'])
print obj
index=obj.index
print index
# 0,1,2
print index[0:]
# 0,1( 0 1)
print index[:-1]
#
print index[-1:]
# ndex , :
# print index[1]='d'
# , index :
index=pd.Index(np.arange(3))
obj2=Series([1.5,-2.5,0],index=index)
print obj2.index is index
# ,index :
print frame4
print 'Ohio' in frame4.columns
print 2003 in frame3.index
#
# Series DataFrame 。
obj=Series([4.5,7.2,-5.3,3.6],index=['d','b','a','c'])
# Series reindex 。 , :
obj2=obj.reindex(['a','b','c','d','e'])
print obj2
obj.reindex(['a','b','c','d','e'],fill_value=0)
# , 。method 。 :ffill :
# fill pad ( )
# bfill backfill ( )
# obj3=Series(['blue','purple','yellow'],index=[0,2,4])
# obj3.reindex(range(6))
# print obj3
# obj3.reindex(range(6),method='ffill')
# print obj3
obj3=Series(['blue','purple','yellow'],index=[0,2,4])
obj3=obj3.reindex(range(6))
print obj3
# DataFrame,reindex ( ) , , 。 , :
frame=DataFrame(np.arange(9).reshape((3,3)),index=['a','c','d'],columns=['Ohio','Texas','California',])
print frame
frame2=frame.reindex(['a','b','c','d'])
print frame2
# columns 。
states=['Texas','Utah','California']
frame = frame.reindex(columns=states)
print frame
# , ( 0):
frame = frame.reindex(index=['a','b','c','d'],method='ffill',columns=states)
print frame
# ix = , :
frame = frame.ix[['a','b','c','d'],states]
print frame
#
# , 。
# , drop :
obj=Series(np.arange(5.),index=['a','b','c','d','e'])
new_obj=obj.drop('c',axis=0)
print new_obj
# DataFrame, :
data=DataFrame(np.arange(16).reshape((4,4)),index=['Ohio','Colorado','Utah','New York'],columns=['one','two','three','four'])
data1 = data.drop(['Colorado','Ohio'])
print data1
# two 0 ,1
data2 = data.drop('two',axis=1)
print data2
#
data3 = data.drop(['two','four'],axis=1)
print data3
# 、
# Series (obj[...]) NumPy , Series 。
obj=Series(np.arange(4.),index=['a','b','c','d'])
print obj
print obj['b']
print obj[1]
print obj[2:4]
print obj[['b','a','d']]
print obj[[1,3]]
print obj[obj<2]
# python , (inclusive)
print obj['b':'c']
obj['b':'c']=5
print obj
# DataFrame
data=DataFrame(np.arange(16).reshape((4,4)),index=['Ohio','Colorado','Utah','New York'],columns=['one','two','three','four'])
print data[:2]
print data[data['three']>5]
# DataFrame :
print data<5
# data = data[data<5]=0
data[data<5]=0
print data
# DataFrame , ix。 numpy DataFrame 。 :
print data.ix['Colorado',['two','three']]
print data.ix[['Colorado','Utah'],[3,0,1]]
print data.ix[2]
print data.ix[data.three>5,:3]
#
s1=Series([7.3,-2.5,3.4,1.5],index=['a','c','d','e'])
s2=Series([-2.1,3.6,-1.5,4,3.1],index=['a','c','e','f','g'])
print s1+s2