[파 이 썬 파충류] 2 - 네트워크 요청.

8730 단어 파충
# I.urllib :         ,          ,             ,           ,      :
# 1)urlopen:    
from urllib import request
response = request.urlopen("http://www.baidu.com")
print(response.read())
#     ctrl+b              

# 2)urlretrieve:            ()
from urllib import request
#     URL       
request.urlretrieve("http://www.baidu.com","baidu.html")

# 3)urlencode,quto_plus:  
#   :urlencode     ,   a=b   ,      quote   
from urllib import parse
url1 = "https://baike.baidu.com/item/"
url2 = "/129156?fr=aladdin"
# urlencode  
# params = {"item":"   "}
# qs = parse.urlencode("   ")
params = ("   ")
qs = parse.quote_plus(params)  #     
url = url1 + qs + url2  #       URL
response = request.urlopen(url)
print(response.read())

# 4)parse_qs:  
from urllib import parse
params = {"item":"   "}
#            urlparse  parse_qs  
qs = parse.urlencode(params)
parse = parse.parse_qs(qs)
print(parse)

# 5)urlparse,urlsplit: URL           ,
# urlsplit   urlparse  params  (    )
from urllib import parse
url = "https://www.baidu.com/s?ie=utf-8&wd=%E4%B8%BA%E4%BB%80%E4%B9%88%E8%A6%81%E5%AD%A6python"
result = parse.urlsplit(url)
print(result)

# 6)request.Request :     ,           ,      :
from urllib import request
from urllib import parse
url = "https://www.lagou.com/jobs/list_python?labelWords=&fromSearch=true&suginput="
# 1.          urlopen
response1 = request.urlopen(url)
print(response1.read())  #          ,         
# 2.     (     :Request Headers->User-Agent)
header ={"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36"}
re = request.Request(url,headers=header)
response2 = request.urlopen(re)
print(response2.read())
#           ,            ,                             .
#   :        URL (positionAjax.jason?needAdditionalResult=false),   json    ,       POST  
# 3.Request(data,method   )
url = "https://www.lagou.com/jobs/positionAjax.json?needAddtionalResult=false"
header ={"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36",
         "Referer": "https://www.lagou.com/jobs/list_python?city=%E5%85%A8%E5%9B%BD&cl=false&fromSearch=true&labelWords=&suginput="
        }
# data          ,          fist:    ,pn:  ,kd:     
data = {"first":"true","pn":1,"kd":"python"}
method = "POST"
re = request.Request(url,headers=header,data=parse.urlencode(data).encode("utf-8"),method=method)
response = request.urlopen(re)
print(response.read())
    #      :
    # 1.      data,   TypeError: can't concat str to bytes,    
    # 2.        TypeError: POST data should be bytes, an iterable of bytes, or a file object. It cannot be of type str.python3       unicode   ,   bytes  
    # 3.                 ,    Referer

# 7)ProxyHandle   (    ):
# 1.      :               IP     ,           IP      ,               
# 2.  :         ,        ,             ,              ,         
# 3.  :      IP,   ,   
# 4.    :http://httpbin.org
# 5.  :
    # i.  urllib.request.ProxyHandler      handler
    # ii.     handler  urllib.request.build_opener    opener
    # ii.  open      
from urllib import request
url = "http://httpbin.org/ip"
# 1.     
response1 = request.urlopen(url)
print(response1.read())
# 2.    
# 2-1.  ProxyHanderler        Handler,    ,{key   ,values ip:prot}
handler = request.ProxyHandler({"http":"39.135.11.97:80"})
# 2-2.       handler    opener
opener = request.build_opener(handler)
# 2-3.  opener     
response2 = opener.open(url)
print(response2.read())

# 8)  cookielib  HTTPCookieProcessor    
# 1.   cookie:Cookie                  Session  ,               
# 2.Cookie   :
    # Set-Cookie: NAME=VALUE;Expires/Max-age=DATE;Path=PATH;Domain=DOMAIN_NAME;SECURE (eg.H_PS_PSSID=26523_1430_21080_18560_20930; path=/; domain=.baidu.com)
    # NAME:cookie   
    # VALUE:cookie  
    # Expires:cookie     
    # Path:cookie     
    # Domain:cookie     
    # SECURE:    https      
# 3.  cookie    
# login_url = "http://http://www.renren.com/PLogin.do"
# dapeng_url = "http://www.renren.com/880151247/profile"
    # 3-1.    cookie
from urllib import request
dapeng_url = "http://www.renren.com/880151247/profile"
headers={"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36",
         "Cookie":"anonymid=jmfo3rtd-qqw614; depovince=GW; _r01_=1; [email protected]; ln_hurl=http://hdn.xnimg.cn/photos/hdn421/20130724/1005/h_main_4Uln_61b40000018e111a.jpg; __utma=151146938.1018799493.1537755899.1537755899.1537755899.1; __utmz=151146938.1537755899.1.1.utmcsr=renren.com|utmccn=(referral)|utmcmd=referral|utmcct=/361722792/profile; wp_fold=0; jebecookies=5b198591-fc1c-40d2-98d7-a20fb74dbab1|||||; ick_login=ec09b039-4fa0-4d3b-80dc-919c1856500f; _de=B4E32D1A836EA635E2A69D838E8B6F33696BF75400CE19CC; p=b6ed7fa041c525ce392291f7fdcf32572; first_login_flag=1; t=935fd259a10790130472041798ba20372; societyguester=935fd259a10790130472041798ba20372; id=361722792; xnsid=f63825e0; ver=7.0; loginfrom=null; JSESSIONID=abcb6zA3mitavdu89Xjyw; XNESSESSIONID=f1898062548e; WebOnLineNotice_361722792=1"}
re = request.Request(url=dapeng_url,headers=headers)
response = request.urlopen(re)
with open ("test.html","w",encoding="utf-8") as f:
    f.write(response.read().decode("utf-8"))
    #    :
        # i.write        str     
        # ii.response.read()       bytes    
        # iii.bytes -> encode -> str, bytes  params,post -> data)
kw = {"wd":"  "}
headers = {"User-Agent":"Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Mobile Safari/537.36"}
response = requests.get(url="http://www.baidu.com/s",params=kw,headers=headers)
with open("test.html","w",encoding="utf-8") as f:
    f.write(response.content.decode("utf-8"))
    # 1-2.      :
# .text .content       ,    unicode  (         ,          ),      bytes  ,        
#  :              bytes
print(response.text)
print(response.content.decode("utf-8"))
    # 1-3.     url  
print(response.url)
    # 1-4.         
print(response.encoding)
    # 1-5     
print(response.status_code)

# 2)  post  ,      
import requests
url = "https://www.lagou.com/jobs/positionAjax.json?needAddtionalResult=false"
headers ={"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36",
         "Referer": "https://www.lagou.com/jobs/list_python?city=%E5%85%A8%E5%9B%BD&cl=false&fromSearch=true&labelWords=&suginput="
        }
data = {"first":"true","pn":1,"kd":"python"}
response = requests.post(url=url,headers=headers,data=data)
print(response.text)
#    json         ,          
# print(reponse.json())

# 3)requests    :
#   : get,post          proxies    
import requests
url = "http://httpbin.org/ip"
# 1.     
response1 = requests.get(url=url)
print(response1.text)
# 2.    ,    ,{key   ,values ip:prot}
proxy = {"http":"39.135.11.97:80"}
response2 = requests.get(url=url,proxies=proxy)
print(response2.text)

# 4)requests cookie  :
import requests
url = "http://www.baidu.com/"
# 1.  cookie
response = requests.get(url=url)
print(response.cookies.get_dict()) # .get_dict          
# 2.  cookie(session),      
# session:      ,   web    session,       urllib opener  cookie   
url = "http://http://www.renren.com/PLogin.do"
data = {"email":"[email protected]","password":"xxxxxx"}
headers= {"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36"}
    # 2-1.  
session = requests.Session()
session.post(url=login_url,headers=headers,data=data)  #   cookie
    # 2-2.  
response = session.get(url=dapeng_url)
    # 2-3.     
#        :print(response.text)
with open("renren.html","w",encoding="utf-8") as f:
    f.write(response.text)

# 5)      SSL  
#         SSL     (https://www.baidu.com)  requests    ,       ,     
response = requests.get("--a url--",verify=False)
print(response.content.decode("utf-8"))

좋은 웹페이지 즐겨찾기