[파 이 썬 파충류] 2 - 네트워크 요청.
8730 단어 파충
# I.urllib : , , , , :
# 1)urlopen:
from urllib import request
response = request.urlopen("http://www.baidu.com")
print(response.read())
# ctrl+b
# 2)urlretrieve: ()
from urllib import request
# URL
request.urlretrieve("http://www.baidu.com","baidu.html")
# 3)urlencode,quto_plus:
# :urlencode , a=b , quote
from urllib import parse
url1 = "https://baike.baidu.com/item/"
url2 = "/129156?fr=aladdin"
# urlencode
# params = {"item":" "}
# qs = parse.urlencode(" ")
params = (" ")
qs = parse.quote_plus(params) #
url = url1 + qs + url2 # URL
response = request.urlopen(url)
print(response.read())
# 4)parse_qs:
from urllib import parse
params = {"item":" "}
# urlparse parse_qs
qs = parse.urlencode(params)
parse = parse.parse_qs(qs)
print(parse)
# 5)urlparse,urlsplit: URL ,
# urlsplit urlparse params ( )
from urllib import parse
url = "https://www.baidu.com/s?ie=utf-8&wd=%E4%B8%BA%E4%BB%80%E4%B9%88%E8%A6%81%E5%AD%A6python"
result = parse.urlsplit(url)
print(result)
# 6)request.Request : , , :
from urllib import request
from urllib import parse
url = "https://www.lagou.com/jobs/list_python?labelWords=&fromSearch=true&suginput="
# 1. urlopen
response1 = request.urlopen(url)
print(response1.read()) # ,
# 2. ( :Request Headers->User-Agent)
header ={"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36"}
re = request.Request(url,headers=header)
response2 = request.urlopen(re)
print(response2.read())
# , , .
# : URL (positionAjax.jason?needAdditionalResult=false), json , POST
# 3.Request(data,method )
url = "https://www.lagou.com/jobs/positionAjax.json?needAddtionalResult=false"
header ={"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36",
"Referer": "https://www.lagou.com/jobs/list_python?city=%E5%85%A8%E5%9B%BD&cl=false&fromSearch=true&labelWords=&suginput="
}
# data , fist: ,pn: ,kd:
data = {"first":"true","pn":1,"kd":"python"}
method = "POST"
re = request.Request(url,headers=header,data=parse.urlencode(data).encode("utf-8"),method=method)
response = request.urlopen(re)
print(response.read())
# :
# 1. data, TypeError: can't concat str to bytes,
# 2. TypeError: POST data should be bytes, an iterable of bytes, or a file object. It cannot be of type str.python3 unicode , bytes
# 3. , Referer
# 7)ProxyHandle ( ):
# 1. : IP , IP ,
# 2. : , , , ,
# 3. : IP, ,
# 4. :http://httpbin.org
# 5. :
# i. urllib.request.ProxyHandler handler
# ii. handler urllib.request.build_opener opener
# ii. open
from urllib import request
url = "http://httpbin.org/ip"
# 1.
response1 = request.urlopen(url)
print(response1.read())
# 2.
# 2-1. ProxyHanderler Handler, ,{key ,values ip:prot}
handler = request.ProxyHandler({"http":"39.135.11.97:80"})
# 2-2. handler opener
opener = request.build_opener(handler)
# 2-3. opener
response2 = opener.open(url)
print(response2.read())
# 8) cookielib HTTPCookieProcessor
# 1. cookie:Cookie Session ,
# 2.Cookie :
# Set-Cookie: NAME=VALUE;Expires/Max-age=DATE;Path=PATH;Domain=DOMAIN_NAME;SECURE (eg.H_PS_PSSID=26523_1430_21080_18560_20930; path=/; domain=.baidu.com)
# NAME:cookie
# VALUE:cookie
# Expires:cookie
# Path:cookie
# Domain:cookie
# SECURE: https
# 3. cookie
# login_url = "http://http://www.renren.com/PLogin.do"
# dapeng_url = "http://www.renren.com/880151247/profile"
# 3-1. cookie
from urllib import request
dapeng_url = "http://www.renren.com/880151247/profile"
headers={"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36",
"Cookie":"anonymid=jmfo3rtd-qqw614; depovince=GW; _r01_=1; [email protected]; ln_hurl=http://hdn.xnimg.cn/photos/hdn421/20130724/1005/h_main_4Uln_61b40000018e111a.jpg; __utma=151146938.1018799493.1537755899.1537755899.1537755899.1; __utmz=151146938.1537755899.1.1.utmcsr=renren.com|utmccn=(referral)|utmcmd=referral|utmcct=/361722792/profile; wp_fold=0; jebecookies=5b198591-fc1c-40d2-98d7-a20fb74dbab1|||||; ick_login=ec09b039-4fa0-4d3b-80dc-919c1856500f; _de=B4E32D1A836EA635E2A69D838E8B6F33696BF75400CE19CC; p=b6ed7fa041c525ce392291f7fdcf32572; first_login_flag=1; t=935fd259a10790130472041798ba20372; societyguester=935fd259a10790130472041798ba20372; id=361722792; xnsid=f63825e0; ver=7.0; loginfrom=null; JSESSIONID=abcb6zA3mitavdu89Xjyw; XNESSESSIONID=f1898062548e; WebOnLineNotice_361722792=1"}
re = request.Request(url=dapeng_url,headers=headers)
response = request.urlopen(re)
with open ("test.html","w",encoding="utf-8") as f:
f.write(response.read().decode("utf-8"))
# :
# i.write str
# ii.response.read() bytes
# iii.bytes -> encode -> str, bytes params,post -> data)
kw = {"wd":" "}
headers = {"User-Agent":"Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Mobile Safari/537.36"}
response = requests.get(url="http://www.baidu.com/s",params=kw,headers=headers)
with open("test.html","w",encoding="utf-8") as f:
f.write(response.content.decode("utf-8"))
# 1-2. :
# .text .content , unicode ( , ), bytes ,
# : bytes
print(response.text)
print(response.content.decode("utf-8"))
# 1-3. url
print(response.url)
# 1-4.
print(response.encoding)
# 1-5
print(response.status_code)
# 2) post ,
import requests
url = "https://www.lagou.com/jobs/positionAjax.json?needAddtionalResult=false"
headers ={"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36",
"Referer": "https://www.lagou.com/jobs/list_python?city=%E5%85%A8%E5%9B%BD&cl=false&fromSearch=true&labelWords=&suginput="
}
data = {"first":"true","pn":1,"kd":"python"}
response = requests.post(url=url,headers=headers,data=data)
print(response.text)
# json ,
# print(reponse.json())
# 3)requests :
# : get,post proxies
import requests
url = "http://httpbin.org/ip"
# 1.
response1 = requests.get(url=url)
print(response1.text)
# 2. , ,{key ,values ip:prot}
proxy = {"http":"39.135.11.97:80"}
response2 = requests.get(url=url,proxies=proxy)
print(response2.text)
# 4)requests cookie :
import requests
url = "http://www.baidu.com/"
# 1. cookie
response = requests.get(url=url)
print(response.cookies.get_dict()) # .get_dict
# 2. cookie(session),
# session: , web session, urllib opener cookie
url = "http://http://www.renren.com/PLogin.do"
data = {"email":"[email protected]","password":"xxxxxx"}
headers= {"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36"}
# 2-1.
session = requests.Session()
session.post(url=login_url,headers=headers,data=data) # cookie
# 2-2.
response = session.get(url=dapeng_url)
# 2-3.
# :print(response.text)
with open("renren.html","w",encoding="utf-8") as f:
f.write(response.text)
# 5) SSL
# SSL (https://www.baidu.com) requests , ,
response = requests.get("--a url--",verify=False)
print(response.content.decode("utf-8"))
이 내용에 흥미가 있습니까?
현재 기사가 여러분의 문제를 해결하지 못하는 경우 AI 엔진은 머신러닝 분석(스마트 모델이 방금 만들어져 부정확한 경우가 있을 수 있음)을 통해 가장 유사한 기사를 추천합니다:
(1) 분포 식 파충류 Scrapy 는 어떻게 해 야 하나 요 - 설치Scrapy 의 설치 에 대해 인터넷 을 샅 샅 이 뒤 졌 습 니 다. 하나씩 설치 하 는 것 은 솔직히 좀 번 거 롭 습 니 다. 그럼 원 키 로 설치 한 것 이 있 습 니까?답 은 분명히 있 습 니 다. 다음은 ...
텍스트를 자유롭게 공유하거나 복사할 수 있습니다.하지만 이 문서의 URL은 참조 URL로 남겨 두십시오.
CC BY-SA 2.5, CC BY-SA 3.0 및 CC BY-SA 4.0에 따라 라이센스가 부여됩니다.