파이썬 파충류 사탕 더미망 지정 그림
2045 단어 python 파충류 학습 노트
import urllib.parse
import threading
import requests
import os
#
thread_lock = threading.BoundedSemaphore(value=10)
#
def get_page(url):
page = requests.get(url)
page = page.content
# byte
page = page.decode('utf-8')
return page
#
def pages_from_duitang(label):
pages = []
url = 'https://www.duitang.com/napi/blog/list/by_search/?kw={}&start={}'
label = urllib.parse.quote(label)
# , 24
for index in range(0, 1800, 24):
#
u = url.format(label, index)
print(u)
page = get_page(u)
pages.append(page)
return pages
def findall_in_page(page, startpart, endpart):
all_strings = []
end = 0
while page.find(startpart, end) != -1:
start = page.find(startpart, end) + len(startpart)
end = page.find(endpart, start)
string = page[start: end]
all_strings.append(string)
return all_strings
#
def pic_urls_from_pages(pages):
pic_urls = []
for page in pages:
urls = findall_in_page(page, 'path":"', '"')
pic_urls.extend(urls)
return pic_urls
#
def download_pics(url, n, dir_name):
img = requests.get(url)
#
file_name = url.split('/')[-1]
#
path = dir_name + '/' + file_name
with open(path, 'wb') as fp:
fp.write(img.content)
# ,
thread_lock.release()
def main(label):
# ,
if not os.path.exists(label):
os.mkdir(label)
dir_name = label
pages = pages_from_duitang(label)
pic_urls = pic_urls_from_pages(pages)
n = 0
for url in pic_urls:
n += 1
print(' {} ...'.format(n))
#
thread_lock.acquire()
t = threading.Thread(target=download_pics, args=(url, n, dir_name))
t.start()
# ,
main(' ')
main의 키워드를 변경하면 해당 그림을 다운로드할 수 있습니다.