심.소.남(PythonCrawling, React, Node_Js)_4) save scrapped data in csv using "Python" > reading data from csv using "Javascript"
Save data to CSV using Python and Read using JS
1) save to csv
you have to put "list" in writer.writerrow(~)
# -*- encoding: utf-8 -*-
# how to use chrome driver : https://emessell.tistory.com/148
# how to implicitly wait for certain element to be loaded : https://aonee.tistory.com/40
import sys
import csv
import io
import requests
import threading
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
# https://www.aladin.co.kr/m/msearch.aspx?SearchTarget=Book&KeyWord=%EC%8B%A4%EC%A1%B4%EC%A3%BC%EC%9D%98&KeyRecentPublish=0&OutStock=0&ViewType=Detail&CustReviewCount=0&CustReviewRank=0&KeyFullWord=%EC%8B%A4%EC%A1%B4%EC%A3%BC%EC%9D%98&KeyLastWord=%EC%8B%A4%EC%A1%B4%EC%A3%BC%EC%9D%98&CategorySearch=&MViewType=&PriceFilterMax=
sys.stdout = io.TextIOWrapper(sys.stdout.detach(), encoding = 'utf-8')
sys.stderr = io.TextIOWrapper(sys.stderr.detach(), encoding = 'utf-8')
# csv 저장
filename = "aladin.csv"
f = open(filename, "w", encoding = "utf8", newline = "")
writer = csv.writer(f)
# 실존주의, 인지치료, 긍정심리, 정신분석
searchwords = ['%EC%8B%A4%EC%A1%B4%EC%A3%BC%EC%9D%98',
'%EC%9D%B8%EC%A7%80%EC%B9%98%EB%A3%8C',
'%EA%B8%8D%EC%A0%95%EC%8B%AC%EB%A6%AC',
'%EC%A0%95%EC%8B%A0%EB%B6%84%EC%84%9D'
]
searchwords_korean = ['실존주의',
'인지치료',
'긍정심리',
'정신분석'
]
books = {}
total_pages = []
total_page = 0
# 페이지 단위
def get_content_total(searchword_korean, searchword, page):
print("searchword in total page", searchword)
url_page = "https://www.aladin.co.kr/m/msearch.aspx?SearchTarget=Book&KeyWord={}&page={}".format(searchword, page)
response_page = requests.get(url_page)
dom_page = BeautifulSoup(response_page.content, "html.parser")
elements = dom_page.select(".browse_list_box")
for idx, element in enumerate(elements) :
title = element.select_one('table > tr >td:nth-child(2) > ul > li:first-child > span')
author = element.select_one('table > tr >td:nth-child(2) > ul > li:nth-child(2) > a.nm_book_title_a')
if(hasattr(author,"text")):
author = author.text
else:
author = author
image = element.select_one('table > tr >td:first-child > div > div > a > img')['src']
link = element.select_one('table > tr >td:first-child > div > div > a').get("href")
link_description = get_content_bookEach(link)
data = [searchword_korean, title.text, author, image, link_description]
writer.writerow(data)
# books[searchword].append({
# "title": title.text,
# "author": author,
# "img": image,
# "description" :link_description
# # "description" :link_description
# })
# 책 한개 단위
def get_content_bookEach(link):
url_book = link
response_book = requests.get(url_book)
dom_book = BeautifulSoup(response_book.content, "html.parser")
element_description = dom_book.select_one(".pdp_fs2.pdp_fwn")
if( element_description):
return element_description.text;
else:
return ""
# 크롤링 함수
for idx, searchword in enumerate(searchwords):
books[searchword] = []
total_page = 0
url_book_whole_search = "https://www.aladin.co.kr/m/msearch.aspx?SearchWord={}&SearchTarget=Book".format(searchword)
response_book_whole_search = requests.get(url_book_whole_search)
soup = BeautifulSoup(response_book_whole_search.content, "html.parser")
elements = soup.select('.search_list_newnum > ul > li')
# 총 페이지수 구하기
print(searchword )
for element in elements:
if( element.text != "") :
total_page += 1
for i in range(1, total_page + 1):
print("page", i)
get_content_total(searchwords_korean[idx], searchword , i)
print("books", books[searchword])
print(books)
2) read from csv using js
Author And Source
이 문제에 관하여(심.소.남(PythonCrawling, React, Node_Js)_4) save scrapped data in csv using "Python" > reading data from csv using "Javascript"), 우리는 이곳에서 더 많은 자료를 발견하고 링크를 클릭하여 보았다 https://velog.io/@dhsys112/심.소.남PythonCrawling-React-NodeJs3-save-scrapped-data-in-csv-using-Python-reading-data-from-csv-using-Javascript저자 귀속: 원작자 정보가 원작자 URL에 포함되어 있으며 저작권은 원작자 소유입니다.
우수한 개발자 콘텐츠 발견에 전념 (Collection and Share based on the CC Protocol.)