심.소.남(PythonCrawling, React, Node_Js)_4) save scrapped data in csv using "Python" > reading data from csv using "Javascript"

Save data to CSV using Python and Read using JS

1) save to csv

you have to put "list" in writer.writerrow(~)

# -*- encoding: utf-8 -*-
# how to use chrome driver : https://emessell.tistory.com/148
# how to implicitly wait for certain element to be loaded : https://aonee.tistory.com/40
import sys
import csv
import io
import requests
import threading
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By


# https://www.aladin.co.kr/m/msearch.aspx?SearchTarget=Book&KeyWord=%EC%8B%A4%EC%A1%B4%EC%A3%BC%EC%9D%98&KeyRecentPublish=0&OutStock=0&ViewType=Detail&CustReviewCount=0&CustReviewRank=0&KeyFullWord=%EC%8B%A4%EC%A1%B4%EC%A3%BC%EC%9D%98&KeyLastWord=%EC%8B%A4%EC%A1%B4%EC%A3%BC%EC%9D%98&CategorySearch=&MViewType=&PriceFilterMax=
sys.stdout = io.TextIOWrapper(sys.stdout.detach(), encoding = 'utf-8')
sys.stderr = io.TextIOWrapper(sys.stderr.detach(), encoding = 'utf-8')

# csv 저장
filename = "aladin.csv"
f = open(filename, "w", encoding = "utf8", newline = "")
writer = csv.writer(f)

# 실존주의, 인지치료, 긍정심리, 정신분석
searchwords = ['%EC%8B%A4%EC%A1%B4%EC%A3%BC%EC%9D%98',
'%EC%9D%B8%EC%A7%80%EC%B9%98%EB%A3%8C',
'%EA%B8%8D%EC%A0%95%EC%8B%AC%EB%A6%AC',
'%EC%A0%95%EC%8B%A0%EB%B6%84%EC%84%9D'
]

searchwords_korean = ['실존주의',
'인지치료',
'긍정심리',
'정신분석'
]

books = {}
total_pages = []
total_page = 0

# 페이지 단위
def get_content_total(searchword_korean, searchword, page):
    print("searchword in total page", searchword)
    url_page = "https://www.aladin.co.kr/m/msearch.aspx?SearchTarget=Book&KeyWord={}&page={}".format(searchword, page)
    response_page = requests.get(url_page)
    dom_page = BeautifulSoup(response_page.content, "html.parser")
    elements = dom_page.select(".browse_list_box")

    for idx, element in enumerate(elements) :
        title    = element.select_one('table > tr >td:nth-child(2) > ul > li:first-child > span')
        author    = element.select_one('table > tr >td:nth-child(2) > ul > li:nth-child(2) > a.nm_book_title_a')
        if(hasattr(author,"text")):
            author = author.text 
        else:
            author = author
        image     = element.select_one('table > tr >td:first-child > div > div > a > img')['src']
        link      = element.select_one('table > tr >td:first-child > div > div > a').get("href")
        link_description = get_content_bookEach(link)
        
        data = [searchword_korean, title.text, author, image, link_description]
        writer.writerow(data)
        
        # books[searchword].append({
        #     "title": title.text,
        #     "author": author,
        #     "img": image,
        #     "description" :link_description
        #     # "description" :link_description
        # })

# 책 한개 단위
def get_content_bookEach(link):
    url_book = link
    response_book = requests.get(url_book)
    dom_book = BeautifulSoup(response_book.content, "html.parser")
    element_description = dom_book.select_one(".pdp_fs2.pdp_fwn")
    if( element_description):
        return element_description.text;
    else:
        return ""


# 크롤링 함수
for idx, searchword in enumerate(searchwords):
    books[searchword]  = []
    
    total_page = 0

    url_book_whole_search = "https://www.aladin.co.kr/m/msearch.aspx?SearchWord={}&SearchTarget=Book".format(searchword)
    response_book_whole_search = requests.get(url_book_whole_search)
    soup = BeautifulSoup(response_book_whole_search.content, "html.parser")
    elements = soup.select('.search_list_newnum > ul > li')

    # 총 페이지수 구하기
    print(searchword )
    for element in elements:
        if( element.text != "") : 
            total_page += 1

    for i in range(1, total_page + 1):
        print("page", i)
        get_content_total(searchwords_korean[idx], searchword  , i)
    
    print("books", books[searchword])


print(books)

2) read from csv using js

좋은 웹페이지 즐겨찾기