R 언어 파충류 실천(상): 중고 주택 데이터 수집
1914 단어 R 언어 학습 노트
gurl % html_session %>%
read_html(encoding="utf-8") %>%
html_nodes("div.f-main-list>div>div")
# puid
puid % html_attr("puid")
#
itemURL % html_attr("href") %>% gsub(pattern="/fang5",replacement="http://cs.ganji.com/fang5")
#
smallImg % html_nodes("dl>dt>div>a>img") %>% html_attr("src")
#
iTitle % html_nodes("dl>dd>a") %>% html_attr("title")
# --
# :f_mew_list > div.f-main.f-clear.f-w1190 > div.f-main-left.f-fl.f-w980 > div.f-main-list > div > div:nth-child(1) > dl > dd.dd-item.size > span:nth-child(1)
iLayout % html_nodes("dl > dd.dd-item.size > span:nth-child(1)") %>% html_text
# --
# :f_mew_list > div.f-main.f-clear.f-w1190 > div.f-main-left.f-fl.f-w980 > div.f-main-list > div > div:nth-child(1) > dl > dd.dd-item.size > span:nth-child(3)
iArea % html_nodes("dl > dd.dd-item.size > span:nth-child(3)") %>% html_text
#
iOrientation% html_nodes("dl > dd.dd-item.size > span:nth-child(5)") %>% html_text#
iFloor % html_nodes("dl > dd.dd-item.size > span:nth-child(7)") %>% html_text#
#
iAddr % html_nodes("dl>dd>span.area") %>% html_text %>% gsub(pattern="
",replacement=" ") %>% gsub(pattern=" ",replacement="")
#
iPrice % html_nodes("dl>dd>div.price>span:first-child") %>% html_text
#
iunitPrice % html_nodes("dl>dd>div.time") %>% html_text %>% gsub(pattern="[^0-9]",replacement="") %>% as.numeric
#
iData