스크래핑 maps.race
웹 스크래핑 과정을 진행하고 있기 때문에 https://mapas.race.es에 있는 드문 버전의 XML/JSON에서 응답을 추출하는 작은 스크립트를 작성하여 JSON 파일 또는 여러 CSV 파일을 생성하기로 결정했습니다. 다음 범주로 구분
import requests
import json
import pandas as pd
formatData="json" # json / csv
nameFile = "race"
incidents = 1
cameras = 1
radars = 1
oilStations = 1
blackPoints = 1
parkings = 1
url = "https://mapas.race.es/WebServices/srvRace.asmx/ObtenerDatos?pstrIncidencias=" + \
str(incidents)+"&pstrCamaras="+str(cameras)+"&pstrRadares="+str(radars) + \
"&pstrGasolineras="+str(oilStations)+"&pstrPuntosNegros=" + \
str(blackPoints)+"&pstrParking="+str(parkings)
headers = {
"authority": "infocar.dgt.es",
"user-agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36",
"host": "mapas.race.es",
"referer": "https://mapas.race.es/",
}
response = requests.get(url, headers=headers)
initialText = response.text
splitText = response.text.split('<string xmlns="http://tempuri.org/">')
jsonToLoad = splitText[1].split("</string>")[0]
jsonRequest = json.loads(jsonToLoad)
items = {}
incidentsItems = []
camerasItems = []
radarsItems = []
oilStationsItems = []
blackPointsItems = []
parkingsItems = []
def get_object(type, item, id=None, image=None):
if type == "incidents":
return {
"id": str(id),
"lat": str(item["Latitud"]),
"lng": str(item["Longitud"]),
"type": str(item["Tipo"]),
"date": item["Fecha"],
"reason": item["Causa"],
"level": item["Nivel"],
"province": item["Provincia"],
"poblation": item["Poblacion"],
"street": item["Carretera"]
}
if type == "cameras":
return {
"id": str(item["Id"]),
"lat": str(item["Latitud"]),
"lng": str(item["Longitud"]),
"image": image
}
if type == "radars" or type == "oilStations" or type == "blackPoints" or type == "parkings":
return {
"id": str(item["Id"]),
"lat": str(item["Latitud"]),
"lng": str(item["Longitud"]),
}
if incidents == 1:
i = 1
for itemIncidence in jsonRequest["Incidencias"]:
obj = get_object('incidents', itemIncidence, str(i))
incidentsItems.append(obj)
i += 1
items["incidents"] = incidentsItems
if cameras == 1:
for itemCameras in jsonRequest["Camaras"]:
image = "http://infocar.dgt.es/etraffic/data/camaras/" + \
str(itemCameras['Id'])+".jpg"
obj = get_object('cameras', itemCameras, "", image)
camerasItems.append(obj)
items["cameras"] = camerasItems
if radars == 1:
for itemRadar in jsonRequest["Radares"]:
obj = get_object('radars', itemRadar)
radarsItems.append(obj)
items["radars"] = radarsItems
if oilStations == 1:
for ItemsOilStation in jsonRequest["Gasolineras"]:
obj = get_object('oilStations', ItemsOilStation)
oilStationsItems.append(obj)
items["oilStations"] = oilStationsItems
if blackPoints == 1:
for itemBlackPoint in jsonRequest["PuntosNegros"]:
obj = get_object('blackPoints', itemBlackPoint)
blackPointsItems.append(obj)
items["blackPoints"] = blackPointsItems
if parkings == 1:
for itemParking in jsonRequest["Parking"]:
obj = get_object('parkings', itemParking)
parkingsItems.append(obj)
items["parkings"] = parkingsItems
if formatData == "json":
f = open(nameFile + '.' + formatData, "w")
itemsDumps = json.dumps(items, indent=2)
f.write(itemsDumps)
elif formatData == "csv":
incidentsDF = pd.DataFrame(items["incidents"])
camerasDF = pd.DataFrame(items["cameras"])
radarsDF = pd.DataFrame(items["radars"])
oilStationsDF = pd.DataFrame(items["oilStations"])
blackPointsDF = pd.DataFrame(items["blackPoints"])
parkingsDF = pd.DataFrame(items["parkings"])
incidentsDF.to_csv(nameFile + "_incidents." + formatData, index=False)
camerasDF.to_csv(nameFile + "_cameras." + formatData, index=False)
radarsDF.to_csv(nameFile + "_radars." + formatData, index=False)
oilStationsDF.to_csv(nameFile + "_oilStations." + formatData)
blackPointsDF.to_csv(nameFile + "_blackPoints." + formatData, index=False)
parkingsDF.to_csv(nameFile + "_parkings." + formatData, index=False)
print('✅ '+ formatData +' file/s generated')
이 글을 읽는 대다수의 사람들에게는 스페인의 데이터이기 때문에 아무 가치가 없을 것입니다.
비열하게 굴지마, 난 파이썬을 배우고 있어...
Reference
이 문제에 관하여(스크래핑 maps.race), 우리는 이곳에서 더 많은 자료를 발견하고 링크를 클릭하여 보았다 https://dev.to/josexs/scraping-race-7o3텍스트를 자유롭게 공유하거나 복사할 수 있습니다.하지만 이 문서의 URL은 참조 URL로 남겨 두십시오.
우수한 개발자 콘텐츠 발견에 전념 (Collection and Share based on the CC Protocol.)