스크래핑 maps.race

22603 단어 scrapingpython
안녕하세요!

웹 스크래핑 과정을 진행하고 있기 때문에 https://mapas.race.es에 있는 드문 버전의 XML/JSON에서 응답을 추출하는 작은 스크립트를 작성하여 JSON 파일 또는 여러 CSV 파일을 생성하기로 결정했습니다. 다음 범주로 구분
  • 인시던트
  • 카메라
  • 레이더
  • 블랙포인트
  • 오일 스테이션
  • 주차

  • import requests
    import json
    import pandas as pd
    
    formatData="json" # json / csv
    nameFile = "race"
    incidents = 1
    cameras = 1
    radars = 1
    oilStations = 1
    blackPoints = 1
    parkings = 1
    
    url = "https://mapas.race.es/WebServices/srvRace.asmx/ObtenerDatos?pstrIncidencias=" + \
        str(incidents)+"&pstrCamaras="+str(cameras)+"&pstrRadares="+str(radars) + \
        "&pstrGasolineras="+str(oilStations)+"&pstrPuntosNegros=" + \
        str(blackPoints)+"&pstrParking="+str(parkings)
    
    headers = {
        "authority": "infocar.dgt.es",
        "user-agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36",
        "host": "mapas.race.es",
        "referer": "https://mapas.race.es/",
    }
    response = requests.get(url, headers=headers)
    initialText = response.text
    splitText = response.text.split('<string xmlns="http://tempuri.org/">')
    jsonToLoad = splitText[1].split("</string>")[0]
    
    jsonRequest = json.loads(jsonToLoad)
    
    items = {}
    incidentsItems = []
    camerasItems = []
    radarsItems = []
    oilStationsItems = []
    blackPointsItems = []
    parkingsItems = []
    
    
    def get_object(type, item, id=None, image=None):
        if type == "incidents":
            return {
                "id": str(id),
                "lat": str(item["Latitud"]),
                "lng": str(item["Longitud"]),
                "type": str(item["Tipo"]),
                "date": item["Fecha"],
                "reason": item["Causa"],
                "level": item["Nivel"],
                "province": item["Provincia"],
                "poblation": item["Poblacion"],
                "street": item["Carretera"]
            }
    
        if type == "cameras":
            return {
                "id": str(item["Id"]),
                "lat": str(item["Latitud"]),
                "lng": str(item["Longitud"]),
                "image": image
            }
    
        if type == "radars" or type == "oilStations" or type == "blackPoints" or type == "parkings":
            return {
                "id": str(item["Id"]),
                "lat": str(item["Latitud"]),
                "lng": str(item["Longitud"]),
            }
    
    
    if incidents == 1:
        i = 1
        for itemIncidence in jsonRequest["Incidencias"]:
            obj = get_object('incidents', itemIncidence, str(i))
            incidentsItems.append(obj)
            i += 1
        items["incidents"] = incidentsItems
    
    if cameras == 1:
        for itemCameras in jsonRequest["Camaras"]:
            image = "http://infocar.dgt.es/etraffic/data/camaras/" + \
                str(itemCameras['Id'])+".jpg"
            obj = get_object('cameras', itemCameras, "", image)
            camerasItems.append(obj)
        items["cameras"] = camerasItems
    
    if radars == 1:
        for itemRadar in jsonRequest["Radares"]:
            obj = get_object('radars', itemRadar)
            radarsItems.append(obj)
        items["radars"] = radarsItems
    
    if oilStations == 1:
        for ItemsOilStation in jsonRequest["Gasolineras"]:
            obj = get_object('oilStations', ItemsOilStation)
            oilStationsItems.append(obj)
        items["oilStations"] = oilStationsItems
    
    if blackPoints == 1:
        for itemBlackPoint in jsonRequest["PuntosNegros"]:
            obj = get_object('blackPoints', itemBlackPoint)
            blackPointsItems.append(obj)
        items["blackPoints"] = blackPointsItems
    
    if parkings == 1:
        for itemParking in jsonRequest["Parking"]:
            obj = get_object('parkings', itemParking)
            parkingsItems.append(obj)
        items["parkings"] = parkingsItems
    
    if formatData == "json":
        f = open(nameFile + '.' + formatData, "w")
        itemsDumps = json.dumps(items, indent=2)
        f.write(itemsDumps)
    elif formatData == "csv":
        incidentsDF = pd.DataFrame(items["incidents"])
        camerasDF = pd.DataFrame(items["cameras"])
        radarsDF = pd.DataFrame(items["radars"])
        oilStationsDF = pd.DataFrame(items["oilStations"])
        blackPointsDF = pd.DataFrame(items["blackPoints"])
        parkingsDF = pd.DataFrame(items["parkings"])
    
        incidentsDF.to_csv(nameFile + "_incidents." + formatData, index=False)
        camerasDF.to_csv(nameFile + "_cameras." + formatData, index=False)
        radarsDF.to_csv(nameFile + "_radars." + formatData, index=False)
        oilStationsDF.to_csv(nameFile + "_oilStations." + formatData)
        blackPointsDF.to_csv(nameFile + "_blackPoints." + formatData, index=False)
        parkingsDF.to_csv(nameFile + "_parkings." + formatData, index=False)
    
    print('✅ '+ formatData +' file/s generated')
    
    


    이 글을 읽는 대다수의 사람들에게는 스페인의 데이터이기 때문에 아무 가치가 없을 것입니다.

    비열하게 굴지마, 난 파이썬을 배우고 있어...

    좋은 웹페이지 즐겨찾기