Nodejs 에서 puppeter 는 이벤트 로 HAR 데 이 터 를 캡 처 합 니 다.

먼저 cvs 파일 에서 분석 할 url 데 이 터 를 읽 은 다음 puppeter 와 puppeter - har 를 사용 하여 브 라 우 저의 HAR 데 이 터 를 가 져 옵 니 다.디 버 깅 과정 에서 for 순환 에서 어떻게 조작 하 는 지 모두 비동기 적 인 것 을 발 견 했 고 마지막 으로 해결 방안 을 찾 았 으 며 이 기록 에 도 포함 되 었 습 니 다.
1, csv 파일 을 분석 하 는 코드 만 들 기 (ultra - harlog / module / cvsresovle. js)
const fs = require("fs");
const path = require("path");
const csv =require('csv');
const parse = require('csv-parse/lib/sync')
const iconv = require('iconv-lite');   

/*
npm install iconv-lite 
*/
function readUrlRecord(csvpath){
    console.log('      :' + csvpath) ;

    //    
    const input = fs.readFileSync(csvpath,'utf8') ;

    /*
        ,  JSON  

    { '   ': '142',
    AREA_NAME: '  ',
    SITE_LINK: 'www.banggo.com',
    BEARING_MODE: '    ',
    SITE_NAME: '  ',
    MENU_TYPE: '  ' }

    */
    const records = parse(input, {
      columns: true,
      skip_empty_lines: true,
      delimiter: ',',
    }) ;

    return records ;
}

//readUrlRecord('../top300.csv') ;

exports.readUrlRecord = readUrlRecord;

2, 캡 처 메 인 코드 만 들 기 (ultra - harlog / module / uppeteerhar - event. js)
const fs = require('fs');
const { promisify } = require('util');
const path = require("path");

const puppeteer = require('puppeteer');
const { harFromMessages } = require('chrome-har');

const logger=require("./log");

const log = logger.getPuppeteerHarEventRecordLogger() ;

//https://michaljanaszek.com/blog/generate-har-with-puppeteer
//https://www.npmjs.com/package/chrome-har

//         
const observe = [
  'Page.loadEventFired',
  'Page.domContentEventFired',
  'Page.frameStartedLoading',
  'Page.frameAttached',
  'Network.requestWillBeSent',
  'Network.requestServedFromCache',
  'Network.dataReceived',
  'Network.responseReceived',
  'Network.resourceChangedPriority',
  'Network.loadingFinished',
  'Network.loadingFailed',
];

/*
         
*/ 
async function launchBrowser(){
    //        [puppeteer.createBrowserFetcher([options])]
  let browser = await puppeteer.launch({
    //        chromium    chromium  ,         /    /node_modules/puppeteer/.local-chromium/
    //executablePath: '/Users/huqiyang/Documents/project/z/chromium/Chromium.app/Contents/MacOS/Chromium',
    //     https         https  
    ignoreHTTPSErrors: true,
    //   headless  ,        
    headless: true,
    //        https://peter.sh/experiments/chromium-command-line-switches/   --timeout
    args:["--disk-cache-size=0","--disable-cache",'--disable-infobars','--window-size=800,600','--ignore-certificate-errors','--enable-feaures'],
    //            DevTools  。       true, headless      false。
    devtools: false,
    //Defaults to 30000 (30 seconds). Pass 0 to disable timeout.
    timeout: 0
    //  puppeteer     ,    
    //slowMo: 250
  });
  return browser ;
}

async function saveHarlog(url,dirPath,filename){
    let homesite = url ;
    //       
    let harFilePath = path.join(dirPath,filename) ;
    //  URL
    if(!(url.startsWith('http://') || url.startsWith('https://'))){
        url = "http://" + url ;
    }
  //     
  let browser = await launchBrowser() ;

  //       
  //let page = await browser.newPage();
  let page = (await browser.pages())[0];

  //        
  const client = await page.target().createCDPSession();
  await client.send('Page.enable');
  await client.send('Network.enable');

  //         HAR     
    const events = [];
  observe.forEach(method => {
    client.on(method, params => {
      events.push({ method, params });
    });
  });

  try{
    //     ,       
    await page.goto(url,{
            timeout:0
    });
  }catch(error){
    log.info('resovle error :' + url + ";  error message:" + error) ;
  }finally{
    if(browser){
        await browser.close();      
    }
  }
  const har = harFromMessages(events);
  //resovleHar(har) ;
  //log.info(JSON.stringify(har));

  await promisify(fs.writeFile)(harFilePath, JSON.stringify(har));
}

exports.launchBrowser = launchBrowser;
exports.saveHarlog = saveHarlog;

3, 시작 파일 만 들 기 (ultra - harlog / uppeteerhar - event - app. js)
const fs = require("fs");
const path = require("path");
const moment = require("moment");
const schedule = require('node-schedule');

const cvsresovler=require("./module/cvsresovle");
const mhar=require("./module/puppeteerhar-event");

/*
cnpm install --save moment
cnpm install --save csv
cnpm install --save node-schedule
cnpm install --save puppeteer
cnpm install --save puppeteer-har
cnpm install --save iconv-lite
cnpm install --save chrome-har

cnpm install --save grpc
*/  
function init(){
        console.log('      ') ;
    //     30       :
    schedule.scheduleJob('0 55 8 * * *',()=>{
        let ftime = moment().format('YYYYMMDDHHmm');
        console.log('       :' + ftime) ;
        let dirPath = path.join(__dirname,'harlogs',ftime) ;
        console.log("    :" + dirPath) ;

        let isExist = false ;
        if(fs.existsSync(dirPath)){
                //     
                let stat = fs.lstatSync(dirPath);
                if(stat.isDirectory()){
                    isExist = true ;
                }
        }
        if(!isExist){
            //     
            console.log("     " + ftime) ;
                fs.mkdirSync(dirPath);
        }       
        //         URL
        let dataArr = cvsresovler.readUrlRecord(path.join(__dirname,'top300.csv')) ;
        console.log("   URL  " + dataArr.length + " ") ;

        /*
                HAR  【       】。
              :        for    dataArr   saveHarlog  ,            。
        */
            (async function iterator(i){
                    let data =  dataArr[i]
                    let url = data['SITE_LINK'] ;

                url = url.trim() ;
                let filename = url.replace(/\//g,'-').replace(/\\/g,'-') + '.har' ;
                if(url){
                            console.log((i+1) + "-starting to resovle url :" + url ) ;
                    try{
                                await mhar.saveHarlog(url,dirPath,"N" + "-" + filename) ;
                            }catch(error){
                                console.log(error) ;
                            }
                }
                if(i + 1 < dataArr.length){
                    iterator(i+1) ;
                }
            })(0) ;
    }); 
    console.log('        ') ;
}

//  
init();

/**
           
*/
async function test(){
        let ftime = moment().format('YYYYMMDDHHmm');
    console.log('       :' + ftime) ;
    let dirPath = path.join(__dirname,'harlogs',ftime) ;
    console.log("    :" + dirPath) ;

    let isExist = false ;
    if(fs.existsSync(dirPath)){
            //     
            let stat = fs.lstatSync(dirPath);
            if(stat.isDirectory()){
                isExist = true ;
            }
    }
    if(!isExist){
        //     
        console.log("     " + ftime) ;
          fs.mkdirSync(dirPath);
    }     

    //   URL
        let url = "www.baidu.com" ; 

        let arguments = process.argv.splice(2);
        if(arguments.length > 0 ){
            url = arguments[0] ;
        }
        url = url.trim() ;

      let filename = url.replace(/\//g,'-').replace(/\\/g,'-') ;
        if(url){
            if(!(url.startsWith('http://') || url.startsWith('https://'))){
                url = "http://" + url ;
            }
            console.log("starting to resovle  test url :" + url ) ;

            try{
                await mhar.saveHarlog(url,dirPath,"NT" + "-" + filename) ;
            }catch(error){
                console.log(error) ;
            }
        }
}
//    
//test() ;

좋은 웹페이지 즐겨찾기