React + Electron으로 스크래핑하기

소개



일본어 자료 밖에 조사하고 있지 않지만, 외부 사이트로의 스크래핑을 하기 위해서 필요한 HTTP 리퀘스트의 코드가 찾아내지 않았기 때문에, 써 보았습니다. js 초보자도 있고 소스 코드가 더럽습니다. 원하는대로 변경하십시오.

프로젝트의 구성은 다음과 같습니다.
project/ 
        src/
           main.js
           util.js
        public/
           index.html


src 디렉토리에 util.js를 배치하십시오.

util.js 소스 코드

모듈 내용


src/main.js
const electron = require("electron");
const app = electron.app;
const BrowserWindow = electron.BrowserWindow;
const path = require('path')
const url = require('url');

let mainWindow;

function createWindow() {
  mainWindow = new BrowserWindow({
    width: 1366, height: 720,
    minWidth: 1194, minHeight: 720,
    webPreferences: {
        nodeIntegration: true,
    }
  })
  mainWindow.loadURL('http://localhost:3000');
  mainWindow.webContents.openDevTools()

  mainWindow.on("closed", () => (mainWindow = null));
}
app.on("ready", createWindow);
app.on("window-all-closed", () => {
  if (process.platform !== "darwin") {
  app.quit();
  }
});
app.on("activate", () => {
  if (mainWindow === null) {
  createWindow();
  }
});

global.util = require('./util'); // utilモジュールのロード

public/index.html
<!DOCTYPE html>
<html lang="en">
  <head>
    <meta charset="utf-8" />
    <!--
      manifest.json provides metadata used when your web app is installed on a
      user's mobile device or desktop. See https://developers.google.com/web/fundamentals/web-app-manifest/
    -->
    <link rel="manifest" href="%PUBLIC_URL%/manifest.json" />
    <!--
      Notice the use of %PUBLIC_URL% in the tags above.
      It will be replaced with the URL of the `public` folder during the build.
      Only files inside the `public` folder can be referenced from the HTML.

      Unlike "/favicon.ico" or "favicon.ico", "%PUBLIC_URL%/favicon.ico" will
      work correctly both with client-side routing and a non-root public URL.
      Learn how to configure a non-root public URL by running `npm run build`.
    -->
    <title>React App</title>
    <script> const electron = require('electron').remote, util = electron.getGlobal('util'); window.electron = electron; window.util = util;</script>
  </head>
  <body>
    <noscript>You need to enable JavaScript to run this app.</noscript>
    <div id="root"></div>
    <!--
      This HTML file is a template.
      If you open it directly in the browser, you will see an empty page.

      You can add webfonts, meta tags, or analytics to this file.
      The build step will place the bundled scripts into the <body> tag.

      To begin the development, run `npm start` or `yarn start`.
      To create a production bundle, use `npm run build` or `yarn build`.
    -->
  </body>
</html>

 

사용 예



usage.js

const util = window.util;

(async() => {
  let url = 'https://qiita.com';
  let cookie = '';
  let ck = util.createCookieStore();
  let headers = {
  'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 
 Safari/537.36',
  'Cookie': cookie,
  };
  let res = await util.request({
    method: 'GET',
    url: url,
    headers: headers,
    data: '',
  }, 'utf8');
  ck = util.updateCookieStore(ck, cookie, res);
  cookie = ck.getAll();
  console.log(res.headers, res.body, cookie);
})();

좋은 웹페이지 즐겨찾기