You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
git/scr/模糊专家系统/scrape/scrape.js

37 lines
1.7 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

let request = require('request').defaults({ 'proxy': "http://127.0.0.1:1080" });
let Queue = require('promise-queue');
let fs = require('fs');
let path = require('path');
let queue = new Queue(10); //最多同时10线程采集
request('https://www.dxomark.com/daksensor/ajax/jsontested', //获得相机列表
(error, response, body) => {
let cameraList = JSON.parse(body).data;
let finishedCount = 0;
cameraList.forEach(cameraMeta => {
let camera = Object.assign({}, cameraMeta);
let link = `https://www.dxomark.com${camera.link}---Specifications`; //拼合对应的规格网址
queue.add(() => new Promise(res => { //把request放入队列以保证同时http请求不超过10个
let doRequest = () => {
request(link, (error, response, body) => {
if (error) { //如果失败则重试
console.log("retrying.." + camera.name);
doRequest();
return;
}
let specMatcherRegexp = /descriptifgauche.+?>([\s\S]+?)<\/td>[\s\S]+?descriptif_data.+?>([\s\S]+?)<\/td>/img;
let match = specMatcherRegexp.exec(body);
while (match) { //用正则表达式匹配table里的每一个项目并添加至采集结果中
camera[match[1]] = match[2];
match = specMatcherRegexp.exec(body);
}
fs.writeFileSync(path.join('./scraped', camera.name + '.txt'), JSON.stringify(camera, null, 4), { encoding: "UTF8" });
finishedCount++;
console.log(`Finished ${finishedCount}/${cameraList.length}: ${camera.name}`);
res();
})
};
doRequest();
}));
});
});