You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
|
|
|
|
let request = require('request').defaults({ 'proxy': "http://127.0.0.1:1080" });
|
|
|
|
|
let Queue = require('promise-queue');
|
|
|
|
|
let fs = require('fs');
|
|
|
|
|
let path = require('path');
|
|
|
|
|
let queue = new Queue(10); //最多同时10线程采集
|
|
|
|
|
|
|
|
|
|
request('https://www.dxomark.com/daksensor/ajax/jsontested', //获得相机列表
|
|
|
|
|
(error, response, body) => {
|
|
|
|
|
let cameraList = JSON.parse(body).data;
|
|
|
|
|
let finishedCount = 0;
|
|
|
|
|
cameraList.forEach(cameraMeta => {
|
|
|
|
|
let camera = Object.assign({}, cameraMeta);
|
|
|
|
|
let link = `https://www.dxomark.com${camera.link}---Specifications`; //拼合对应的规格网址
|
|
|
|
|
queue.add(() => new Promise(res => { //把request放入队列,以保证同时http请求不超过10个
|
|
|
|
|
let doRequest = () => {
|
|
|
|
|
request(link, (error, response, body) => {
|
|
|
|
|
if (error) { //如果失败则重试
|
|
|
|
|
console.log("retrying.." + camera.name);
|
|
|
|
|
doRequest();
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
let specMatcherRegexp = /descriptifgauche.+?>([\s\S]+?)<\/td>[\s\S]+?descriptif_data.+?>([\s\S]+?)<\/td>/img;
|
|
|
|
|
let match = specMatcherRegexp.exec(body);
|
|
|
|
|
while (match) { //用正则表达式匹配table里的每一个项目,并添加至采集结果中
|
|
|
|
|
camera[match[1]] = match[2];
|
|
|
|
|
match = specMatcherRegexp.exec(body);
|
|
|
|
|
}
|
|
|
|
|
fs.writeFileSync(path.join('./scraped', camera.name + '.txt'), JSON.stringify(camera, null, 4), { encoding: "UTF8" });
|
|
|
|
|
finishedCount++;
|
|
|
|
|
console.log(`Finished ${finishedCount}/${cameraList.length}: ${camera.name}`);
|
|
|
|
|
res();
|
|
|
|
|
})
|
|
|
|
|
};
|
|
|
|
|
doRequest();
|
|
|
|
|
}));
|
|
|
|
|
});
|
|
|
|
|
});
|