main
tamguo 7 years ago
parent c3ee15dbe6
commit 79bc03d597

@ -30,14 +30,14 @@ public class PaperCrawler {
private final String SUBJECT_ID = "gaokao";
// 科目
private final String COURSE_ID = "likeshuxue";
// 110000 北京
private final String AREA_ID = "110000";
// 110000 北京 | 310000 上海
private final String AREA_ID = "310000";
// 年份
private final String YEAR = "2013";
private final String YEAR = "2016";
// 真题试卷 类型(1:真题试卷,2:模拟试卷,3:押题预测,4:名校精品)
private final String PAPER_TYPE = "2";
private final String PAPER_TYPE = "4";
// 开始采集的URL
private final String START_URL = "https://tiku.baidu.com/tikupc/paperlist/1bfd700abb68a98271fefa04-16-4-2013-37-1-download";
private final String START_URL = "https://tiku.baidu.com/tikupc/paperlist/1bfd700abb68a98271fefa04-16-7-2016-93-1-download";
private RunData runData;

@ -65,7 +65,6 @@ public class PaperQuestionCrawler {
private static final String FILES_NO_FORMAT = "000000000";
private static final String FILES_PREFIX = "likeshuxue";
private static final String COURSE_ID = "likeshuxue";
private static final String DOMAIN = "http://www.tamguo.com";
private RunData runData;
@ -152,7 +151,7 @@ public class PaperQuestionCrawler {
System.out.println("down images " + (ret?"success":"fail") + "" + img);
// 替换URL
question.setAnswer(question.getAnswer().replace(img, DOMAIN + "/files/paper/" + COURSE_ID + '/' + fileDatePath + "/" + fileName));
question.setAnswer(question.getAnswer().replace(img, "/files/paper/" + COURSE_ID + '/' + fileDatePath + "/" + fileName));
}
question.setAnswer(question.getAnswer());
}
@ -173,7 +172,7 @@ public class PaperQuestionCrawler {
System.out.println("down images " + (ret?"success":"fail") + "" + img);
// 替换URL
question.setAnalysis(question.getAnalysis().replace(img, DOMAIN + "/files/paper/" + COURSE_ID + '/' + fileDatePath + "/" + fileName));
question.setAnalysis(question.getAnalysis().replace(img, "/files/paper/" + COURSE_ID + '/' + fileDatePath + "/" + fileName));
}
question.setAnalysis(question.getAnalysis());
}
@ -194,7 +193,7 @@ public class PaperQuestionCrawler {
System.out.println("down images " + (ret?"success":"fail") + "" + img);
// 替换URL
question.setContent(question.getContent().replace(img, DOMAIN + "/files/paper/" + COURSE_ID + '/' + fileDatePath + "/" + fileName));
question.setContent(question.getContent().replace(img, "/files/paper/" + COURSE_ID + '/' + fileDatePath + "/" + fileName));
}
question.setContent(question.getContent());
}

Loading…
Cancel
Save