From 79bc03d59733e2242c8759341285491c8fe34a31 Mon Sep 17 00:00:00 2001 From: tamguo Date: Tue, 14 Aug 2018 15:11:07 +0800 Subject: [PATCH] bug fix --- .../src/test/java/com/tamguo/PaperCrawler.java | 10 +++++----- .../src/test/java/com/tamguo/PaperQuestionCrawler.java | 7 +++---- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/tamguo-crawler/src/test/java/com/tamguo/PaperCrawler.java b/tamguo-crawler/src/test/java/com/tamguo/PaperCrawler.java index 5f73dcc..4e89122 100644 --- a/tamguo-crawler/src/test/java/com/tamguo/PaperCrawler.java +++ b/tamguo-crawler/src/test/java/com/tamguo/PaperCrawler.java @@ -30,14 +30,14 @@ public class PaperCrawler { private final String SUBJECT_ID = "gaokao"; // 科目 private final String COURSE_ID = "likeshuxue"; - // 110000 北京 - private final String AREA_ID = "110000"; + // 110000 北京 | 310000 上海 + private final String AREA_ID = "310000"; // 年份 - private final String YEAR = "2013"; + private final String YEAR = "2016"; // 真题试卷 类型(1:真题试卷,2:模拟试卷,3:押题预测,4:名校精品) - private final String PAPER_TYPE = "2"; + private final String PAPER_TYPE = "4"; // 开始采集的URL - private final String START_URL = "https://tiku.baidu.com/tikupc/paperlist/1bfd700abb68a98271fefa04-16-4-2013-37-1-download"; + private final String START_URL = "https://tiku.baidu.com/tikupc/paperlist/1bfd700abb68a98271fefa04-16-7-2016-93-1-download"; private RunData runData; diff --git a/tamguo-crawler/src/test/java/com/tamguo/PaperQuestionCrawler.java b/tamguo-crawler/src/test/java/com/tamguo/PaperQuestionCrawler.java index cd648ca..3778863 100644 --- a/tamguo-crawler/src/test/java/com/tamguo/PaperQuestionCrawler.java +++ b/tamguo-crawler/src/test/java/com/tamguo/PaperQuestionCrawler.java @@ -65,7 +65,6 @@ public class PaperQuestionCrawler { private static final String FILES_NO_FORMAT = "000000000"; private static final String FILES_PREFIX = "likeshuxue"; private static final String COURSE_ID = "likeshuxue"; - private static final String DOMAIN = "http://www.tamguo.com"; private RunData runData; @@ -152,7 +151,7 @@ public class PaperQuestionCrawler { System.out.println("down images " + (ret?"success":"fail") + ":" + img); // 替换URL - question.setAnswer(question.getAnswer().replace(img, DOMAIN + "/files/paper/" + COURSE_ID + '/' + fileDatePath + "/" + fileName)); + question.setAnswer(question.getAnswer().replace(img, "/files/paper/" + COURSE_ID + '/' + fileDatePath + "/" + fileName)); } question.setAnswer(question.getAnswer()); } @@ -173,7 +172,7 @@ public class PaperQuestionCrawler { System.out.println("down images " + (ret?"success":"fail") + ":" + img); // 替换URL - question.setAnalysis(question.getAnalysis().replace(img, DOMAIN + "/files/paper/" + COURSE_ID + '/' + fileDatePath + "/" + fileName)); + question.setAnalysis(question.getAnalysis().replace(img, "/files/paper/" + COURSE_ID + '/' + fileDatePath + "/" + fileName)); } question.setAnalysis(question.getAnalysis()); } @@ -194,7 +193,7 @@ public class PaperQuestionCrawler { System.out.println("down images " + (ret?"success":"fail") + ":" + img); // 替换URL - question.setContent(question.getContent().replace(img, DOMAIN + "/files/paper/" + COURSE_ID + '/' + fileDatePath + "/" + fileName)); + question.setContent(question.getContent().replace(img, "/files/paper/" + COURSE_ID + '/' + fileDatePath + "/" + fileName)); } question.setContent(question.getContent()); }