diff --git a/tamguo-crawler/src/test/java/com/tamguo/PaperCrawler.java b/tamguo-crawler/src/test/java/com/tamguo/PaperCrawler.java index 4fe79d1..5f73dcc 100644 --- a/tamguo-crawler/src/test/java/com/tamguo/PaperCrawler.java +++ b/tamguo-crawler/src/test/java/com/tamguo/PaperCrawler.java @@ -21,6 +21,7 @@ import com.xuxueli.crawler.parser.PageParser; import com.xuxueli.crawler.parser.strategy.HtmlUnitPageLoader; import com.xuxueli.crawler.rundata.RunData; +// 北京模拟试卷,真题试卷已经爬取完毕 @RunWith(SpringRunner.class) @SpringBootTest public class PaperCrawler { @@ -32,11 +33,11 @@ public class PaperCrawler { // 110000 北京 private final String AREA_ID = "110000"; // 年份 - private final String YEAR = "2017"; + private final String YEAR = "2013"; // 真题试卷 类型(1:真题试卷,2:模拟试卷,3:押题预测,4:名校精品) - private final String PAPER_TYPE = "1"; + private final String PAPER_TYPE = "2"; // 开始采集的URL - private final String START_URL = "https://tiku.baidu.com/tikupc/paperlist/1bfd700abb68a98271fefa04-16-1-2017-37-1-download"; + private final String START_URL = "https://tiku.baidu.com/tikupc/paperlist/1bfd700abb68a98271fefa04-16-4-2013-37-1-download"; private RunData runData; diff --git a/tamguo-crawler/src/test/java/com/tamguo/PaperQuestionCrawler.java b/tamguo-crawler/src/test/java/com/tamguo/PaperQuestionCrawler.java index 23bd4cf..cd648ca 100644 --- a/tamguo-crawler/src/test/java/com/tamguo/PaperQuestionCrawler.java +++ b/tamguo-crawler/src/test/java/com/tamguo/PaperQuestionCrawler.java @@ -215,13 +215,13 @@ public class PaperQuestionCrawler { } private String getFileDatePath() { - SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMddHHmm"); + SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMddHH"); String format = sdf.format(new Date()); return format; } private String getFileNo() { - SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMddHHmm"); + SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMddHH"); String format = sdf.format(new Date()); DecimalFormat df = new DecimalFormat(FILES_NO_FORMAT); String key = FILES_PREFIX + format; @@ -236,7 +236,7 @@ public class PaperQuestionCrawler { int pageSize = 1000; while(true) { Page questionPage = new Page(page , pageSize); - List questionList = crawlerPaperMapper.selectPage(questionPage, Condition.create().orderAsc(Arrays.asList("queindex"))); + List questionList = crawlerPaperMapper.selectPage(questionPage, Condition.create().orderAsc(Arrays.asList("paper_id" , "queindex"))); for(int i=0 ;i