main
tamguo 7 years ago
parent ee252950a8
commit 8d4d11bc9a

@ -12,6 +12,8 @@ public class CrawlerPaperEntity extends SuperEntity<CrawlerChapterEntity>{
private String paperId;
private Integer queindex;
public String getQuestionUrl() {
return questionUrl;
}
@ -31,4 +33,12 @@ public class CrawlerPaperEntity extends SuperEntity<CrawlerChapterEntity>{
public void setPaperId(String paperId) {
this.paperId = paperId;
}
public Integer getQueindex() {
return queindex;
}
public void setQueindex(Integer queindex) {
this.queindex = queindex;
}
}

@ -24,6 +24,9 @@ public class PaperVo {
@PageFieldSelect(cssQuery = ".view-analyse .view-link", selectType = XxlCrawlerConf.SelectType.ATTR, selectVal = "abs:href")
private List<String> questionUrls;
@PageFieldSelect(cssQuery = ".question-box-inner .queindex-wrap .queindex")
private List<Integer> queindexs;
public String getPaperName() {
return paperName;
@ -64,5 +67,13 @@ public class PaperVo {
public void setQuestionUrls(List<String> questionUrls) {
this.questionUrls = questionUrls;
}
public List<Integer> getQueindexs() {
return queindexs;
}
public void setQueindexs(List<Integer> queindexs) {
this.queindexs = queindexs;
}
}

@ -18,6 +18,7 @@ import com.tamguo.model.enums.QuestionType;
import com.tamguo.model.vo.PaperVo;
import com.xuxueli.crawler.XxlCrawler;
import com.xuxueli.crawler.parser.PageParser;
import com.xuxueli.crawler.parser.strategy.HtmlUnitPageLoader;
import com.xuxueli.crawler.rundata.RunData;
@RunWith(SpringRunner.class)
@ -31,11 +32,11 @@ public class PaperCrawler {
// 110000 北京
private final String AREA_ID = "110000";
// 年份
private final String YEAR = "2018";
private final String YEAR = "2017";
// 真题试卷 类型(1:真题试卷,2:模拟试卷,3:押题预测,4:名校精品)
private final String PAPER_TYPE = "1";
// 开始采集的URL
private final String START_URL = "https://tiku.baidu.com/tikupc/paperlist/1bfd700abb68a98271fefa04-16-0-2018-37-1-download";
private final String START_URL = "https://tiku.baidu.com/tikupc/paperlist/1bfd700abb68a98271fefa04-16-1-2017-37-1-download";
private RunData runData;
@ -51,6 +52,7 @@ public class PaperCrawler {
.setAllowSpread(false)
.setFailRetryCount(5)
.setThreadCount(1)
.setPageLoader(new HtmlUnitPageLoader())
.setPageParser(new PageParser<PaperVo>() {
@Override
@ -89,11 +91,13 @@ public class PaperCrawler {
paper.setQuestionInfo(entitys.toJSONString());
paperMapper.insert(paper);
// 插入图片
// 插入
for(int i=0 ; i<paperVo.getQuestionUrls().size() ; i++) {
CrawlerPaperEntity cp = new CrawlerPaperEntity();
cp.setPaperId(paper.getId());
cp.setQuestionUrl(paperVo.getQuestionUrls().get(i));
cp.setQueindex(paperVo.getQueindexs().get(i));
crawlerPaperMapper.insert(cp);
}
}

@ -75,7 +75,7 @@ public class PaperQuestionCrawler {
XxlCrawler crawler = new XxlCrawler.Builder()
.setAllowSpread(false)
.setThreadCount(20)
.setThreadCount(1)
.setFailRetryCount(5)
.setPageLoader(new HtmlUnitPageLoader())
.setPageParser(new PageParser<QuestionVo>() {
@ -215,13 +215,13 @@ public class PaperQuestionCrawler {
}
private String getFileDatePath() {
SimpleDateFormat sdf = new SimpleDateFormat("ddHHmm");
SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMddHHmm");
String format = sdf.format(new Date());
return format;
}
private String getFileNo() {
SimpleDateFormat sdf = new SimpleDateFormat("ddHHmm");
SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMddHHmm");
String format = sdf.format(new Date());
DecimalFormat df = new DecimalFormat(FILES_NO_FORMAT);
String key = FILES_PREFIX + format;
@ -236,7 +236,7 @@ public class PaperQuestionCrawler {
int pageSize = 1000;
while(true) {
Page<CrawlerPaperEntity> questionPage = new Page<CrawlerPaperEntity>(page , pageSize);
List<CrawlerPaperEntity> questionList = crawlerPaperMapper.selectPage(questionPage, Condition.create().orderDesc(Arrays.asList("id")));
List<CrawlerPaperEntity> questionList = crawlerPaperMapper.selectPage(questionPage, Condition.create().orderAsc(Arrays.asList("queindex")));
for(int i=0 ;i<questionList.size() ; i++) {
runData.addUrl(questionList.get(i).getQuestionUrl());
}

@ -27,6 +27,7 @@ public class ThymeleafConfig implements EnvironmentAware{
vars.put("PAPER_TYPE_MONI", SystemConstant.MONI_PAPER_ID);
vars.put("PAPER_TYPE_YATI", SystemConstant.YATI_PAPER_ID);
vars.put("PAPER_TYPE_MINGXIAO", SystemConstant.MINGXIAO_PAPER_ID);
vars.put("BEIJING_AREA_ID", SystemConstant.BEIJING_AREA_ID);
viewResolver.setStaticVariables(vars);
}
}

@ -118,12 +118,13 @@
<div class="kpoint-1-content clearfix">
<div class="detail-kpoint-2" th:each="cc,ccStat:${cc.childChapterList}">
<h5 th:text="${cc.name}">1.1 集合的含义</h5>
<p>
<p th:if="${cc.questionNum > 0}">
<span class="">
<span class="count" th:text="${cc.questionNum}">531</span> 道题
</span>
</p>
<div class="mask" style="display: none;">
<p><span class=" no-que " th:if="${cc.questionNum <= 0}">本知识点暂无试题, 敬请期待!</span></p>
<div class="mask" style="display: none;" th:if="${cc.questionNum > 0}">
<a target="_blank" th:href="${domainName + 'questionlist/' + cc.id + '-1-5.html'}" class="do">
马上做题
</a>

@ -147,7 +147,7 @@
</div>
<div class="homepage-paper-box shiti-box">
<div class="hotpaper-list-wrap">
<h3 class="hotpaper-title">热门试卷<a class="list-more-link" th:href="${domainName+'paperlist/gaokao-0-0-0-1-1.html'}">更多热门试卷 &gt; </a>
<h3 class="hotpaper-title">热门试卷<a class="list-more-link" th:href="${domainName+'paperlist/gaokao-0-0-0-0-1.html'}">更多热门试卷 &gt; </a>
</h3>
<ul class="hotpaper-list">
<li class="hotpaper-list-item" th:each="p,pStat:${hotPaperList}">

@ -3,8 +3,8 @@
<head>
<meta charset="UTF-8">
<title th:text="${'探果题库_' + paper.name}">探果题库_聪明的学生都在这里</title>
<meta name="keywords" content="探果题库为考生提供高效的智能备考服务,涵括领域有高考、财会类、建筑工程、职业资格、医卫类、计算机类和学历类等热门考试题库。拥有高校名师丰富的经验,优质的学习资料和备考全阶段的高效服务!"/>
<meta name="description" content="探果题库,高考试题,高考试卷,高校试题,名校,名校试题,名校试卷,高校名师,名师专访,名师教案,名师课堂试题库,试卷库,智能题库,历年真题,模拟试题,押题,预测试题,高考,会计证,会计从业,会计师,经济师,施工员,建造师,建筑师,造价师,职业资格,证券资格,考研,计算机考试,建筑考试,财会类,医卫类,护士资格,公务员,知识点,试题,试卷"/>
<meta name="keywords" th:content="${#strings.isEmpty(subject.seoKeywords) ? '探果题库为考生提供高效的智能备考服务,涵括领域有高考、财会类、建筑工程、职业资格、医卫类、计算机类和学历类等热门考试题库。拥有高校名师丰富的经验,优质的学习资料和备考全阶段的高效服务!' : subject.seoKeywords}"/>
<meta name="description" th:content="${#strings.isEmpty(subject.seoDescription) ? '探果题库,高考试题,高考试卷,高校试题,名校,名校试题,名校试卷,高校名师,名师专访,名师教案,名师课堂试题库,试卷库,智能题库,历年真题,模拟试题,押题,预测试题,高考,会计证,会计从业,会计师,经济师,施工员,建造师,建筑师,造价师,职业资格,证券资格,考研,计算机考试,建筑考试,财会类,医卫类,护士资格,公务员,知识点,试题,试卷' : subject.seoDescription}"/>
<meta name="author" content="Tamguo Team" />
<meta name="copyright" content="Tamguo" />
<meta name="keywords" th:content="${paper.seoKeywords}" />

@ -2,9 +2,9 @@
<html lang="Zh-hans" xmlns:th="http://www.thymeleaf.org">
<head>
<meta charset="UTF-8">
<title>探果题库_聪明的学生都在这里</title>
<meta name="keywords" content="探果题库为考生提供高效的智能备考服务,涵括领域有高考、财会类、建筑工程、职业资格、医卫类、计算机类和学历类等热门考试题库。拥有高校名师丰富的经验,优质的学习资料和备考全阶段的高效服务!"/>
<meta name="description" content="探果题库,高考试题,高考试卷,高校试题,名校,名校试题,名校试卷,高校名师,名师专访,名师教案,名师课堂试题库,试卷库,智能题库,历年真题,模拟试题,押题,预测试题,高考,会计证,会计从业,会计师,经济师,施工员,建造师,建筑师,造价师,职业资格,证券资格,考研,计算机考试,建筑考试,财会类,医卫类,护士资格,公务员,知识点,试题,试卷"/>
<title th:text="${#strings.isEmpty(subject.seoTitle) ? '探果题库_聪明的学生都在这里' : subject.seoTitle}"></title>
<meta name="keywords" th:content="${#strings.isEmpty(subject.seoKeywords) ? '探果题库为考生提供高效的智能备考服务,涵括领域有高考、财会类、建筑工程、职业资格、医卫类、计算机类和学历类等热门考试题库。拥有高校名师丰富的经验,优质的学习资料和备考全阶段的高效服务!' : subject.seoKeywords}"/>
<meta name="description" th:content="${#strings.isEmpty(subject.seoDescription) ? '探果题库,高考试题,高考试卷,高校试题,名校,名校试题,名校试卷,高校名师,名师专访,名师教案,名师课堂试题库,试卷库,智能题库,历年真题,模拟试题,押题,预测试题,高考,会计证,会计从业,会计师,经济师,施工员,建造师,建筑师,造价师,职业资格,证券资格,考研,计算机考试,建筑考试,财会类,医卫类,护士资格,公务员,知识点,试题,试卷' : subject.seoDescription}"/>
<link rel="stylesheet" th:href="${domainName + 'css/main.css'}"></link>
<link rel="stylesheet" th:href="${domainName + 'css/reset.css'}" />
<link rel="stylesheet" th:href="${domainName + 'css/iconfont.css'}" />

@ -2,7 +2,7 @@
<html lang="Zh-hans" xmlns:th="http://www.thymeleaf.org">
<head>
<meta charset="UTF-8">
<title>探果题库_聪明的学生都在这里</title>
<title th:text="${'探果题库_' + subject.name +'_' + course.name +'_'+ chapter.name}">探果题库_聪明的学生都在这里</title>
<meta name="keywords" content="探果题库为考生提供高效的智能备考服务,涵括领域有高考、财会类、建筑工程、职业资格、医卫类、计算机类和学历类等热门考试题库。拥有高校名师丰富的经验,优质的学习资料和备考全阶段的高效服务!"/>
<meta name="description" content="探果题库,高考试题,高考试卷,高校试题,名校,名校试题,名校试卷,高校名师,名师专访,名师教案,名师课堂试题库,试卷库,智能题库,历年真题,模拟试题,押题,预测试题,高考,会计证,会计从业,会计师,经济师,施工员,建造师,建筑师,造价师,职业资格,证券资格,考研,计算机考试,建筑考试,财会类,医卫类,护士资格,公务员,知识点,试题,试卷"/>
<meta name="author" content="Tamguo Team" />

Loading…
Cancel
Save