重新构建数据

main
tamguo 7 years ago
parent da866c5dcc
commit c8fd2f945e

@ -142,11 +142,6 @@
<artifactId>commons-email</artifactId>
<version>1.5</version>
</dependency>
<dependency>
<groupId>com.xuxueli</groupId>
<artifactId>xxl-crawler</artifactId>
<version>1.2.1</version>
</dependency>
</dependencies>
<dependencyManagement>

@ -1,245 +0,0 @@
package com.tamguo.admin.crawler;
import com.tamguo.admin.model.QuestionEntity;
import com.tamguo.admin.service.IQuestionService;
import com.xuxueli.crawler.XxlCrawler;
import com.xuxueli.crawler.annotation.PageFieldSelect;
import com.xuxueli.crawler.annotation.PageSelect;
import com.xuxueli.crawler.parser.PageParser;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.scheduling.annotation.EnableScheduling;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Component;
import java.math.BigInteger;
/**
* 01VO
*
* @author xuxueli 2017-10-09 19:48:48
*/
@Component
@EnableScheduling
public class XxlCrawlerTask {
Logger logger = LoggerFactory.getLogger(this.getClass());
@Autowired
private IQuestionService iQuestionService;
@PageSelect(cssQuery = ".que-multi")
public static class PageVo {
@PageFieldSelect(cssQuery = ".que-multi")
private String question;
public String getQuestion() {
return question;
}
public void setQuestion(String question) {
this.question = question;
}
@Override
public String toString() {
return "PageVo{" +
"question='" + question + '\'' +
'}';
}
}
@PageSelect(cssQuery = ".question-box-inner")
public static class PageLiZongVo {
@PageFieldSelect(cssQuery = ".question-box-inner")
private String question;
public String getQuestion() {
return question;
}
public void setQuestion(String question) {
this.question = question;
}
@Override
public String toString() {
return "PageLiZongVo{" +
"question='" + question + '\'' +
'}';
}
}
@PageSelect(cssQuery = ".question-box-inner")
public static class PageWenZongVo {
@PageFieldSelect(cssQuery = ".question-box-inner")
private String question;
public String getQuestion() {
return question;
}
public void setQuestion(String question) {
this.question = question;
}
@Override
public String toString() {
return "PageWenZongVo{" +
"question='" + question + '\'' +
'}';
}
}
/**
* @description 2018
* @author sh00859
* @date 2018/6/29
*/
// @Scheduled(cron = "0 0/1 * * * ?")
public void crawlerYWData() {
XxlCrawler crawler = new XxlCrawler.Builder()
.setUrls("https://tiku.baidu.com/tikupc/paperdetail/4baa90f5f61fb7360b4c656b")
.setWhiteUrlRegexs("https://tiku\\.baidu\\.com/tikupc/paperdetail/4baa90f5f61fb7360b4c656b")
.setPageParser(new PageParser<PageVo>() {
@Override
public void parse(Document html, Element pageVoElement, PageVo pageVo) {
// 解析封装 PageVo 对象
String pageUrl = html.baseUri();
System.out.println(pageUrl + "" + pageVo.toString());
QuestionEntity question = new QuestionEntity();
question.setChapterId(new BigInteger("1"));
question.setCourseId("1012550050327625730");
question.setPaperId(new BigInteger("1012550408013676545"));
question.setContent(pageVo.getQuestion() == null ? "无" : pageVo.getQuestion());//问题
// question.setAnswer(pageVo.getAnswer() == null ? "无" : pageVo.getAnswer());//回答
question.setAnswer("无");//回答
question.setAnalysis("暂无解释");
question.setQuestionType("5");
question.setReviewPoint("语文");
question.setSubjectId("13");
question.setScore(10);
question.setYear("2018");
try {
iQuestionService.save(question);
} catch (Exception e) {
logger.error("错误信息[{}]", e);
}
}
})
.build();
System.out.println("start");
crawler.start(true);
System.out.println("end");
}
/**
* @description 2018
* @author sh00859
* @date 2018/6/29
*/
// @Scheduled(cron = "0 0 19 * * ?")
public void crawlerSXData() {
XxlCrawler crawler = new XxlCrawler.Builder()
.setUrls("https://tiku.baidu.com/tikupc/paperdetail/de62bec66137ee06eff91868")
.setWhiteUrlRegexs("https://tiku\\.baidu\\.com/tikupc/paperdetail/de62bec66137ee06eff91868")
.setPageParser(new PageParser<PageLiZongVo>() {
@Override
public void parse(Document html, Element pageVoElement, PageLiZongVo pageVo) {
// 解析封装 PageVo 对象
String pageUrl = html.baseUri();
System.out.println(pageUrl + "" + pageVo.toString());
QuestionEntity question = new QuestionEntity();
question.setChapterId(new BigInteger("1"));
question.setCourseId("1012652550204428289");
question.setPaperId(new BigInteger("1012652716789600257"));
question.setContent(pageVo.getQuestion() == null ? "无" : pageVo.getQuestion());//问题
// question.setAnswer(pageVo.getAnswer() == null ? "无" : pageVo.getAnswer());//回答
question.setAnswer("无");//回答
question.setAnalysis("暂无解释");
question.setQuestionType("5");
question.setReviewPoint("理综");
question.setSubjectId("13");
question.setScore(10);
question.setYear("2018");
try {
iQuestionService.save(question);
} catch (Exception e) {
logger.error("错误信息[{}]", e);
}
}
})
.build();
System.out.println("start");
crawler.start(true);
System.out.println("end");
}
/**
* @description 2018
* @author sh00859
* @date 2018/6/29
*/
@Scheduled(cron = "0 27 19 * * ?")
public void crawlerWZData() {
XxlCrawler crawler = new XxlCrawler.Builder()
.setUrls("https://tiku.baidu.com/tikupc/paperdetail/acf80b22bcd126fff7050b72")
.setWhiteUrlRegexs("https://tiku\\.baidu\\.com/tikupc/paperdetail/acf80b22bcd126fff7050b72")
.setPageParser(new PageParser<PageWenZongVo>() {
@Override
public void parse(Document html, Element pageVoElement, PageWenZongVo pageVo) {
// 解析封装 PageVo 对象
String pageUrl = html.baseUri();
System.out.println(pageUrl + "" + pageVo.toString());
QuestionEntity question = new QuestionEntity();
question.setChapterId(new BigInteger("1"));
question.setCourseId("1012658027151851521");
question.setPaperId(new BigInteger("1012658169615581186"));
question.setContent(pageVo.getQuestion() == null ? "无" : pageVo.getQuestion());//问题
// question.setAnswer(pageVo.getAnswer() == null ? "无" : pageVo.getAnswer());//回答
question.setAnswer("无");//回答
question.setAnalysis("暂无解释");
question.setQuestionType("5");
question.setReviewPoint("理综");
question.setSubjectId("13");
question.setScore(10);
question.setYear("2018");
try {
iQuestionService.save(question);
} catch (Exception e) {
logger.error("错误信息[{}]", e);
}
}
})
.build();
System.out.println("start");
crawler.start(true);
System.out.println("end");
}
}

@ -25,8 +25,6 @@ public class CourseEntity extends SuperEntity<CourseEntity> implements Serializa
private BigInteger questionNum;
private String icon;
private Integer orders;
private String seoTitle;
@ -76,14 +74,6 @@ public class CourseEntity extends SuperEntity<CourseEntity> implements Serializa
this.pointNum = pointNum;
}
public String getIcon() {
return icon;
}
public void setIcon(String icon) {
this.icon = icon;
}
public Integer getOrders() {
return orders;
}

@ -8,7 +8,6 @@
<result column="subject_id" property="subjectId" jdbcType="INTEGER" />
<result column="point_num" property="pointNum" jdbcType="INTEGER" />
<result column="question_num" property="questionNum" jdbcType="INTEGER" />
<result column="icon" property="icon" jdbcType="VARCHAR" />
<result column="orders" property="orders" jdbcType="INTEGER" />
<result column="seo_title" property="seoTitle" jdbcType="VARCHAR" />
<result column="seo_keywords" property="seoKeywords" jdbcType="VARCHAR" />
@ -24,7 +23,6 @@
c.point_num,
c.question_num,
c.subject_id,
c.icon
FROM
tiku_course c
WHERE
@ -34,7 +32,6 @@
<select id="queryPageByName" resultType="CourseEntity">
SELECT
c.icon,
c.`name`,
c.orders,
c.point_num,

@ -8,8 +8,7 @@ $(function () {
{ label: '科目名称', name: 'name', width: 60 },
{ label: '排序', name: 'orders', width: 60 },
{ label: '题目数量', name: 'questionNum', width: 60 },
{ label: '知识点数量', name: 'pointNum', width: 100 },
{ label: '图标', name: 'icon', width: 50}
{ label: '知识点数量', name: 'pointNum', width: 100 }
],
viewrecords: true,
height: 385,
@ -101,7 +100,6 @@ var vm = new Vue({
subjectId:null,
pointNum:null,
questionNum:null,
icon:null,
orders:0,
chapterList:[]
}
@ -120,7 +118,7 @@ var vm = new Vue({
add: function(){
vm.showList = false;
vm.title = "新增";
vm.course = {uid:null,name:null,subjectId:null,pointNum:null,questionNum:null,icon:null,orders:0};
vm.course = {uid:null,name:null,subjectId:null,pointNum:null,questionNum:null,orders:0};
axios.all([vm.getMenu() , vm.getSubjectList()]).then(axios.spread(function (mResponse,sResponse) {
ztree = $.fn.zTree.init($("#menuTree"), setting, mResponse.data.result);

@ -0,0 +1,9 @@
package com.tamguo.dao;
import com.tamguo.config.dao.SuperMapper;
import com.tamguo.model.CourseEntity;
public interface CourseMapper extends SuperMapper<CourseEntity>{
}

@ -0,0 +1,115 @@
package com.tamguo.model;
import java.io.Serializable;
import com.baomidou.mybatisplus.annotations.TableName;
import com.tamguo.config.dao.SuperEntity;
import java.math.BigInteger;
/**
* The persistent class for the tiku_course database table.
*
*/
@TableName(value="tiku_course")
public class CourseEntity extends SuperEntity<CourseEntity> implements Serializable {
private static final long serialVersionUID = 1L;
private String name;
private String subjectId;
private BigInteger pointNum;
private BigInteger questionNum;
private String icon;
private Integer orders;
private String seoTitle;
private String seoKeywords;
private String seoDescription;
public CourseEntity() {
}
public String getName() {
return this.name;
}
public void setName(String name) {
this.name = name;
}
public String getSubjectId() {
return this.subjectId;
}
public void setSubjectId(String subjectId) {
this.subjectId = subjectId;
}
public BigInteger getQuestionNum() {
return questionNum;
}
public void setQuestionNum(BigInteger questionNum) {
this.questionNum = questionNum;
}
public BigInteger getPointNum() {
return pointNum;
}
public void setPointNum(BigInteger pointNum) {
this.pointNum = pointNum;
}
public String getIcon() {
return icon;
}
public void setIcon(String icon) {
this.icon = icon;
}
public Integer getOrders() {
return orders;
}
public void setOrders(Integer orders) {
this.orders = orders;
}
public String getSeoTitle() {
return seoTitle;
}
public void setSeoTitle(String seoTitle) {
this.seoTitle = seoTitle;
}
public String getSeoKeywords() {
return seoKeywords;
}
public void setSeoKeywords(String seoKeywords) {
this.seoKeywords = seoKeywords;
}
public String getSeoDescription() {
return seoDescription;
}
public void setSeoDescription(String seoDescription) {
this.seoDescription = seoDescription;
}
public static long getSerialversionuid() {
return serialVersionUID;
}
}

@ -14,6 +14,9 @@ public class SubjectVo {
@PageFieldSelect(cssQuery=".course-list-container .course-list .course-item")
private List<String> courseName;
@PageFieldSelect(cssQuery=".submenu-contain .contain-title")
private String subjectName;
public List<String> getName() {
return name;
}
@ -22,4 +25,20 @@ public class SubjectVo {
this.name = name;
}
public List<String> getCourseName() {
return courseName;
}
public void setCourseName(List<String> courseName) {
this.courseName = courseName;
}
public String getSubjectName() {
return subjectName;
}
public void setSubjectName(String subjectName) {
this.subjectName = subjectName;
}
}

@ -1,12 +1,19 @@
package com.tamguo.service.impl;
import java.math.BigInteger;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import com.tamguo.dao.CourseMapper;
import com.tamguo.dao.SubjectMapper;
import com.tamguo.model.CourseEntity;
import com.tamguo.model.SubjectEntity;
import com.tamguo.model.vo.SubjectVo;
import com.tamguo.service.ISubjectService;
@ -19,6 +26,10 @@ public class SubjectService implements ISubjectService{
@Autowired
SubjectMapper subjectMapper;
@Autowired
CourseMapper courseMapper;
private Logger logger = LoggerFactory.getLogger(getClass());
private RunData runData;
@ -26,33 +37,58 @@ public class SubjectService implements ISubjectService{
public void crawlerSubject() {
XxlCrawler crawler = new XxlCrawler.Builder()
.setUrls("https://tiku.baidu.com/")
.setWhiteUrlRegexs("https://tiku\\.baidu\\.com/")
.setWhiteUrlRegexs("https://tiku\\.baidu\\.com/tikupc/homepage/\\w+" , "https://tiku.baidu.com/")
.setPageParser(new PageParser<SubjectVo>() {
@Override
public void parse(Document html, Element pageVoElement, SubjectVo subjectVo) {
// 解析封装 PageVo 对象
String pageUrl = html.baseUri();
System.out.println(pageUrl + "" + subjectVo.toString());
for(int i=0 ; i<subjectVo.getName().size() ; i++) {
String name = subjectVo.getName().get(i);
SubjectEntity subject = subjectMapper.findByName(name);
if(subject != null) {
continue;
}
SubjectEntity entity = new SubjectEntity();
entity.setName(name);
subjectMapper.insert(entity);
// 获取Course
Elements elements = pageVoElement.getElementsByClass("all-list-li");
for(int k=0 ; k<elements.size() ; k++) {
Element element = elements.get(k);
String url = element.child(0).attr("href");
runData.addUrl(url);
}
if(pageUrl.equals("https://tiku.baidu.com/")) {
logger.info("开始解析考试分类:{}" , pageUrl);
for(int i=0 ; i<subjectVo.getName().size() ; i++) {
String name = subjectVo.getName().get(i);
SubjectEntity subject = subjectMapper.findByName(name);
if(subject != null) {
continue;
}
SubjectEntity entity = new SubjectEntity();
if(name.equals("国考")) {
name = "公务员(国考)";
}
entity.setName(name);
subjectMapper.insert(entity);
// 获取Course
Elements elements = pageVoElement.getElementsByClass("all-list-li");
for(int k=0 ; k<elements.size() ; k++) {
Element element = elements.get(k);
String url = element.child(0).attr("href");
runData.addUrl(url);
}
}
}
if(pageUrl.contains("https://tiku.baidu.com/tikupc/homepage/")) {
logger.info("开始解析科目分类:{}" , pageUrl);
for(int i=0 ; i<subjectVo.getCourseName().size() ; i++) {
logger.info("科目名称:{}" , subjectVo.getCourseName().get(i));
SubjectEntity subject = subjectMapper.findByName(subjectVo.getSubjectName());
CourseEntity course = new CourseEntity();
course.setIcon(StringUtils.EMPTY);
course.setName(subjectVo.getCourseName().get(i));
course.setOrders(i+1);
course.setPointNum(BigInteger.ZERO);
course.setQuestionNum(BigInteger.ZERO);
course.setSeoDescription(subjectVo.getCourseName().get(i));
course.setSeoKeywords(subjectVo.getCourseName().get(i));
course.setSeoTitle(subjectVo.getCourseName().get(i));
course.setSubjectId(subject.getUid());
courseMapper.insert(course);
}
}
}

@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN" "http://mybatis.org/dtd/mybatis-3-mapper.dtd">
<mapper namespace="com.tamguo.dao.CourseMapper">
</mapper>

@ -25,8 +25,6 @@ public class CourseEntity extends SuperEntity<CourseEntity> implements Serializa
private BigInteger questionNum;
private String icon;
private Integer orders;
private String seoTitle;
@ -76,14 +74,6 @@ public class CourseEntity extends SuperEntity<CourseEntity> implements Serializa
this.pointNum = pointNum;
}
public String getIcon() {
return icon;
}
public void setIcon(String icon) {
this.icon = icon;
}
public Integer getOrders() {
return orders;
}

@ -19,7 +19,7 @@ spring.datasource.testOnReturn=false
spring.datasource.testWhileIdle=true
spring.datasource.timeBetweenEvictionRunsMillis=60000
spring.datasource.type=com.alibaba.druid.pool.DruidDataSource
spring.datasource.url=jdbc:mysql://47.100.175.14:3306/tiku?useUnicode=true&characterEncoding=UTF-8&useSSL=false
spring.datasource.url=jdbc:mysql://47.100.175.14:3306/tamguo?useUnicode=true&characterEncoding=UTF-8&useSSL=false
spring.datasource.username=root
spring.datasource.validationQuery=SELECT 1 FROM DUAL

@ -9,8 +9,7 @@
c.orders,
c.point_num,
c.question_num,
c.subject_id,
c.icon
c.subject_id
FROM
tiku_course c
WHERE

Loading…
Cancel
Save