tiku_book管理

main
cff 7 years ago
parent d328257550
commit b1a9cdfc89

@ -0,0 +1,8 @@
package com.tamguo.dao;
import com.tamguo.config.dao.SuperMapper;
import com.tamguo.model.BookEntity;
public interface BookMapper extends SuperMapper<BookEntity> {
}

@ -0,0 +1,92 @@
package com.tamguo.model;
import com.baomidou.mybatisplus.annotations.TableName;
import com.tamguo.config.dao.SuperEntity;
import java.io.Serializable;
/**
* The persistent class for the tiku_course database table.
*/
@TableName(value = "tiku_book")
public class BookEntity extends SuperEntity<BookEntity> implements Serializable {
private static final long serialVersionUID = 1L;
private String subjectId;
private String courseId;
private String name;
private String publishingHouse;
private Integer questionNum;
private Integer pointNum;
private Integer orders;
public BookEntity() {
}
public static long getSerialVersionUID() {
return serialVersionUID;
}
public String getSubjectId() {
return subjectId;
}
public void setSubjectId(String subjectId) {
this.subjectId = subjectId;
}
public String getCourseId() {
return courseId;
}
public void setCourseId(String courseId) {
this.courseId = courseId;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getPublishingHouse() {
return publishingHouse;
}
public void setPublishingHouse(String publishingHouse) {
this.publishingHouse = publishingHouse;
}
public Integer getQuestionNum() {
return questionNum;
}
public void setQuestionNum(Integer questionNum) {
this.questionNum = questionNum;
}
public Integer getPointNum() {
return pointNum;
}
public void setPointNum(Integer pointNum) {
this.pointNum = pointNum;
}
public Integer getOrders() {
return orders;
}
public void setOrders(Integer orders) {
this.orders = orders;
}
}

@ -9,59 +9,8 @@ import java.util.List;
@PageSelect(cssQuery = "body")
public class BookVo {
@PageFieldSelect(cssQuery = ".all-list-li")
@PageFieldSelect(cssQuery = ".ih3")
private List<String> name;
// 类型名称
@PageFieldSelect(cssQuery=".submenu-contain .contain-title")
private String subjectName;
// 科目信息
@PageFieldSelect(cssQuery=".course-list-container .course-list .course-item")
private List<String> courseName;
// 带采集的科目URLs
@PageFieldSelect(cssQuery = ".all-list-li a", selectType = XxlCrawlerConf.SelectType.ATTR, selectVal = "abs:href")
private List<String> courseUrls;
@PageFieldSelect(cssQuery=".screening .selected a")
private String chapterPageCourseName;
@PageFieldSelect(cssQuery=".screening .selected a")
private String chapterCurrName;
// 带采集的章节URLs缓存
@PageFieldSelect(cssQuery = ".main-submenu .contain-ul .contain-li:eq(1) a", selectType = XxlCrawlerConf.SelectType.ATTR, selectVal = "abs:href")
private List<String> chapterUrlsTemp;
// 待采集的章节URLs
@PageFieldSelect(cssQuery = ".screening .sc-subject li:not(.selected) a", selectType = XxlCrawlerConf.SelectType.ATTR, selectVal = "abs:href")
private List<String> chapterUrls;
// 待采集的问题URLs
@PageFieldSelect(cssQuery = ".list-right .detail-chapter .detail-kpoint-1 .detail-kpoint-2 .mask a", selectType = XxlCrawlerConf.SelectType.ATTR, selectVal = "abs:href")
private List<String> questionUrlsTemp;
// 待采集问题URLs
@PageFieldSelect(cssQuery = ".bd-content .question-box .question-box-inner .view-analyse a", selectType = XxlCrawlerConf.SelectType.ATTR, selectVal = "abs:href")
private List<String> questionUrls;
// 单个题目数据
@PageFieldSelect(cssQuery=".question-box-inner .questem-inner", selectType = XxlCrawlerConf.SelectType.HTML)
private String content;
@PageFieldSelect(cssQuery=".exam-answer-content", selectType = XxlCrawlerConf.SelectType.HTML)
private List<String> answer;
@PageFieldSelect(cssQuery=".exam-analysis .exam-analysis-content", selectType = XxlCrawlerConf.SelectType.HTML)
private String analysis;
@PageFieldSelect(cssQuery=".que-title span:eq(0)",selectType = XxlCrawlerConf.SelectType.TEXT)
private String questionType;
@PageFieldSelect(cssQuery=".que-title span:eq(1)",selectType = XxlCrawlerConf.SelectType.TEXT)
private String score;
public List<String> getName() {
return name;
@ -70,117 +19,4 @@ public class BookVo {
public void setName(List<String> name) {
this.name = name;
}
public List<String> getCourseName() {
return courseName;
}
public void setCourseName(List<String> courseName) {
this.courseName = courseName;
}
public String getSubjectName() {
return subjectName;
}
public void setSubjectName(String subjectName) {
this.subjectName = subjectName;
}
public List<String> getCourseUrls() {
return courseUrls;
}
public void setCourseUrls(List<String> courseUrls) {
this.courseUrls = courseUrls;
}
public List<String> getChapterUrls() {
return chapterUrls;
}
public void setChapterUrls(List<String> chapterUrls) {
this.chapterUrls = chapterUrls;
}
public List<String> getChapterUrlsTemp() {
return chapterUrlsTemp;
}
public void setChapterUrlsTemp(List<String> chapterUrlsTemp) {
this.chapterUrlsTemp = chapterUrlsTemp;
}
public String getChapterPageCourseName() {
return chapterPageCourseName;
}
public void setChapterPageCourseName(String chapterPageCourseName) {
this.chapterPageCourseName = chapterPageCourseName;
}
public String getChapterCurrName() {
return chapterCurrName;
}
public void setChapterCurrName(String chapterCurrName) {
this.chapterCurrName = chapterCurrName;
}
public List<String> getQuestionUrlsTemp() {
return questionUrlsTemp;
}
public void setQuestionUrlsTemp(List<String> questionUrlsTemp) {
this.questionUrlsTemp = questionUrlsTemp;
}
public List<String> getQuestionUrls() {
return questionUrls;
}
public void setQuestionUrls(List<String> questionUrls) {
this.questionUrls = questionUrls;
}
public String getAnalysis() {
return analysis;
}
public void setAnalysis(String analysis) {
this.analysis = analysis;
}
public String getContent() {
return content;
}
public void setContent(String content) {
this.content = content;
}
public String getQuestionType() {
return questionType;
}
public void setQuestionType(String questionType) {
this.questionType = questionType;
}
public String getScore() {
return score;
}
public void setScore(String score) {
this.score = score;
}
public List<String> getAnswer() {
return answer;
}
public void setAnswer(List<String> answer) {
this.answer = answer;
}
}

@ -1,14 +1,11 @@
package com.tamguo.service.impl;
import com.tamguo.dao.ChapterMapper;
import com.tamguo.dao.CourseMapper;
import com.tamguo.dao.CrawlerQuestionMapper;
import com.tamguo.dao.SubjectMapper;
import com.tamguo.dao.BookMapper;
import com.tamguo.model.BookEntity;
import com.tamguo.model.vo.BookVo;
import com.tamguo.service.IBookService;
import com.xuxueli.crawler.XxlCrawler;
import com.xuxueli.crawler.parser.PageParser;
import com.xuxueli.crawler.rundata.RunData;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.slf4j.Logger;
@ -16,33 +13,17 @@ import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.List;
@Service
public class BookService implements IBookService {
@Autowired
SubjectMapper subjectMapper;
@Autowired
CourseMapper courseMapper;
@Autowired
ChapterMapper chapterMapper;
@Autowired
CrawlerQuestionMapper crawlerQuestionMapper;
BookMapper bookMapper;
private Logger logger = LoggerFactory.getLogger(getClass());
private Set<String> urls = new HashSet<>();
private Set<String> questionUrls = new HashSet<String>();
private Map<String, Object> chapterQuestionListMap = new HashMap<>();
private RunData runData;
@Override
public void crawlerBook() {
@ -56,38 +37,23 @@ public class BookService implements IBookService {
public void parse(Document html, Element pageVoElement, BookVo bookVo) {
// 解析封装 PageVo 对象
String pageUrl = html.baseUri();
if (pageUrl.equals("https://tiku.baidu.com/")) {
logger.info("开始解析考试分类:{}", pageUrl);
// for (int i = 0; i < subjectVo.getName().size(); i++) {
// String name = subjectVo.getName().get(i);
//
// SubjectEntity subject = subjectMapper.findByName(name);
// if (subject != null) {
// continue;
// }
// SubjectEntity entity = new SubjectEntity();
// if (name.equals("高考")) {
// name = "高考";
// entity.setName(name);
// subjectMapper.insert(entity);
// // 加入科目爬取数据
// for (String url : subjectVo.getCourseUrls()) {
// runData.addUrl(url);
// }
//
// }
//
//
// }
if (pageUrl.equals("http://www.dzkbw.com")) {
logger.info("开始解析书本信息:{}", pageUrl);
List<String> books = bookVo.getName();
books.forEach(item -> {
BookEntity bookEntity = new BookEntity();
bookEntity.setName(item);
bookEntity.setQuestionNum(0);
bookEntity.setPointNum(0);
bookMapper.insert(bookEntity);
});
}
}
}).build();
runData = crawler.getRunData();
// runData = crawler.getRunData();
// 获取科目
crawler.start(true);
}

Loading…
Cancel
Save