tamguo 7 years ago
commit af64ccd6ca

@ -0,0 +1,8 @@
package com.tamguo.dao;
import com.tamguo.config.dao.SuperMapper;
import com.tamguo.model.BookEntity;
public interface BookMapper extends SuperMapper<BookEntity> {
}

@ -0,0 +1,8 @@
package com.tamguo.dao;
import com.tamguo.config.dao.SuperMapper;
import com.tamguo.model.CrawlerBookEntity;
public interface CrawlerBookMapper extends SuperMapper<CrawlerBookEntity> {
}

@ -0,0 +1,102 @@
package com.tamguo.model;
import com.baomidou.mybatisplus.annotations.TableName;
import com.tamguo.config.dao.SuperEntity;
import java.io.Serializable;
/**
* The persistent class for the tiku_book database table.
*/
@TableName(value = "tiku_book")
public class BookEntity extends SuperEntity<BookEntity> implements Serializable {
private static final long serialVersionUID = 1L;
private String subjectId;
private String courseId;
private String name;
private String publishingHouse;
private Integer questionNum;
private Integer pointNum;
private Integer orders;
private String reserveField1;
public BookEntity() {
}
public static long getSerialVersionUID() {
return serialVersionUID;
}
public String getSubjectId() {
return subjectId;
}
public void setSubjectId(String subjectId) {
this.subjectId = subjectId;
}
public String getCourseId() {
return courseId;
}
public void setCourseId(String courseId) {
this.courseId = courseId;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getPublishingHouse() {
return publishingHouse;
}
public void setPublishingHouse(String publishingHouse) {
this.publishingHouse = publishingHouse;
}
public Integer getQuestionNum() {
return questionNum;
}
public void setQuestionNum(Integer questionNum) {
this.questionNum = questionNum;
}
public Integer getPointNum() {
return pointNum;
}
public void setPointNum(Integer pointNum) {
this.pointNum = pointNum;
}
public Integer getOrders() {
return orders;
}
public void setOrders(Integer orders) {
this.orders = orders;
}
public String getReserveField1() {
return reserveField1;
}
public void setReserveField1(String reserveField1) {
this.reserveField1 = reserveField1;
}
}

@ -0,0 +1,51 @@
package com.tamguo.model;
import com.baomidou.mybatisplus.annotations.TableName;
import com.tamguo.config.dao.SuperEntity;
import java.io.Serializable;
/**
* The persistent class for the crawler_book database table.
*/
@TableName(value = "crawler_book")
public class CrawlerBookEntity extends SuperEntity<CrawlerBookEntity> implements Serializable {
private static final long serialVersionUID = 1L;
private String bookUrl;
private String bookUid;
private Integer orders;
public CrawlerBookEntity() {
}
public static long getSerialVersionUID() {
return serialVersionUID;
}
public String getBookUrl() {
return bookUrl;
}
public void setBookUrl(String bookUrl) {
this.bookUrl = bookUrl;
}
public String getBookUid() {
return bookUid;
}
public void setBookUid(String bookUid) {
this.bookUid = bookUid;
}
public Integer getOrders() {
return orders;
}
public void setOrders(Integer orders) {
this.orders = orders;
}
}

@ -0,0 +1,32 @@
package com.tamguo.model.vo;
import com.xuxueli.crawler.annotation.PageFieldSelect;
import com.xuxueli.crawler.annotation.PageSelect;
import com.xuxueli.crawler.conf.XxlCrawlerConf;
@PageSelect(cssQuery = ".pic_right li")
public class BookVo {
@PageFieldSelect(cssQuery = ".text")
private String name;
@PageFieldSelect(cssQuery = "a", selectType = XxlCrawlerConf.SelectType.ATTR, selectVal = "abs:href")
private String bookUrl;
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getBookUrl() {
return bookUrl;
}
public void setBookUrl(String bookUrl) {
this.bookUrl = bookUrl;
}
}

@ -0,0 +1,32 @@
package com.tamguo.model.vo;
import com.xuxueli.crawler.annotation.PageFieldSelect;
import com.xuxueli.crawler.annotation.PageSelect;
import java.util.List;
@PageSelect(cssQuery = ".out-chapter")
public class ChapterVo {
@PageFieldSelect(cssQuery = "h3")
private String name;
@PageFieldSelect(cssQuery = ".out-list li")
private List<String> sonChapters;
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public List<String> getSonChapters() {
return sonChapters;
}
public void setSonChapters(List<String> sonChapters) {
this.sonChapters = sonChapters;
}
}

@ -0,0 +1,20 @@
package com.tamguo.model.vo;
import com.xuxueli.crawler.annotation.PageFieldSelect;
import com.xuxueli.crawler.annotation.PageSelect;
import com.xuxueli.crawler.conf.XxlCrawlerConf;
@PageSelect(cssQuery = "body")
public class CrawlerBookVo {
@PageFieldSelect(cssQuery = ".con .pic img", selectType = XxlCrawlerConf.SelectType.ATTR, selectVal = "abs:src")
private String bookImage;
public String getBookImage() {
return bookImage;
}
public void setBookImage(String bookImage) {
this.bookImage = bookImage;
}
}

@ -0,0 +1,11 @@
package com.tamguo.service;
public interface IBookService {
/**
*
*/
void crawlerBook();
}

@ -0,0 +1,11 @@
package com.tamguo.service;
public interface ICrawlerBookService {
/**
*
*/
void crawlerBook();
}

@ -0,0 +1,74 @@
package com.tamguo.service.impl;
import com.tamguo.dao.ChapterMapper;
import com.tamguo.model.ChapterEntity;
import com.tamguo.model.vo.ChapterVo;
import com.tamguo.service.IBookService;
import com.xuxueli.crawler.XxlCrawler;
import com.xuxueli.crawler.parser.PageParser;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import java.util.List;
import java.util.UUID;
@Service
public class BookService implements IBookService {
@Autowired
ChapterMapper chapterMapper;
private Logger logger = LoggerFactory.getLogger(getClass());
@Override
public void crawlerBook() {
XxlCrawler crawler = new XxlCrawler.Builder()
.setUrls("https://tiku.baidu.com/tikupc/chapterlist/1bfd700abb68a98271fefa04-27-jiaocai-11")
.setAllowSpread(false)
.setFailRetryCount(5)
.setThreadCount(20)
.setPageParser(new PageParser<ChapterVo>() {
@Override
public void parse(Document html, Element pageVoElement, ChapterVo chapterVo) {
// 解析封装 PageVo 对象
String parentName = chapterVo.getName();
ChapterEntity chapterEntity = new ChapterEntity();
String uid = UUID.randomUUID().toString().replace("-", "");
chapterEntity.setUid(uid);
chapterEntity.setName(parentName);
chapterEntity.setCourseId("0");
chapterEntity.setCourseId("0");
chapterEntity.setParentId("-1");
chapterEntity.setQuestionNum(0);
chapterEntity.setPointNum(0);
chapterMapper.insert(chapterEntity);
List<String> sonChapters = chapterVo.getSonChapters();
sonChapters.forEach(s -> {
ChapterEntity sonChapterEntity = new ChapterEntity();
sonChapterEntity.setName(s);
sonChapterEntity.setCourseId("0");
sonChapterEntity.setCourseId("0");
sonChapterEntity.setParentId(uid);
sonChapterEntity.setQuestionNum(0);
sonChapterEntity.setPointNum(0);
chapterMapper.insert(sonChapterEntity);
});
}
// }
}).build();
// 获取科目
crawler.start(true);
}
}

@ -0,0 +1,77 @@
package com.tamguo.service.impl;
import com.baomidou.mybatisplus.mapper.Condition;
import com.tamguo.dao.BookMapper;
import com.tamguo.dao.CrawlerBookMapper;
import com.tamguo.model.BookEntity;
import com.tamguo.model.CrawlerBookEntity;
import com.tamguo.model.vo.CrawlerBookVo;
import com.tamguo.service.ICrawlerBookService;
import com.xuxueli.crawler.XxlCrawler;
import com.xuxueli.crawler.parser.PageParser;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import java.util.List;
@Service
public class CrawlerBookService implements ICrawlerBookService {
@Autowired
CrawlerBookMapper crawlerBookMapper;
@Autowired
BookMapper bookMapper;
private Logger logger = LoggerFactory.getLogger(getClass());
@Override
public void crawlerBook() {
List<BookEntity> bookEntities = bookMapper.selectList(Condition.EMPTY);
for (BookEntity bookEntity : bookEntities) {
String url = bookEntity.getReserveField1();
String bookId = bookEntity.getUid();
String regexs = url.replaceAll("\\d+", "\\\\d+").replaceAll("\\.","\\\\.");
XxlCrawler crawler = new XxlCrawler.Builder()
.setUrls("http://www.ruiwen.com/jiaocai/")
.setWhiteUrlRegexs(regexs)//
.setAllowSpread(true)
.setFailRetryCount(5)
.setThreadCount(20)
.setPageParser(new PageParser<CrawlerBookVo>() {
@Override
public void parse(Document html, Element pageVoElement, CrawlerBookVo crawlerBookVo) {
String pageUrl = html.baseUri();
// 解析封装 PageVo 对象
String img = crawlerBookVo.getBookImage();
if (StringUtils.isNoneBlank(img)) {
CrawlerBookEntity crawlerBookEntity = new CrawlerBookEntity();
crawlerBookEntity.setBookUid(bookId);
crawlerBookEntity.setBookUrl(crawlerBookVo.getBookImage());
crawlerBookEntity.setOrders(Integer.parseInt(img.substring(img.lastIndexOf("/") + 1, img.lastIndexOf("."))));
crawlerBookMapper.insert(crawlerBookEntity);
}
}
}).build();
// 获取科目
crawler.start(true);
}
}
// public static void main(String[] args) {
// String url = "http://www.ruiwen.com/jiaocai/yuwen/renjiaoban/yinianjishangce/shangce1.html";
// System.out.println(url.replaceAll("\\d+", "\\\\d+").replaceAll("\\.","\\\\."));
// }
}

@ -0,0 +1,23 @@
package com.tamguo;
import com.tamguo.service.IBookService;
import com.tamguo.service.ISubjectService;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.test.context.junit4.SpringRunner;
@RunWith(SpringRunner.class)
@SpringBootTest
public class BookCrawler {
@Autowired
IBookService bookService;
@Test
public void crawlerBook() throws Exception {
bookService.crawlerBook();
}
}

@ -0,0 +1,23 @@
package com.tamguo;
import com.tamguo.service.IBookService;
import com.tamguo.service.ICrawlerBookService;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.test.context.junit4.SpringRunner;
@RunWith(SpringRunner.class)
@SpringBootTest
public class CrawlerBookCrawler {
@Autowired
ICrawlerBookService crawlerBookService;
@Test
public void crawlerBook() throws Exception {
crawlerBookService.crawlerBook();
}
}
Loading…
Cancel
Save