diff --git a/tamguo-crawler/src/main/java/com/tamguo/dao/CrawlerBookMapper.java b/tamguo-crawler/src/main/java/com/tamguo/dao/CrawlerBookMapper.java new file mode 100644 index 0000000..bf312b8 --- /dev/null +++ b/tamguo-crawler/src/main/java/com/tamguo/dao/CrawlerBookMapper.java @@ -0,0 +1,8 @@ +package com.tamguo.dao; + +import com.tamguo.config.dao.SuperMapper; +import com.tamguo.model.CrawlerBookEntity; + +public interface CrawlerBookMapper extends SuperMapper { + +} diff --git a/tamguo-crawler/src/main/java/com/tamguo/model/BookEntity.java b/tamguo-crawler/src/main/java/com/tamguo/model/BookEntity.java index 0d3967b..8803011 100644 --- a/tamguo-crawler/src/main/java/com/tamguo/model/BookEntity.java +++ b/tamguo-crawler/src/main/java/com/tamguo/model/BookEntity.java @@ -6,7 +6,7 @@ import com.tamguo.config.dao.SuperEntity; import java.io.Serializable; /** - * The persistent class for the tiku_course database table. + * The persistent class for the tiku_book database table. */ @TableName(value = "tiku_book") public class BookEntity extends SuperEntity implements Serializable { diff --git a/tamguo-crawler/src/main/java/com/tamguo/model/CrawlerBookEntity.java b/tamguo-crawler/src/main/java/com/tamguo/model/CrawlerBookEntity.java new file mode 100644 index 0000000..79c5041 --- /dev/null +++ b/tamguo-crawler/src/main/java/com/tamguo/model/CrawlerBookEntity.java @@ -0,0 +1,51 @@ +package com.tamguo.model; + +import com.baomidou.mybatisplus.annotations.TableName; +import com.tamguo.config.dao.SuperEntity; + +import java.io.Serializable; + +/** + * The persistent class for the crawler_book database table. + */ +@TableName(value = "crawler_book") +public class CrawlerBookEntity extends SuperEntity implements Serializable { + private static final long serialVersionUID = 1L; + + private String bookUrl; + + private String bookUid; + + private Integer orders; + + public CrawlerBookEntity() { + } + + public static long getSerialVersionUID() { + return serialVersionUID; + } + + public String getBookUrl() { + return bookUrl; + } + + public void setBookUrl(String bookUrl) { + this.bookUrl = bookUrl; + } + + public String getBookUid() { + return bookUid; + } + + public void setBookUid(String bookUid) { + this.bookUid = bookUid; + } + + public Integer getOrders() { + return orders; + } + + public void setOrders(Integer orders) { + this.orders = orders; + } +} \ No newline at end of file diff --git a/tamguo-crawler/src/main/java/com/tamguo/model/vo/CrawlerBookVo.java b/tamguo-crawler/src/main/java/com/tamguo/model/vo/CrawlerBookVo.java new file mode 100644 index 0000000..0c348a1 --- /dev/null +++ b/tamguo-crawler/src/main/java/com/tamguo/model/vo/CrawlerBookVo.java @@ -0,0 +1,20 @@ +package com.tamguo.model.vo; + +import com.xuxueli.crawler.annotation.PageFieldSelect; +import com.xuxueli.crawler.annotation.PageSelect; +import com.xuxueli.crawler.conf.XxlCrawlerConf; + +@PageSelect(cssQuery = "body") +public class CrawlerBookVo { + + @PageFieldSelect(cssQuery = ".con .pic img", selectType = XxlCrawlerConf.SelectType.ATTR, selectVal = "abs:src") + private String bookImage; + + public String getBookImage() { + return bookImage; + } + + public void setBookImage(String bookImage) { + this.bookImage = bookImage; + } +} diff --git a/tamguo-crawler/src/main/java/com/tamguo/service/ICrawlerBookService.java b/tamguo-crawler/src/main/java/com/tamguo/service/ICrawlerBookService.java new file mode 100644 index 0000000..f2c223f --- /dev/null +++ b/tamguo-crawler/src/main/java/com/tamguo/service/ICrawlerBookService.java @@ -0,0 +1,11 @@ +package com.tamguo.service; + +public interface ICrawlerBookService { + + /** + * 爬取书本数据 + */ + void crawlerBook(); + + +} diff --git a/tamguo-crawler/src/main/java/com/tamguo/service/impl/CrawlerBookService.java b/tamguo-crawler/src/main/java/com/tamguo/service/impl/CrawlerBookService.java new file mode 100644 index 0000000..3a92131 --- /dev/null +++ b/tamguo-crawler/src/main/java/com/tamguo/service/impl/CrawlerBookService.java @@ -0,0 +1,62 @@ +package com.tamguo.service.impl; + +import com.tamguo.config.redis.CacheService; +import com.tamguo.dao.CrawlerBookMapper; +import com.tamguo.model.CrawlerBookEntity; +import com.tamguo.model.vo.CrawlerBookVo; +import com.tamguo.service.ICrawlerBookService; +import com.xuxueli.crawler.XxlCrawler; +import com.xuxueli.crawler.parser.PageParser; +import org.apache.commons.lang3.StringUtils; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Service; + +@Service +public class CrawlerBookService implements ICrawlerBookService { + + + @Autowired + CrawlerBookMapper crawlerBookMapper; + + private Logger logger = LoggerFactory.getLogger(getClass()); + + private static final String FILES_NO_FORMAT = "000000"; + private static final String FILES_PREFIX = "FPIMAGE"; + private static final String DOMAIN = "http://www.tamguo.com"; + @Autowired + CacheService cacheService; + + + //一年级语文上册 + @Override + public void crawlerBook() { + XxlCrawler crawler = new XxlCrawler.Builder() + .setUrls("http://www.ruiwen.com/jiaocai/") + .setWhiteUrlRegexs("http://www\\.ruiwen\\.com/jiaocai/yuwen/bubianban/yinianjishangce/shangce\\d+\\.html") + .setAllowSpread(true) + .setFailRetryCount(5) + .setThreadCount(20) + .setPageParser(new PageParser() { + @Override + public void parse(Document html, Element pageVoElement, CrawlerBookVo crawlerBookVo) { + // 解析封装 PageVo 对象 + String img = crawlerBookVo.getBookImage(); + if (StringUtils.isNoneBlank(img)) { + CrawlerBookEntity crawlerBookEntity = new CrawlerBookEntity(); + crawlerBookEntity.setBookUid("1019238600753074178"); + crawlerBookEntity.setBookUrl(crawlerBookVo.getBookImage()); + crawlerBookEntity.setOrders(Integer.parseInt(img.substring(img.lastIndexOf("/") + 1, img.lastIndexOf(".")))); + crawlerBookMapper.insert(crawlerBookEntity); + } + } + }).build(); + + // 获取科目 + crawler.start(true); + } + +} diff --git a/tamguo-crawler/src/test/java/com/tamguo/CrawlerBookCrawler.java b/tamguo-crawler/src/test/java/com/tamguo/CrawlerBookCrawler.java new file mode 100644 index 0000000..0a0c2da --- /dev/null +++ b/tamguo-crawler/src/test/java/com/tamguo/CrawlerBookCrawler.java @@ -0,0 +1,23 @@ +package com.tamguo; + +import com.tamguo.service.IBookService; +import com.tamguo.service.ICrawlerBookService; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.test.context.junit4.SpringRunner; + +@RunWith(SpringRunner.class) +@SpringBootTest +public class CrawlerBookCrawler { + + @Autowired + ICrawlerBookService crawlerBookService; + + @Test + public void crawlerBook() throws Exception { + crawlerBookService.crawlerBook(); + } + +}