From ab57a9ceb4eb4ae3bb246f22791fadd2f7c78441 Mon Sep 17 00:00:00 2001 From: sh00859 <302959274@qq.com> Date: Wed, 18 Jul 2018 17:39:39 +0800 Subject: [PATCH] =?UTF-8?q?=E7=AB=A0=E8=8A=82=E4=BF=AE=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../java/com/tamguo/model/vo/ChapterVo.java | 32 +++++++++++ .../com/tamguo/service/impl/BookService.java | 56 ++++++++++++------- 2 files changed, 67 insertions(+), 21 deletions(-) create mode 100644 tamguo-crawler/src/main/java/com/tamguo/model/vo/ChapterVo.java diff --git a/tamguo-crawler/src/main/java/com/tamguo/model/vo/ChapterVo.java b/tamguo-crawler/src/main/java/com/tamguo/model/vo/ChapterVo.java new file mode 100644 index 0000000..70378fc --- /dev/null +++ b/tamguo-crawler/src/main/java/com/tamguo/model/vo/ChapterVo.java @@ -0,0 +1,32 @@ +package com.tamguo.model.vo; + +import com.xuxueli.crawler.annotation.PageFieldSelect; +import com.xuxueli.crawler.annotation.PageSelect; + +import java.util.List; + +@PageSelect(cssQuery = ".out-chapter") +public class ChapterVo { + + @PageFieldSelect(cssQuery = "h3") + private String name; + + @PageFieldSelect(cssQuery = ".out-list li") + private List sonChapters; + + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } + + public List getSonChapters() { + return sonChapters; + } + + public void setSonChapters(List sonChapters) { + this.sonChapters = sonChapters; + } +} diff --git a/tamguo-crawler/src/main/java/com/tamguo/service/impl/BookService.java b/tamguo-crawler/src/main/java/com/tamguo/service/impl/BookService.java index 2b27c9c..4d37f18 100644 --- a/tamguo-crawler/src/main/java/com/tamguo/service/impl/BookService.java +++ b/tamguo-crawler/src/main/java/com/tamguo/service/impl/BookService.java @@ -1,8 +1,8 @@ package com.tamguo.service.impl; -import com.tamguo.dao.BookMapper; -import com.tamguo.model.BookEntity; -import com.tamguo.model.vo.BookVo; +import com.tamguo.dao.ChapterMapper; +import com.tamguo.model.ChapterEntity; +import com.tamguo.model.vo.ChapterVo; import com.tamguo.service.IBookService; import com.xuxueli.crawler.XxlCrawler; import com.xuxueli.crawler.parser.PageParser; @@ -13,12 +13,15 @@ import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; +import java.util.List; +import java.util.UUID; + @Service public class BookService implements IBookService { @Autowired - BookMapper bookMapper; + ChapterMapper chapterMapper; private Logger logger = LoggerFactory.getLogger(getClass()); @@ -26,33 +29,44 @@ public class BookService implements IBookService { @Override public void crawlerBook() { XxlCrawler crawler = new XxlCrawler.Builder() - .setUrls("http://www.ruiwen.com/jiaocai/") + .setUrls("https://tiku.baidu.com/tikupc/chapterlist/1bfd700abb68a98271fefa04-27-jiaocai-11") .setAllowSpread(false) .setFailRetryCount(5) .setThreadCount(20) - .setPageParser(new PageParser() { + .setPageParser(new PageParser() { @Override - public void parse(Document html, Element pageVoElement, BookVo bookVo) { + public void parse(Document html, Element pageVoElement, ChapterVo chapterVo) { // 解析封装 PageVo 对象 - String pageUrl = html.baseUri(); - if (pageUrl.equals("http://www.ruiwen.com/jiaocai/")) { - logger.info("开始解析书本信息:{}", pageUrl); - String name = bookVo.getName(); - String url = bookVo.getBookUrl(); - - BookEntity bookEntity = new BookEntity(); - bookEntity.setName(name); - bookEntity.setReserveField1(url); - bookEntity.setQuestionNum(0); - bookEntity.setPointNum(0); - bookMapper.insert(bookEntity); - } + String parentName = chapterVo.getName(); + ChapterEntity chapterEntity = new ChapterEntity(); + String uid = UUID.randomUUID().toString().replace("-", ""); + chapterEntity.setUid(uid); + chapterEntity.setName(parentName); + chapterEntity.setCourseId("0"); + chapterEntity.setCourseId("0"); + chapterEntity.setParentId("-1"); + chapterEntity.setQuestionNum(0); + chapterEntity.setPointNum(0); + chapterMapper.insert(chapterEntity); + List sonChapters = chapterVo.getSonChapters(); + sonChapters.forEach(s -> { + ChapterEntity sonChapterEntity = new ChapterEntity(); + sonChapterEntity.setName(s); + sonChapterEntity.setCourseId("0"); + sonChapterEntity.setCourseId("0"); + sonChapterEntity.setParentId(uid); + sonChapterEntity.setQuestionNum(0); + sonChapterEntity.setPointNum(0); + chapterMapper.insert(sonChapterEntity); + }); } + + +// } }).build(); -// runData = crawler.getRunData(); // 获取科目 crawler.start(true); }