From 4fe94c1cf0cf7a03e756a1f79aed343755130ad7 Mon Sep 17 00:00:00 2001 From: cff <302959274@qq.com> Date: Tue, 17 Jul 2018 23:43:13 +0800 Subject: [PATCH] =?UTF-8?q?=E4=B8=80=E5=B9=B4=E7=BA=A7=E8=AF=AD=E6=96=87?= =?UTF-8?q?=E4=B8=8A=E5=86=8C+=E4=B8=80=E5=B9=B4=E7=BA=A7=E8=AF=AD?= =?UTF-8?q?=E6=96=87=E4=B8=8B=E5=86=8C+=E4=B8=80=E5=B9=B4=E7=BA=A7?= =?UTF-8?q?=E8=8B=B1=E8=AF=AD=E4=B8=8A=E5=86=8C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../src/main/java/com/tamguo/model/vo/BookVo.java | 2 +- .../java/com/tamguo/service/impl/CrawlerBookService.java | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/tamguo-crawler/src/main/java/com/tamguo/model/vo/BookVo.java b/tamguo-crawler/src/main/java/com/tamguo/model/vo/BookVo.java index 19f9dcd..8d337c0 100644 --- a/tamguo-crawler/src/main/java/com/tamguo/model/vo/BookVo.java +++ b/tamguo-crawler/src/main/java/com/tamguo/model/vo/BookVo.java @@ -9,7 +9,7 @@ import java.util.List; @PageSelect(cssQuery = "body") public class BookVo { - @PageFieldSelect(cssQuery = ".text") + @PageFieldSelect(cssQuery = ".pic_right .text") private List name; public List getName() { diff --git a/tamguo-crawler/src/main/java/com/tamguo/service/impl/CrawlerBookService.java b/tamguo-crawler/src/main/java/com/tamguo/service/impl/CrawlerBookService.java index 3a92131..4d54fe7 100644 --- a/tamguo-crawler/src/main/java/com/tamguo/service/impl/CrawlerBookService.java +++ b/tamguo-crawler/src/main/java/com/tamguo/service/impl/CrawlerBookService.java @@ -36,7 +36,9 @@ public class CrawlerBookService implements ICrawlerBookService { public void crawlerBook() { XxlCrawler crawler = new XxlCrawler.Builder() .setUrls("http://www.ruiwen.com/jiaocai/") - .setWhiteUrlRegexs("http://www\\.ruiwen\\.com/jiaocai/yuwen/bubianban/yinianjishangce/shangce\\d+\\.html") + .setWhiteUrlRegexs("http://www\\.ruiwen\\.com/jiaocai/yuwen/bubianban/yinianjishangce/shangce\\d+\\.html")//一年级语文上册 +// .setWhiteUrlRegexs("http://www\\.ruiwen\\.com/jiaocai/yuwen/bubianban/yinianjixiace/xiace\\d+\\.html")//一年级语文下册 +// .setWhiteUrlRegexs("http://www\\.ruiwen\\.com/jiaocai/yingyu/bubianban/yinianjixiace/xiace\\d+\\.html")//一年级英语上册 .setAllowSpread(true) .setFailRetryCount(5) .setThreadCount(20) @@ -47,7 +49,9 @@ public class CrawlerBookService implements ICrawlerBookService { String img = crawlerBookVo.getBookImage(); if (StringUtils.isNoneBlank(img)) { CrawlerBookEntity crawlerBookEntity = new CrawlerBookEntity(); - crawlerBookEntity.setBookUid("1019238600753074178"); + crawlerBookEntity.setBookUid("1019244094196551682");//一年级语文上册 +// crawlerBookEntity.setBookUid("1019244094704062466");//一年级语文下册 +// crawlerBookEntity.setBookUid("1019244096797020162");//一年级英语上册 crawlerBookEntity.setBookUrl(crawlerBookVo.getBookImage()); crawlerBookEntity.setOrders(Integer.parseInt(img.substring(img.lastIndexOf("/") + 1, img.lastIndexOf(".")))); crawlerBookMapper.insert(crawlerBookEntity);