逻辑修改

main
sh00859 7 years ago
parent 4fe94c1cf0
commit 90bccde83e

@ -27,6 +27,8 @@ public class BookEntity extends SuperEntity<BookEntity> implements Serializable
private Integer orders; private Integer orders;
private String reserveField1;
public BookEntity() { public BookEntity() {
} }
@ -89,4 +91,12 @@ public class BookEntity extends SuperEntity<BookEntity> implements Serializable
public void setOrders(Integer orders) { public void setOrders(Integer orders) {
this.orders = orders; this.orders = orders;
} }
public String getReserveField1() {
return reserveField1;
}
public void setReserveField1(String reserveField1) {
this.reserveField1 = reserveField1;
}
} }

@ -4,19 +4,29 @@ import com.xuxueli.crawler.annotation.PageFieldSelect;
import com.xuxueli.crawler.annotation.PageSelect; import com.xuxueli.crawler.annotation.PageSelect;
import com.xuxueli.crawler.conf.XxlCrawlerConf; import com.xuxueli.crawler.conf.XxlCrawlerConf;
import java.util.List; @PageSelect(cssQuery = ".pic_right li")
@PageSelect(cssQuery = "body")
public class BookVo { public class BookVo {
@PageFieldSelect(cssQuery = ".pic_right .text") @PageFieldSelect(cssQuery = ".text")
private List<String> name; private String name;
@PageFieldSelect(cssQuery = "a", selectType = XxlCrawlerConf.SelectType.ATTR, selectVal = "abs:href")
private String bookUrl;
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public List<String> getName() { public String getBookUrl() {
return name; return bookUrl;
} }
public void setName(List<String> name) { public void setBookUrl(String bookUrl) {
this.name = name; this.bookUrl = bookUrl;
} }
} }

@ -13,8 +13,6 @@ import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service; import org.springframework.stereotype.Service;
import java.util.List;
@Service @Service
public class BookService implements IBookService { public class BookService implements IBookService {
@ -39,14 +37,15 @@ public class BookService implements IBookService {
String pageUrl = html.baseUri(); String pageUrl = html.baseUri();
if (pageUrl.equals("http://www.ruiwen.com/jiaocai/")) { if (pageUrl.equals("http://www.ruiwen.com/jiaocai/")) {
logger.info("开始解析书本信息:{}", pageUrl); logger.info("开始解析书本信息:{}", pageUrl);
List<String> books = bookVo.getName(); String name = bookVo.getName();
books.forEach(item -> { String url = bookVo.getBookUrl();
BookEntity bookEntity = new BookEntity();
bookEntity.setName(item); BookEntity bookEntity = new BookEntity();
bookEntity.setQuestionNum(0); bookEntity.setName(name);
bookEntity.setPointNum(0); bookEntity.setReserveField1(url);
bookMapper.insert(bookEntity); bookEntity.setQuestionNum(0);
}); bookEntity.setPointNum(0);
bookMapper.insert(bookEntity);
} }

@ -1,7 +1,9 @@
package com.tamguo.service.impl; package com.tamguo.service.impl;
import com.tamguo.config.redis.CacheService; import com.baomidou.mybatisplus.mapper.Condition;
import com.tamguo.dao.BookMapper;
import com.tamguo.dao.CrawlerBookMapper; import com.tamguo.dao.CrawlerBookMapper;
import com.tamguo.model.BookEntity;
import com.tamguo.model.CrawlerBookEntity; import com.tamguo.model.CrawlerBookEntity;
import com.tamguo.model.vo.CrawlerBookVo; import com.tamguo.model.vo.CrawlerBookVo;
import com.tamguo.service.ICrawlerBookService; import com.tamguo.service.ICrawlerBookService;
@ -15,52 +17,61 @@ import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service; import org.springframework.stereotype.Service;
import java.util.List;
@Service @Service
public class CrawlerBookService implements ICrawlerBookService { public class CrawlerBookService implements ICrawlerBookService {
@Autowired @Autowired
CrawlerBookMapper crawlerBookMapper; CrawlerBookMapper crawlerBookMapper;
@Autowired
BookMapper bookMapper;
private Logger logger = LoggerFactory.getLogger(getClass()); private Logger logger = LoggerFactory.getLogger(getClass());
private static final String FILES_NO_FORMAT = "000000";
private static final String FILES_PREFIX = "FPIMAGE";
private static final String DOMAIN = "http://www.tamguo.com";
@Autowired
CacheService cacheService;
//一年级语文上册
@Override @Override
public void crawlerBook() { public void crawlerBook() {
XxlCrawler crawler = new XxlCrawler.Builder() List<BookEntity> bookEntities = bookMapper.selectList(Condition.EMPTY);
.setUrls("http://www.ruiwen.com/jiaocai/") for (BookEntity bookEntity : bookEntities) {
.setWhiteUrlRegexs("http://www\\.ruiwen\\.com/jiaocai/yuwen/bubianban/yinianjishangce/shangce\\d+\\.html")//一年级语文上册 String url = bookEntity.getReserveField1();
// .setWhiteUrlRegexs("http://www\\.ruiwen\\.com/jiaocai/yuwen/bubianban/yinianjixiace/xiace\\d+\\.html")//一年级语文下册 String bookId = bookEntity.getUid();
// .setWhiteUrlRegexs("http://www\\.ruiwen\\.com/jiaocai/yingyu/bubianban/yinianjixiace/xiace\\d+\\.html")//一年级英语上册 String regexs = url.replaceAll("\\d+", "\\\\d+").replaceAll("\\.","\\\\.");
.setAllowSpread(true)
.setFailRetryCount(5) XxlCrawler crawler = new XxlCrawler.Builder()
.setThreadCount(20) .setUrls("http://www.ruiwen.com/jiaocai/")
.setPageParser(new PageParser<CrawlerBookVo>() { .setWhiteUrlRegexs(regexs)//
@Override .setAllowSpread(true)
public void parse(Document html, Element pageVoElement, CrawlerBookVo crawlerBookVo) { .setFailRetryCount(5)
// 解析封装 PageVo 对象 .setThreadCount(20)
String img = crawlerBookVo.getBookImage(); .setPageParser(new PageParser<CrawlerBookVo>() {
if (StringUtils.isNoneBlank(img)) { @Override
CrawlerBookEntity crawlerBookEntity = new CrawlerBookEntity(); public void parse(Document html, Element pageVoElement, CrawlerBookVo crawlerBookVo) {
crawlerBookEntity.setBookUid("1019244094196551682");//一年级语文上册 String pageUrl = html.baseUri();
// crawlerBookEntity.setBookUid("1019244094704062466");//一年级语文下册 // 解析封装 PageVo 对象
// crawlerBookEntity.setBookUid("1019244096797020162");//一年级英语上册 String img = crawlerBookVo.getBookImage();
crawlerBookEntity.setBookUrl(crawlerBookVo.getBookImage()); if (StringUtils.isNoneBlank(img)) {
crawlerBookEntity.setOrders(Integer.parseInt(img.substring(img.lastIndexOf("/") + 1, img.lastIndexOf(".")))); CrawlerBookEntity crawlerBookEntity = new CrawlerBookEntity();
crawlerBookMapper.insert(crawlerBookEntity); crawlerBookEntity.setBookUid(bookId);
crawlerBookEntity.setBookUrl(crawlerBookVo.getBookImage());
crawlerBookEntity.setOrders(Integer.parseInt(img.substring(img.lastIndexOf("/") + 1, img.lastIndexOf("."))));
crawlerBookMapper.insert(crawlerBookEntity);
}
} }
} }).build();
}).build();
// 获取科目
crawler.start(true);
}
// 获取科目
crawler.start(true);
} }
// public static void main(String[] args) {
// String url = "http://www.ruiwen.com/jiaocai/yuwen/renjiaoban/yinianjishangce/shangce1.html";
// System.out.println(url.replaceAll("\\d+", "\\\\d+").replaceAll("\\.","\\\\."));
// }
} }

Loading…
Cancel
Save