逻辑修改

main
sh00859 7 years ago
parent 4fe94c1cf0
commit 90bccde83e

@ -27,6 +27,8 @@ public class BookEntity extends SuperEntity<BookEntity> implements Serializable
private Integer orders;
private String reserveField1;
public BookEntity() {
}
@ -89,4 +91,12 @@ public class BookEntity extends SuperEntity<BookEntity> implements Serializable
public void setOrders(Integer orders) {
this.orders = orders;
}
public String getReserveField1() {
return reserveField1;
}
public void setReserveField1(String reserveField1) {
this.reserveField1 = reserveField1;
}
}

@ -4,19 +4,29 @@ import com.xuxueli.crawler.annotation.PageFieldSelect;
import com.xuxueli.crawler.annotation.PageSelect;
import com.xuxueli.crawler.conf.XxlCrawlerConf;
import java.util.List;
@PageSelect(cssQuery = "body")
@PageSelect(cssQuery = ".pic_right li")
public class BookVo {
@PageFieldSelect(cssQuery = ".pic_right .text")
private List<String> name;
@PageFieldSelect(cssQuery = ".text")
private String name;
@PageFieldSelect(cssQuery = "a", selectType = XxlCrawlerConf.SelectType.ATTR, selectVal = "abs:href")
private String bookUrl;
public List<String> getName() {
public String getName() {
return name;
}
public void setName(List<String> name) {
public void setName(String name) {
this.name = name;
}
public String getBookUrl() {
return bookUrl;
}
public void setBookUrl(String bookUrl) {
this.bookUrl = bookUrl;
}
}

@ -13,8 +13,6 @@ import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import java.util.List;
@Service
public class BookService implements IBookService {
@ -39,14 +37,15 @@ public class BookService implements IBookService {
String pageUrl = html.baseUri();
if (pageUrl.equals("http://www.ruiwen.com/jiaocai/")) {
logger.info("开始解析书本信息:{}", pageUrl);
List<String> books = bookVo.getName();
books.forEach(item -> {
String name = bookVo.getName();
String url = bookVo.getBookUrl();
BookEntity bookEntity = new BookEntity();
bookEntity.setName(item);
bookEntity.setName(name);
bookEntity.setReserveField1(url);
bookEntity.setQuestionNum(0);
bookEntity.setPointNum(0);
bookMapper.insert(bookEntity);
});
}

@ -1,7 +1,9 @@
package com.tamguo.service.impl;
import com.tamguo.config.redis.CacheService;
import com.baomidou.mybatisplus.mapper.Condition;
import com.tamguo.dao.BookMapper;
import com.tamguo.dao.CrawlerBookMapper;
import com.tamguo.model.BookEntity;
import com.tamguo.model.CrawlerBookEntity;
import com.tamguo.model.vo.CrawlerBookVo;
import com.tamguo.service.ICrawlerBookService;
@ -15,47 +17,49 @@ import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import java.util.List;
@Service
public class CrawlerBookService implements ICrawlerBookService {
@Autowired
CrawlerBookMapper crawlerBookMapper;
@Autowired
BookMapper bookMapper;
private Logger logger = LoggerFactory.getLogger(getClass());
private static final String FILES_NO_FORMAT = "000000";
private static final String FILES_PREFIX = "FPIMAGE";
private static final String DOMAIN = "http://www.tamguo.com";
@Autowired
CacheService cacheService;
//一年级语文上册
@Override
public void crawlerBook() {
List<BookEntity> bookEntities = bookMapper.selectList(Condition.EMPTY);
for (BookEntity bookEntity : bookEntities) {
String url = bookEntity.getReserveField1();
String bookId = bookEntity.getUid();
String regexs = url.replaceAll("\\d+", "\\\\d+").replaceAll("\\.","\\\\.");
XxlCrawler crawler = new XxlCrawler.Builder()
.setUrls("http://www.ruiwen.com/jiaocai/")
.setWhiteUrlRegexs("http://www\\.ruiwen\\.com/jiaocai/yuwen/bubianban/yinianjishangce/shangce\\d+\\.html")//一年级语文上册
// .setWhiteUrlRegexs("http://www\\.ruiwen\\.com/jiaocai/yuwen/bubianban/yinianjixiace/xiace\\d+\\.html")//一年级语文下册
// .setWhiteUrlRegexs("http://www\\.ruiwen\\.com/jiaocai/yingyu/bubianban/yinianjixiace/xiace\\d+\\.html")//一年级英语上册
.setWhiteUrlRegexs(regexs)//
.setAllowSpread(true)
.setFailRetryCount(5)
.setThreadCount(20)
.setPageParser(new PageParser<CrawlerBookVo>() {
@Override
public void parse(Document html, Element pageVoElement, CrawlerBookVo crawlerBookVo) {
String pageUrl = html.baseUri();
// 解析封装 PageVo 对象
String img = crawlerBookVo.getBookImage();
if (StringUtils.isNoneBlank(img)) {
CrawlerBookEntity crawlerBookEntity = new CrawlerBookEntity();
crawlerBookEntity.setBookUid("1019244094196551682");//一年级语文上册
// crawlerBookEntity.setBookUid("1019244094704062466");//一年级语文下册
// crawlerBookEntity.setBookUid("1019244096797020162");//一年级英语上册
crawlerBookEntity.setBookUid(bookId);
crawlerBookEntity.setBookUrl(crawlerBookVo.getBookImage());
crawlerBookEntity.setOrders(Integer.parseInt(img.substring(img.lastIndexOf("/") + 1, img.lastIndexOf("."))));
crawlerBookMapper.insert(crawlerBookEntity);
}
}
}).build();
@ -63,4 +67,11 @@ public class CrawlerBookService implements ICrawlerBookService {
crawler.start(true);
}
}
// public static void main(String[] args) {
// String url = "http://www.ruiwen.com/jiaocai/yuwen/renjiaoban/yinianjishangce/shangce1.html";
// System.out.println(url.replaceAll("\\d+", "\\\\d+").replaceAll("\\.","\\\\."));
// }
}

Loading…
Cancel
Save