parent
7c6f116beb
commit
55e1a53ace
@ -0,0 +1,8 @@
|
||||
package com.tamguo.dao;
|
||||
|
||||
import com.tamguo.config.dao.SuperMapper;
|
||||
import com.tamguo.model.CrawlerBookEntity;
|
||||
|
||||
public interface CrawlerBookMapper extends SuperMapper<CrawlerBookEntity> {
|
||||
|
||||
}
|
@ -0,0 +1,51 @@
|
||||
package com.tamguo.model;
|
||||
|
||||
import com.baomidou.mybatisplus.annotations.TableName;
|
||||
import com.tamguo.config.dao.SuperEntity;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* The persistent class for the crawler_book database table.
|
||||
*/
|
||||
@TableName(value = "crawler_book")
|
||||
public class CrawlerBookEntity extends SuperEntity<CrawlerBookEntity> implements Serializable {
|
||||
private static final long serialVersionUID = 1L;
|
||||
|
||||
private String bookUrl;
|
||||
|
||||
private String bookUid;
|
||||
|
||||
private Integer orders;
|
||||
|
||||
public CrawlerBookEntity() {
|
||||
}
|
||||
|
||||
public static long getSerialVersionUID() {
|
||||
return serialVersionUID;
|
||||
}
|
||||
|
||||
public String getBookUrl() {
|
||||
return bookUrl;
|
||||
}
|
||||
|
||||
public void setBookUrl(String bookUrl) {
|
||||
this.bookUrl = bookUrl;
|
||||
}
|
||||
|
||||
public String getBookUid() {
|
||||
return bookUid;
|
||||
}
|
||||
|
||||
public void setBookUid(String bookUid) {
|
||||
this.bookUid = bookUid;
|
||||
}
|
||||
|
||||
public Integer getOrders() {
|
||||
return orders;
|
||||
}
|
||||
|
||||
public void setOrders(Integer orders) {
|
||||
this.orders = orders;
|
||||
}
|
||||
}
|
@ -0,0 +1,20 @@
|
||||
package com.tamguo.model.vo;
|
||||
|
||||
import com.xuxueli.crawler.annotation.PageFieldSelect;
|
||||
import com.xuxueli.crawler.annotation.PageSelect;
|
||||
import com.xuxueli.crawler.conf.XxlCrawlerConf;
|
||||
|
||||
@PageSelect(cssQuery = "body")
|
||||
public class CrawlerBookVo {
|
||||
|
||||
@PageFieldSelect(cssQuery = ".con .pic img", selectType = XxlCrawlerConf.SelectType.ATTR, selectVal = "abs:src")
|
||||
private String bookImage;
|
||||
|
||||
public String getBookImage() {
|
||||
return bookImage;
|
||||
}
|
||||
|
||||
public void setBookImage(String bookImage) {
|
||||
this.bookImage = bookImage;
|
||||
}
|
||||
}
|
@ -0,0 +1,11 @@
|
||||
package com.tamguo.service;
|
||||
|
||||
public interface ICrawlerBookService {
|
||||
|
||||
/**
|
||||
* 爬取书本数据
|
||||
*/
|
||||
void crawlerBook();
|
||||
|
||||
|
||||
}
|
@ -0,0 +1,62 @@
|
||||
package com.tamguo.service.impl;
|
||||
|
||||
import com.tamguo.config.redis.CacheService;
|
||||
import com.tamguo.dao.CrawlerBookMapper;
|
||||
import com.tamguo.model.CrawlerBookEntity;
|
||||
import com.tamguo.model.vo.CrawlerBookVo;
|
||||
import com.tamguo.service.ICrawlerBookService;
|
||||
import com.xuxueli.crawler.XxlCrawler;
|
||||
import com.xuxueli.crawler.parser.PageParser;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
@Service
|
||||
public class CrawlerBookService implements ICrawlerBookService {
|
||||
|
||||
|
||||
@Autowired
|
||||
CrawlerBookMapper crawlerBookMapper;
|
||||
|
||||
private Logger logger = LoggerFactory.getLogger(getClass());
|
||||
|
||||
private static final String FILES_NO_FORMAT = "000000";
|
||||
private static final String FILES_PREFIX = "FPIMAGE";
|
||||
private static final String DOMAIN = "http://www.tamguo.com";
|
||||
@Autowired
|
||||
CacheService cacheService;
|
||||
|
||||
|
||||
//一年级语文上册
|
||||
@Override
|
||||
public void crawlerBook() {
|
||||
XxlCrawler crawler = new XxlCrawler.Builder()
|
||||
.setUrls("http://www.ruiwen.com/jiaocai/")
|
||||
.setWhiteUrlRegexs("http://www\\.ruiwen\\.com/jiaocai/yuwen/bubianban/yinianjishangce/shangce\\d+\\.html")
|
||||
.setAllowSpread(true)
|
||||
.setFailRetryCount(5)
|
||||
.setThreadCount(20)
|
||||
.setPageParser(new PageParser<CrawlerBookVo>() {
|
||||
@Override
|
||||
public void parse(Document html, Element pageVoElement, CrawlerBookVo crawlerBookVo) {
|
||||
// 解析封装 PageVo 对象
|
||||
String img = crawlerBookVo.getBookImage();
|
||||
if (StringUtils.isNoneBlank(img)) {
|
||||
CrawlerBookEntity crawlerBookEntity = new CrawlerBookEntity();
|
||||
crawlerBookEntity.setBookUid("1019238600753074178");
|
||||
crawlerBookEntity.setBookUrl(crawlerBookVo.getBookImage());
|
||||
crawlerBookEntity.setOrders(Integer.parseInt(img.substring(img.lastIndexOf("/") + 1, img.lastIndexOf("."))));
|
||||
crawlerBookMapper.insert(crawlerBookEntity);
|
||||
}
|
||||
}
|
||||
}).build();
|
||||
|
||||
// 获取科目
|
||||
crawler.start(true);
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,23 @@
|
||||
package com.tamguo;
|
||||
|
||||
import com.tamguo.service.IBookService;
|
||||
import com.tamguo.service.ICrawlerBookService;
|
||||
import org.junit.Test;
|
||||
import org.junit.runner.RunWith;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.boot.test.context.SpringBootTest;
|
||||
import org.springframework.test.context.junit4.SpringRunner;
|
||||
|
||||
@RunWith(SpringRunner.class)
|
||||
@SpringBootTest
|
||||
public class CrawlerBookCrawler {
|
||||
|
||||
@Autowired
|
||||
ICrawlerBookService crawlerBookService;
|
||||
|
||||
@Test
|
||||
public void crawlerBook() throws Exception {
|
||||
crawlerBookService.crawlerBook();
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in new issue