爬取数据

main
tamguo 7 years ago
parent d684906312
commit c9c4f889e4

@ -52,8 +52,8 @@ public class QuestionService implements IQuestionService{
@Autowired @Autowired
CacheService cacheService; CacheService cacheService;
private static final String FILES_NO_FORMAT = "000000000"; private static final String FILES_NO_FORMAT = "000000000";
private static final String FILES_PREFIX = "shengwu"; private static final String FILES_PREFIX = "zz";
private static final String COURSE_ID = "shengwu"; private static final String COURSE_ID = "zhengzhi";
private static final String DOMAIN = "http://www.tamguo.com"; private static final String DOMAIN = "http://www.tamguo.com";
private RunData runData; private RunData runData;

@ -30,8 +30,8 @@ import com.xuxueli.crawler.rundata.RunData;
@Service @Service
public class SubjectService implements ISubjectService{ public class SubjectService implements ISubjectService{
private final static String COURSE_ID = "shengwu"; private final static String COURSE_ID = "zhengzhi";
private final static String BOOK_ID = "1025976567395184645"; private final static String BOOK_ID = "1025976567395184649";
private final static String SUBJECT_ID = "gaokao"; private final static String SUBJECT_ID = "gaokao";
@Autowired @Autowired
SubjectMapper subjectMapper; SubjectMapper subjectMapper;
@ -53,7 +53,7 @@ public class SubjectService implements ISubjectService{
@Override @Override
public void crawlerSubject() { public void crawlerSubject() {
XxlCrawler crawler = new XxlCrawler.Builder() XxlCrawler crawler = new XxlCrawler.Builder()
.setUrls("https://tiku.baidu.com/tikupc/chapterlist/1bfd700abb68a98271fefa04-20-knowpoint-11") .setUrls("https://tiku.baidu.com/tikupc/chapterlist/1bfd700abb68a98271fefa04-26-knowpoint-11")
.setAllowSpread(false) .setAllowSpread(false)
.setFailRetryCount(5) .setFailRetryCount(5)
.setThreadCount(20) .setThreadCount(20)
@ -63,7 +63,7 @@ public class SubjectService implements ISubjectService{
public void parse(Document html, Element pageVoElement, SubjectVo subjectVo) { public void parse(Document html, Element pageVoElement, SubjectVo subjectVo) {
// 解析封装 PageVo 对象 // 解析封装 PageVo 对象
String pageUrl = html.baseUri(); String pageUrl = html.baseUri();
if(pageUrl.contains("https://tiku.baidu.com/tikupc/chapterlist/1bfd700abb68a98271fefa04-20-knowpoint-11")) { if(pageUrl.contains("https://tiku.baidu.com/tikupc/chapterlist/1bfd700abb68a98271fefa04-26-knowpoint-11")) {
logger.info("开始解析书籍:{}" , pageUrl); logger.info("开始解析书籍:{}" , pageUrl);
ChapterEntity chapterCondition = new ChapterEntity(); ChapterEntity chapterCondition = new ChapterEntity();
chapterCondition.setName(subjectVo.getChapterCurrName()); chapterCondition.setName(subjectVo.getChapterCurrName());

Loading…
Cancel
Save