From da866c5dcc0046eced8f24ca9d3984f8c9ee677d Mon Sep 17 00:00:00 2001 From: tamguo Date: Mon, 2 Jul 2018 16:43:44 +0800 Subject: [PATCH] =?UTF-8?q?=E7=88=AC=E6=95=B0=E6=8D=AE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../java/com/tamguo/model/vo/CourseVo.java | 22 +++++++++++++++++++ .../java/com/tamguo/model/vo/SubjectVo.java | 6 +++-- .../com/tamguo/service/ICourseService.java | 10 --------- .../tamguo/service/impl/SubjectService.java | 22 +++++++++++++++++-- 4 files changed, 46 insertions(+), 14 deletions(-) create mode 100644 tamguo-crawler/src/main/java/com/tamguo/model/vo/CourseVo.java delete mode 100644 tamguo-crawler/src/main/java/com/tamguo/service/ICourseService.java diff --git a/tamguo-crawler/src/main/java/com/tamguo/model/vo/CourseVo.java b/tamguo-crawler/src/main/java/com/tamguo/model/vo/CourseVo.java new file mode 100644 index 0000000..f53c8e5 --- /dev/null +++ b/tamguo-crawler/src/main/java/com/tamguo/model/vo/CourseVo.java @@ -0,0 +1,22 @@ +package com.tamguo.model.vo; + +import java.util.List; + +import com.xuxueli.crawler.annotation.PageFieldSelect; +import com.xuxueli.crawler.annotation.PageSelect; + +@PageSelect(cssQuery = "body") +public class CourseVo { + + @PageFieldSelect(cssQuery = ".course-item") + private List name; + + public List getName() { + return name; + } + + public void setName(List name) { + this.name = name; + } + +} diff --git a/tamguo-crawler/src/main/java/com/tamguo/model/vo/SubjectVo.java b/tamguo-crawler/src/main/java/com/tamguo/model/vo/SubjectVo.java index 7fdefc6..4a75956 100644 --- a/tamguo-crawler/src/main/java/com/tamguo/model/vo/SubjectVo.java +++ b/tamguo-crawler/src/main/java/com/tamguo/model/vo/SubjectVo.java @@ -10,7 +10,10 @@ public class SubjectVo { @PageFieldSelect(cssQuery = ".all-list-li") private List name; - + + @PageFieldSelect(cssQuery=".course-list-container .course-list .course-item") + private List courseName; + public List getName() { return name; } @@ -19,5 +22,4 @@ public class SubjectVo { this.name = name; } - } diff --git a/tamguo-crawler/src/main/java/com/tamguo/service/ICourseService.java b/tamguo-crawler/src/main/java/com/tamguo/service/ICourseService.java deleted file mode 100644 index 489e65c..0000000 --- a/tamguo-crawler/src/main/java/com/tamguo/service/ICourseService.java +++ /dev/null @@ -1,10 +0,0 @@ -package com.tamguo.service; - -public interface ICourseService { - - /** - * 爬取科目数据 - */ - void crawlerCourse(); - -} diff --git a/tamguo-crawler/src/main/java/com/tamguo/service/impl/SubjectService.java b/tamguo-crawler/src/main/java/com/tamguo/service/impl/SubjectService.java index 0dc7448..1bfaaba 100644 --- a/tamguo-crawler/src/main/java/com/tamguo/service/impl/SubjectService.java +++ b/tamguo-crawler/src/main/java/com/tamguo/service/impl/SubjectService.java @@ -2,6 +2,7 @@ package com.tamguo.service.impl; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; @@ -11,6 +12,7 @@ import com.tamguo.model.vo.SubjectVo; import com.tamguo.service.ISubjectService; import com.xuxueli.crawler.XxlCrawler; import com.xuxueli.crawler.parser.PageParser; +import com.xuxueli.crawler.rundata.RunData; @Service public class SubjectService implements ISubjectService{ @@ -18,12 +20,15 @@ public class SubjectService implements ISubjectService{ @Autowired SubjectMapper subjectMapper; + private RunData runData; + @Override public void crawlerSubject() { XxlCrawler crawler = new XxlCrawler.Builder() .setUrls("https://tiku.baidu.com/") .setWhiteUrlRegexs("https://tiku\\.baidu\\.com/") .setPageParser(new PageParser() { + @Override public void parse(Document html, Element pageVoElement, SubjectVo subjectVo) { // 解析封装 PageVo 对象 @@ -40,11 +45,24 @@ public class SubjectService implements ISubjectService{ SubjectEntity entity = new SubjectEntity(); entity.setName(name); subjectMapper.insert(entity); + + // 获取Course + Elements elements = pageVoElement.getElementsByClass("all-list-li"); + for(int k=0 ; k