From 7ee525e183d4d887cf7e324a2f76d621b2a61dbc Mon Sep 17 00:00:00 2001 From: tamguo Date: Mon, 2 Jul 2018 14:33:08 +0800 Subject: [PATCH] =?UTF-8?q?=E5=AF=BC=E5=85=A5=E8=80=83=E8=AF=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tamguo-crawler/pom.xml | 68 +------------------ .../com/tamguo/TamguoCrawlerApplication.java | 22 ------ .../config/dao/MyMetaObjectHandler.java | 3 +- .../java/com/tamguo/dao/SubjectMapper.java | 12 ++++ .../java/com/tamguo/model/SubjectEntity.java | 42 ++++++++++++ .../java/com/tamguo/model/vo/SubjectVo.java | 23 +++++++ .../com/tamguo/service/IChapterService.java | 9 +++ .../com/tamguo/service/ICourseService.java | 10 +++ .../com/tamguo/service/ISubjectService.java | 10 +++ .../tamguo/service/impl/SubjectService.java | 51 ++++++++++++++ .../src/main/resources/application.properties | 2 +- .../main/resources/mappers/SubjectMapper.xml | 9 +++ .../test/java/com/tamguo/SubjectCrawler.java | 23 +++++++ 13 files changed, 194 insertions(+), 90 deletions(-) create mode 100644 tamguo-crawler/src/main/java/com/tamguo/dao/SubjectMapper.java create mode 100644 tamguo-crawler/src/main/java/com/tamguo/model/SubjectEntity.java create mode 100644 tamguo-crawler/src/main/java/com/tamguo/model/vo/SubjectVo.java create mode 100644 tamguo-crawler/src/main/java/com/tamguo/service/IChapterService.java create mode 100644 tamguo-crawler/src/main/java/com/tamguo/service/ICourseService.java create mode 100644 tamguo-crawler/src/main/java/com/tamguo/service/ISubjectService.java create mode 100644 tamguo-crawler/src/main/java/com/tamguo/service/impl/SubjectService.java create mode 100644 tamguo-crawler/src/main/resources/mappers/SubjectMapper.xml create mode 100644 tamguo-crawler/src/test/java/com/tamguo/SubjectCrawler.java diff --git a/tamguo-crawler/pom.xml b/tamguo-crawler/pom.xml index 7eefd94..1c82634 100644 --- a/tamguo-crawler/pom.xml +++ b/tamguo-crawler/pom.xml @@ -20,14 +20,6 @@ - - org.springframework.boot - spring-boot-starter-web - - - org.springframework.boot - spring-boot-starter-thymeleaf - org.springframework.boot spring-boot-starter-jdbc @@ -45,10 +37,6 @@ - - net.sourceforge.nekohtml - nekohtml - org.springframework.boot spring-boot-starter-test @@ -59,50 +47,10 @@ fastjson 1.2.32 - - org.apache.shiro - shiro-spring - 1.2.5 - - - org.apache.shiro - shiro-ehcache - 1.2.5 - - - com.github.theborakompanioni - thymeleaf-extras-shiro - 1.2.1 - - - cn.songxinqiang - com.baidu.ueditor - 1.1.2-edit-1.0 - - - commons-codec - commons-codec - - - commons-fileupload - commons-fileupload - 1.3.1 - commons-io commons-io - - com.github.penggle - kaptcha - 2.3.2 - - - javax.servlet-api - javax.servlet - - - com.alibaba druid @@ -118,19 +66,9 @@ 3.6 - com.aliyun - aliyun-java-sdk-dysmsapi - 1.0.0 - - - com.aliyun - aliyun-java-sdk-core - 3.2.8 - - - org.apache.commons - commons-email - 1.5 + com.xuxueli + xxl-crawler + 1.2.1 diff --git a/tamguo-crawler/src/main/java/com/tamguo/TamguoCrawlerApplication.java b/tamguo-crawler/src/main/java/com/tamguo/TamguoCrawlerApplication.java index 40e17a0..64ddefc 100644 --- a/tamguo-crawler/src/main/java/com/tamguo/TamguoCrawlerApplication.java +++ b/tamguo-crawler/src/main/java/com/tamguo/TamguoCrawlerApplication.java @@ -1,15 +1,8 @@ package com.tamguo; import org.springframework.boot.autoconfigure.SpringBootApplication; -import org.springframework.boot.autoconfigure.web.HttpMessageConverters; import org.springframework.boot.builder.SpringApplicationBuilder; -import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.ComponentScan; -import org.springframework.http.converter.HttpMessageConverter; - -import com.alibaba.fastjson.serializer.SerializerFeature; -import com.alibaba.fastjson.support.config.FastJsonConfig; -import com.alibaba.fastjson.support.spring.FastJsonHttpMessageConverter; @SpringBootApplication @ComponentScan("com.tamguo") @@ -19,19 +12,4 @@ public class TamguoCrawlerApplication { new SpringApplicationBuilder(TamguoCrawlerApplication.class).web(true).run(args); } - /** - * FastJson替代Jackson - * @return - */ - @Bean - public HttpMessageConverters fastJsonHttpMessageConverters() { - FastJsonHttpMessageConverter fastConverter = new FastJsonHttpMessageConverter(); - FastJsonConfig fastJsonConfig = new FastJsonConfig(); - fastJsonConfig.setDateFormat("yyyy-MM-dd HH:mm:ss"); - fastJsonConfig.setSerializerFeatures(SerializerFeature.DisableCircularReferenceDetect); - fastConverter.setFastJsonConfig(fastJsonConfig); - HttpMessageConverter converter = fastConverter; - return new HttpMessageConverters(converter); - } - } diff --git a/tamguo-crawler/src/main/java/com/tamguo/config/dao/MyMetaObjectHandler.java b/tamguo-crawler/src/main/java/com/tamguo/config/dao/MyMetaObjectHandler.java index 3c4417d..dad62b1 100644 --- a/tamguo-crawler/src/main/java/com/tamguo/config/dao/MyMetaObjectHandler.java +++ b/tamguo-crawler/src/main/java/com/tamguo/config/dao/MyMetaObjectHandler.java @@ -1,7 +1,6 @@ package com.tamguo.config.dao; import com.baomidou.mybatisplus.mapper.MetaObjectHandler; -import com.tamguo.TamguoCrawlerApplication; import org.apache.ibatis.reflection.MetaObject; import org.slf4j.Logger; @@ -13,7 +12,7 @@ import org.slf4j.LoggerFactory; //@Component public class MyMetaObjectHandler extends MetaObjectHandler { - protected final static Logger logger = LoggerFactory.getLogger(TamguoCrawlerApplication.class); + protected final static Logger logger = LoggerFactory.getLogger(MyMetaObjectHandler.class); @Override public void insertFill(MetaObject metaObject) { diff --git a/tamguo-crawler/src/main/java/com/tamguo/dao/SubjectMapper.java b/tamguo-crawler/src/main/java/com/tamguo/dao/SubjectMapper.java new file mode 100644 index 0000000..8065ecd --- /dev/null +++ b/tamguo-crawler/src/main/java/com/tamguo/dao/SubjectMapper.java @@ -0,0 +1,12 @@ +package com.tamguo.dao; + +import org.apache.ibatis.annotations.Param; + +import com.tamguo.config.dao.SuperMapper; +import com.tamguo.model.SubjectEntity; + +public interface SubjectMapper extends SuperMapper{ + + SubjectEntity findByName(@Param(value="name")String name); + +} diff --git a/tamguo-crawler/src/main/java/com/tamguo/model/SubjectEntity.java b/tamguo-crawler/src/main/java/com/tamguo/model/SubjectEntity.java new file mode 100644 index 0000000..7bc9cf9 --- /dev/null +++ b/tamguo-crawler/src/main/java/com/tamguo/model/SubjectEntity.java @@ -0,0 +1,42 @@ +package com.tamguo.model; + +import java.io.Serializable; +import com.baomidou.mybatisplus.annotations.TableName; +import com.tamguo.config.dao.SuperEntity; + +@TableName(value="tiku_subject") +public class SubjectEntity extends SuperEntity implements Serializable { + + private static final long serialVersionUID = 1L; + + private String name; + + private String courseId; + + private String courseName; + + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } + + public String getCourseId() { + return courseId; + } + + public void setCourseId(String courseId) { + this.courseId = courseId; + } + + public String getCourseName() { + return courseName; + } + + public void setCourseName(String courseName) { + this.courseName = courseName; + } + +} diff --git a/tamguo-crawler/src/main/java/com/tamguo/model/vo/SubjectVo.java b/tamguo-crawler/src/main/java/com/tamguo/model/vo/SubjectVo.java new file mode 100644 index 0000000..7fdefc6 --- /dev/null +++ b/tamguo-crawler/src/main/java/com/tamguo/model/vo/SubjectVo.java @@ -0,0 +1,23 @@ +package com.tamguo.model.vo; + +import java.util.List; + +import com.xuxueli.crawler.annotation.PageFieldSelect; +import com.xuxueli.crawler.annotation.PageSelect; + +@PageSelect(cssQuery = "body") +public class SubjectVo { + + @PageFieldSelect(cssQuery = ".all-list-li") + private List name; + + public List getName() { + return name; + } + + public void setName(List name) { + this.name = name; + } + + +} diff --git a/tamguo-crawler/src/main/java/com/tamguo/service/IChapterService.java b/tamguo-crawler/src/main/java/com/tamguo/service/IChapterService.java new file mode 100644 index 0000000..33fa4a4 --- /dev/null +++ b/tamguo-crawler/src/main/java/com/tamguo/service/IChapterService.java @@ -0,0 +1,9 @@ +package com.tamguo.service; + +public interface IChapterService { + + /** + * 爬取章节数据 + */ + void crawlerChapter(); +} diff --git a/tamguo-crawler/src/main/java/com/tamguo/service/ICourseService.java b/tamguo-crawler/src/main/java/com/tamguo/service/ICourseService.java new file mode 100644 index 0000000..489e65c --- /dev/null +++ b/tamguo-crawler/src/main/java/com/tamguo/service/ICourseService.java @@ -0,0 +1,10 @@ +package com.tamguo.service; + +public interface ICourseService { + + /** + * 爬取科目数据 + */ + void crawlerCourse(); + +} diff --git a/tamguo-crawler/src/main/java/com/tamguo/service/ISubjectService.java b/tamguo-crawler/src/main/java/com/tamguo/service/ISubjectService.java new file mode 100644 index 0000000..b6ad7ea --- /dev/null +++ b/tamguo-crawler/src/main/java/com/tamguo/service/ISubjectService.java @@ -0,0 +1,10 @@ +package com.tamguo.service; + +public interface ISubjectService { + + /** + * 爬取考试数据 + */ + void crawlerSubject(); + +} diff --git a/tamguo-crawler/src/main/java/com/tamguo/service/impl/SubjectService.java b/tamguo-crawler/src/main/java/com/tamguo/service/impl/SubjectService.java new file mode 100644 index 0000000..0dc7448 --- /dev/null +++ b/tamguo-crawler/src/main/java/com/tamguo/service/impl/SubjectService.java @@ -0,0 +1,51 @@ +package com.tamguo.service.impl; + +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Service; + +import com.tamguo.dao.SubjectMapper; +import com.tamguo.model.SubjectEntity; +import com.tamguo.model.vo.SubjectVo; +import com.tamguo.service.ISubjectService; +import com.xuxueli.crawler.XxlCrawler; +import com.xuxueli.crawler.parser.PageParser; + +@Service +public class SubjectService implements ISubjectService{ + + @Autowired + SubjectMapper subjectMapper; + + @Override + public void crawlerSubject() { + XxlCrawler crawler = new XxlCrawler.Builder() + .setUrls("https://tiku.baidu.com/") + .setWhiteUrlRegexs("https://tiku\\.baidu\\.com/") + .setPageParser(new PageParser() { + @Override + public void parse(Document html, Element pageVoElement, SubjectVo subjectVo) { + // 解析封装 PageVo 对象 + String pageUrl = html.baseUri(); + System.out.println(pageUrl + ":" + subjectVo.toString()); + + for(int i=0 ; i + + + + + + \ No newline at end of file diff --git a/tamguo-crawler/src/test/java/com/tamguo/SubjectCrawler.java b/tamguo-crawler/src/test/java/com/tamguo/SubjectCrawler.java new file mode 100644 index 0000000..c7522f8 --- /dev/null +++ b/tamguo-crawler/src/test/java/com/tamguo/SubjectCrawler.java @@ -0,0 +1,23 @@ +package com.tamguo; + +import org.junit.Test; +import org.junit.runner.RunWith; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.test.context.junit4.SpringRunner; + +import com.tamguo.service.ISubjectService; + +@RunWith(SpringRunner.class) +@SpringBootTest +public class SubjectCrawler { + + @Autowired + ISubjectService iSubjectService; + + @Test + public void crawlerSubject() throws Exception { + iSubjectService.crawlerSubject(); + } + +}