diff --git a/tamguo-crawler/pom.xml b/tamguo-crawler/pom.xml
index 7eefd94..1c82634 100644
--- a/tamguo-crawler/pom.xml
+++ b/tamguo-crawler/pom.xml
@@ -20,14 +20,6 @@
-
- org.springframework.boot
- spring-boot-starter-web
-
-
- org.springframework.boot
- spring-boot-starter-thymeleaf
-
org.springframework.boot
spring-boot-starter-jdbc
@@ -45,10 +37,6 @@
-
- net.sourceforge.nekohtml
- nekohtml
-
org.springframework.boot
spring-boot-starter-test
@@ -59,50 +47,10 @@
fastjson
1.2.32
-
- org.apache.shiro
- shiro-spring
- 1.2.5
-
-
- org.apache.shiro
- shiro-ehcache
- 1.2.5
-
-
- com.github.theborakompanioni
- thymeleaf-extras-shiro
- 1.2.1
-
-
- cn.songxinqiang
- com.baidu.ueditor
- 1.1.2-edit-1.0
-
-
- commons-codec
- commons-codec
-
-
- commons-fileupload
- commons-fileupload
- 1.3.1
-
commons-io
commons-io
-
- com.github.penggle
- kaptcha
- 2.3.2
-
-
- javax.servlet-api
- javax.servlet
-
-
-
com.alibaba
druid
@@ -118,19 +66,9 @@
3.6
- com.aliyun
- aliyun-java-sdk-dysmsapi
- 1.0.0
-
-
- com.aliyun
- aliyun-java-sdk-core
- 3.2.8
-
-
- org.apache.commons
- commons-email
- 1.5
+ com.xuxueli
+ xxl-crawler
+ 1.2.1
diff --git a/tamguo-crawler/src/main/java/com/tamguo/TamguoCrawlerApplication.java b/tamguo-crawler/src/main/java/com/tamguo/TamguoCrawlerApplication.java
index 40e17a0..64ddefc 100644
--- a/tamguo-crawler/src/main/java/com/tamguo/TamguoCrawlerApplication.java
+++ b/tamguo-crawler/src/main/java/com/tamguo/TamguoCrawlerApplication.java
@@ -1,15 +1,8 @@
package com.tamguo;
import org.springframework.boot.autoconfigure.SpringBootApplication;
-import org.springframework.boot.autoconfigure.web.HttpMessageConverters;
import org.springframework.boot.builder.SpringApplicationBuilder;
-import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.ComponentScan;
-import org.springframework.http.converter.HttpMessageConverter;
-
-import com.alibaba.fastjson.serializer.SerializerFeature;
-import com.alibaba.fastjson.support.config.FastJsonConfig;
-import com.alibaba.fastjson.support.spring.FastJsonHttpMessageConverter;
@SpringBootApplication
@ComponentScan("com.tamguo")
@@ -19,19 +12,4 @@ public class TamguoCrawlerApplication {
new SpringApplicationBuilder(TamguoCrawlerApplication.class).web(true).run(args);
}
- /**
- * FastJson替代Jackson
- * @return
- */
- @Bean
- public HttpMessageConverters fastJsonHttpMessageConverters() {
- FastJsonHttpMessageConverter fastConverter = new FastJsonHttpMessageConverter();
- FastJsonConfig fastJsonConfig = new FastJsonConfig();
- fastJsonConfig.setDateFormat("yyyy-MM-dd HH:mm:ss");
- fastJsonConfig.setSerializerFeatures(SerializerFeature.DisableCircularReferenceDetect);
- fastConverter.setFastJsonConfig(fastJsonConfig);
- HttpMessageConverter> converter = fastConverter;
- return new HttpMessageConverters(converter);
- }
-
}
diff --git a/tamguo-crawler/src/main/java/com/tamguo/config/dao/MyMetaObjectHandler.java b/tamguo-crawler/src/main/java/com/tamguo/config/dao/MyMetaObjectHandler.java
index 3c4417d..dad62b1 100644
--- a/tamguo-crawler/src/main/java/com/tamguo/config/dao/MyMetaObjectHandler.java
+++ b/tamguo-crawler/src/main/java/com/tamguo/config/dao/MyMetaObjectHandler.java
@@ -1,7 +1,6 @@
package com.tamguo.config.dao;
import com.baomidou.mybatisplus.mapper.MetaObjectHandler;
-import com.tamguo.TamguoCrawlerApplication;
import org.apache.ibatis.reflection.MetaObject;
import org.slf4j.Logger;
@@ -13,7 +12,7 @@ import org.slf4j.LoggerFactory;
//@Component
public class MyMetaObjectHandler extends MetaObjectHandler {
- protected final static Logger logger = LoggerFactory.getLogger(TamguoCrawlerApplication.class);
+ protected final static Logger logger = LoggerFactory.getLogger(MyMetaObjectHandler.class);
@Override
public void insertFill(MetaObject metaObject) {
diff --git a/tamguo-crawler/src/main/java/com/tamguo/dao/SubjectMapper.java b/tamguo-crawler/src/main/java/com/tamguo/dao/SubjectMapper.java
new file mode 100644
index 0000000..8065ecd
--- /dev/null
+++ b/tamguo-crawler/src/main/java/com/tamguo/dao/SubjectMapper.java
@@ -0,0 +1,12 @@
+package com.tamguo.dao;
+
+import org.apache.ibatis.annotations.Param;
+
+import com.tamguo.config.dao.SuperMapper;
+import com.tamguo.model.SubjectEntity;
+
+public interface SubjectMapper extends SuperMapper{
+
+ SubjectEntity findByName(@Param(value="name")String name);
+
+}
diff --git a/tamguo-crawler/src/main/java/com/tamguo/model/SubjectEntity.java b/tamguo-crawler/src/main/java/com/tamguo/model/SubjectEntity.java
new file mode 100644
index 0000000..7bc9cf9
--- /dev/null
+++ b/tamguo-crawler/src/main/java/com/tamguo/model/SubjectEntity.java
@@ -0,0 +1,42 @@
+package com.tamguo.model;
+
+import java.io.Serializable;
+import com.baomidou.mybatisplus.annotations.TableName;
+import com.tamguo.config.dao.SuperEntity;
+
+@TableName(value="tiku_subject")
+public class SubjectEntity extends SuperEntity implements Serializable {
+
+ private static final long serialVersionUID = 1L;
+
+ private String name;
+
+ private String courseId;
+
+ private String courseName;
+
+ public String getName() {
+ return name;
+ }
+
+ public void setName(String name) {
+ this.name = name;
+ }
+
+ public String getCourseId() {
+ return courseId;
+ }
+
+ public void setCourseId(String courseId) {
+ this.courseId = courseId;
+ }
+
+ public String getCourseName() {
+ return courseName;
+ }
+
+ public void setCourseName(String courseName) {
+ this.courseName = courseName;
+ }
+
+}
diff --git a/tamguo-crawler/src/main/java/com/tamguo/model/vo/SubjectVo.java b/tamguo-crawler/src/main/java/com/tamguo/model/vo/SubjectVo.java
new file mode 100644
index 0000000..7fdefc6
--- /dev/null
+++ b/tamguo-crawler/src/main/java/com/tamguo/model/vo/SubjectVo.java
@@ -0,0 +1,23 @@
+package com.tamguo.model.vo;
+
+import java.util.List;
+
+import com.xuxueli.crawler.annotation.PageFieldSelect;
+import com.xuxueli.crawler.annotation.PageSelect;
+
+@PageSelect(cssQuery = "body")
+public class SubjectVo {
+
+ @PageFieldSelect(cssQuery = ".all-list-li")
+ private List name;
+
+ public List getName() {
+ return name;
+ }
+
+ public void setName(List name) {
+ this.name = name;
+ }
+
+
+}
diff --git a/tamguo-crawler/src/main/java/com/tamguo/service/IChapterService.java b/tamguo-crawler/src/main/java/com/tamguo/service/IChapterService.java
new file mode 100644
index 0000000..33fa4a4
--- /dev/null
+++ b/tamguo-crawler/src/main/java/com/tamguo/service/IChapterService.java
@@ -0,0 +1,9 @@
+package com.tamguo.service;
+
+public interface IChapterService {
+
+ /**
+ * 爬取章节数据
+ */
+ void crawlerChapter();
+}
diff --git a/tamguo-crawler/src/main/java/com/tamguo/service/ICourseService.java b/tamguo-crawler/src/main/java/com/tamguo/service/ICourseService.java
new file mode 100644
index 0000000..489e65c
--- /dev/null
+++ b/tamguo-crawler/src/main/java/com/tamguo/service/ICourseService.java
@@ -0,0 +1,10 @@
+package com.tamguo.service;
+
+public interface ICourseService {
+
+ /**
+ * 爬取科目数据
+ */
+ void crawlerCourse();
+
+}
diff --git a/tamguo-crawler/src/main/java/com/tamguo/service/ISubjectService.java b/tamguo-crawler/src/main/java/com/tamguo/service/ISubjectService.java
new file mode 100644
index 0000000..b6ad7ea
--- /dev/null
+++ b/tamguo-crawler/src/main/java/com/tamguo/service/ISubjectService.java
@@ -0,0 +1,10 @@
+package com.tamguo.service;
+
+public interface ISubjectService {
+
+ /**
+ * 爬取考试数据
+ */
+ void crawlerSubject();
+
+}
diff --git a/tamguo-crawler/src/main/java/com/tamguo/service/impl/SubjectService.java b/tamguo-crawler/src/main/java/com/tamguo/service/impl/SubjectService.java
new file mode 100644
index 0000000..0dc7448
--- /dev/null
+++ b/tamguo-crawler/src/main/java/com/tamguo/service/impl/SubjectService.java
@@ -0,0 +1,51 @@
+package com.tamguo.service.impl;
+
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.stereotype.Service;
+
+import com.tamguo.dao.SubjectMapper;
+import com.tamguo.model.SubjectEntity;
+import com.tamguo.model.vo.SubjectVo;
+import com.tamguo.service.ISubjectService;
+import com.xuxueli.crawler.XxlCrawler;
+import com.xuxueli.crawler.parser.PageParser;
+
+@Service
+public class SubjectService implements ISubjectService{
+
+ @Autowired
+ SubjectMapper subjectMapper;
+
+ @Override
+ public void crawlerSubject() {
+ XxlCrawler crawler = new XxlCrawler.Builder()
+ .setUrls("https://tiku.baidu.com/")
+ .setWhiteUrlRegexs("https://tiku\\.baidu\\.com/")
+ .setPageParser(new PageParser() {
+ @Override
+ public void parse(Document html, Element pageVoElement, SubjectVo subjectVo) {
+ // 解析封装 PageVo 对象
+ String pageUrl = html.baseUri();
+ System.out.println(pageUrl + ":" + subjectVo.toString());
+
+ for(int i=0 ; i
+
+
+
+
+
+
\ No newline at end of file
diff --git a/tamguo-crawler/src/test/java/com/tamguo/SubjectCrawler.java b/tamguo-crawler/src/test/java/com/tamguo/SubjectCrawler.java
new file mode 100644
index 0000000..c7522f8
--- /dev/null
+++ b/tamguo-crawler/src/test/java/com/tamguo/SubjectCrawler.java
@@ -0,0 +1,23 @@
+package com.tamguo;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.boot.test.context.SpringBootTest;
+import org.springframework.test.context.junit4.SpringRunner;
+
+import com.tamguo.service.ISubjectService;
+
+@RunWith(SpringRunner.class)
+@SpringBootTest
+public class SubjectCrawler {
+
+ @Autowired
+ ISubjectService iSubjectService;
+
+ @Test
+ public void crawlerSubject() throws Exception {
+ iSubjectService.crawlerSubject();
+ }
+
+}