From bd694c90431f9f5673b2f9891b5f64579cf8b7c0 Mon Sep 17 00:00:00 2001 From: "smiletocandy@qq.com" Date: Wed, 23 Jan 2019 14:05:14 +0800 Subject: [PATCH] IDEA --- tamguo-crawler/.classpath | 31 ----------- tamguo-crawler/.gitignore | 1 - tamguo-crawler/.project | 23 -------- .../org.eclipse.core.resources.prefs | 6 --- .../.settings/org.eclipse.jdt.core.prefs | 5 -- .../.settings/org.eclipse.m2e.core.prefs | 4 -- tamguo-crawler/pom.xml | 2 +- .../tamguo/config/dao/MybatisPlusConfig.java | 16 +++--- .../com/tamguo/service/impl/BookService.java | 32 +++++------ .../tamguo/service/impl/ChapterService.java | 33 +++++------- .../service/impl/CrawlerBookService.java | 2 +- .../tamguo/service/impl/QuestionService.java | 47 +++++++--------- .../tamguo/service/impl/SubjectService.java | 29 +++++----- .../src/main/resources/application.properties | 2 +- tamguo-crawler/src/main/resources/redis.xml | 2 +- .../java/com/tamguo/ModifyQuestionImage.java | 13 +++-- .../test/java/com/tamguo/PaperCrawler.java | 10 ++-- .../java/com/tamguo/PaperQuestionCrawler.java | 54 +++++++------------ .../com/tamguo/SingleQuestionCrawler.java | 51 ++++++++---------- 19 files changed, 121 insertions(+), 242 deletions(-) delete mode 100644 tamguo-crawler/.classpath delete mode 100644 tamguo-crawler/.gitignore delete mode 100644 tamguo-crawler/.project delete mode 100644 tamguo-crawler/.settings/org.eclipse.core.resources.prefs delete mode 100644 tamguo-crawler/.settings/org.eclipse.jdt.core.prefs delete mode 100644 tamguo-crawler/.settings/org.eclipse.m2e.core.prefs diff --git a/tamguo-crawler/.classpath b/tamguo-crawler/.classpath deleted file mode 100644 index 3553992..0000000 --- a/tamguo-crawler/.classpath +++ /dev/null @@ -1,31 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/tamguo-crawler/.gitignore b/tamguo-crawler/.gitignore deleted file mode 100644 index b83d222..0000000 --- a/tamguo-crawler/.gitignore +++ /dev/null @@ -1 +0,0 @@ -/target/ diff --git a/tamguo-crawler/.project b/tamguo-crawler/.project deleted file mode 100644 index b487433..0000000 --- a/tamguo-crawler/.project +++ /dev/null @@ -1,23 +0,0 @@ - - - tamguo-crawler - - - - - - org.eclipse.jdt.core.javabuilder - - - - - org.eclipse.m2e.core.maven2Builder - - - - - - org.eclipse.jdt.core.javanature - org.eclipse.m2e.core.maven2Nature - - diff --git a/tamguo-crawler/.settings/org.eclipse.core.resources.prefs b/tamguo-crawler/.settings/org.eclipse.core.resources.prefs deleted file mode 100644 index 04cfa2c..0000000 --- a/tamguo-crawler/.settings/org.eclipse.core.resources.prefs +++ /dev/null @@ -1,6 +0,0 @@ -eclipse.preferences.version=1 -encoding//src/main/java=UTF-8 -encoding//src/main/resources=UTF-8 -encoding//src/test/java=UTF-8 -encoding//src/test/resources=UTF-8 -encoding/=UTF-8 diff --git a/tamguo-crawler/.settings/org.eclipse.jdt.core.prefs b/tamguo-crawler/.settings/org.eclipse.jdt.core.prefs deleted file mode 100644 index d59e09c..0000000 --- a/tamguo-crawler/.settings/org.eclipse.jdt.core.prefs +++ /dev/null @@ -1,5 +0,0 @@ -eclipse.preferences.version=1 -org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.8 -org.eclipse.jdt.core.compiler.compliance=1.8 -org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning -org.eclipse.jdt.core.compiler.source=1.8 diff --git a/tamguo-crawler/.settings/org.eclipse.m2e.core.prefs b/tamguo-crawler/.settings/org.eclipse.m2e.core.prefs deleted file mode 100644 index 14b697b..0000000 --- a/tamguo-crawler/.settings/org.eclipse.m2e.core.prefs +++ /dev/null @@ -1,4 +0,0 @@ -activeProfiles= -eclipse.preferences.version=1 -resolveWorkspaceProjects=true -version=1 diff --git a/tamguo-crawler/pom.xml b/tamguo-crawler/pom.xml index 6395bb5..0fd0c42 100644 --- a/tamguo-crawler/pom.xml +++ b/tamguo-crawler/pom.xml @@ -69,7 +69,7 @@ com.xuxueli xxl-crawler - 1.2.1 + 1.2.2 diff --git a/tamguo-crawler/src/main/java/com/tamguo/config/dao/MybatisPlusConfig.java b/tamguo-crawler/src/main/java/com/tamguo/config/dao/MybatisPlusConfig.java index ab5ec42..ce1a53c 100644 --- a/tamguo-crawler/src/main/java/com/tamguo/config/dao/MybatisPlusConfig.java +++ b/tamguo-crawler/src/main/java/com/tamguo/config/dao/MybatisPlusConfig.java @@ -1,12 +1,5 @@ package com.tamguo.config.dao; -import java.util.ArrayList; -import java.util.List; - -import org.mybatis.spring.annotation.MapperScan; -import org.springframework.context.annotation.Bean; -import org.springframework.context.annotation.Configuration; - import com.baomidou.mybatisplus.mapper.ISqlInjector; import com.baomidou.mybatisplus.mapper.LogicSqlInjector; import com.baomidou.mybatisplus.mapper.MetaObjectHandler; @@ -15,8 +8,13 @@ import com.baomidou.mybatisplus.plugins.PerformanceInterceptor; import com.baomidou.mybatisplus.plugins.parser.ISqlParser; import com.baomidou.mybatisplus.plugins.parser.tenant.TenantHandler; import com.baomidou.mybatisplus.plugins.parser.tenant.TenantSqlParser; - import net.sf.jsqlparser.expression.Expression; +import org.mybatis.spring.annotation.MapperScan; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; + +import java.util.ArrayList; +import java.util.List; @Configuration @MapperScan("com.tamguo.dao*") @@ -39,7 +37,7 @@ public class MybatisPlusConfig { * 【测试多租户】 SQL 解析处理拦截器
* 这里固定写成住户 1 实际情况你可以从cookie读取,因此数据看不到 【 麻花藤 】 这条记录( 注意观察 SQL )
*/ - List sqlParserList = new ArrayList<>(); + List sqlParserList = new ArrayList(); TenantSqlParser tenantSqlParser = new TenantSqlParser(); tenantSqlParser.setTenantHandler(new TenantHandler() { @Override diff --git a/tamguo-crawler/src/main/java/com/tamguo/service/impl/BookService.java b/tamguo-crawler/src/main/java/com/tamguo/service/impl/BookService.java index fe573b4..dd7996a 100644 --- a/tamguo-crawler/src/main/java/com/tamguo/service/impl/BookService.java +++ b/tamguo-crawler/src/main/java/com/tamguo/service/impl/BookService.java @@ -2,25 +2,22 @@ package com.tamguo.service.impl; import com.baomidou.mybatisplus.plugins.Page; import com.tamguo.config.redis.CacheService; -import com.tamguo.dao.ChapterMapper; -import com.tamguo.dao.CourseMapper; -import com.tamguo.dao.CrawlerQuestionMapper; -import com.tamguo.dao.QuestionMapper; -import com.tamguo.dao.SubjectMapper; -import com.tamguo.model.ChapterEntity; -import com.tamguo.model.CourseEntity; -import com.tamguo.model.CrawlerQuestionEntity; -import com.tamguo.model.QuestionEntity; -import com.tamguo.model.SubjectEntity; +import com.tamguo.dao.*; +import com.tamguo.model.*; import com.tamguo.model.enums.QuestionType; import com.tamguo.model.vo.QuestionVo; import com.tamguo.service.IBookService; import com.xuxueli.crawler.XxlCrawler; import com.xuxueli.crawler.conf.XxlCrawlerConf; +import com.xuxueli.crawler.loader.strategy.HtmlUnitPageLoader; import com.xuxueli.crawler.parser.PageParser; -import com.xuxueli.crawler.parser.strategy.HtmlUnitPageLoader; import com.xuxueli.crawler.rundata.RunData; import com.xuxueli.crawler.util.FileUtil; +import org.apache.commons.lang3.StringUtils; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Service; import java.io.File; import java.text.DecimalFormat; @@ -30,12 +27,6 @@ import java.util.HashSet; import java.util.List; import java.util.Set; -import org.apache.commons.lang3.StringUtils; -import org.jsoup.nodes.Document; -import org.jsoup.nodes.Element; -import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.stereotype.Service; - @Service public class BookService implements IBookService { @@ -59,6 +50,7 @@ public class BookService implements IBookService { @Override public void crawlerBook() { + new HtmlUnitPageLoader(); XxlCrawler crawler = new XxlCrawler.Builder() .setAllowSpread(false) .setThreadCount(20) @@ -122,7 +114,7 @@ public class BookService implements IBookService { question.setSubjectId(subject.getId()); if (questionVo.getAnswerImages()!=null && questionVo.getAnswerImages().size() > 0) { - Set imagesSet = new HashSet<>(questionVo.getAnswerImages()); + Set imagesSet = new HashSet(questionVo.getAnswerImages()); for (String img: imagesSet) { // 下载图片文件 @@ -140,7 +132,7 @@ public class BookService implements IBookService { } if (questionVo.getAnalysisImages()!=null && questionVo.getAnalysisImages().size() > 0) { - Set imagesSet = new HashSet<>(questionVo.getAnalysisImages()); + Set imagesSet = new HashSet(questionVo.getAnalysisImages()); for (String img: imagesSet) { // 下载图片文件 @@ -158,7 +150,7 @@ public class BookService implements IBookService { } if (questionVo.getContentImages()!=null && questionVo.getContentImages().size() > 0) { - Set imagesSet = new HashSet<>(questionVo.getContentImages()); + Set imagesSet = new HashSet(questionVo.getContentImages()); for (String img: imagesSet) { // 下载图片文件 diff --git a/tamguo-crawler/src/main/java/com/tamguo/service/impl/ChapterService.java b/tamguo-crawler/src/main/java/com/tamguo/service/impl/ChapterService.java index ca931fc..f093c74 100644 --- a/tamguo-crawler/src/main/java/com/tamguo/service/impl/ChapterService.java +++ b/tamguo-crawler/src/main/java/com/tamguo/service/impl/ChapterService.java @@ -1,25 +1,9 @@ package com.tamguo.service.impl; -import java.util.Arrays; -import java.util.HashSet; -import java.util.List; -import java.util.Set; - -import org.jsoup.nodes.Document; -import org.jsoup.nodes.Element; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.stereotype.Service; - import com.alibaba.fastjson.JSONObject; import com.baomidou.mybatisplus.mapper.Condition; import com.baomidou.mybatisplus.plugins.Page; -import com.tamguo.dao.ChapterMapper; -import com.tamguo.dao.CourseMapper; -import com.tamguo.dao.CrawlerChapterMapper; -import com.tamguo.dao.CrawlerQuestionMapper; -import com.tamguo.dao.SubjectMapper; +import com.tamguo.dao.*; import com.tamguo.model.ChapterEntity; import com.tamguo.model.CourseEntity; import com.tamguo.model.CrawlerChapterEntity; @@ -28,6 +12,17 @@ import com.tamguo.service.IChapterService; import com.xuxueli.crawler.XxlCrawler; import com.xuxueli.crawler.parser.PageParser; import com.xuxueli.crawler.rundata.RunData; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Service; + +import java.util.Arrays; +import java.util.HashSet; +import java.util.List; +import java.util.Set; @Service public class ChapterService implements IChapterService{ @@ -43,7 +38,7 @@ public class ChapterService implements IChapterService{ private Logger logger = LoggerFactory.getLogger(getClass()); - private Set urls = new HashSet<>(); + private Set urls = new HashSet(); private RunData runData; @@ -56,7 +51,7 @@ public class ChapterService implements IChapterService{ int page = 1; int pageSize = 100; while(true) { - Page chapterPage = new Page<>(page, pageSize); + Page chapterPage = new Page(page, pageSize); List chapterList = chapterMapper.selectPage(chapterPage, Condition.create().orderAsc(Arrays.asList("id"))); for(int i=0 ;i bookEntities = bookMapper.selectList(Condition.EMPTY); for (BookEntity bookEntity : bookEntities) { String url = bookEntity.getReserveField1(); - String bookId = bookEntity.getId(); + final String bookId = bookEntity.getId(); String regexs = url.replaceAll("\\d+", "\\\\d+").replaceAll("\\.","\\\\."); XxlCrawler crawler = new XxlCrawler.Builder() diff --git a/tamguo-crawler/src/main/java/com/tamguo/service/impl/QuestionService.java b/tamguo-crawler/src/main/java/com/tamguo/service/impl/QuestionService.java index ee6a7fc..58bf237 100644 --- a/tamguo-crawler/src/main/java/com/tamguo/service/impl/QuestionService.java +++ b/tamguo-crawler/src/main/java/com/tamguo/service/impl/QuestionService.java @@ -1,41 +1,32 @@ package com.tamguo.service.impl; -import java.io.File; -import java.text.DecimalFormat; -import java.text.SimpleDateFormat; -import java.util.Date; -import java.util.HashSet; -import java.util.List; -import java.util.Set; - -import org.apache.commons.lang3.StringUtils; -import org.jsoup.nodes.Document; -import org.jsoup.nodes.Element; -import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.stereotype.Service; - import com.baomidou.mybatisplus.plugins.Page; import com.baomidou.mybatisplus.service.impl.ServiceImpl; import com.tamguo.config.redis.CacheService; -import com.tamguo.dao.ChapterMapper; -import com.tamguo.dao.CourseMapper; -import com.tamguo.dao.CrawlerQuestionMapper; -import com.tamguo.dao.QuestionMapper; -import com.tamguo.dao.SubjectMapper; -import com.tamguo.model.ChapterEntity; -import com.tamguo.model.CourseEntity; -import com.tamguo.model.CrawlerQuestionEntity; -import com.tamguo.model.QuestionEntity; -import com.tamguo.model.SubjectEntity; +import com.tamguo.dao.*; +import com.tamguo.model.*; import com.tamguo.model.enums.QuestionType; import com.tamguo.model.vo.QuestionVo; import com.tamguo.service.IQuestionService; import com.xuxueli.crawler.XxlCrawler; import com.xuxueli.crawler.conf.XxlCrawlerConf; +import com.xuxueli.crawler.loader.strategy.HtmlUnitPageLoader; import com.xuxueli.crawler.parser.PageParser; -import com.xuxueli.crawler.parser.strategy.HtmlUnitPageLoader; import com.xuxueli.crawler.rundata.RunData; import com.xuxueli.crawler.util.FileUtil; +import org.apache.commons.lang3.StringUtils; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Service; + +import java.io.File; +import java.text.DecimalFormat; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.HashSet; +import java.util.List; +import java.util.Set; @Service public class QuestionService extends ServiceImpl implements IQuestionService{ @@ -125,7 +116,7 @@ public class QuestionService extends ServiceImpl question.setSubjectId(subject.getId()); if (questionVo.getAnswerImages()!=null && questionVo.getAnswerImages().size() > 0) { - Set imagesSet = new HashSet<>(questionVo.getAnswerImages()); + Set imagesSet = new HashSet(questionVo.getAnswerImages()); for (String img: imagesSet) { // 下载图片文件 @@ -146,7 +137,7 @@ public class QuestionService extends ServiceImpl } if (questionVo.getAnalysisImages()!=null && questionVo.getAnalysisImages().size() > 0) { - Set imagesSet = new HashSet<>(questionVo.getAnalysisImages()); + Set imagesSet = new HashSet(questionVo.getAnalysisImages()); for (String img: imagesSet) { // 下载图片文件 @@ -167,7 +158,7 @@ public class QuestionService extends ServiceImpl } if (questionVo.getContentImages()!=null && questionVo.getContentImages().size() > 0) { - Set imagesSet = new HashSet<>(questionVo.getContentImages()); + Set imagesSet = new HashSet(questionVo.getContentImages()); for (String img: imagesSet) { // 下载图片文件 diff --git a/tamguo-crawler/src/main/java/com/tamguo/service/impl/SubjectService.java b/tamguo-crawler/src/main/java/com/tamguo/service/impl/SubjectService.java index 97cab14..2f5426b 100644 --- a/tamguo-crawler/src/main/java/com/tamguo/service/impl/SubjectService.java +++ b/tamguo-crawler/src/main/java/com/tamguo/service/impl/SubjectService.java @@ -1,19 +1,5 @@ package com.tamguo.service.impl; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Map; -import java.util.Set; - -import org.apache.commons.lang3.StringUtils; -import org.jsoup.nodes.Document; -import org.jsoup.nodes.Element; -import org.jsoup.select.Elements; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.stereotype.Service; - import com.alibaba.fastjson.JSONObject; import com.tamguo.dao.ChapterMapper; import com.tamguo.dao.CourseMapper; @@ -26,6 +12,19 @@ import com.tamguo.service.ISubjectService; import com.xuxueli.crawler.XxlCrawler; import com.xuxueli.crawler.parser.PageParser; import com.xuxueli.crawler.rundata.RunData; +import org.apache.commons.lang3.StringUtils; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Service; + +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; @Service public class SubjectService implements ISubjectService{ @@ -46,7 +45,7 @@ public class SubjectService implements ISubjectService{ private Set questionUrls = new HashSet(); - private Map chapterQuestionListMap = new HashMap<>(); + private Map chapterQuestionListMap = new HashMap(); private RunData runData; diff --git a/tamguo-crawler/src/main/resources/application.properties b/tamguo-crawler/src/main/resources/application.properties index 26dd5ef..ac51a49 100644 --- a/tamguo-crawler/src/main/resources/application.properties +++ b/tamguo-crawler/src/main/resources/application.properties @@ -8,7 +8,7 @@ spring.datasource.maxPoolPreparedStatementPerConnectionSize=20 spring.datasource.maxWait=60000 spring.datasource.minEvictableIdleTimeMillis=300000 spring.datasource.minIdle=5 -spring.datasource.password=123456 +spring.datasource.password=tamguo spring.datasource.poolPreparedStatements=true spring.datasource.testOnBorrow=false spring.datasource.testOnReturn=false diff --git a/tamguo-crawler/src/main/resources/redis.xml b/tamguo-crawler/src/main/resources/redis.xml index b68365a..7838db3 100644 --- a/tamguo-crawler/src/main/resources/redis.xml +++ b/tamguo-crawler/src/main/resources/redis.xml @@ -4,6 +4,6 @@ - + \ No newline at end of file diff --git a/tamguo-crawler/src/test/java/com/tamguo/ModifyQuestionImage.java b/tamguo-crawler/src/test/java/com/tamguo/ModifyQuestionImage.java index 3cd0acd..3f2a9e9 100644 --- a/tamguo-crawler/src/test/java/com/tamguo/ModifyQuestionImage.java +++ b/tamguo-crawler/src/test/java/com/tamguo/ModifyQuestionImage.java @@ -1,17 +1,16 @@ package com.tamguo; -import java.util.Arrays; - +import com.baomidou.mybatisplus.mapper.Condition; +import com.baomidou.mybatisplus.plugins.Page; +import com.tamguo.model.QuestionEntity; +import com.tamguo.service.IQuestionService; import org.junit.Test; import org.junit.runner.RunWith; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.boot.test.context.SpringBootTest; import org.springframework.test.context.junit4.SpringRunner; -import com.baomidou.mybatisplus.mapper.Condition; -import com.baomidou.mybatisplus.plugins.Page; -import com.tamguo.model.QuestionEntity; -import com.tamguo.service.IQuestionService; +import java.util.Arrays; /** * Test - 修改用户图片 @@ -33,7 +32,7 @@ public class ModifyQuestionImage { Integer size = 100; while(true) { - Page page = new Page<>(current , size); + Page page = new Page(current , size); Page entitys = iQuestionService.selectPage(page , Condition.create().orderAsc(Arrays.asList("id"))); if(entitys.getCurrent() > 759) { break; diff --git a/tamguo-crawler/src/test/java/com/tamguo/PaperCrawler.java b/tamguo-crawler/src/test/java/com/tamguo/PaperCrawler.java index 0bb96dd..2c83d5e 100644 --- a/tamguo-crawler/src/test/java/com/tamguo/PaperCrawler.java +++ b/tamguo-crawler/src/test/java/com/tamguo/PaperCrawler.java @@ -1,5 +1,6 @@ package com.tamguo; +import com.xuxueli.crawler.loader.strategy.HtmlUnitPageLoader; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.junit.Test; @@ -18,7 +19,6 @@ import com.tamguo.model.enums.QuestionType; import com.tamguo.model.vo.PaperVo; import com.xuxueli.crawler.XxlCrawler; import com.xuxueli.crawler.parser.PageParser; -import com.xuxueli.crawler.parser.strategy.HtmlUnitPageLoader; import com.xuxueli.crawler.rundata.RunData; // 北京模拟试卷,真题试卷已经爬取完毕 @@ -34,13 +34,13 @@ public class PaperCrawler { // 140000 山西 | 350000 福建 | 340000 安徽 | 220000 吉林 | 150000 内蒙古 | 640000 宁夏 | 650000 新疆 | 广西 450000 | 210000 辽宁 // 230000 黑龙江 | 610000 陕西 | 360000 江西 | 440000 广东 | 430000 湖南 | 460000 海南 | 530000 云南 | 510000 四川 | 630000 青海 // 620000 甘肃 | 130000 河北 | 540000 西藏 | 贵州 520000 - private final String AREA_ID = "610000"; + private final String AREA_ID = "360000"; // 年份 - private final String YEAR = "2017"; + private final String YEAR = "2016"; // 真题试卷 类型(1:真题试卷,2:模拟试卷,3:押题预测,4:名校精品) - private final String PAPER_TYPE = "2"; + private final String PAPER_TYPE = "4"; // 开始采集的URL - private final String START_URL = "https://tiku.baidu.com/tikupc/paperlist/1bfd700abb68a98271fefa04-20-4-2017-1306-1-download"; + private final String START_URL = "https://tiku.baidu.com/tikupc/paperlist/1bfd700abb68a98271fefa04-20-7-2016-1360-1-download"; private RunData runData; diff --git a/tamguo-crawler/src/test/java/com/tamguo/PaperQuestionCrawler.java b/tamguo-crawler/src/test/java/com/tamguo/PaperQuestionCrawler.java index 23bd5ef..84300dc 100644 --- a/tamguo-crawler/src/test/java/com/tamguo/PaperQuestionCrawler.java +++ b/tamguo-crawler/src/test/java/com/tamguo/PaperQuestionCrawler.java @@ -1,47 +1,31 @@ package com.tamguo; -import java.io.File; -import java.text.DecimalFormat; -import java.text.SimpleDateFormat; -import java.util.Arrays; -import java.util.Date; -import java.util.HashSet; -import java.util.List; -import java.util.Set; -import java.util.concurrent.atomic.AtomicLong; - -import org.apache.commons.lang3.StringUtils; -import org.jsoup.nodes.Document; -import org.jsoup.nodes.Element; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.boot.test.context.SpringBootTest; -import org.springframework.test.context.junit4.SpringRunner; - import com.baomidou.mybatisplus.mapper.Condition; import com.baomidou.mybatisplus.plugins.Page; import com.tamguo.config.redis.CacheService; -import com.tamguo.dao.ChapterMapper; -import com.tamguo.dao.CourseMapper; -import com.tamguo.dao.CrawlerPaperMapper; -import com.tamguo.dao.CrawlerQuestionMapper; -import com.tamguo.dao.PaperMapper; -import com.tamguo.dao.QuestionMapper; -import com.tamguo.dao.SubjectMapper; -import com.tamguo.model.CourseEntity; -import com.tamguo.model.CrawlerPaperEntity; -import com.tamguo.model.PaperEntity; -import com.tamguo.model.QuestionEntity; -import com.tamguo.model.SubjectEntity; +import com.tamguo.dao.*; +import com.tamguo.model.*; import com.tamguo.model.enums.QuestionType; import com.tamguo.model.vo.QuestionVo; import com.xuxueli.crawler.XxlCrawler; import com.xuxueli.crawler.conf.XxlCrawlerConf; +import com.xuxueli.crawler.loader.strategy.HtmlUnitPageLoader; import com.xuxueli.crawler.parser.PageParser; -import com.xuxueli.crawler.parser.strategy.HtmlUnitPageLoader; import com.xuxueli.crawler.rundata.RunData; import com.xuxueli.crawler.util.FileUtil; +import org.apache.commons.lang3.StringUtils; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.test.context.junit4.SpringRunner; + +import java.io.File; +import java.text.DecimalFormat; +import java.text.SimpleDateFormat; +import java.util.*; @RunWith(SpringRunner.class) @SpringBootTest @@ -137,7 +121,7 @@ public class PaperQuestionCrawler { question.setSubjectId(subject.getId()); if (questionVo.getAnswerImages()!=null && questionVo.getAnswerImages().size() > 0) { - Set imagesSet = new HashSet<>(questionVo.getAnswerImages()); + Set imagesSet = new HashSet(questionVo.getAnswerImages()); for (String img: imagesSet) { // 下载图片文件 @@ -158,7 +142,7 @@ public class PaperQuestionCrawler { } if (questionVo.getAnalysisImages()!=null && questionVo.getAnalysisImages().size() > 0) { - Set imagesSet = new HashSet<>(questionVo.getAnalysisImages()); + Set imagesSet = new HashSet(questionVo.getAnalysisImages()); for (String img: imagesSet) { // 下载图片文件 @@ -179,7 +163,7 @@ public class PaperQuestionCrawler { } if (questionVo.getContentImages()!=null && questionVo.getContentImages().size() > 0) { - Set imagesSet = new HashSet<>(questionVo.getContentImages()); + Set imagesSet = new HashSet(questionVo.getContentImages()); for (String img: imagesSet) { // 下载图片文件 diff --git a/tamguo-crawler/src/test/java/com/tamguo/SingleQuestionCrawler.java b/tamguo-crawler/src/test/java/com/tamguo/SingleQuestionCrawler.java index 0f855b2..2437231 100644 --- a/tamguo-crawler/src/test/java/com/tamguo/SingleQuestionCrawler.java +++ b/tamguo-crawler/src/test/java/com/tamguo/SingleQuestionCrawler.java @@ -1,13 +1,16 @@ package com.tamguo; -import java.io.File; -import java.text.DecimalFormat; -import java.text.SimpleDateFormat; -import java.util.Date; -import java.util.HashSet; -import java.util.List; -import java.util.Set; - +import com.baomidou.mybatisplus.plugins.Page; +import com.tamguo.config.redis.CacheService; +import com.tamguo.dao.*; +import com.tamguo.model.*; +import com.tamguo.model.vo.QuestionVo; +import com.xuxueli.crawler.XxlCrawler; +import com.xuxueli.crawler.conf.XxlCrawlerConf; +import com.xuxueli.crawler.loader.strategy.HtmlUnitPageLoader; +import com.xuxueli.crawler.parser.PageParser; +import com.xuxueli.crawler.rundata.RunData; +import com.xuxueli.crawler.util.FileUtil; import org.apache.commons.lang3.StringUtils; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; @@ -18,25 +21,13 @@ import org.springframework.beans.factory.annotation.Value; import org.springframework.boot.test.context.SpringBootTest; import org.springframework.test.context.junit4.SpringRunner; -import com.baomidou.mybatisplus.plugins.Page; -import com.tamguo.config.redis.CacheService; -import com.tamguo.dao.ChapterMapper; -import com.tamguo.dao.CourseMapper; -import com.tamguo.dao.CrawlerQuestionMapper; -import com.tamguo.dao.QuestionMapper; -import com.tamguo.dao.SubjectMapper; -import com.tamguo.model.ChapterEntity; -import com.tamguo.model.CourseEntity; -import com.tamguo.model.CrawlerQuestionEntity; -import com.tamguo.model.QuestionEntity; -import com.tamguo.model.SubjectEntity; -import com.tamguo.model.vo.QuestionVo; -import com.xuxueli.crawler.XxlCrawler; -import com.xuxueli.crawler.conf.XxlCrawlerConf; -import com.xuxueli.crawler.parser.PageParser; -import com.xuxueli.crawler.parser.strategy.HtmlUnitPageLoader; -import com.xuxueli.crawler.rundata.RunData; -import com.xuxueli.crawler.util.FileUtil; +import java.io.File; +import java.text.DecimalFormat; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.HashSet; +import java.util.List; +import java.util.Set; @RunWith(SpringRunner.class) @SpringBootTest @@ -94,7 +85,7 @@ public class SingleQuestionCrawler { question.setYear(questionVo.getYear()); if (questionVo.getAnswerImages()!=null && questionVo.getAnswerImages().size() > 0) { - Set imagesSet = new HashSet<>(questionVo.getAnswerImages()); + Set imagesSet = new HashSet(questionVo.getAnswerImages()); for (String img: imagesSet) { // 下载图片文件 @@ -113,7 +104,7 @@ public class SingleQuestionCrawler { if (questionVo.getAnalysisImages()!=null && questionVo.getAnalysisImages().size() > 0) { - Set imagesSet = new HashSet<>(questionVo.getAnalysisImages()); + Set imagesSet = new HashSet(questionVo.getAnalysisImages()); for (String img: imagesSet) { // 下载图片文件 @@ -131,7 +122,7 @@ public class SingleQuestionCrawler { question.setAnalysis(questionVo.getAnalysis()); if (questionVo.getContentImages()!=null && questionVo.getContentImages().size() > 0) { - Set imagesSet = new HashSet<>(questionVo.getContentImages()); + Set imagesSet = new HashSet(questionVo.getContentImages()); for (String img: imagesSet) { // 下载图片文件