smiletocandy@qq.com 7 years ago
parent b6a830a772
commit bd694c9043

@ -1,31 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<classpath>
<classpathentry kind="src" output="target/classes" path="src/main/java">
<attributes>
<attribute name="optional" value="true"/>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry excluding="**" kind="src" output="target/classes" path="src/main/resources">
<attributes>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="src" output="target/test-classes" path="src/test/java">
<attributes>
<attribute name="optional" value="true"/>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.8">
<attributes>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER">
<attributes>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="output" path="target/classes"/>
</classpath>

@ -1 +0,0 @@
/target/

@ -1,23 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<projectDescription>
<name>tamguo-crawler</name>
<comment></comment>
<projects>
</projects>
<buildSpec>
<buildCommand>
<name>org.eclipse.jdt.core.javabuilder</name>
<arguments>
</arguments>
</buildCommand>
<buildCommand>
<name>org.eclipse.m2e.core.maven2Builder</name>
<arguments>
</arguments>
</buildCommand>
</buildSpec>
<natures>
<nature>org.eclipse.jdt.core.javanature</nature>
<nature>org.eclipse.m2e.core.maven2Nature</nature>
</natures>
</projectDescription>

@ -1,6 +0,0 @@
eclipse.preferences.version=1
encoding//src/main/java=UTF-8
encoding//src/main/resources=UTF-8
encoding//src/test/java=UTF-8
encoding//src/test/resources=UTF-8
encoding/<project>=UTF-8

@ -1,5 +0,0 @@
eclipse.preferences.version=1
org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.8
org.eclipse.jdt.core.compiler.compliance=1.8
org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
org.eclipse.jdt.core.compiler.source=1.8

@ -1,4 +0,0 @@
activeProfiles=
eclipse.preferences.version=1
resolveWorkspaceProjects=true
version=1

@ -69,7 +69,7 @@
<dependency>
<groupId>com.xuxueli</groupId>
<artifactId>xxl-crawler</artifactId>
<version>1.2.1</version>
<version>1.2.2</version>
</dependency>
<!-- htmlunit -->
<dependency>

@ -1,12 +1,5 @@
package com.tamguo.config.dao;
import java.util.ArrayList;
import java.util.List;
import org.mybatis.spring.annotation.MapperScan;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import com.baomidou.mybatisplus.mapper.ISqlInjector;
import com.baomidou.mybatisplus.mapper.LogicSqlInjector;
import com.baomidou.mybatisplus.mapper.MetaObjectHandler;
@ -15,8 +8,13 @@ import com.baomidou.mybatisplus.plugins.PerformanceInterceptor;
import com.baomidou.mybatisplus.plugins.parser.ISqlParser;
import com.baomidou.mybatisplus.plugins.parser.tenant.TenantHandler;
import com.baomidou.mybatisplus.plugins.parser.tenant.TenantSqlParser;
import net.sf.jsqlparser.expression.Expression;
import org.mybatis.spring.annotation.MapperScan;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import java.util.ArrayList;
import java.util.List;
@Configuration
@MapperScan("com.tamguo.dao*")
@ -39,7 +37,7 @@ public class MybatisPlusConfig {
* SQL <br>
* 1 cookie SQL <br>
*/
List<ISqlParser> sqlParserList = new ArrayList<>();
List<ISqlParser> sqlParserList = new ArrayList<ISqlParser>();
TenantSqlParser tenantSqlParser = new TenantSqlParser();
tenantSqlParser.setTenantHandler(new TenantHandler() {
@Override

@ -2,25 +2,22 @@ package com.tamguo.service.impl;
import com.baomidou.mybatisplus.plugins.Page;
import com.tamguo.config.redis.CacheService;
import com.tamguo.dao.ChapterMapper;
import com.tamguo.dao.CourseMapper;
import com.tamguo.dao.CrawlerQuestionMapper;
import com.tamguo.dao.QuestionMapper;
import com.tamguo.dao.SubjectMapper;
import com.tamguo.model.ChapterEntity;
import com.tamguo.model.CourseEntity;
import com.tamguo.model.CrawlerQuestionEntity;
import com.tamguo.model.QuestionEntity;
import com.tamguo.model.SubjectEntity;
import com.tamguo.dao.*;
import com.tamguo.model.*;
import com.tamguo.model.enums.QuestionType;
import com.tamguo.model.vo.QuestionVo;
import com.tamguo.service.IBookService;
import com.xuxueli.crawler.XxlCrawler;
import com.xuxueli.crawler.conf.XxlCrawlerConf;
import com.xuxueli.crawler.loader.strategy.HtmlUnitPageLoader;
import com.xuxueli.crawler.parser.PageParser;
import com.xuxueli.crawler.parser.strategy.HtmlUnitPageLoader;
import com.xuxueli.crawler.rundata.RunData;
import com.xuxueli.crawler.util.FileUtil;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import java.io.File;
import java.text.DecimalFormat;
@ -30,12 +27,6 @@ import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
@Service
public class BookService implements IBookService {
@ -59,6 +50,7 @@ public class BookService implements IBookService {
@Override
public void crawlerBook() {
new HtmlUnitPageLoader();
XxlCrawler crawler = new XxlCrawler.Builder()
.setAllowSpread(false)
.setThreadCount(20)
@ -122,7 +114,7 @@ public class BookService implements IBookService {
question.setSubjectId(subject.getId());
if (questionVo.getAnswerImages()!=null && questionVo.getAnswerImages().size() > 0) {
Set<String> imagesSet = new HashSet<>(questionVo.getAnswerImages());
Set<String> imagesSet = new HashSet<String>(questionVo.getAnswerImages());
for (String img: imagesSet) {
// 下载图片文件
@ -140,7 +132,7 @@ public class BookService implements IBookService {
}
if (questionVo.getAnalysisImages()!=null && questionVo.getAnalysisImages().size() > 0) {
Set<String> imagesSet = new HashSet<>(questionVo.getAnalysisImages());
Set<String> imagesSet = new HashSet<String>(questionVo.getAnalysisImages());
for (String img: imagesSet) {
// 下载图片文件
@ -158,7 +150,7 @@ public class BookService implements IBookService {
}
if (questionVo.getContentImages()!=null && questionVo.getContentImages().size() > 0) {
Set<String> imagesSet = new HashSet<>(questionVo.getContentImages());
Set<String> imagesSet = new HashSet<String>(questionVo.getContentImages());
for (String img: imagesSet) {
// 下载图片文件

@ -1,25 +1,9 @@
package com.tamguo.service.impl;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import com.alibaba.fastjson.JSONObject;
import com.baomidou.mybatisplus.mapper.Condition;
import com.baomidou.mybatisplus.plugins.Page;
import com.tamguo.dao.ChapterMapper;
import com.tamguo.dao.CourseMapper;
import com.tamguo.dao.CrawlerChapterMapper;
import com.tamguo.dao.CrawlerQuestionMapper;
import com.tamguo.dao.SubjectMapper;
import com.tamguo.dao.*;
import com.tamguo.model.ChapterEntity;
import com.tamguo.model.CourseEntity;
import com.tamguo.model.CrawlerChapterEntity;
@ -28,6 +12,17 @@ import com.tamguo.service.IChapterService;
import com.xuxueli.crawler.XxlCrawler;
import com.xuxueli.crawler.parser.PageParser;
import com.xuxueli.crawler.rundata.RunData;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
@Service
public class ChapterService implements IChapterService{
@ -43,7 +38,7 @@ public class ChapterService implements IChapterService{
private Logger logger = LoggerFactory.getLogger(getClass());
private Set<String> urls = new HashSet<>();
private Set<String> urls = new HashSet<String>();
private RunData runData;
@ -56,7 +51,7 @@ public class ChapterService implements IChapterService{
int page = 1;
int pageSize = 100;
while(true) {
Page<ChapterEntity> chapterPage = new Page<>(page, pageSize);
Page<ChapterEntity> chapterPage = new Page<ChapterEntity>(page, pageSize);
List<ChapterEntity> chapterList = chapterMapper.selectPage(chapterPage, Condition.create().orderAsc(Arrays.asList("id")));
for(int i=0 ;i<chapterList.size() ; i++) {
// 处理数据

@ -38,7 +38,7 @@ public class CrawlerBookService implements ICrawlerBookService {
List<BookEntity> bookEntities = bookMapper.selectList(Condition.EMPTY);
for (BookEntity bookEntity : bookEntities) {
String url = bookEntity.getReserveField1();
String bookId = bookEntity.getId();
final String bookId = bookEntity.getId();
String regexs = url.replaceAll("\\d+", "\\\\d+").replaceAll("\\.","\\\\.");
XxlCrawler crawler = new XxlCrawler.Builder()

@ -1,41 +1,32 @@
package com.tamguo.service.impl;
import java.io.File;
import java.text.DecimalFormat;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import com.baomidou.mybatisplus.plugins.Page;
import com.baomidou.mybatisplus.service.impl.ServiceImpl;
import com.tamguo.config.redis.CacheService;
import com.tamguo.dao.ChapterMapper;
import com.tamguo.dao.CourseMapper;
import com.tamguo.dao.CrawlerQuestionMapper;
import com.tamguo.dao.QuestionMapper;
import com.tamguo.dao.SubjectMapper;
import com.tamguo.model.ChapterEntity;
import com.tamguo.model.CourseEntity;
import com.tamguo.model.CrawlerQuestionEntity;
import com.tamguo.model.QuestionEntity;
import com.tamguo.model.SubjectEntity;
import com.tamguo.dao.*;
import com.tamguo.model.*;
import com.tamguo.model.enums.QuestionType;
import com.tamguo.model.vo.QuestionVo;
import com.tamguo.service.IQuestionService;
import com.xuxueli.crawler.XxlCrawler;
import com.xuxueli.crawler.conf.XxlCrawlerConf;
import com.xuxueli.crawler.loader.strategy.HtmlUnitPageLoader;
import com.xuxueli.crawler.parser.PageParser;
import com.xuxueli.crawler.parser.strategy.HtmlUnitPageLoader;
import com.xuxueli.crawler.rundata.RunData;
import com.xuxueli.crawler.util.FileUtil;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import java.io.File;
import java.text.DecimalFormat;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
@Service
public class QuestionService extends ServiceImpl<QuestionMapper, QuestionEntity> implements IQuestionService{
@ -125,7 +116,7 @@ public class QuestionService extends ServiceImpl<QuestionMapper, QuestionEntity>
question.setSubjectId(subject.getId());
if (questionVo.getAnswerImages()!=null && questionVo.getAnswerImages().size() > 0) {
Set<String> imagesSet = new HashSet<>(questionVo.getAnswerImages());
Set<String> imagesSet = new HashSet<String>(questionVo.getAnswerImages());
for (String img: imagesSet) {
// 下载图片文件
@ -146,7 +137,7 @@ public class QuestionService extends ServiceImpl<QuestionMapper, QuestionEntity>
}
if (questionVo.getAnalysisImages()!=null && questionVo.getAnalysisImages().size() > 0) {
Set<String> imagesSet = new HashSet<>(questionVo.getAnalysisImages());
Set<String> imagesSet = new HashSet<String>(questionVo.getAnalysisImages());
for (String img: imagesSet) {
// 下载图片文件
@ -167,7 +158,7 @@ public class QuestionService extends ServiceImpl<QuestionMapper, QuestionEntity>
}
if (questionVo.getContentImages()!=null && questionVo.getContentImages().size() > 0) {
Set<String> imagesSet = new HashSet<>(questionVo.getContentImages());
Set<String> imagesSet = new HashSet<String>(questionVo.getContentImages());
for (String img: imagesSet) {
// 下载图片文件

@ -1,19 +1,5 @@
package com.tamguo.service.impl;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import com.alibaba.fastjson.JSONObject;
import com.tamguo.dao.ChapterMapper;
import com.tamguo.dao.CourseMapper;
@ -26,6 +12,19 @@ import com.tamguo.service.ISubjectService;
import com.xuxueli.crawler.XxlCrawler;
import com.xuxueli.crawler.parser.PageParser;
import com.xuxueli.crawler.rundata.RunData;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
@Service
public class SubjectService implements ISubjectService{
@ -46,7 +45,7 @@ public class SubjectService implements ISubjectService{
private Set<String> questionUrls = new HashSet<String>();
private Map<String, Object> chapterQuestionListMap = new HashMap<>();
private Map<String, Object> chapterQuestionListMap = new HashMap<String, Object>();
private RunData runData;

@ -8,7 +8,7 @@ spring.datasource.maxPoolPreparedStatementPerConnectionSize=20
spring.datasource.maxWait=60000
spring.datasource.minEvictableIdleTimeMillis=300000
spring.datasource.minIdle=5
spring.datasource.password=123456
spring.datasource.password=tamguo
spring.datasource.poolPreparedStatements=true
spring.datasource.testOnBorrow=false
spring.datasource.testOnReturn=false

@ -4,6 +4,6 @@
<pool maxActive="50" maxIdle="20" maxWait="1000" />
<servers>
<!-- test -->
<server ip="127.0.0.1" port="6379" needAuth="true" auth="reaps" connectTimeOut="2000"/>
<server ip="127.0.0.1" port="6379" connectTimeOut="2000"/>
</servers>
</redis>

@ -1,17 +1,16 @@
package com.tamguo;
import java.util.Arrays;
import com.baomidou.mybatisplus.mapper.Condition;
import com.baomidou.mybatisplus.plugins.Page;
import com.tamguo.model.QuestionEntity;
import com.tamguo.service.IQuestionService;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.test.context.junit4.SpringRunner;
import com.baomidou.mybatisplus.mapper.Condition;
import com.baomidou.mybatisplus.plugins.Page;
import com.tamguo.model.QuestionEntity;
import com.tamguo.service.IQuestionService;
import java.util.Arrays;
/**
* Test -
@ -33,7 +32,7 @@ public class ModifyQuestionImage {
Integer size = 100;
while(true) {
Page<QuestionEntity> page = new Page<>(current , size);
Page<QuestionEntity> page = new Page<QuestionEntity>(current , size);
Page<QuestionEntity> entitys = iQuestionService.selectPage(page , Condition.create().orderAsc(Arrays.asList("id")));
if(entitys.getCurrent() > 759) {
break;

@ -1,5 +1,6 @@
package com.tamguo;
import com.xuxueli.crawler.loader.strategy.HtmlUnitPageLoader;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.junit.Test;
@ -18,7 +19,6 @@ import com.tamguo.model.enums.QuestionType;
import com.tamguo.model.vo.PaperVo;
import com.xuxueli.crawler.XxlCrawler;
import com.xuxueli.crawler.parser.PageParser;
import com.xuxueli.crawler.parser.strategy.HtmlUnitPageLoader;
import com.xuxueli.crawler.rundata.RunData;
// 北京模拟试卷,真题试卷已经爬取完毕
@ -34,13 +34,13 @@ public class PaperCrawler {
// 140000 山西 | 350000 福建 | 340000 安徽 | 220000 吉林 | 150000 内蒙古 | 640000 宁夏 | 650000 新疆 | 广西 450000 | 210000 辽宁
// 230000 黑龙江 | 610000 陕西 | 360000 江西 | 440000 广东 | 430000 湖南 | 460000 海南 | 530000 云南 | 510000 四川 | 630000 青海
// 620000 甘肃 | 130000 河北 | 540000 西藏 | 贵州 520000
private final String AREA_ID = "610000";
private final String AREA_ID = "360000";
// 年份
private final String YEAR = "2017";
private final String YEAR = "2016";
// 真题试卷 类型(1:真题试卷,2:模拟试卷,3:押题预测,4:名校精品)
private final String PAPER_TYPE = "2";
private final String PAPER_TYPE = "4";
// 开始采集的URL
private final String START_URL = "https://tiku.baidu.com/tikupc/paperlist/1bfd700abb68a98271fefa04-20-4-2017-1306-1-download";
private final String START_URL = "https://tiku.baidu.com/tikupc/paperlist/1bfd700abb68a98271fefa04-20-7-2016-1360-1-download";
private RunData runData;

@ -1,47 +1,31 @@
package com.tamguo;
import java.io.File;
import java.text.DecimalFormat;
import java.text.SimpleDateFormat;
import java.util.Arrays;
import java.util.Date;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.concurrent.atomic.AtomicLong;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.test.context.junit4.SpringRunner;
import com.baomidou.mybatisplus.mapper.Condition;
import com.baomidou.mybatisplus.plugins.Page;
import com.tamguo.config.redis.CacheService;
import com.tamguo.dao.ChapterMapper;
import com.tamguo.dao.CourseMapper;
import com.tamguo.dao.CrawlerPaperMapper;
import com.tamguo.dao.CrawlerQuestionMapper;
import com.tamguo.dao.PaperMapper;
import com.tamguo.dao.QuestionMapper;
import com.tamguo.dao.SubjectMapper;
import com.tamguo.model.CourseEntity;
import com.tamguo.model.CrawlerPaperEntity;
import com.tamguo.model.PaperEntity;
import com.tamguo.model.QuestionEntity;
import com.tamguo.model.SubjectEntity;
import com.tamguo.dao.*;
import com.tamguo.model.*;
import com.tamguo.model.enums.QuestionType;
import com.tamguo.model.vo.QuestionVo;
import com.xuxueli.crawler.XxlCrawler;
import com.xuxueli.crawler.conf.XxlCrawlerConf;
import com.xuxueli.crawler.loader.strategy.HtmlUnitPageLoader;
import com.xuxueli.crawler.parser.PageParser;
import com.xuxueli.crawler.parser.strategy.HtmlUnitPageLoader;
import com.xuxueli.crawler.rundata.RunData;
import com.xuxueli.crawler.util.FileUtil;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.test.context.junit4.SpringRunner;
import java.io.File;
import java.text.DecimalFormat;
import java.text.SimpleDateFormat;
import java.util.*;
@RunWith(SpringRunner.class)
@SpringBootTest
@ -137,7 +121,7 @@ public class PaperQuestionCrawler {
question.setSubjectId(subject.getId());
if (questionVo.getAnswerImages()!=null && questionVo.getAnswerImages().size() > 0) {
Set<String> imagesSet = new HashSet<>(questionVo.getAnswerImages());
Set<String> imagesSet = new HashSet<String>(questionVo.getAnswerImages());
for (String img: imagesSet) {
// 下载图片文件
@ -158,7 +142,7 @@ public class PaperQuestionCrawler {
}
if (questionVo.getAnalysisImages()!=null && questionVo.getAnalysisImages().size() > 0) {
Set<String> imagesSet = new HashSet<>(questionVo.getAnalysisImages());
Set<String> imagesSet = new HashSet<String>(questionVo.getAnalysisImages());
for (String img: imagesSet) {
// 下载图片文件
@ -179,7 +163,7 @@ public class PaperQuestionCrawler {
}
if (questionVo.getContentImages()!=null && questionVo.getContentImages().size() > 0) {
Set<String> imagesSet = new HashSet<>(questionVo.getContentImages());
Set<String> imagesSet = new HashSet<String>(questionVo.getContentImages());
for (String img: imagesSet) {
// 下载图片文件

@ -1,13 +1,16 @@
package com.tamguo;
import java.io.File;
import java.text.DecimalFormat;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import com.baomidou.mybatisplus.plugins.Page;
import com.tamguo.config.redis.CacheService;
import com.tamguo.dao.*;
import com.tamguo.model.*;
import com.tamguo.model.vo.QuestionVo;
import com.xuxueli.crawler.XxlCrawler;
import com.xuxueli.crawler.conf.XxlCrawlerConf;
import com.xuxueli.crawler.loader.strategy.HtmlUnitPageLoader;
import com.xuxueli.crawler.parser.PageParser;
import com.xuxueli.crawler.rundata.RunData;
import com.xuxueli.crawler.util.FileUtil;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
@ -18,25 +21,13 @@ import org.springframework.beans.factory.annotation.Value;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.test.context.junit4.SpringRunner;
import com.baomidou.mybatisplus.plugins.Page;
import com.tamguo.config.redis.CacheService;
import com.tamguo.dao.ChapterMapper;
import com.tamguo.dao.CourseMapper;
import com.tamguo.dao.CrawlerQuestionMapper;
import com.tamguo.dao.QuestionMapper;
import com.tamguo.dao.SubjectMapper;
import com.tamguo.model.ChapterEntity;
import com.tamguo.model.CourseEntity;
import com.tamguo.model.CrawlerQuestionEntity;
import com.tamguo.model.QuestionEntity;
import com.tamguo.model.SubjectEntity;
import com.tamguo.model.vo.QuestionVo;
import com.xuxueli.crawler.XxlCrawler;
import com.xuxueli.crawler.conf.XxlCrawlerConf;
import com.xuxueli.crawler.parser.PageParser;
import com.xuxueli.crawler.parser.strategy.HtmlUnitPageLoader;
import com.xuxueli.crawler.rundata.RunData;
import com.xuxueli.crawler.util.FileUtil;
import java.io.File;
import java.text.DecimalFormat;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
@RunWith(SpringRunner.class)
@SpringBootTest
@ -94,7 +85,7 @@ public class SingleQuestionCrawler {
question.setYear(questionVo.getYear());
if (questionVo.getAnswerImages()!=null && questionVo.getAnswerImages().size() > 0) {
Set<String> imagesSet = new HashSet<>(questionVo.getAnswerImages());
Set<String> imagesSet = new HashSet<String>(questionVo.getAnswerImages());
for (String img: imagesSet) {
// 下载图片文件
@ -113,7 +104,7 @@ public class SingleQuestionCrawler {
if (questionVo.getAnalysisImages()!=null && questionVo.getAnalysisImages().size() > 0) {
Set<String> imagesSet = new HashSet<>(questionVo.getAnalysisImages());
Set<String> imagesSet = new HashSet<String>(questionVo.getAnalysisImages());
for (String img: imagesSet) {
// 下载图片文件
@ -131,7 +122,7 @@ public class SingleQuestionCrawler {
question.setAnalysis(questionVo.getAnalysis());
if (questionVo.getContentImages()!=null && questionVo.getContentImages().size() > 0) {
Set<String> imagesSet = new HashSet<>(questionVo.getContentImages());
Set<String> imagesSet = new HashSet<String>(questionVo.getContentImages());
for (String img: imagesSet) {
// 下载图片文件

Loading…
Cancel
Save