|
|
|
@ -1,47 +1,31 @@
|
|
|
|
|
package com.tamguo;
|
|
|
|
|
|
|
|
|
|
import java.io.File;
|
|
|
|
|
import java.text.DecimalFormat;
|
|
|
|
|
import java.text.SimpleDateFormat;
|
|
|
|
|
import java.util.Arrays;
|
|
|
|
|
import java.util.Date;
|
|
|
|
|
import java.util.HashSet;
|
|
|
|
|
import java.util.List;
|
|
|
|
|
import java.util.Set;
|
|
|
|
|
import java.util.concurrent.atomic.AtomicLong;
|
|
|
|
|
|
|
|
|
|
import org.apache.commons.lang3.StringUtils;
|
|
|
|
|
import org.jsoup.nodes.Document;
|
|
|
|
|
import org.jsoup.nodes.Element;
|
|
|
|
|
import org.junit.Test;
|
|
|
|
|
import org.junit.runner.RunWith;
|
|
|
|
|
import org.springframework.beans.factory.annotation.Autowired;
|
|
|
|
|
import org.springframework.boot.test.context.SpringBootTest;
|
|
|
|
|
import org.springframework.test.context.junit4.SpringRunner;
|
|
|
|
|
|
|
|
|
|
import com.baomidou.mybatisplus.mapper.Condition;
|
|
|
|
|
import com.baomidou.mybatisplus.plugins.Page;
|
|
|
|
|
import com.tamguo.config.redis.CacheService;
|
|
|
|
|
import com.tamguo.dao.ChapterMapper;
|
|
|
|
|
import com.tamguo.dao.CourseMapper;
|
|
|
|
|
import com.tamguo.dao.CrawlerPaperMapper;
|
|
|
|
|
import com.tamguo.dao.CrawlerQuestionMapper;
|
|
|
|
|
import com.tamguo.dao.PaperMapper;
|
|
|
|
|
import com.tamguo.dao.QuestionMapper;
|
|
|
|
|
import com.tamguo.dao.SubjectMapper;
|
|
|
|
|
import com.tamguo.model.CourseEntity;
|
|
|
|
|
import com.tamguo.model.CrawlerPaperEntity;
|
|
|
|
|
import com.tamguo.model.PaperEntity;
|
|
|
|
|
import com.tamguo.model.QuestionEntity;
|
|
|
|
|
import com.tamguo.model.SubjectEntity;
|
|
|
|
|
import com.tamguo.dao.*;
|
|
|
|
|
import com.tamguo.model.*;
|
|
|
|
|
import com.tamguo.model.enums.QuestionType;
|
|
|
|
|
import com.tamguo.model.vo.QuestionVo;
|
|
|
|
|
import com.xuxueli.crawler.XxlCrawler;
|
|
|
|
|
import com.xuxueli.crawler.conf.XxlCrawlerConf;
|
|
|
|
|
import com.xuxueli.crawler.loader.strategy.HtmlUnitPageLoader;
|
|
|
|
|
import com.xuxueli.crawler.parser.PageParser;
|
|
|
|
|
import com.xuxueli.crawler.parser.strategy.HtmlUnitPageLoader;
|
|
|
|
|
import com.xuxueli.crawler.rundata.RunData;
|
|
|
|
|
import com.xuxueli.crawler.util.FileUtil;
|
|
|
|
|
import org.apache.commons.lang3.StringUtils;
|
|
|
|
|
import org.jsoup.nodes.Document;
|
|
|
|
|
import org.jsoup.nodes.Element;
|
|
|
|
|
import org.junit.Test;
|
|
|
|
|
import org.junit.runner.RunWith;
|
|
|
|
|
import org.springframework.beans.factory.annotation.Autowired;
|
|
|
|
|
import org.springframework.boot.test.context.SpringBootTest;
|
|
|
|
|
import org.springframework.test.context.junit4.SpringRunner;
|
|
|
|
|
|
|
|
|
|
import java.io.File;
|
|
|
|
|
import java.text.DecimalFormat;
|
|
|
|
|
import java.text.SimpleDateFormat;
|
|
|
|
|
import java.util.*;
|
|
|
|
|
|
|
|
|
|
@RunWith(SpringRunner.class)
|
|
|
|
|
@SpringBootTest
|
|
|
|
@ -137,7 +121,7 @@ public class PaperQuestionCrawler {
|
|
|
|
|
question.setSubjectId(subject.getId());
|
|
|
|
|
|
|
|
|
|
if (questionVo.getAnswerImages()!=null && questionVo.getAnswerImages().size() > 0) {
|
|
|
|
|
Set<String> imagesSet = new HashSet<>(questionVo.getAnswerImages());
|
|
|
|
|
Set<String> imagesSet = new HashSet<String>(questionVo.getAnswerImages());
|
|
|
|
|
for (String img: imagesSet) {
|
|
|
|
|
|
|
|
|
|
// 下载图片文件
|
|
|
|
@ -158,7 +142,7 @@ public class PaperQuestionCrawler {
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (questionVo.getAnalysisImages()!=null && questionVo.getAnalysisImages().size() > 0) {
|
|
|
|
|
Set<String> imagesSet = new HashSet<>(questionVo.getAnalysisImages());
|
|
|
|
|
Set<String> imagesSet = new HashSet<String>(questionVo.getAnalysisImages());
|
|
|
|
|
for (String img: imagesSet) {
|
|
|
|
|
|
|
|
|
|
// 下载图片文件
|
|
|
|
@ -179,7 +163,7 @@ public class PaperQuestionCrawler {
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (questionVo.getContentImages()!=null && questionVo.getContentImages().size() > 0) {
|
|
|
|
|
Set<String> imagesSet = new HashSet<>(questionVo.getContentImages());
|
|
|
|
|
Set<String> imagesSet = new HashSet<String>(questionVo.getContentImages());
|
|
|
|
|
for (String img: imagesSet) {
|
|
|
|
|
|
|
|
|
|
// 下载图片文件
|
|
|
|
|