diff --git a/tamguo-admin/src/main/resources/application.properties b/tamguo-admin/src/main/resources/application.properties index 3cf7c64..253e8b9 100644 --- a/tamguo-admin/src/main/resources/application.properties +++ b/tamguo-admin/src/main/resources/application.properties @@ -1,5 +1,5 @@ -domain.name=http://localhost/ -server.port=80 +domain.name=http://admin.tamguo.com/ +server.port=8082 jasypt.encryptor.password=tamguo spring.datasource.connectionProperties=druid.stat.mergeSql=true;druid.stat.slowSqlMillis=5000 @@ -11,14 +11,14 @@ spring.datasource.maxPoolPreparedStatementPerConnectionSize=20 spring.datasource.maxWait=60000 spring.datasource.minEvictableIdleTimeMillis=300000 spring.datasource.minIdle=5 -spring.datasource.password=Tanguo +spring.datasource.password= spring.datasource.poolPreparedStatements=true spring.datasource.testOnBorrow=false spring.datasource.testOnReturn=false spring.datasource.testWhileIdle=true spring.datasource.timeBetweenEvictionRunsMillis=60000 spring.datasource.type=com.alibaba.druid.pool.DruidDataSource -spring.datasource.url=jdbc:mysql://47.100.175.14:3306/tamguo?useUnicode=true&characterEncoding=UTF-8&useSSL=false +spring.datasource.url=jdbc:mysql://127.0.0.1:3306/tiku?useUnicode=true&characterEncoding=UTF-8&useSSL=false spring.datasource.username=root spring.datasource.validationQuery=SELECT 1 FROM DUAL @@ -46,7 +46,7 @@ spring.thymeleaf.encoding=UTF-8 spring.thymeleaf.content-type=text/html spring.thymeleaf.cache=false -redis.hostname=47.100.175.14 +redis.hostname=127.0.0.1 redis.port=6379 redis.password= diff --git a/tamguo-admin/src/main/resources/redis.xml b/tamguo-admin/src/main/resources/redis.xml index 4055e0d..ed04f74 100644 --- a/tamguo-admin/src/main/resources/redis.xml +++ b/tamguo-admin/src/main/resources/redis.xml @@ -4,6 +4,6 @@ - + \ No newline at end of file diff --git a/tamguo-crawler/src/main/java/com/tamguo/model/enums/QuestionType.java b/tamguo-crawler/src/main/java/com/tamguo/model/enums/QuestionType.java new file mode 100644 index 0000000..df9e3a1 --- /dev/null +++ b/tamguo-crawler/src/main/java/com/tamguo/model/enums/QuestionType.java @@ -0,0 +1,54 @@ +package com.tamguo.model.enums; + +import java.io.Serializable; +/** + * 试题类型、、题目类型(1.单选题;2.多选题; 3.解答题) + * + * @author tamguo + * + */ +public enum QuestionType { + + DANXUANTI("1", "单选题"), + DUOXUANTI("2", "多选题"), + TIANKONGTI("3", "填空题"), + PANDUANTI("4", "判断题"), + WENDATI("5", "问答题"); + + private String value; + private String desc; + + QuestionType(final String value, final String desc) { + this.value = value; + this.desc = desc; + } + + public static QuestionType getQuestionType(String value) { + if("单选题".equals(value)) { + return DANXUANTI; + }else if("多选题".equals(value)) { + return DUOXUANTI; + }else if("填空题".equals(value)) { + return TIANKONGTI; + }else if("判断题".equals(value)) { + return PANDUANTI; + }else if("问答题".equals(value)) { + return WENDATI; + } + return DANXUANTI; + } + + public Serializable getValue() { + return this.value; + } + + public String getDesc(){ + return this.desc; + } + + @Override + public String toString() { + return this.value; + } + +} diff --git a/tamguo-crawler/src/main/java/com/tamguo/model/vo/QuestionVo.java b/tamguo-crawler/src/main/java/com/tamguo/model/vo/QuestionVo.java index 60b3f5d..302ba58 100644 --- a/tamguo-crawler/src/main/java/com/tamguo/model/vo/QuestionVo.java +++ b/tamguo-crawler/src/main/java/com/tamguo/model/vo/QuestionVo.java @@ -11,6 +11,12 @@ public class QuestionVo { @PageFieldSelect(cssQuery=".question-box-inner .questem-inner", selectType = XxlCrawlerConf.SelectType.HTML) private String content; + @PageFieldSelect(cssQuery = ".question-box-inner .questem-inner img", selectType = XxlCrawlerConf.SelectType.ATTR, selectVal = "abs:src") + private List contentImages; + + @PageFieldSelect(cssQuery=".queoptions-inner", selectType = XxlCrawlerConf.SelectType.HTML) + private String queoptions; + @PageFieldSelect(cssQuery=".exam-answer-content", selectType = XxlCrawlerConf.SelectType.HTML) private String answer; @@ -35,21 +41,6 @@ public class QuestionVo { @PageFieldSelect(cssQuery=".kpoint-contain point point-item",selectType = XxlCrawlerConf.SelectType.TEXT) private List reviewPoint; - public String getContent() { - return content; - } - - public void setContent(String content) { - this.content = content; - } - - public String getAnalysis() { - return analysis; - } - - public void setAnalysis(String analysis) { - this.analysis = analysis; - } public String getQuestionType() { return questionType; @@ -106,5 +97,37 @@ public class QuestionVo { public void setAnalysisImages(List analysisImages) { this.analysisImages = analysisImages; } - + + public String getQueoptions() { + return queoptions; + } + + public void setQueoptions(String queoptions) { + this.queoptions = queoptions; + } + + public String getContent() { + return content; + } + + public void setContent(String content) { + this.content = content; + } + + public List getContentImages() { + return contentImages; + } + + public void setContentImages(List contentImages) { + this.contentImages = contentImages; + } + + public String getAnalysis() { + return analysis; + } + + public void setAnalysis(String analysis) { + this.analysis = analysis; + } + } diff --git a/tamguo-crawler/src/main/resources/application.properties b/tamguo-crawler/src/main/resources/application.properties index 684bdc5..6c38d28 100644 --- a/tamguo-crawler/src/main/resources/application.properties +++ b/tamguo-crawler/src/main/resources/application.properties @@ -1,3 +1,4 @@ +domain.name=http://www.tamguo.com/ spring.datasource.connectionProperties=druid.stat.mergeSql=true;druid.stat.slowSqlMillis=5000 spring.datasource.driver-class-name=com.mysql.jdbc.Driver spring.datasource.filters=stat,wall,log4j diff --git a/tamguo-crawler/src/test/java/com/tamguo/SingleQuestionCrawler.java b/tamguo-crawler/src/test/java/com/tamguo/SingleQuestionCrawler.java new file mode 100644 index 0000000..a73916b --- /dev/null +++ b/tamguo-crawler/src/test/java/com/tamguo/SingleQuestionCrawler.java @@ -0,0 +1,200 @@ +package com.tamguo; + +import java.io.File; +import java.text.DecimalFormat; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import org.apache.commons.lang3.StringUtils; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.test.context.junit4.SpringRunner; + +import com.baomidou.mybatisplus.plugins.Page; +import com.tamguo.config.redis.CacheService; +import com.tamguo.dao.ChapterMapper; +import com.tamguo.dao.CourseMapper; +import com.tamguo.dao.CrawlerQuestionMapper; +import com.tamguo.dao.QuestionMapper; +import com.tamguo.dao.SubjectMapper; +import com.tamguo.model.ChapterEntity; +import com.tamguo.model.CourseEntity; +import com.tamguo.model.CrawlerQuestionEntity; +import com.tamguo.model.QuestionEntity; +import com.tamguo.model.SubjectEntity; +import com.tamguo.model.vo.QuestionVo; +import com.xuxueli.crawler.XxlCrawler; +import com.xuxueli.crawler.conf.XxlCrawlerConf; +import com.xuxueli.crawler.parser.PageParser; +import com.xuxueli.crawler.parser.strategy.HtmlUnitPageLoader; +import com.xuxueli.crawler.rundata.RunData; +import com.xuxueli.crawler.util.FileUtil; + +@RunWith(SpringRunner.class) +@SpringBootTest +public class SingleQuestionCrawler { + + private RunData runData; + @Autowired + CrawlerQuestionMapper crawlerQuestionMapper; + @Autowired + ChapterMapper chapterMapper; + @Autowired + CourseMapper courseMapper; + @Autowired + SubjectMapper subjectMapper; + @Autowired + CacheService cacheService; + @Autowired + QuestionMapper questionMapper; + + private static final String FILES_NO_FORMAT = "000000"; + private static final String FILES_PREFIX = "FP"; + @Value(value="${domain.name}") + public String DOMAIN; + + @Test + public void crawlerSubject() throws Exception { + XxlCrawler crawler = new XxlCrawler.Builder() + .setAllowSpread(false) + .setThreadCount(20) + .setPageLoader(new HtmlUnitPageLoader()) + .setPageParser(new PageParser() { + @Override + public void parse(Document html, Element pageVoElement, QuestionVo questionVo) { + CrawlerQuestionEntity condition = new CrawlerQuestionEntity(); + condition.setQuestionUrl(html.baseUri()); + CrawlerQuestionEntity crawlerQuestion = crawlerQuestionMapper.selectOne(condition); + ChapterEntity chapter = chapterMapper.selectById(crawlerQuestion.getChapterId()); + CourseEntity course = courseMapper.selectById(chapter.getCourseId()); + SubjectEntity subject = subjectMapper.selectById(course.getSubjectId()); + + QuestionEntity question = new QuestionEntity(); + question.setAnalysis(questionVo.getAnalysis()); + question.setAnswer(questionVo.getAnswer()); + question.setAuditStatus("1"); + question.setChapterId(chapter.getUid()); + question.setContent(questionVo.getContent()); + question.setCourseId(course.getUid()); + question.setPaperId(null); + question.setQuestionType("1"); + if(questionVo.getReviewPoint() != null && questionVo.getReviewPoint().size() > 0) { + question.setReviewPoint(StringUtils.join(questionVo.getReviewPoint().toArray(), ",")); + } + question.setScore(questionVo.getScore()); + question.setSubjectId(subject.getUid()); + question.setYear(questionVo.getYear()); + + if (questionVo.getAnswerImages()!=null && questionVo.getAnswerImages().size() > 0) { + Set imagesSet = new HashSet<>(questionVo.getAnswerImages()); + for (String img: imagesSet) { + + // 下载图片文件 + String fileName = getFileName(img); + File dir = new File(getFilePath()); + if (!dir.exists()) + dir.mkdirs(); + boolean ret = FileUtil.downFile(img, XxlCrawlerConf.TIMEOUT_MILLIS_DEFAULT, getFilePath(), fileName); + System.out.println("down images " + (ret?"success":"fail") + ":" + img); + + // 替换URL + questionVo.setAnswer(questionVo.getAnswer().replace(img, DOMAIN + getFilePaths() + fileName)); + } + question.setAnswer(questionVo.getAnswer()); + } + + + if (questionVo.getAnalysisImages()!=null && questionVo.getAnalysisImages().size() > 0) { + Set imagesSet = new HashSet<>(questionVo.getAnalysisImages()); + for (String img: imagesSet) { + + // 下载图片文件 + String fileName = getFileName(img); + File dir = new File(getFilePath()); + if (!dir.exists()) + dir.mkdirs(); + boolean ret = FileUtil.downFile(img, XxlCrawlerConf.TIMEOUT_MILLIS_DEFAULT, getFilePath(), fileName); + System.out.println("down images " + (ret?"success":"fail") + ":" + img); + + // 替换URL + questionVo.setAnalysis(questionVo.getAnalysis().replace(img, DOMAIN + getFilePaths() + fileName)); + } + } + question.setAnalysis(questionVo.getAnalysis()); + + if (questionVo.getContentImages()!=null && questionVo.getContentImages().size() > 0) { + Set imagesSet = new HashSet<>(questionVo.getContentImages()); + for (String img: imagesSet) { + + // 下载图片文件 + String fileName = getFileName(img); + File dir = new File(getFilePath()); + if (!dir.exists()) + dir.mkdirs(); + boolean ret = FileUtil.downFile(img, XxlCrawlerConf.TIMEOUT_MILLIS_DEFAULT, getFilePath(), fileName); + System.out.println("down images " + (ret?"success":"fail") + ":" + img); + + // 替换URL + questionVo.setContent(questionVo.getContent().replace(img, DOMAIN + getFilePaths() + fileName)); + } + } + question.setContent(questionVo.getContent()); + questionMapper.insert(question); + } + + public String getFileName(String img) { + return getFileNo() + img.substring(img.lastIndexOf(".")); + } + + private String getFilePath() { + SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMdd"); + String format = sdf.format(new Date()); + return "/home/webdata/files/" + format + "/"; + } + + private String getFilePaths() { + SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMdd"); + String format = sdf.format(new Date()); + return "/files/" + format + "/"; + } + + private String getFileNo() { + SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMdd"); + String format = sdf.format(new Date()); + DecimalFormat df = new DecimalFormat(FILES_NO_FORMAT); + String key = FILES_PREFIX + format; + Long incr = cacheService.incr(key); + String avatorNo = FILES_PREFIX + df.format(incr); + return avatorNo; + } + + }).build(); + + runData = crawler.getRunData(); + int page = 1; + int pageSize = 100; + while(true) { + Page questionPage = new Page(page , pageSize); + List questionList = crawlerQuestionMapper.queryPageOrderUid(questionPage); + for(int i=0 ;i - + \ No newline at end of file diff --git a/tamguo/src/main/resources/templates/index.html b/tamguo/src/main/resources/templates/index.html index 64b13a0..4c127f2 100644 --- a/tamguo/src/main/resources/templates/index.html +++ b/tamguo/src/main/resources/templates/index.html @@ -106,14 +106,14 @@

试卷资源 一考知底,高分必刷,全面提分 当前位置: - 更多地区 > + 更多地区 >