导入数据

main
tamguo 7 years ago
parent 6598526d91
commit 623cc0e84a

@ -14,6 +14,7 @@ import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import com.alibaba.fastjson.JSONObject;
import com.tamguo.dao.ChapterMapper;
import com.tamguo.dao.CourseMapper;
import com.tamguo.dao.CrawlerQuestionMapper;
@ -55,6 +56,8 @@ public class SubjectService implements ISubjectService{
XxlCrawler crawler = new XxlCrawler.Builder()
.setUrls("https://tiku.baidu.com/")
.setAllowSpread(false)
.setFailRetryCount(5)
.setThreadCount(20)
.setPageParser(new PageParser<SubjectVo>() {
@Override
@ -200,18 +203,24 @@ public class SubjectService implements ISubjectService{
// 剔除已经爬取的数据
urls.add(pageUrl);
for(String url : subjectVo.getChapterUrls()) {
if(url.equals(pageVoElement.getElementsByClass("main-inner").get(0).getElementsByClass("selected").get(0).getElementsByTag("a").attr("abs:href"))) {
continue;
}
if(!urls.contains(url)) {
runData.addUrl(url);
}
logger.info("url:{}" ,pageUrl );
logger.info("subjectVo:{}" ,JSONObject.toJSON(subjectVo) );
if(subjectVo.getChapterUrls() != null) {
for(String url : subjectVo.getChapterUrls()) {
if(url.equals(pageVoElement.getElementsByClass("main-inner").get(0).getElementsByClass("selected").get(0).getElementsByTag("a").attr("abs:href"))) {
continue;
}
if(!urls.contains(url)) {
runData.addUrl(url);
}
}
}
}
if(pageUrl.contains("https://tiku.baidu.com/tikupc/chapterdetail")) {
// 加入待解析题目列表
logger.info("url : {}" , pageUrl);
logger.info("subjectVo : {}" , JSONObject.toJSON(subjectVo));
for(String questionUrl : subjectVo.getQuestionUrls()) {
if(!questionUrls.contains(questionUrl)) {
// 处理URL

@ -15,7 +15,7 @@ spring.datasource.testOnReturn=false
spring.datasource.testWhileIdle=true
spring.datasource.timeBetweenEvictionRunsMillis=60000
spring.datasource.type=com.alibaba.druid.pool.DruidDataSource
spring.datasource.url=jdbc:mysql://47.100.175.14:3306/tamguo?useUnicode=true&characterEncoding=UTF-8&useSSL=false
spring.datasource.url=jdbc:mysql://47.100.175.14:3306/tiku?useUnicode=true&characterEncoding=UTF-8&useSSL=false
spring.datasource.username=root
spring.datasource.validationQuery=SELECT 1 FROM DUAL
@ -33,4 +33,10 @@ mybatis-plus.global-config.sql-injector=com.baomidou.mybatisplus.mapper.LogicSql
mybatis-plus.global-config.meta-object-handler=com.tamguo.config.dao.MyMetaObjectHandler
mybatis-plus.global-config.sql-parser-cache=true
mybatis-plus.configuration.map-underscore-to-camel-case=true
mybatis-plus.configuration.cache-enabled=false
mybatis-plus.configuration.cache-enabled=false
logging.level.root=INFO
logging.level.org.springframework.web=INFO
logging.file=/home/webdata/log/tamguo-crawler.log
logging.pattern.console=%d{yyyy/MM/dd-HH:mm:ss} [%thread] %-5level %logger- %msg%n
logging.pattern.file=%d{yyyy/MM/dd-HH:mm} [%thread] %-5level %logger- %msg%n
Loading…
Cancel
Save