main
tamguo 7 years ago
parent 7ee525e183
commit da866c5dcc

@ -0,0 +1,22 @@
package com.tamguo.model.vo;
import java.util.List;
import com.xuxueli.crawler.annotation.PageFieldSelect;
import com.xuxueli.crawler.annotation.PageSelect;
@PageSelect(cssQuery = "body")
public class CourseVo {
@PageFieldSelect(cssQuery = ".course-item")
private List<String> name;
public List<String> getName() {
return name;
}
public void setName(List<String> name) {
this.name = name;
}
}

@ -10,7 +10,10 @@ public class SubjectVo {
@PageFieldSelect(cssQuery = ".all-list-li") @PageFieldSelect(cssQuery = ".all-list-li")
private List<String> name; private List<String> name;
@PageFieldSelect(cssQuery=".course-list-container .course-list .course-item")
private List<String> courseName;
public List<String> getName() { public List<String> getName() {
return name; return name;
} }
@ -19,5 +22,4 @@ public class SubjectVo {
this.name = name; this.name = name;
} }
} }

@ -1,10 +0,0 @@
package com.tamguo.service;
public interface ICourseService {
/**
*
*/
void crawlerCourse();
}

@ -2,6 +2,7 @@ package com.tamguo.service.impl;
import org.jsoup.nodes.Document; import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element; import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service; import org.springframework.stereotype.Service;
@ -11,6 +12,7 @@ import com.tamguo.model.vo.SubjectVo;
import com.tamguo.service.ISubjectService; import com.tamguo.service.ISubjectService;
import com.xuxueli.crawler.XxlCrawler; import com.xuxueli.crawler.XxlCrawler;
import com.xuxueli.crawler.parser.PageParser; import com.xuxueli.crawler.parser.PageParser;
import com.xuxueli.crawler.rundata.RunData;
@Service @Service
public class SubjectService implements ISubjectService{ public class SubjectService implements ISubjectService{
@ -18,12 +20,15 @@ public class SubjectService implements ISubjectService{
@Autowired @Autowired
SubjectMapper subjectMapper; SubjectMapper subjectMapper;
private RunData runData;
@Override @Override
public void crawlerSubject() { public void crawlerSubject() {
XxlCrawler crawler = new XxlCrawler.Builder() XxlCrawler crawler = new XxlCrawler.Builder()
.setUrls("https://tiku.baidu.com/") .setUrls("https://tiku.baidu.com/")
.setWhiteUrlRegexs("https://tiku\\.baidu\\.com/") .setWhiteUrlRegexs("https://tiku\\.baidu\\.com/")
.setPageParser(new PageParser<SubjectVo>() { .setPageParser(new PageParser<SubjectVo>() {
@Override @Override
public void parse(Document html, Element pageVoElement, SubjectVo subjectVo) { public void parse(Document html, Element pageVoElement, SubjectVo subjectVo) {
// 解析封装 PageVo 对象 // 解析封装 PageVo 对象
@ -40,11 +45,24 @@ public class SubjectService implements ISubjectService{
SubjectEntity entity = new SubjectEntity(); SubjectEntity entity = new SubjectEntity();
entity.setName(name); entity.setName(name);
subjectMapper.insert(entity); subjectMapper.insert(entity);
// 获取Course
Elements elements = pageVoElement.getElementsByClass("all-list-li");
for(int k=0 ; k<elements.size() ; k++) {
Element element = elements.get(k);
String url = element.child(0).attr("href");
runData.addUrl(url);
}
} }
} }
})
.build(); }).build();
runData = crawler.getRunData();
// 获取科目
crawler.start(true); crawler.start(true);
} }

Loading…
Cancel
Save