爬取数据

main
tamguo 7 years ago
parent c67654ac57
commit 0c1acdc345

@ -0,0 +1,488 @@
package com.tamguo.config.redis;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import redis.clients.jedis.ShardedJedis;
/**
*
*
*/
@Service("cacheService")
public class CacheService {
private final static String REDIS_PRE_KEY = "TAMGUO:";
private SerializeTranscoder objectSerialize = new ObjectUtil();
@Autowired
private RedisXMLConfigure redisXMLConfigure;
/**
*
* @Title: get @Description: @param @return String @throws
*/
public String get(String key) {
key = getPreKey(key);
ShardedJedis conn = null;
try {
conn = redisXMLConfigure.getConnection();
return conn.get(key);
} finally {
redisXMLConfigure.closeConnection(conn);
}
}
/**
*
* @Title: set @Description: @param @return void @throws
*/
public void set(String key, String value) {
key = getPreKey(key);
ShardedJedis conn = null;
try {
conn = redisXMLConfigure.getConnection();
conn.set(key, value);
} finally {
redisXMLConfigure.closeConnection(conn);
}
}
/**
*
* set
*
* @param key
* @param value
* @param time
*
* @description
* @exception @since
* 1.0.0
*/
public void set(String key, String value, int time) {
key = getPreKey(key);
ShardedJedis conn = null;
try {
conn = redisXMLConfigure.getConnection();
conn.set(key, value);
conn.expire(key, time);
} finally {
redisXMLConfigure.closeConnection(conn);
}
}
/**
* redis
*
* @param key key
* @param value
*/
public void setObject(String key, Object value) {
key = getPreKey(key);
ShardedJedis conn = null;
try {
conn = redisXMLConfigure.getConnection();
conn.set(key.getBytes(), objectSerialize.serialize(value));
} catch (Exception ex) {
ex.printStackTrace();
} finally {
redisXMLConfigure.closeConnection(conn);
}
}
/**
*
*
* @param key key
* @param value
* @param time
*/
public void setObject(String key, Object value, int time) {
key = getPreKey(key);
ShardedJedis conn = null;
try {
conn = redisXMLConfigure.getConnection();
conn.setex(key.getBytes(), time, objectSerialize.serialize(value));
} catch (Exception ex) {
ex.printStackTrace();
} finally {
redisXMLConfigure.closeConnection(conn);
}
}
/**
*
*
* @param key key
* @return
*/
public Object getObject(String key) {
key = getPreKey(key);
ShardedJedis conn = null;
try {
conn = redisXMLConfigure.getConnection();
byte[] obj = conn.get(key.getBytes());
if (null == obj)
return null;
return objectSerialize.deserialize(obj);
} catch (Exception ex) {
ex.printStackTrace();
} finally {
redisXMLConfigure.closeConnection(conn);
}
return null;
}
/**
*
*
* @param key key
* @return
*/
public boolean deleteObject(String key) {
key = getPreKey(key);
ShardedJedis conn = null;
try {
conn = redisXMLConfigure.getConnection();
return conn.del(key.getBytes()) == 1L;
} catch (Exception ex) {
ex.printStackTrace();
} finally {
redisXMLConfigure.closeConnection(conn);
}
return false;
}
/**
*
* @Title: isExist @Description: key @param @return boolean
* @throws
*/
public boolean isExist(String key) {
key = getPreKey(key);
ShardedJedis conn = null;
try {
conn = redisXMLConfigure.getConnection();
return conn.exists(key);
} catch (Exception ex) {
ex.printStackTrace();
} finally {
redisXMLConfigure.closeConnection(conn);
}
return false;
}
public boolean notExist(String key) {
return !isExist(key);
}
public boolean delete(String key) {
key = getPreKey(key);
ShardedJedis conn = null;
try {
conn = redisXMLConfigure.getConnection();
return conn.del(key) == 1;
} catch (Exception ex) {
ex.printStackTrace();
} finally {
redisXMLConfigure.closeConnection(conn);
}
return false;
}
/**
* redis list value key ()
*
* @param key
* @param value
* @return
*/
public long putToListEnd(String key, String value) {
key = getPreKey(key);
ShardedJedis conn = null;
try {
conn = redisXMLConfigure.getConnection();
long length = conn.rpush(key, value);
return length;
} finally {
redisXMLConfigure.closeConnection(conn);
}
}
/**
* valuekey,
*
* @author zhangxin
* @param key
* @param value
* @param seconds
* @param score
* @return long
*/
public long addToSortedSetAndExpire(String key, String value, int seconds, double score) {
return addToSortedSet(key, value, seconds, true, score);
}
/**
* valuekey valuescore
*
* @author zhangxin
* @param key
* @param value
* @param score
* @return long
*/
public double addToSortedSetScore(String key, String value, double score) {
key = getPreKey(key);
ShardedJedis conn = null;
try {
conn = redisXMLConfigure.getConnection();
Double zincrby = conn.zincrby(key, score, value);
return zincrby;
} finally {
redisXMLConfigure.closeConnection(conn);
}
}
/**
* memberScore
* @param key
* @param value
* @return
*/
public Double getMemberScore(String key, String member) {
key = getPreKey(key);
ShardedJedis conn = null;
try {
conn = redisXMLConfigure.getConnection();
Double zscore = conn.zscore(key, member);
return zscore == null ? 0 : zscore;
} finally {
redisXMLConfigure.closeConnection(conn);
}
}
/**
* valuekey,
*
* @author zhangxin
* @param key
* @param value
* @param score
* @return long
*/
public long addToSortedSet(String key, String value, double score) {
return addToSortedSet(key, value, -1, false, score);
}
/**
* member
*
* @return isExist true
*/
public boolean isExistSortedSet(String key, String member) {
key = getPreKey(key);
ShardedJedis conn = null;
try {
conn = redisXMLConfigure.getConnection();
Long zrank = conn.zrank(key, member);
return zrank != null;
} finally {
redisXMLConfigure.closeConnection(conn);
}
}
/**
* member
*
* @return isExist true
*/
public boolean delSortedSetMember(String key, String[] member) {
key = getPreKey(key);
ShardedJedis conn = null;
try {
conn = redisXMLConfigure.getConnection();
Long zrem = conn.zrem(key, member);
return zrem >= 1;
} finally {
redisXMLConfigure.closeConnection(conn);
}
}
/**
* valuekey, setExpirefalse, seconds
*
* @return
*/
private long addToSortedSet(String key, String value, int seconds, boolean setExpire, double score) {
key = getPreKey(key);
ShardedJedis conn = null;
try {
conn = redisXMLConfigure.getConnection();
long addNum = conn.zadd(key, score, value);
if (setExpire) {
conn.expire(key, seconds);
}
return addNum;
} finally {
redisXMLConfigure.closeConnection(conn);
}
}
/**
* score
*
* @author zhangxin
* @param key
* @param pageNo
* 1
* @param pageSize
* @return Set<String>
*/
public Set<String> getSortedSetByPage(String key, int pageNo, int pageSize) {
key = getPreKey(key);
ShardedJedis conn = null;
try {
if (pageNo < 1) {
pageNo = 1;
}
if (pageSize < 1) {
pageSize = 1;
}
int start = (pageNo - 1) * pageSize;
conn = redisXMLConfigure.getConnection();
return conn.zrevrange(key, start, start + pageSize - 1);
} catch (Exception ex) {
ex.printStackTrace();
} finally {
redisXMLConfigure.closeConnection(conn);
}
return null;
}
public List<String> getListHead(String key) {
key = getPreKey(key);
ShardedJedis conn = null;
try {
conn = redisXMLConfigure.getConnection();
List<String> result = conn.blpop(1000, key);
if (null == result || result.size() == 0)
return null;
return result;
} finally {
redisXMLConfigure.closeConnection(conn);
}
}
/**
* map
*
* @param key
* @param field map field
* @param value map value
* @return if filed exist return 0 else return 1
*/
public Long hset(String key, String field, String value) {
key = getPreKey(key);
ShardedJedis conn = null;
try {
conn = redisXMLConfigure.getConnection();
return conn.hset(key, field, value);
} finally {
redisXMLConfigure.closeConnection(conn);
}
}
public String hset(String key, Map<String, String> values) {
key = getPreKey(key);
ShardedJedis conn = null;
try {
conn = redisXMLConfigure.getConnection();
return conn.hmset(key, values);
} finally {
redisXMLConfigure.closeConnection(conn);
}
}
public String hset(String key, Map<String, String> values, int time) {
key = getPreKey(key);
ShardedJedis conn = null;
try {
conn = redisXMLConfigure.getConnection();
String hmset = conn.hmset(key, values);
conn.expire(key, time);
return hmset;
} finally {
redisXMLConfigure.closeConnection(conn);
}
}
/**
* mapfield
*
* @param key
* @param field map field
* @return
*/
public String hget(String key, String field) {
key = getPreKey(key);
ShardedJedis conn = null;
try {
conn = redisXMLConfigure.getConnection();
return conn.hget(key, field);
} finally {
redisXMLConfigure.closeConnection(conn);
}
}
/**
* keystring1
*
* @param key
* @return
*/
public Long decr(String key) {
key = getPreKey(key);
ShardedJedis conn = null;
try {
conn = redisXMLConfigure.getConnection();
return conn.decr(key);
} finally {
redisXMLConfigure.closeConnection(conn);
}
}
/**
* keystring1
*
* @param key
* @return
*/
public Long incr(String key) {
key = getPreKey(key);
ShardedJedis conn = null;
try {
conn = redisXMLConfigure.getConnection();
return conn.incr(key);
} finally {
redisXMLConfigure.closeConnection(conn);
}
}
private String getPreKey(String key) {
String temp_pre = redisXMLConfigure.getPreKey();
if (null == temp_pre) {
return REDIS_PRE_KEY + key;
}
return temp_pre + key;
}
}

@ -0,0 +1,69 @@
package com.tamguo.config.redis;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
public class ObjectUtil extends SerializeTranscoder {
@Override
public byte[] serialize(Object value) {
if (value == null) {
throw new NullPointerException("Can't serialize null");
}
byte[] result = null;
ByteArrayOutputStream bos = null;
ObjectOutputStream os = null;
try {
bos = new ByteArrayOutputStream();
os = new ObjectOutputStream(bos);
os.writeObject(value);
os.close();
bos.close();
result = bos.toByteArray();
} catch (IOException e) {
throw new IllegalArgumentException("Non-serializable object", e);
} finally {
close(os);
close(bos);
}
return result;
}
@Override
public Object deserialize(byte[] in) {
Object result = null;
ByteArrayInputStream bis = null;
ObjectInputStream is = null;
try {
if (in != null) {
bis = new ByteArrayInputStream(in);
is = new ObjectInputStream(bis);
result = is.readObject();
is.close();
bis.close();
}
} catch (IOException e) {
e.printStackTrace();
} catch (ClassNotFoundException e) {
e.printStackTrace();
} finally {
close(is);
close(bis);
}
return result;
}
public static boolean equals(Object o1, Object o2) {
if (o1 == o2) {
return true;
} else if (o1 == null || o2 == null) {
return false;
} else {
return o1.equals(o2);
}
}
}

@ -0,0 +1,47 @@
package com.tamguo.config.redis;
public class PoolConfigBean {
private int max_active;
private int max_idle;
private long max_wait;
public PoolConfigBean() {
}
public PoolConfigBean(int max_active, int max_idle, long max_wait) {
super();
this.max_active = max_active;
this.max_idle = max_idle;
this.max_wait = max_wait;
}
public int getMax_active() {
return max_active;
}
public void setMax_active(int max_active) {
this.max_active = max_active;
}
public int getMax_idle() {
return max_idle;
}
public void setMax_idle(int max_idle) {
this.max_idle = max_idle;
}
public long getMax_wait() {
return max_wait;
}
public void setMax_wait(long max_wait) {
this.max_wait = max_wait;
}
@Override
public String toString() {
return "PoolConfig [max_active=" + max_active + ", max_idle=" + max_idle + ", max_wait=" + max_wait + "]";
}
}

@ -0,0 +1,53 @@
package com.tamguo.config.redis;
public class RedisServerNodeBean {
private String ip;
private int port;
private boolean needAuth;
private String auth;
public RedisServerNodeBean(String ip, int port, boolean needAuth, String auth) {
this.ip = ip;
this.port = port;
this.needAuth = needAuth;
this.auth = auth;
}
public String getIp() {
return ip;
}
public void setIp(String ip) {
this.ip = ip;
}
public int getPort() {
return port;
}
public void setPort(int port) {
this.port = port;
}
public boolean isNeedAuth() {
return needAuth;
}
public void setNeedAuth(boolean needAuth) {
this.needAuth = needAuth;
}
public String getAuth() {
return auth;
}
public void setAuth(String auth) {
this.auth = auth;
}
@Override
public String toString() {
return "RedisServer [ip=" + ip + ", port=" + port + ", needAuth=" + needAuth + ", auth=" + auth + "]";
}
}

@ -0,0 +1,173 @@
package com.tamguo.config.redis;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
import org.apache.log4j.Logger;
import org.springframework.beans.factory.InitializingBean;
import org.springframework.stereotype.Component;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
import redis.clients.jedis.JedisPoolConfig;
import redis.clients.jedis.JedisShardInfo;
import redis.clients.jedis.ShardedJedis;
import redis.clients.jedis.ShardedJedisPool;
@Component("redisConfigure")
public class RedisXMLConfigure implements InitializingBean {
private static final Logger logger = Logger.getLogger(RedisXMLConfigure.class);
private static String preKey;
private static Document document = null;
private ShardedJedisPool shardedJedisPool;
@Override
public void afterPropertiesSet() throws Exception {
XMLConfiguration xmlConfiguration = new XMLConfiguration();
String REDIS_PATH = "redis.xml";
InputStream stream = null;
try {
stream = this.getClass().getClassLoader().getResourceAsStream(REDIS_PATH);
if (stream == null) {
logger.error("load redis.xml failed!!!" + REDIS_PATH);
throw new RuntimeException("load redis.xml failed");
}
logger.info("Redis XML config path:" + REDIS_PATH);
if (xmlConfiguration.readConfigFile(stream)) {
document = xmlConfiguration.getDocument();
} else {
logger.error("load redis.xml failed!!!");
}
} finally {
if (null != stream)
stream.close();
}
//初始化参数
initPreKey();
PoolConfigBean pcb = initPoolConfigBean();
List<RedisServerNodeBean> rsnbs = initRedisServerNodeBeans();
//实现shardedJedisPool
JedisPoolConfig jedisPoolConfig = new JedisPoolConfig();
//no maxActive config
jedisPoolConfig.setMaxIdle(pcb.getMax_idle());
jedisPoolConfig.setMaxWaitMillis(pcb.getMax_wait());
shardedJedisPool = new ShardedJedisPool(jedisPoolConfig,getJedisShardInfo(rsnbs));
if(shardedJedisPool == null){
throw new RuntimeException("config redis.xml error");
}
}
/**
* jedis
*/
private PoolConfigBean initPoolConfigBean() {
PoolConfigBean poolConfigBean = new PoolConfigBean();
Element poolElement = (Element) document.getElementsByTagName("pool").item(0);
int max_active = poolElement.hasAttribute("maxActive") ? Integer.parseInt(poolElement.getAttribute("maxActive")) : -1;
int max_idle = poolElement.hasAttribute("maxIdle") ? Integer.parseInt(poolElement.getAttribute("maxIdle")) : -1;
long max_wait = poolElement.hasAttribute("maxWait") ? Long.parseLong(poolElement.getAttribute("maxWait")) : -1;
poolConfigBean.setMax_active(max_active);
poolConfigBean.setMax_idle(max_idle);
poolConfigBean.setMax_wait(max_wait);
return poolConfigBean;
}
/**
* redisserver
*/
private List<RedisServerNodeBean> initRedisServerNodeBeans() {
List<RedisServerNodeBean> redisServers = new ArrayList<RedisServerNodeBean>();
NodeList serverElements = document.getElementsByTagName("server");
int serverLen = serverElements.getLength();
if (serverLen < 1) {
logger.error("redis.servers.server must have one !");
return null;
}
for (int i = 0; i < serverLen; i++) {
Element serverElement = (Element) serverElements.item(i);
String temp_ip = serverElement.hasAttribute("ip") ? serverElement.getAttribute("ip") : null;
if (temp_ip == null) {
logger.error("redis.servers.server.ip must be supplied!");
return null;
}
String temp_port = serverElement.hasAttribute("port") ? serverElement.getAttribute("port") : "6379";
String temp_needAuth = serverElement.hasAttribute("needAuth") ? serverElement.getAttribute("needAuth") : "false";
String temp_auth = null;
// need auth
if ("true".equals(temp_needAuth)) {
temp_auth = serverElement.hasAttribute("auth") ? serverElement.getAttribute("auth") : null;
if (null == temp_auth) {
logger.error("since needAuth is true,auth must be supplied!");
return null;
}
}
RedisServerNodeBean rs = null;
try {
rs = new RedisServerNodeBean(temp_ip, Integer.parseInt(temp_port), Boolean.parseBoolean(temp_needAuth), temp_auth);
} catch (NumberFormatException e) {
logger.error("port must be a number!\n" + e.getMessage());
return null;
}
redisServers.add(rs);
}
return redisServers;
}
/**
* JedisShardInfo
* @param redisServers
* @return
*/
private List<JedisShardInfo> getJedisShardInfo(List<RedisServerNodeBean> redisServers) {
if(redisServers == null){
logger.error("redisServers must not be empty null");
return null;
}
int serverLen = redisServers.size();
if (serverLen < 1) {
logger.error("redisServers must not be empty ");
return null;
}
List<JedisShardInfo> servers = new ArrayList<JedisShardInfo>(serverLen);
for (int i = 0; i < serverLen; i++) {
RedisServerNodeBean redisServer = redisServers.get(i);
JedisShardInfo jedisShardInfo = new JedisShardInfo(redisServer.getIp(), redisServer.getPort());
if (redisServer.isNeedAuth()) {
jedisShardInfo.setPassword(redisServer.getAuth());
}
servers.add(jedisShardInfo);
}
return servers;
}
/*
* rediskey
*/
private void initPreKey() {
Element preKeyElement = (Element) document.getElementsByTagName("preKey").item(0);
preKey = preKeyElement.hasAttribute("value") ? preKeyElement.getAttribute("value") : "";
}
public String getPreKey() {
return preKey;
}
/**
* jedis
* @return
*/
public ShardedJedis getConnection() {
return shardedJedisPool.getResource();
}
/**
* jedis
* @param resource
*/
public void closeConnection(ShardedJedis resource) {
resource.close();
}
}

@ -0,0 +1,20 @@
package com.tamguo.config.redis;
import java.io.Closeable;
public abstract class SerializeTranscoder {
public abstract byte[] serialize(Object value);
public abstract Object deserialize(byte[] in);
public void close(Closeable closeable) {
if (closeable != null) {
try {
closeable.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}
}

@ -0,0 +1,53 @@
package com.tamguo.config.redis;
import java.io.IOException;
import java.io.InputStream;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import org.w3c.dom.Document;
public class XMLConfiguration {
private Document document = null;
public boolean readConfigFile(String configFilename) {
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
try {
DocumentBuilder db = dbf.newDocumentBuilder();
document = db.parse(configFilename);
} catch (IOException e) {
e.printStackTrace();
} catch (Exception e) {
e.printStackTrace();
}
if (document == null) {
return false;
}
return true;
}
public boolean readConfigFile(InputStream stream) {
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
try {
DocumentBuilder db = dbf.newDocumentBuilder();
document = db.parse(stream);
} catch (IOException e) {
e.printStackTrace();
} catch (Exception e) {
e.printStackTrace();
}
if (document == null) {
return false;
}
return true;
}
public Document getDocument() {
return document;
}
protected void setDocument(Document document) {
this.document = document;
}
}

@ -1,9 +1,13 @@
package com.tamguo.dao; package com.tamguo.dao;
import java.util.List;
import com.baomidou.mybatisplus.plugins.pagination.Pagination;
import com.tamguo.config.dao.SuperMapper; import com.tamguo.config.dao.SuperMapper;
import com.tamguo.model.CrawlerQuestionEntity; import com.tamguo.model.CrawlerQuestionEntity;
public interface CrawlerQuestionMapper extends SuperMapper<CrawlerQuestionEntity>{ public interface CrawlerQuestionMapper extends SuperMapper<CrawlerQuestionEntity>{
List<CrawlerQuestionEntity> queryPageOrderUid(Pagination page);
} }

@ -0,0 +1,8 @@
package com.tamguo.dao;
import com.tamguo.config.dao.SuperMapper;
import com.tamguo.model.QuestionEntity;
public interface QuestionMapper extends SuperMapper<QuestionEntity>{
}

@ -0,0 +1,139 @@
package com.tamguo.model;
import java.io.Serializable;
import com.baomidou.mybatisplus.annotations.TableName;
import com.tamguo.config.dao.SuperEntity;
/**
* The persistent class for the tiku_question database table.
*
*/
@TableName(value="tiku_question")
public class QuestionEntity extends SuperEntity<QuestionEntity> implements Serializable {
private static final long serialVersionUID = 1L;
private String analysis;
private String paperId;
private String answer;
private String chapterId;
private String questionType;
private String content;
private String subjectId;
private String courseId;
private String reviewPoint;
private String year;
private String score;
private String auditStatus;
public QuestionEntity() {
}
public String getAnalysis() {
return this.analysis;
}
public void setAnalysis(String analysis) {
this.analysis = analysis;
}
public String getAnswer() {
return this.answer;
}
public void setAnswer(String answer) {
this.answer = answer;
}
public String getChapterId() {
return this.chapterId;
}
public void setChapterId(String chapterId) {
this.chapterId = chapterId;
}
public String getQuestionType() {
return this.questionType;
}
public void setQuestionType(String questionType) {
this.questionType = questionType;
}
public String getContent() {
return content;
}
public void setContent(String content) {
this.content = content;
}
public String getReviewPoint() {
return reviewPoint;
}
public void setReviewPoint(String reviewPoint) {
this.reviewPoint = reviewPoint;
}
public String getYear() {
return year;
}
public void setYear(String year) {
this.year = year;
}
public String getScore() {
return score;
}
public void setScore(String score) {
this.score = score;
}
public String getPaperId() {
return paperId;
}
public void setPaperId(String paperId) {
this.paperId = paperId;
}
public String getCourseId() {
return courseId;
}
public void setCourseId(String courseId) {
this.courseId = courseId;
}
public String getSubjectId() {
return subjectId;
}
public void setSubjectId(String subjectId) {
this.subjectId = subjectId;
}
public String getAuditStatus() {
return auditStatus;
}
public void setAuditStatus(String auditStatus) {
this.auditStatus = auditStatus;
}
}

@ -0,0 +1,110 @@
package com.tamguo.model.vo;
import java.util.List;
import com.xuxueli.crawler.annotation.PageFieldSelect;
import com.xuxueli.crawler.conf.XxlCrawlerConf;
public class QuestionVo {
// 单个题目数据
@PageFieldSelect(cssQuery=".question-box-inner .questem-inner", selectType = XxlCrawlerConf.SelectType.HTML)
private String content;
@PageFieldSelect(cssQuery=".exam-answer-content", selectType = XxlCrawlerConf.SelectType.HTML)
private String answer;
@PageFieldSelect(cssQuery = ".exam-answer-content img", selectType = XxlCrawlerConf.SelectType.ATTR, selectVal = "abs:src")
private List<String> answerImages;
@PageFieldSelect(cssQuery=".exam-analysis .exam-analysis-content", selectType = XxlCrawlerConf.SelectType.HTML)
private String analysis;
@PageFieldSelect(cssQuery = ".exam-analysis .exam-analysis-content img", selectType = XxlCrawlerConf.SelectType.ATTR, selectVal = "abs:src")
private List<String> analysisImages;
@PageFieldSelect(cssQuery=".que-title span:eq(0)",selectType = XxlCrawlerConf.SelectType.TEXT)
private String questionType;
@PageFieldSelect(cssQuery=".que-title span:eq(1)",selectType = XxlCrawlerConf.SelectType.TEXT)
private String score;
@PageFieldSelect(cssQuery=".que-title span:eq(2)",selectType = XxlCrawlerConf.SelectType.TEXT)
private String year;
@PageFieldSelect(cssQuery=".kpoint-contain point point-item",selectType = XxlCrawlerConf.SelectType.TEXT)
private List<String> reviewPoint;
public String getContent() {
return content;
}
public void setContent(String content) {
this.content = content;
}
public String getAnalysis() {
return analysis;
}
public void setAnalysis(String analysis) {
this.analysis = analysis;
}
public String getQuestionType() {
return questionType;
}
public void setQuestionType(String questionType) {
this.questionType = questionType;
}
public String getScore() {
return score;
}
public void setScore(String score) {
this.score = score;
}
public String getAnswer() {
return answer;
}
public void setAnswer(String answer) {
this.answer = answer;
}
public List<String> getReviewPoint() {
return reviewPoint;
}
public void setReviewPoint(List<String> reviewPoint) {
this.reviewPoint = reviewPoint;
}
public String getYear() {
return year;
}
public void setYear(String year) {
this.year = year;
}
public List<String> getAnswerImages() {
return answerImages;
}
public void setAnswerImages(List<String> answerImages) {
this.answerImages = answerImages;
}
public List<String> getAnalysisImages() {
return analysisImages;
}
public void setAnalysisImages(List<String> analysisImages) {
this.analysisImages = analysisImages;
}
}

@ -0,0 +1,8 @@
package com.tamguo.service;
public interface IQuestionService {
/**
*
*/
void crawlerQuestion();
}

@ -0,0 +1,171 @@
package com.tamguo.service.impl;
import java.io.File;
import java.text.DecimalFormat;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import com.baomidou.mybatisplus.plugins.Page;
import com.tamguo.config.redis.CacheService;
import com.tamguo.dao.ChapterMapper;
import com.tamguo.dao.CourseMapper;
import com.tamguo.dao.CrawlerQuestionMapper;
import com.tamguo.dao.QuestionMapper;
import com.tamguo.dao.SubjectMapper;
import com.tamguo.model.ChapterEntity;
import com.tamguo.model.CourseEntity;
import com.tamguo.model.CrawlerQuestionEntity;
import com.tamguo.model.QuestionEntity;
import com.tamguo.model.SubjectEntity;
import com.tamguo.model.vo.QuestionVo;
import com.tamguo.service.IQuestionService;
import com.xuxueli.crawler.XxlCrawler;
import com.xuxueli.crawler.conf.XxlCrawlerConf;
import com.xuxueli.crawler.parser.PageParser;
import com.xuxueli.crawler.parser.strategy.HtmlUnitPageLoader;
import com.xuxueli.crawler.rundata.RunData;
import com.xuxueli.crawler.util.FileUtil;
@Service
public class QuestionService implements IQuestionService{
@Autowired
QuestionMapper questionMapper;
@Autowired
CrawlerQuestionMapper crawlerQuestionMapper;
@Autowired
ChapterMapper chapterMapper;
@Autowired
CourseMapper courseMapper;
@Autowired
SubjectMapper subjectMapper;
@Autowired
CacheService cacheService;
private static final String FILES_NO_FORMAT = "00000";
private static final String FILES_PREFIX = "FP";
private static final String DOMAIN = "http://static.tamguo.com";
private RunData runData;
@Override
public void crawlerQuestion() {
XxlCrawler crawler = new XxlCrawler.Builder()
.setAllowSpread(false)
.setThreadCount(10)
.setPageLoader(new HtmlUnitPageLoader())
.setPageParser(new PageParser<QuestionVo>() {
@Override
public void parse(Document html, Element pageVoElement, QuestionVo questionVo) {
CrawlerQuestionEntity condition = new CrawlerQuestionEntity();
condition.setQuestionUrl(html.baseUri());
CrawlerQuestionEntity crawlerQuestion = crawlerQuestionMapper.selectOne(condition);
ChapterEntity chapter = chapterMapper.selectById(crawlerQuestion.getChapterId());
CourseEntity course = courseMapper.selectById(chapter.getCourseId());
SubjectEntity subject = subjectMapper.selectById(course.getSubjectId());
QuestionEntity question = new QuestionEntity();
question.setAnalysis(questionVo.getAnalysis());
question.setAnswer(questionVo.getAnswer());
question.setAuditStatus("1");
question.setChapterId(chapter.getUid());
question.setContent(questionVo.getContent());
question.setCourseId(course.getUid());
question.setPaperId(null);
question.setQuestionType("1");
if(questionVo.getReviewPoint() != null && questionVo.getReviewPoint().size() > 0) {
question.setReviewPoint(StringUtils.join(questionVo.getReviewPoint().toArray(), ","));
}
question.setScore(questionVo.getScore());
question.setSubjectId(subject.getUid());
question.setYear(questionVo.getYear());
if (questionVo.getAnswerImages()!=null && questionVo.getAnswerImages().size() > 0) {
Set<String> imagesSet = new HashSet<>(questionVo.getAnswerImages());
for (String img: imagesSet) {
// 下载图片文件
String fileName = getFileName(img);
File dir = new File(getFilePath());
if (!dir.exists())
dir.mkdirs();
boolean ret = FileUtil.downFile(img, XxlCrawlerConf.TIMEOUT_MILLIS_DEFAULT, getFilePath(), fileName);
System.out.println("down images " + (ret?"success":"fail") + "" + img);
// 替换URL
questionVo.setAnswer(questionVo.getAnswer().replace(img, DOMAIN + getFilePath() + fileName));
}
}
if (questionVo.getAnalysisImages()!=null && questionVo.getAnalysisImages().size() > 0) {
Set<String> imagesSet = new HashSet<>(questionVo.getAnalysisImages());
for (String img: imagesSet) {
// 下载图片文件
String fileName = getFileName(img);
File dir = new File(getFilePath());
if (!dir.exists())
dir.mkdirs();
boolean ret = FileUtil.downFile(img, XxlCrawlerConf.TIMEOUT_MILLIS_DEFAULT, getFilePath(), fileName);
System.out.println("down images " + (ret?"success":"fail") + "" + img);
// 替换URL
questionVo.setAnalysis(questionVo.getAnalysis().replace(img, DOMAIN + getFilePath() + fileName));
}
}
// 处理图片
questionMapper.insert(question);
}
public String getFileName(String img) {
return getFileNo() + img.substring(img.lastIndexOf("."));
}
private String getFilePath() {
SimpleDateFormat sdf = new SimpleDateFormat("yyyyMM");
String format = sdf.format(new Date());
return "/images/question/" + format + "/";
}
private String getFileNo() {
SimpleDateFormat sdf = new SimpleDateFormat("yyyyMM");
String format = sdf.format(new Date());
DecimalFormat df = new DecimalFormat(FILES_NO_FORMAT);
String key = FILES_PREFIX + format;
Long incr = cacheService.incr(key);
String avatorNo = FILES_PREFIX + df.format(incr);
return avatorNo;
}
}).build();
runData = crawler.getRunData();
int page = 1;
int pageSize = 100;
while(true) {
Page<CrawlerQuestionEntity> questionPage = new Page<CrawlerQuestionEntity>(page , pageSize);
List<CrawlerQuestionEntity> questionList = crawlerQuestionMapper.queryPageOrderUid(questionPage);
for(int i=0 ;i<questionList.size() ; i++) {
runData.addUrl(questionList.get(i).getQuestionUrl());
}
page++;
if(questionList.size() < 100) {
break;
}
}
// 获取科目
crawler.start(true);
}
}

@ -2,5 +2,7 @@
<!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN" "http://mybatis.org/dtd/mybatis-3-mapper.dtd"> <!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN" "http://mybatis.org/dtd/mybatis-3-mapper.dtd">
<mapper namespace="com.tamguo.dao.CrawlerQuestionMapper"> <mapper namespace="com.tamguo.dao.CrawlerQuestionMapper">
<select id="queryPageOrderUid" resultType="CrawlerQuestionEntity">
select * from crawler_question order by uid desc
</select>
</mapper> </mapper>

@ -0,0 +1,5 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN" "http://mybatis.org/dtd/mybatis-3-mapper.dtd">
<mapper namespace="com.tamguo.dao.QuestionMapper">
</mapper>

@ -0,0 +1,24 @@
package com.tamguo;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.test.context.junit4.SpringRunner;
import com.tamguo.service.IQuestionService;
@RunWith(SpringRunner.class)
@SpringBootTest
public class QuestionCrawler {
@Autowired
IQuestionService iQuestionService;
@Test
public void crawlerSubject() throws Exception {
iQuestionService.crawlerQuestion();
}
}
Loading…
Cancel
Save