From 0c1acdc34568e3309144a9e3644c063bc37a4afc Mon Sep 17 00:00:00 2001 From: tamguo Date: Fri, 6 Jul 2018 09:14:37 +0800 Subject: [PATCH] =?UTF-8?q?=E7=88=AC=E5=8F=96=E6=95=B0=E6=8D=AE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../com/tamguo/config/redis/CacheService.java | 488 ++++++++++++++++++ .../com/tamguo/config/redis/ObjectUtil.java | 69 +++ .../tamguo/config/redis/PoolConfigBean.java | 47 ++ .../config/redis/RedisServerNodeBean.java | 53 ++ .../config/redis/RedisXMLConfigure.java | 173 +++++++ .../config/redis/SerializeTranscoder.java | 20 + .../tamguo/config/redis/XMLConfiguration.java | 53 ++ .../com/tamguo/dao/CrawlerQuestionMapper.java | 6 +- .../java/com/tamguo/dao/QuestionMapper.java | 8 + .../java/com/tamguo/model/QuestionEntity.java | 139 +++++ .../java/com/tamguo/model/vo/QuestionVo.java | 110 ++++ .../com/tamguo/service/IQuestionService.java | 8 + .../tamguo/service/impl/QuestionService.java | 171 ++++++ .../mappers/CrawlerQuestionMapper.xml | 4 +- .../main/resources/mappers/QuestionMapper.xml | 5 + .../test/java/com/tamguo/QuestionCrawler.java | 24 + 16 files changed, 1376 insertions(+), 2 deletions(-) create mode 100644 tamguo-crawler/src/main/java/com/tamguo/config/redis/CacheService.java create mode 100644 tamguo-crawler/src/main/java/com/tamguo/config/redis/ObjectUtil.java create mode 100644 tamguo-crawler/src/main/java/com/tamguo/config/redis/PoolConfigBean.java create mode 100644 tamguo-crawler/src/main/java/com/tamguo/config/redis/RedisServerNodeBean.java create mode 100644 tamguo-crawler/src/main/java/com/tamguo/config/redis/RedisXMLConfigure.java create mode 100644 tamguo-crawler/src/main/java/com/tamguo/config/redis/SerializeTranscoder.java create mode 100644 tamguo-crawler/src/main/java/com/tamguo/config/redis/XMLConfiguration.java create mode 100644 tamguo-crawler/src/main/java/com/tamguo/dao/QuestionMapper.java create mode 100644 tamguo-crawler/src/main/java/com/tamguo/model/QuestionEntity.java create mode 100644 tamguo-crawler/src/main/java/com/tamguo/model/vo/QuestionVo.java create mode 100644 tamguo-crawler/src/main/java/com/tamguo/service/IQuestionService.java create mode 100644 tamguo-crawler/src/main/java/com/tamguo/service/impl/QuestionService.java create mode 100644 tamguo-crawler/src/main/resources/mappers/QuestionMapper.xml create mode 100644 tamguo-crawler/src/test/java/com/tamguo/QuestionCrawler.java diff --git a/tamguo-crawler/src/main/java/com/tamguo/config/redis/CacheService.java b/tamguo-crawler/src/main/java/com/tamguo/config/redis/CacheService.java new file mode 100644 index 0000000..8498300 --- /dev/null +++ b/tamguo-crawler/src/main/java/com/tamguo/config/redis/CacheService.java @@ -0,0 +1,488 @@ +package com.tamguo.config.redis; + +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Service; + +import redis.clients.jedis.ShardedJedis; + +/** + * 缓存中心 + * + */ +@Service("cacheService") +public class CacheService { + private final static String REDIS_PRE_KEY = "TAMGUO:"; + private SerializeTranscoder objectSerialize = new ObjectUtil(); + + @Autowired + private RedisXMLConfigure redisXMLConfigure; + + /** + * + * @Title: get @Description: @param @return String 返回类型 @throws + */ + public String get(String key) { + key = getPreKey(key); + ShardedJedis conn = null; + try { + conn = redisXMLConfigure.getConnection(); + return conn.get(key); + } finally { + redisXMLConfigure.closeConnection(conn); + } + } + + /** + * + * @Title: set @Description: @param @return void 返回类型 @throws + */ + public void set(String key, String value) { + key = getPreKey(key); + ShardedJedis conn = null; + try { + conn = redisXMLConfigure.getConnection(); + conn.set(key, value); + } finally { + redisXMLConfigure.closeConnection(conn); + } + } + + /** + * + * set 设置带过期时间的字符缓存 + * + * @param key + * @param value + * @param time + * 过期时间,秒 + * @description + * @exception @since + * 1.0.0 + */ + public void set(String key, String value, int time) { + key = getPreKey(key); + ShardedJedis conn = null; + try { + conn = redisXMLConfigure.getConnection(); + conn.set(key, value); + conn.expire(key, time); + } finally { + redisXMLConfigure.closeConnection(conn); + } + } + + /** + * redis中存放对象 + * + * @param key 对象key + * @param value 可序列化的对象 + */ + public void setObject(String key, Object value) { + key = getPreKey(key); + ShardedJedis conn = null; + try { + conn = redisXMLConfigure.getConnection(); + conn.set(key.getBytes(), objectSerialize.serialize(value)); + } catch (Exception ex) { + ex.printStackTrace(); + } finally { + redisXMLConfigure.closeConnection(conn); + } + } + + /** + * 设置过期时间存储对象 + * + * @param key 对象key + * @param value 对象值 + * @param time 过期时间 秒 + */ + public void setObject(String key, Object value, int time) { + key = getPreKey(key); + ShardedJedis conn = null; + try { + conn = redisXMLConfigure.getConnection(); + conn.setex(key.getBytes(), time, objectSerialize.serialize(value)); + } catch (Exception ex) { + ex.printStackTrace(); + } finally { + redisXMLConfigure.closeConnection(conn); + } + } + + /** + * 获取存储的对象 + * + * @param key 对象key + * @return 存储的对象 + */ + public Object getObject(String key) { + key = getPreKey(key); + ShardedJedis conn = null; + try { + conn = redisXMLConfigure.getConnection(); + byte[] obj = conn.get(key.getBytes()); + if (null == obj) + return null; + return objectSerialize.deserialize(obj); + } catch (Exception ex) { + ex.printStackTrace(); + } finally { + redisXMLConfigure.closeConnection(conn); + } + return null; + } + + /** + * 删除一个对象 + * + * @param key 对象key值 + * @return + */ + public boolean deleteObject(String key) { + key = getPreKey(key); + ShardedJedis conn = null; + try { + conn = redisXMLConfigure.getConnection(); + return conn.del(key.getBytes()) == 1L; + } catch (Exception ex) { + ex.printStackTrace(); + } finally { + redisXMLConfigure.closeConnection(conn); + } + return false; + } + + /** + * + * @Title: isExist @Description: 判断key是否存在 @param @return boolean + * 返回类型 @throws + */ + public boolean isExist(String key) { + key = getPreKey(key); + ShardedJedis conn = null; + try { + conn = redisXMLConfigure.getConnection(); + return conn.exists(key); + } catch (Exception ex) { + ex.printStackTrace(); + } finally { + redisXMLConfigure.closeConnection(conn); + } + return false; + } + + public boolean notExist(String key) { + return !isExist(key); + } + + public boolean delete(String key) { + key = getPreKey(key); + ShardedJedis conn = null; + try { + conn = redisXMLConfigure.getConnection(); + return conn.del(key) == 1; + } catch (Exception ex) { + ex.printStackTrace(); + } finally { + redisXMLConfigure.closeConnection(conn); + } + return false; + } + + /** + * 关于 redis list的操作 将 值 value 插入到列表 key 的表尾(最右边)。 + * + * @param key + * @param value + * @return + */ + public long putToListEnd(String key, String value) { + key = getPreKey(key); + ShardedJedis conn = null; + try { + conn = redisXMLConfigure.getConnection(); + long length = conn.rpush(key, value); + return length; + } finally { + redisXMLConfigure.closeConnection(conn); + } + } + + /** + * 将value插入集合key的尾部, 并设置过期时间 + * + * @author zhangxin + * @param key + * @param value + * @param seconds + * @param score + * @return long 被成功添加的新成员的数量,不包括那些被更新的、已经存在的成员 + */ + public long addToSortedSetAndExpire(String key, String value, int seconds, double score) { + return addToSortedSet(key, value, seconds, true, score); + } + + + /** + * 将value插入集合key的尾部 增加value的score + * + * @author zhangxin + * @param key + * @param value + * @param score + * @return long 被成功添加的新成员的分数 + */ + public double addToSortedSetScore(String key, String value, double score) { + key = getPreKey(key); + ShardedJedis conn = null; + try { + conn = redisXMLConfigure.getConnection(); + Double zincrby = conn.zincrby(key, score, value); + return zincrby; + } finally { + redisXMLConfigure.closeConnection(conn); + } + } + + /** + * 获取member的Score + * @param key + * @param value + * @return + */ + public Double getMemberScore(String key, String member) { + key = getPreKey(key); + ShardedJedis conn = null; + try { + conn = redisXMLConfigure.getConnection(); + Double zscore = conn.zscore(key, member); + return zscore == null ? 0 : zscore; + } finally { + redisXMLConfigure.closeConnection(conn); + } + } + + + /** + * 将value插入集合key的尾部, 不设置过期时间 + * + * @author zhangxin + * @param key + * @param value + * @param score + * @return long 被成功添加的新成员的数量,不包括那些被更新的、已经存在的成员 + */ + public long addToSortedSet(String key, String value, double score) { + return addToSortedSet(key, value, -1, false, score); + } + + + /** + * 判断member在集合里是否存在 + * + * @return isExist 存在 true 不存在 + */ + public boolean isExistSortedSet(String key, String member) { + key = getPreKey(key); + ShardedJedis conn = null; + try { + conn = redisXMLConfigure.getConnection(); + Long zrank = conn.zrank(key, member); + return zrank != null; + } finally { + redisXMLConfigure.closeConnection(conn); + } + } + + /** + * 删除member + * + * @return isExist 存在 true 不存在 + */ + public boolean delSortedSetMember(String key, String[] member) { + key = getPreKey(key); + ShardedJedis conn = null; + try { + conn = redisXMLConfigure.getConnection(); + Long zrem = conn.zrem(key, member); + return zrem >= 1; + } finally { + redisXMLConfigure.closeConnection(conn); + } + } + + /** + * 将value插入集合key的尾部, 对于setExpire为false的情况, seconds无效 + * + * @return 被成功添加的新成员的数量,不包括那些被更新的、已经存在的成员 + */ + private long addToSortedSet(String key, String value, int seconds, boolean setExpire, double score) { + key = getPreKey(key); + ShardedJedis conn = null; + try { + conn = redisXMLConfigure.getConnection(); + long addNum = conn.zadd(key, score, value); + if (setExpire) { + conn.expire(key, seconds); + } + return addNum; + } finally { + redisXMLConfigure.closeConnection(conn); + } + } + + /** + * 按score降序分页获取有序集合中内容 + * + * @author zhangxin + * @param key + * @param pageNo + * 首页从1开始 + * @param pageSize + * @return Set + */ + public Set getSortedSetByPage(String key, int pageNo, int pageSize) { + key = getPreKey(key); + ShardedJedis conn = null; + try { + if (pageNo < 1) { + pageNo = 1; + } + if (pageSize < 1) { + pageSize = 1; + } + int start = (pageNo - 1) * pageSize; + conn = redisXMLConfigure.getConnection(); + return conn.zrevrange(key, start, start + pageSize - 1); + } catch (Exception ex) { + ex.printStackTrace(); + } finally { + redisXMLConfigure.closeConnection(conn); + } + return null; + } + + public List getListHead(String key) { + key = getPreKey(key); + ShardedJedis conn = null; + try { + conn = redisXMLConfigure.getConnection(); + List result = conn.blpop(1000, key); + + if (null == result || result.size() == 0) + return null; + return result; + } finally { + redisXMLConfigure.closeConnection(conn); + } + } + + /** + * 存储map + * + * @param key 键值 + * @param field map field + * @param value map value + * @return if filed exist return 0 else return 1 + */ + public Long hset(String key, String field, String value) { + key = getPreKey(key); + ShardedJedis conn = null; + try { + conn = redisXMLConfigure.getConnection(); + return conn.hset(key, field, value); + } finally { + redisXMLConfigure.closeConnection(conn); + } + } + + public String hset(String key, Map values) { + key = getPreKey(key); + ShardedJedis conn = null; + try { + conn = redisXMLConfigure.getConnection(); + return conn.hmset(key, values); + } finally { + redisXMLConfigure.closeConnection(conn); + } + } + + public String hset(String key, Map values, int time) { + key = getPreKey(key); + ShardedJedis conn = null; + try { + conn = redisXMLConfigure.getConnection(); + String hmset = conn.hmset(key, values); + conn.expire(key, time); + return hmset; + } finally { + redisXMLConfigure.closeConnection(conn); + } + } + + /** + * 得到map中存储的field值 + * + * @param key 键值 + * @param field map field + * @return + */ + public String hget(String key, String field) { + key = getPreKey(key); + ShardedJedis conn = null; + try { + conn = redisXMLConfigure.getConnection(); + return conn.hget(key, field); + } finally { + redisXMLConfigure.closeConnection(conn); + } + } + + /** + * 名称为key的string减1操作 + * + * @param key + * @return + */ + public Long decr(String key) { + key = getPreKey(key); + ShardedJedis conn = null; + try { + conn = redisXMLConfigure.getConnection(); + return conn.decr(key); + } finally { + redisXMLConfigure.closeConnection(conn); + } + } + + /** + * 名称为key的string加1操作 + * + * @param key + * @return + */ + public Long incr(String key) { + key = getPreKey(key); + ShardedJedis conn = null; + try { + conn = redisXMLConfigure.getConnection(); + return conn.incr(key); + } finally { + redisXMLConfigure.closeConnection(conn); + } + } + + private String getPreKey(String key) { + String temp_pre = redisXMLConfigure.getPreKey(); + if (null == temp_pre) { + return REDIS_PRE_KEY + key; + } + return temp_pre + key; + } + +} diff --git a/tamguo-crawler/src/main/java/com/tamguo/config/redis/ObjectUtil.java b/tamguo-crawler/src/main/java/com/tamguo/config/redis/ObjectUtil.java new file mode 100644 index 0000000..724149a --- /dev/null +++ b/tamguo-crawler/src/main/java/com/tamguo/config/redis/ObjectUtil.java @@ -0,0 +1,69 @@ +package com.tamguo.config.redis; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.ObjectInputStream; +import java.io.ObjectOutputStream; + +public class ObjectUtil extends SerializeTranscoder { + @Override + public byte[] serialize(Object value) { + if (value == null) { + throw new NullPointerException("Can't serialize null"); + } + byte[] result = null; + ByteArrayOutputStream bos = null; + ObjectOutputStream os = null; + try { + bos = new ByteArrayOutputStream(); + os = new ObjectOutputStream(bos); + os.writeObject(value); + os.close(); + bos.close(); + result = bos.toByteArray(); + } catch (IOException e) { + throw new IllegalArgumentException("Non-serializable object", e); + } finally { + close(os); + close(bos); + } + return result; + } + + @Override + public Object deserialize(byte[] in) { + Object result = null; + ByteArrayInputStream bis = null; + ObjectInputStream is = null; + try { + if (in != null) { + bis = new ByteArrayInputStream(in); + is = new ObjectInputStream(bis); + result = is.readObject(); + is.close(); + bis.close(); + } + } catch (IOException e) { + e.printStackTrace(); + } catch (ClassNotFoundException e) { + e.printStackTrace(); + } finally { + close(is); + close(bis); + } + return result; + } + + public static boolean equals(Object o1, Object o2) { + + if (o1 == o2) { + return true; + } else if (o1 == null || o2 == null) { + return false; + } else { + return o1.equals(o2); + } + } + +} diff --git a/tamguo-crawler/src/main/java/com/tamguo/config/redis/PoolConfigBean.java b/tamguo-crawler/src/main/java/com/tamguo/config/redis/PoolConfigBean.java new file mode 100644 index 0000000..c1b9bc1 --- /dev/null +++ b/tamguo-crawler/src/main/java/com/tamguo/config/redis/PoolConfigBean.java @@ -0,0 +1,47 @@ +package com.tamguo.config.redis; + +public class PoolConfigBean { + private int max_active; + private int max_idle; + private long max_wait; + + public PoolConfigBean() { + } + + public PoolConfigBean(int max_active, int max_idle, long max_wait) { + super(); + this.max_active = max_active; + this.max_idle = max_idle; + this.max_wait = max_wait; + } + + public int getMax_active() { + return max_active; + } + + public void setMax_active(int max_active) { + this.max_active = max_active; + } + + public int getMax_idle() { + return max_idle; + } + + public void setMax_idle(int max_idle) { + this.max_idle = max_idle; + } + + public long getMax_wait() { + return max_wait; + } + + public void setMax_wait(long max_wait) { + this.max_wait = max_wait; + } + + @Override + public String toString() { + return "PoolConfig [max_active=" + max_active + ", max_idle=" + max_idle + ", max_wait=" + max_wait + "]"; + } + +} diff --git a/tamguo-crawler/src/main/java/com/tamguo/config/redis/RedisServerNodeBean.java b/tamguo-crawler/src/main/java/com/tamguo/config/redis/RedisServerNodeBean.java new file mode 100644 index 0000000..d8b4297 --- /dev/null +++ b/tamguo-crawler/src/main/java/com/tamguo/config/redis/RedisServerNodeBean.java @@ -0,0 +1,53 @@ +package com.tamguo.config.redis; + +public class RedisServerNodeBean { + private String ip; + private int port; + private boolean needAuth; + private String auth; + + public RedisServerNodeBean(String ip, int port, boolean needAuth, String auth) { + this.ip = ip; + this.port = port; + this.needAuth = needAuth; + this.auth = auth; + } + + public String getIp() { + return ip; + } + + public void setIp(String ip) { + this.ip = ip; + } + + public int getPort() { + return port; + } + + public void setPort(int port) { + this.port = port; + } + + public boolean isNeedAuth() { + return needAuth; + } + + public void setNeedAuth(boolean needAuth) { + this.needAuth = needAuth; + } + + public String getAuth() { + return auth; + } + + public void setAuth(String auth) { + this.auth = auth; + } + + @Override + public String toString() { + return "RedisServer [ip=" + ip + ", port=" + port + ", needAuth=" + needAuth + ", auth=" + auth + "]"; + } + +} diff --git a/tamguo-crawler/src/main/java/com/tamguo/config/redis/RedisXMLConfigure.java b/tamguo-crawler/src/main/java/com/tamguo/config/redis/RedisXMLConfigure.java new file mode 100644 index 0000000..e58c66a --- /dev/null +++ b/tamguo-crawler/src/main/java/com/tamguo/config/redis/RedisXMLConfigure.java @@ -0,0 +1,173 @@ +package com.tamguo.config.redis; + +import java.io.InputStream; +import java.util.ArrayList; +import java.util.List; + +import org.apache.log4j.Logger; +import org.springframework.beans.factory.InitializingBean; +import org.springframework.stereotype.Component; +import org.w3c.dom.Document; +import org.w3c.dom.Element; +import org.w3c.dom.NodeList; + +import redis.clients.jedis.JedisPoolConfig; +import redis.clients.jedis.JedisShardInfo; +import redis.clients.jedis.ShardedJedis; +import redis.clients.jedis.ShardedJedisPool; + +@Component("redisConfigure") +public class RedisXMLConfigure implements InitializingBean { + private static final Logger logger = Logger.getLogger(RedisXMLConfigure.class); + private static String preKey; + private static Document document = null; + private ShardedJedisPool shardedJedisPool; + + @Override + public void afterPropertiesSet() throws Exception { + XMLConfiguration xmlConfiguration = new XMLConfiguration(); + String REDIS_PATH = "redis.xml"; + InputStream stream = null; + try { + stream = this.getClass().getClassLoader().getResourceAsStream(REDIS_PATH); + if (stream == null) { + logger.error("load redis.xml failed!!!" + REDIS_PATH); + throw new RuntimeException("load redis.xml failed"); + } + logger.info("Redis XML config path:" + REDIS_PATH); + if (xmlConfiguration.readConfigFile(stream)) { + document = xmlConfiguration.getDocument(); + } else { + logger.error("load redis.xml failed!!!"); + } + } finally { + if (null != stream) + stream.close(); + } + //初始化参数 + initPreKey(); + PoolConfigBean pcb = initPoolConfigBean(); + List rsnbs = initRedisServerNodeBeans(); + //实现shardedJedisPool + JedisPoolConfig jedisPoolConfig = new JedisPoolConfig(); + //no maxActive config + jedisPoolConfig.setMaxIdle(pcb.getMax_idle()); + jedisPoolConfig.setMaxWaitMillis(pcb.getMax_wait()); + shardedJedisPool = new ShardedJedisPool(jedisPoolConfig,getJedisShardInfo(rsnbs)); + if(shardedJedisPool == null){ + throw new RuntimeException("config redis.xml error"); + } + } + + /** + * 初始化jedis参数 + */ + private PoolConfigBean initPoolConfigBean() { + PoolConfigBean poolConfigBean = new PoolConfigBean(); + Element poolElement = (Element) document.getElementsByTagName("pool").item(0); + int max_active = poolElement.hasAttribute("maxActive") ? Integer.parseInt(poolElement.getAttribute("maxActive")) : -1; + int max_idle = poolElement.hasAttribute("maxIdle") ? Integer.parseInt(poolElement.getAttribute("maxIdle")) : -1; + long max_wait = poolElement.hasAttribute("maxWait") ? Long.parseLong(poolElement.getAttribute("maxWait")) : -1; + poolConfigBean.setMax_active(max_active); + poolConfigBean.setMax_idle(max_idle); + poolConfigBean.setMax_wait(max_wait); + return poolConfigBean; + } + + /** + * 解析配置redis的server列表 + */ + private List initRedisServerNodeBeans() { + List redisServers = new ArrayList(); + NodeList serverElements = document.getElementsByTagName("server"); + int serverLen = serverElements.getLength(); + if (serverLen < 1) { + logger.error("redis.servers.server must have one !"); + return null; + } + for (int i = 0; i < serverLen; i++) { + Element serverElement = (Element) serverElements.item(i); + String temp_ip = serverElement.hasAttribute("ip") ? serverElement.getAttribute("ip") : null; + if (temp_ip == null) { + logger.error("redis.servers.server.ip must be supplied!"); + return null; + } + + String temp_port = serverElement.hasAttribute("port") ? serverElement.getAttribute("port") : "6379"; + String temp_needAuth = serverElement.hasAttribute("needAuth") ? serverElement.getAttribute("needAuth") : "false"; + String temp_auth = null; + // need auth + if ("true".equals(temp_needAuth)) { + temp_auth = serverElement.hasAttribute("auth") ? serverElement.getAttribute("auth") : null; + if (null == temp_auth) { + logger.error("since needAuth is true,auth must be supplied!"); + return null; + } + } + + RedisServerNodeBean rs = null; + try { + rs = new RedisServerNodeBean(temp_ip, Integer.parseInt(temp_port), Boolean.parseBoolean(temp_needAuth), temp_auth); + } catch (NumberFormatException e) { + logger.error("port must be a number!\n" + e.getMessage()); + return null; + } + redisServers.add(rs); + } + return redisServers; + } + + /** + * 转换自定义配置为JedisShardInfo对象 + * @param redisServers + * @return + */ + private List getJedisShardInfo(List redisServers) { + if(redisServers == null){ + logger.error("redisServers must not be empty null"); + return null; + } + int serverLen = redisServers.size(); + if (serverLen < 1) { + logger.error("redisServers must not be empty "); + return null; + } + List servers = new ArrayList(serverLen); + for (int i = 0; i < serverLen; i++) { + RedisServerNodeBean redisServer = redisServers.get(i); + JedisShardInfo jedisShardInfo = new JedisShardInfo(redisServer.getIp(), redisServer.getPort()); + if (redisServer.isNeedAuth()) { + jedisShardInfo.setPassword(redisServer.getAuth()); + } + servers.add(jedisShardInfo); + } + return servers; + } + + /* + * 初始化redis的key前缀 + */ + private void initPreKey() { + Element preKeyElement = (Element) document.getElementsByTagName("preKey").item(0); + preKey = preKeyElement.hasAttribute("value") ? preKeyElement.getAttribute("value") : ""; + } + + public String getPreKey() { + return preKey; + } + /** + * 从jedis连接池获得一个连接 + * @return + */ + public ShardedJedis getConnection() { + return shardedJedisPool.getResource(); + } + /** + * 把连接放回jedis连接池 + * @param resource + */ + public void closeConnection(ShardedJedis resource) { + resource.close(); + } + +} diff --git a/tamguo-crawler/src/main/java/com/tamguo/config/redis/SerializeTranscoder.java b/tamguo-crawler/src/main/java/com/tamguo/config/redis/SerializeTranscoder.java new file mode 100644 index 0000000..63f928f --- /dev/null +++ b/tamguo-crawler/src/main/java/com/tamguo/config/redis/SerializeTranscoder.java @@ -0,0 +1,20 @@ +package com.tamguo.config.redis; + +import java.io.Closeable; + +public abstract class SerializeTranscoder { + + public abstract byte[] serialize(Object value); + + public abstract Object deserialize(byte[] in); + + public void close(Closeable closeable) { + if (closeable != null) { + try { + closeable.close(); + } catch (Exception e) { + e.printStackTrace(); + } + } + } +} diff --git a/tamguo-crawler/src/main/java/com/tamguo/config/redis/XMLConfiguration.java b/tamguo-crawler/src/main/java/com/tamguo/config/redis/XMLConfiguration.java new file mode 100644 index 0000000..43b5a56 --- /dev/null +++ b/tamguo-crawler/src/main/java/com/tamguo/config/redis/XMLConfiguration.java @@ -0,0 +1,53 @@ +package com.tamguo.config.redis; + +import java.io.IOException; +import java.io.InputStream; + +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; + +import org.w3c.dom.Document; + +public class XMLConfiguration { + private Document document = null; + + public boolean readConfigFile(String configFilename) { + DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); + try { + DocumentBuilder db = dbf.newDocumentBuilder(); + document = db.parse(configFilename); + } catch (IOException e) { + e.printStackTrace(); + } catch (Exception e) { + e.printStackTrace(); + } + if (document == null) { + return false; + } + return true; + } + + public boolean readConfigFile(InputStream stream) { + DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); + try { + DocumentBuilder db = dbf.newDocumentBuilder(); + document = db.parse(stream); + } catch (IOException e) { + e.printStackTrace(); + } catch (Exception e) { + e.printStackTrace(); + } + if (document == null) { + return false; + } + return true; + } + + public Document getDocument() { + return document; + } + + protected void setDocument(Document document) { + this.document = document; + } +} diff --git a/tamguo-crawler/src/main/java/com/tamguo/dao/CrawlerQuestionMapper.java b/tamguo-crawler/src/main/java/com/tamguo/dao/CrawlerQuestionMapper.java index 1c645f6..d9c03fc 100644 --- a/tamguo-crawler/src/main/java/com/tamguo/dao/CrawlerQuestionMapper.java +++ b/tamguo-crawler/src/main/java/com/tamguo/dao/CrawlerQuestionMapper.java @@ -1,9 +1,13 @@ package com.tamguo.dao; +import java.util.List; + +import com.baomidou.mybatisplus.plugins.pagination.Pagination; import com.tamguo.config.dao.SuperMapper; import com.tamguo.model.CrawlerQuestionEntity; public interface CrawlerQuestionMapper extends SuperMapper{ - + List queryPageOrderUid(Pagination page); + } diff --git a/tamguo-crawler/src/main/java/com/tamguo/dao/QuestionMapper.java b/tamguo-crawler/src/main/java/com/tamguo/dao/QuestionMapper.java new file mode 100644 index 0000000..632622b --- /dev/null +++ b/tamguo-crawler/src/main/java/com/tamguo/dao/QuestionMapper.java @@ -0,0 +1,8 @@ +package com.tamguo.dao; + +import com.tamguo.config.dao.SuperMapper; +import com.tamguo.model.QuestionEntity; + +public interface QuestionMapper extends SuperMapper{ + +} diff --git a/tamguo-crawler/src/main/java/com/tamguo/model/QuestionEntity.java b/tamguo-crawler/src/main/java/com/tamguo/model/QuestionEntity.java new file mode 100644 index 0000000..82ff201 --- /dev/null +++ b/tamguo-crawler/src/main/java/com/tamguo/model/QuestionEntity.java @@ -0,0 +1,139 @@ +package com.tamguo.model; + +import java.io.Serializable; +import com.baomidou.mybatisplus.annotations.TableName; +import com.tamguo.config.dao.SuperEntity; + + +/** + * The persistent class for the tiku_question database table. + * + */ +@TableName(value="tiku_question") +public class QuestionEntity extends SuperEntity implements Serializable { + private static final long serialVersionUID = 1L; + + private String analysis; + + private String paperId; + + private String answer; + + private String chapterId; + + private String questionType; + + private String content; + + private String subjectId; + + private String courseId; + + private String reviewPoint; + + private String year; + + private String score; + + private String auditStatus; + + public QuestionEntity() { + } + + public String getAnalysis() { + return this.analysis; + } + + public void setAnalysis(String analysis) { + this.analysis = analysis; + } + + public String getAnswer() { + return this.answer; + } + + public void setAnswer(String answer) { + this.answer = answer; + } + + public String getChapterId() { + return this.chapterId; + } + + public void setChapterId(String chapterId) { + this.chapterId = chapterId; + } + + public String getQuestionType() { + return this.questionType; + } + + public void setQuestionType(String questionType) { + this.questionType = questionType; + } + + public String getContent() { + return content; + } + + public void setContent(String content) { + this.content = content; + } + + public String getReviewPoint() { + return reviewPoint; + } + + public void setReviewPoint(String reviewPoint) { + this.reviewPoint = reviewPoint; + } + + public String getYear() { + return year; + } + + public void setYear(String year) { + this.year = year; + } + + public String getScore() { + return score; + } + + public void setScore(String score) { + this.score = score; + } + + public String getPaperId() { + return paperId; + } + + public void setPaperId(String paperId) { + this.paperId = paperId; + } + + public String getCourseId() { + return courseId; + } + + public void setCourseId(String courseId) { + this.courseId = courseId; + } + + public String getSubjectId() { + return subjectId; + } + + public void setSubjectId(String subjectId) { + this.subjectId = subjectId; + } + + public String getAuditStatus() { + return auditStatus; + } + + public void setAuditStatus(String auditStatus) { + this.auditStatus = auditStatus; + } + +} \ No newline at end of file diff --git a/tamguo-crawler/src/main/java/com/tamguo/model/vo/QuestionVo.java b/tamguo-crawler/src/main/java/com/tamguo/model/vo/QuestionVo.java new file mode 100644 index 0000000..60b3f5d --- /dev/null +++ b/tamguo-crawler/src/main/java/com/tamguo/model/vo/QuestionVo.java @@ -0,0 +1,110 @@ +package com.tamguo.model.vo; + +import java.util.List; + +import com.xuxueli.crawler.annotation.PageFieldSelect; +import com.xuxueli.crawler.conf.XxlCrawlerConf; + +public class QuestionVo { + + // 单个题目数据 + @PageFieldSelect(cssQuery=".question-box-inner .questem-inner", selectType = XxlCrawlerConf.SelectType.HTML) + private String content; + + @PageFieldSelect(cssQuery=".exam-answer-content", selectType = XxlCrawlerConf.SelectType.HTML) + private String answer; + + @PageFieldSelect(cssQuery = ".exam-answer-content img", selectType = XxlCrawlerConf.SelectType.ATTR, selectVal = "abs:src") + private List answerImages; + + @PageFieldSelect(cssQuery=".exam-analysis .exam-analysis-content", selectType = XxlCrawlerConf.SelectType.HTML) + private String analysis; + + @PageFieldSelect(cssQuery = ".exam-analysis .exam-analysis-content img", selectType = XxlCrawlerConf.SelectType.ATTR, selectVal = "abs:src") + private List analysisImages; + + @PageFieldSelect(cssQuery=".que-title span:eq(0)",selectType = XxlCrawlerConf.SelectType.TEXT) + private String questionType; + + @PageFieldSelect(cssQuery=".que-title span:eq(1)",selectType = XxlCrawlerConf.SelectType.TEXT) + private String score; + + @PageFieldSelect(cssQuery=".que-title span:eq(2)",selectType = XxlCrawlerConf.SelectType.TEXT) + private String year; + + @PageFieldSelect(cssQuery=".kpoint-contain point point-item",selectType = XxlCrawlerConf.SelectType.TEXT) + private List reviewPoint; + + public String getContent() { + return content; + } + + public void setContent(String content) { + this.content = content; + } + + public String getAnalysis() { + return analysis; + } + + public void setAnalysis(String analysis) { + this.analysis = analysis; + } + + public String getQuestionType() { + return questionType; + } + + public void setQuestionType(String questionType) { + this.questionType = questionType; + } + + public String getScore() { + return score; + } + + public void setScore(String score) { + this.score = score; + } + + public String getAnswer() { + return answer; + } + + public void setAnswer(String answer) { + this.answer = answer; + } + + public List getReviewPoint() { + return reviewPoint; + } + + public void setReviewPoint(List reviewPoint) { + this.reviewPoint = reviewPoint; + } + + public String getYear() { + return year; + } + + public void setYear(String year) { + this.year = year; + } + + public List getAnswerImages() { + return answerImages; + } + + public void setAnswerImages(List answerImages) { + this.answerImages = answerImages; + } + + public List getAnalysisImages() { + return analysisImages; + } + + public void setAnalysisImages(List analysisImages) { + this.analysisImages = analysisImages; + } + +} diff --git a/tamguo-crawler/src/main/java/com/tamguo/service/IQuestionService.java b/tamguo-crawler/src/main/java/com/tamguo/service/IQuestionService.java new file mode 100644 index 0000000..72d6ab8 --- /dev/null +++ b/tamguo-crawler/src/main/java/com/tamguo/service/IQuestionService.java @@ -0,0 +1,8 @@ +package com.tamguo.service; + +public interface IQuestionService { + /** + * 爬取章节数据 + */ + void crawlerQuestion(); +} diff --git a/tamguo-crawler/src/main/java/com/tamguo/service/impl/QuestionService.java b/tamguo-crawler/src/main/java/com/tamguo/service/impl/QuestionService.java new file mode 100644 index 0000000..1481845 --- /dev/null +++ b/tamguo-crawler/src/main/java/com/tamguo/service/impl/QuestionService.java @@ -0,0 +1,171 @@ +package com.tamguo.service.impl; + +import java.io.File; +import java.text.DecimalFormat; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import org.apache.commons.lang3.StringUtils; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Service; + +import com.baomidou.mybatisplus.plugins.Page; +import com.tamguo.config.redis.CacheService; +import com.tamguo.dao.ChapterMapper; +import com.tamguo.dao.CourseMapper; +import com.tamguo.dao.CrawlerQuestionMapper; +import com.tamguo.dao.QuestionMapper; +import com.tamguo.dao.SubjectMapper; +import com.tamguo.model.ChapterEntity; +import com.tamguo.model.CourseEntity; +import com.tamguo.model.CrawlerQuestionEntity; +import com.tamguo.model.QuestionEntity; +import com.tamguo.model.SubjectEntity; +import com.tamguo.model.vo.QuestionVo; +import com.tamguo.service.IQuestionService; +import com.xuxueli.crawler.XxlCrawler; +import com.xuxueli.crawler.conf.XxlCrawlerConf; +import com.xuxueli.crawler.parser.PageParser; +import com.xuxueli.crawler.parser.strategy.HtmlUnitPageLoader; +import com.xuxueli.crawler.rundata.RunData; +import com.xuxueli.crawler.util.FileUtil; + +@Service +public class QuestionService implements IQuestionService{ + + @Autowired + QuestionMapper questionMapper; + @Autowired + CrawlerQuestionMapper crawlerQuestionMapper; + @Autowired + ChapterMapper chapterMapper; + @Autowired + CourseMapper courseMapper; + @Autowired + SubjectMapper subjectMapper; + @Autowired + CacheService cacheService; + private static final String FILES_NO_FORMAT = "00000"; + private static final String FILES_PREFIX = "FP"; + private static final String DOMAIN = "http://static.tamguo.com"; + + private RunData runData; + + @Override + public void crawlerQuestion() { + + XxlCrawler crawler = new XxlCrawler.Builder() + .setAllowSpread(false) + .setThreadCount(10) + .setPageLoader(new HtmlUnitPageLoader()) + .setPageParser(new PageParser() { + + @Override + public void parse(Document html, Element pageVoElement, QuestionVo questionVo) { + CrawlerQuestionEntity condition = new CrawlerQuestionEntity(); + condition.setQuestionUrl(html.baseUri()); + CrawlerQuestionEntity crawlerQuestion = crawlerQuestionMapper.selectOne(condition); + ChapterEntity chapter = chapterMapper.selectById(crawlerQuestion.getChapterId()); + CourseEntity course = courseMapper.selectById(chapter.getCourseId()); + SubjectEntity subject = subjectMapper.selectById(course.getSubjectId()); + + QuestionEntity question = new QuestionEntity(); + question.setAnalysis(questionVo.getAnalysis()); + question.setAnswer(questionVo.getAnswer()); + question.setAuditStatus("1"); + question.setChapterId(chapter.getUid()); + question.setContent(questionVo.getContent()); + question.setCourseId(course.getUid()); + question.setPaperId(null); + question.setQuestionType("1"); + if(questionVo.getReviewPoint() != null && questionVo.getReviewPoint().size() > 0) { + question.setReviewPoint(StringUtils.join(questionVo.getReviewPoint().toArray(), ",")); + } + question.setScore(questionVo.getScore()); + question.setSubjectId(subject.getUid()); + question.setYear(questionVo.getYear()); + + if (questionVo.getAnswerImages()!=null && questionVo.getAnswerImages().size() > 0) { + Set imagesSet = new HashSet<>(questionVo.getAnswerImages()); + for (String img: imagesSet) { + + // 下载图片文件 + String fileName = getFileName(img); + File dir = new File(getFilePath()); + if (!dir.exists()) + dir.mkdirs(); + boolean ret = FileUtil.downFile(img, XxlCrawlerConf.TIMEOUT_MILLIS_DEFAULT, getFilePath(), fileName); + System.out.println("down images " + (ret?"success":"fail") + ":" + img); + + // 替换URL + questionVo.setAnswer(questionVo.getAnswer().replace(img, DOMAIN + getFilePath() + fileName)); + } + } + + if (questionVo.getAnalysisImages()!=null && questionVo.getAnalysisImages().size() > 0) { + Set imagesSet = new HashSet<>(questionVo.getAnalysisImages()); + for (String img: imagesSet) { + + // 下载图片文件 + String fileName = getFileName(img); + File dir = new File(getFilePath()); + if (!dir.exists()) + dir.mkdirs(); + boolean ret = FileUtil.downFile(img, XxlCrawlerConf.TIMEOUT_MILLIS_DEFAULT, getFilePath(), fileName); + System.out.println("down images " + (ret?"success":"fail") + ":" + img); + + // 替换URL + questionVo.setAnalysis(questionVo.getAnalysis().replace(img, DOMAIN + getFilePath() + fileName)); + } + } + // 处理图片 + questionMapper.insert(question); + + } + + public String getFileName(String img) { + return getFileNo() + img.substring(img.lastIndexOf(".")); + } + + private String getFilePath() { + SimpleDateFormat sdf = new SimpleDateFormat("yyyyMM"); + String format = sdf.format(new Date()); + return "/images/question/" + format + "/"; + } + + private String getFileNo() { + SimpleDateFormat sdf = new SimpleDateFormat("yyyyMM"); + String format = sdf.format(new Date()); + DecimalFormat df = new DecimalFormat(FILES_NO_FORMAT); + String key = FILES_PREFIX + format; + Long incr = cacheService.incr(key); + String avatorNo = FILES_PREFIX + df.format(incr); + return avatorNo; + } + }).build(); + + runData = crawler.getRunData(); + int page = 1; + int pageSize = 100; + while(true) { + Page questionPage = new Page(page , pageSize); + List questionList = crawlerQuestionMapper.queryPageOrderUid(questionPage); + for(int i=0 ;i - + \ No newline at end of file diff --git a/tamguo-crawler/src/main/resources/mappers/QuestionMapper.xml b/tamguo-crawler/src/main/resources/mappers/QuestionMapper.xml new file mode 100644 index 0000000..cb56a1f --- /dev/null +++ b/tamguo-crawler/src/main/resources/mappers/QuestionMapper.xml @@ -0,0 +1,5 @@ + + + + + \ No newline at end of file diff --git a/tamguo-crawler/src/test/java/com/tamguo/QuestionCrawler.java b/tamguo-crawler/src/test/java/com/tamguo/QuestionCrawler.java new file mode 100644 index 0000000..9b157ed --- /dev/null +++ b/tamguo-crawler/src/test/java/com/tamguo/QuestionCrawler.java @@ -0,0 +1,24 @@ +package com.tamguo; + +import org.junit.Test; +import org.junit.runner.RunWith; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.test.context.junit4.SpringRunner; + +import com.tamguo.service.IQuestionService; + +@RunWith(SpringRunner.class) +@SpringBootTest +public class QuestionCrawler { + + @Autowired + IQuestionService iQuestionService; + + @Test + public void crawlerSubject() throws Exception { + iQuestionService.crawlerQuestion(); + } + + +}