ADD file via upload

3 years ago · acf4b3d896
parent cd55150cb9
commit acf4b3d896
1 changed files with 14 additions and 0 deletions
--- a/washer.py
+++ b/washer.py
@ -0,0 +1,14 @@
+# 通过正则来对数据进行清洗，也可以使用xpath
+import re,numpy
+def process(resp):
+# 标题
+    title = re.findall('<h2 class="popularem-title">(.+?)</h2>', resp)
+# 简介
+    content = re.findall('<p class="popularem-abs padshow">(.+?)</p>', resp)
+# 作者
+    author = re.findall('<a href="author_\d+?" class="column">(.+?)</a>', resp)
+# 点赞数
+    favNum = re.findall('<span class="fav" data-id="\d+?">(.+?)</span>', resp)
+# 使用zip函数打包并转换成列表
+    mess = list(zip(title, content, author, favNum))
+    return mess