ADD file via upload

master
hnu202109060319 2 years ago
parent cd55150cb9
commit acf4b3d896

@ -0,0 +1,14 @@
# 通过正则来对数据进行清洗也可以使用xpath
import re,numpy
def process(resp):
# 标题
title = re.findall('<h2 class="popularem-title">(.+?)</h2>', resp)
# 简介
content = re.findall('<p class="popularem-abs padshow">(.+?)</p>', resp)
# 作者
author = re.findall('<a href="author_\d+?" class="column">(.+?)</a>', resp)
# 点赞数
favNum = re.findall('<span class="fav" data-id="\d+?">(.+?)</span>', resp)
# 使用zip函数打包并转换成列表
mess = list(zip(title, content, author, favNum))
return mess
Loading…
Cancel
Save