You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

14 lines
566 B

# 通过正则来对数据进行清洗也可以使用xpath
import re,numpy
def process(resp):
# 标题
title = re.findall('<h2 class="popularem-title">(.+?)</h2>', resp)
# 简介
content = re.findall('<p class="popularem-abs padshow">(.+?)</p>', resp)
# 作者
author = re.findall('<a href="author_\d+?" class="column">(.+?)</a>', resp)
# 点赞数
favNum = re.findall('<span class="fav" data-id="\d+?">(.+?)</span>', resp)
# 使用zip函数打包并转换成列表
mess = list(zip(title, content, author, favNum))
return mess