|
|
|
|
@ -1,2 +1,13 @@
|
|
|
|
|
# python
|
|
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
|
import requests
|
|
|
|
|
res = requests.get('https://mp.weixin.qq.com/s/K0u_qPFQtWuH4hk5K2xWfQ')#新闻的网址
|
|
|
|
|
res.encoding = res.apparent_encoding
|
|
|
|
|
|
|
|
|
|
soup = BeautifulSoup(res.text, 'html')#使用html5lib样式来解析网页,看不懂没关系
|
|
|
|
|
print(soup)#查看页面源代码
|
|
|
|
|
data = soup.select('p')#元素选择器
|
|
|
|
|
text=''
|
|
|
|
|
for p in data:
|
|
|
|
|
text += p.text.strip()
|
|
|
|
|
print(text)
|
|
|
|
|
|