diff --git a/README.md b/README.md index 6376748..c858376 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,13 @@ # python +from bs4 import BeautifulSoup +import requests +res = requests.get('https://mp.weixin.qq.com/s/K0u_qPFQtWuH4hk5K2xWfQ')#新闻的网址 +res.encoding = res.apparent_encoding +soup = BeautifulSoup(res.text, 'html')#使用html5lib样式来解析网页,看不懂没关系 +print(soup)#查看页面源代码 +data = soup.select('p')#元素选择器 +text='' +for p in data: + text += p.text.strip() +print(text)