Update README.md

main
pp3zah5lx 6 months ago
parent 4542cb49de
commit 411cf95ab0

@ -1,2 +1,44 @@
# BeautifulSoup
import requests
from bs4 import BeautifulSoup
#获取url
def urlList():
url = "https://www.ncrczpw.com/index.php?m=jobfair&c=index&a=index"
head = {
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0'
}
r = requests.get(url,headers=head)
r.encoding = 'utf-8'
bea = BeautifulSoup(r.text,'lxml')
text = bea.select("div.td2 a")
arr = []
for i in text:
arr.append(i.get("href"))
arr.pop()
return arr
#获取前8个内容
def text(url):
head = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0'
}
r = requests.get(url, headers=head)
r.encoding = 'utf-8'
bea = BeautifulSoup(r.text, 'lxml')
sum = ""
text = bea.select("div.jobfairshow div.txt")
for i in text:
# print(i.get_text(strip=True))
sum = sum + i.get_text(strip=True) + ","
text = bea.select("div a strong")
for i in text:
# print(i.get_text(strip=True))
sum = sum + i.get_text(strip=True) + ","
print(sum)
arr = urlList()
print(arr)
for i in range(0,8):
text(arr[i])

Loading…
Cancel
Save