You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
|
|
|
# BeautifulSoup
|
|
|
|
|
|
|
|
import requests
|
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
|
|
|
|
#获取url
|
|
|
|
def urlList():
|
|
|
|
url = "https://www.ncrczpw.com/index.php?m=jobfair&c=index&a=index"
|
|
|
|
head = {
|
|
|
|
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0'
|
|
|
|
}
|
|
|
|
r = requests.get(url,headers=head)
|
|
|
|
r.encoding = 'utf-8'
|
|
|
|
bea = BeautifulSoup(r.text,'lxml')
|
|
|
|
text = bea.select("div.td2 a")
|
|
|
|
arr = []
|
|
|
|
for i in text:
|
|
|
|
arr.append(i.get("href"))
|
|
|
|
arr.pop()
|
|
|
|
return arr
|
|
|
|
|
|
|
|
#获取前8个内容
|
|
|
|
def text(url):
|
|
|
|
head = {
|
|
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0'
|
|
|
|
}
|
|
|
|
r = requests.get(url, headers=head)
|
|
|
|
r.encoding = 'utf-8'
|
|
|
|
bea = BeautifulSoup(r.text, 'lxml')
|
|
|
|
sum = ""
|
|
|
|
text = bea.select("div.jobfairshow div.txt")
|
|
|
|
for i in text:
|
|
|
|
# print(i.get_text(strip=True))
|
|
|
|
sum = sum + i.get_text(strip=True) + ","
|
|
|
|
text = bea.select("div a strong")
|
|
|
|
for i in text:
|
|
|
|
# print(i.get_text(strip=True))
|
|
|
|
sum = sum + i.get_text(strip=True) + ","
|
|
|
|
print(sum)
|
|
|
|
|
|
|
|
arr = urlList()
|
|
|
|
print(arr)
|
|
|
|
for i in range(0,8):
|
|
|
|
text(arr[i])
|