Delete 'Controller/DataFetch.py'

main
ptf2oq859 6 months ago
parent bd97ccc542
commit 1f78412472

@ -1,97 +0,0 @@
'''
爬虫业务
'''
import requests,re,json
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
'''基于css选择器来实现目标的获取解析'''
class Scrawler():
def __init__(self):
self.headers={
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.75 Safari/537.36 Edg/100.0.1185.36'
}
def reqbsGetText(self,url=None,cssselector=None):
'''获取文本'''
try:
rs = requests.get(url,headers=self.headers)
rs.encoding=rs.apparent_encoding
bs =BeautifulSoup(rs.text,'html.parser')
return [str(item.text).strip() for item in bs.select(selector=cssselector)]
except:pass
def reqbsGetJSON(self,url=None):
'''获取JSON文本'''
try:
rs = requests.get(url,headers=self.headers).text
rs = str(rs)[13:][:-2]
return dict(json.loads(rs))
except:pass
def reqbsGetImg(self,url=None,cssselector=None):
'''图片获取'''
try:
rs = requests.get(url, headers=self.headers)
rs.encoding = rs.apparent_encoding
bs = BeautifulSoup(rs.text, 'html.parser')
urls = [item.attrs['src'] for item in bs.select(selector=cssselector)]
print(urls)
titles = [item.atrrs['alt'] for item in bs.select(selector=cssselector)]
return urls,titles
except:pass
def reqbsGetHref(self, url=None, cssselector=None):
try:
rs = requests.get(url, headers=self.headers)
rs.encoding = rs.apparent_encoding
bs = BeautifulSoup(rs.text, 'html.parser')
urls = [item.attrs['href'] for item in bs.select(selector=cssselector)]
return urls
except:
pass
def seleniumGetText(self,url,cssselector=None):
chrome = webdriver.Chrome()
chrome.get(url)
chrome.implicitly_wait(3)
res = chrome.find_elements(by=By.CSS_SELECTOR,value=cssselector)
return [str(item.text).strip() for item in res]
# 获取株洲本地的温度
def getWeatherInfo():
url=" http://d1.weather.com.cn/sk_2d/101250304.html?_=1716271809611"
headers={
"Host": "d1.weather.com.cn",
"Referer": "http://www.weather.com.cn/",
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.5005.63 Safari/537.36"
}
str = requests.get(url,headers=headers).text.replace('"','')
for item in str.split(","):
if 'time' in item:
timeinfo=item[5:]
elif 'temp:' in item:temp=item.split(":")[1]
return timeinfo,temp
# 获取期刊名
if __name__=='__main__':
# print(getWeatherInfo())
str='var dataSK={"nameen":"hetang","cityname":"荷塘","city":"101250304","temp":"27.6","tempf":"81.7","WD":"东北风","wde":"NE","WS":"1级","wse":"4km\/h","SD":"69%","sd":"69%","qy":"1002","njd":"12km","time":"14:25","rain":"0","rain24h":"0","aqi":"49","aqi_pm25":"49","weather":"阴","weathere":"Overcast","weathercode":"d02","limitnumber":"","date":"05月21日(星期二)"}'
str1=str.replace('"','')
items=str1.split(",")
for item in items:
if 'time' in item:
timeinfo=item[5:]
elif 'temp:' in item:temp=item.split(":")[1]
print(timeinfo,temp)
Loading…
Cancel
Save