diff --git a/Controller/DataFetch.py b/Controller/DataFetch.py deleted file mode 100644 index 2f94f93..0000000 --- a/Controller/DataFetch.py +++ /dev/null @@ -1,97 +0,0 @@ -''' -爬虫业务 -''' -import requests,re,json -from bs4 import BeautifulSoup -from selenium import webdriver -from selenium.webdriver.common.by import By - -'''基于css选择器来实现目标的获取解析''' -class Scrawler(): - def __init__(self): - self.headers={ - 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.75 Safari/537.36 Edg/100.0.1185.36' - } - def reqbsGetText(self,url=None,cssselector=None): - '''获取文本''' - try: - rs = requests.get(url,headers=self.headers) - rs.encoding=rs.apparent_encoding - bs =BeautifulSoup(rs.text,'html.parser') - return [str(item.text).strip() for item in bs.select(selector=cssselector)] - except:pass - - def reqbsGetJSON(self,url=None): - '''获取JSON文本''' - try: - rs = requests.get(url,headers=self.headers).text - rs = str(rs)[13:][:-2] - return dict(json.loads(rs)) - except:pass - - def reqbsGetImg(self,url=None,cssselector=None): - '''图片获取''' - try: - rs = requests.get(url, headers=self.headers) - rs.encoding = rs.apparent_encoding - bs = BeautifulSoup(rs.text, 'html.parser') - urls = [item.attrs['src'] for item in bs.select(selector=cssselector)] - print(urls) - titles = [item.atrrs['alt'] for item in bs.select(selector=cssselector)] - return urls,titles - except:pass - - def reqbsGetHref(self, url=None, cssselector=None): - try: - rs = requests.get(url, headers=self.headers) - rs.encoding = rs.apparent_encoding - bs = BeautifulSoup(rs.text, 'html.parser') - urls = [item.attrs['href'] for item in bs.select(selector=cssselector)] - return urls - except: - pass - - def seleniumGetText(self,url,cssselector=None): - chrome = webdriver.Chrome() - chrome.get(url) - chrome.implicitly_wait(3) - res = chrome.find_elements(by=By.CSS_SELECTOR,value=cssselector) - return [str(item.text).strip() for item in res] - -# 获取株洲本地的温度 -def getWeatherInfo(): - url=" http://d1.weather.com.cn/sk_2d/101250304.html?_=1716271809611" - headers={ - "Host": "d1.weather.com.cn", - "Referer": "http://www.weather.com.cn/", - "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.5005.63 Safari/537.36" - } - str = requests.get(url,headers=headers).text.replace('"','') - for item in str.split(","): - if 'time' in item: - timeinfo=item[5:] - elif 'temp:' in item:temp=item.split(":")[1] - return timeinfo,temp - -# 获取期刊名 - - - -if __name__=='__main__': - # print(getWeatherInfo()) - str='var dataSK={"nameen":"hetang","cityname":"荷塘","city":"101250304","temp":"27.6","tempf":"81.7","WD":"东北风","wde":"NE","WS":"1级","wse":"4km\/h","SD":"69%","sd":"69%","qy":"1002","njd":"12km","time":"14:25","rain":"0","rain24h":"0","aqi":"49","aqi_pm25":"49","weather":"阴","weathere":"Overcast","weathercode":"d02","limitnumber":"","date":"05月21日(星期二)"}' - str1=str.replace('"','') - items=str1.split(",") - for item in items: - if 'time' in item: - timeinfo=item[5:] - elif 'temp:' in item:temp=item.split(":")[1] - print(timeinfo,temp) - - - - - - - -