diff --git a/SpiderWebsite/demo/views.py b/SpiderWebsite/demo/views.py
index 5d53aa1..2e463d2 100644
--- a/SpiderWebsite/demo/views.py
+++ b/SpiderWebsite/demo/views.py
@@ -207,3 +207,80 @@ def word_cloud(request):
# 从这里写你们的爬虫函数,例:
# def spider_fun(url, web_name):
# pass
+
+from bs4 import BeautifulSoup
+import urllib.request,urllib.error #这里用urllib库实现requests库功能
+import os
+import re
+import pandas as pd
+
+#正则提取信息
+#findLink = re.compile(r'href="(.*?)"') #提取网址
+findTitle = re.compile(r'target="_blank">(.*?)') #提取标题
+findPrice = re.compile(r'(.*?)') #提取价格
+findTag = re.compile(r'/" target="_blank">(.*?)') #提取商品类型
+findPlace = re.compile(r'
(.*?)
') #提取地址
+
+def askURL(url): #访问网站获取信息
+ head = {
+ 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36'
+}
+ request = urllib.request.Request(url,headers=head)
+ html = ""
+ try:
+ response = urllib.request.urlopen(request)
+ html = response.read().decode('utf-8')
+ #print(html) #test
+ except urllib.error.URLError as e:
+ if hasattr(e,'code'):
+ print(e.code)
+ if hasattr(e,'reason'):
+ print(e.reason)
+ return html
+
+def getData(baseurl): #提取需要的信息
+ datalist = []
+ for i in range(1,4): #调用访问网站函数,访问每一页的信息,这里只访问了几页
+ url = baseurl + str(i)
+ html = askURL(url)
+
+ soup = BeautifulSoup(html,"html.parser")
+ for item in soup.find_all('div',class_="media-body"): #提取信息
+ #print(item) #test
+ data = []
+
+ item = str(item)
+ title = re.findall(findTitle,item)[0]
+ link = re.findall(findPlace,item)[0]
+ price = re.findall(findPrice,item)[0]
+ tag = re.findall(findTag,item)[0]
+
+ data.append(title)
+ data.append(link)
+ data.append(price)
+ data.append(tag)
+ datalist.append(data)
+
+ return datalist
+
+def saveData(savepath,datalist,web_name): #保存文件
+ name = ["标题","地址","价格","类型"]
+ file = pd.DataFrame(columns=name,data=datalist) #整合表头和数据
+ file.to_csv(savepath+'/lyh_tiaozaomarket.csv') #保存至当前路径,命名为xxx.csv
+ print('已保存%s信息' % web_name)
+
+
+'''---------代码开始的地方---------'''
+
+def begin_spider(url, web_name):
+ savepath = os.getcwd() #获取当前路径作为保存路径
+ datalist = getData(url)
+ saveData(savepath,datalist,web_name)
+
+def main():
+ url='https://guilin.baixing.com/ershou/?page='
+ web_name='桂林百姓网二手市场'
+ begin_spider(url, web_name)
+
+if __name__ == "__main__":
+ main()