diff --git a/pythonProject1/.idea/.gitignore b/pythonProject1/.idea/.gitignore new file mode 100644 index 0000000..35410ca --- /dev/null +++ b/pythonProject1/.idea/.gitignore @@ -0,0 +1,8 @@ +# 默认忽略的文件 +/shelf/ +/workspace.xml +# 基于编辑器的 HTTP 客户端请求 +/httpRequests/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml diff --git a/pythonProject1/.idea/inspectionProfiles/profiles_settings.xml b/pythonProject1/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 0000000..105ce2d --- /dev/null +++ b/pythonProject1/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/pythonProject1/.idea/misc.xml b/pythonProject1/.idea/misc.xml new file mode 100644 index 0000000..a50ac5b --- /dev/null +++ b/pythonProject1/.idea/misc.xml @@ -0,0 +1,7 @@ + + + + + + \ No newline at end of file diff --git a/pythonProject1/.idea/modules.xml b/pythonProject1/.idea/modules.xml new file mode 100644 index 0000000..fdd8fdf --- /dev/null +++ b/pythonProject1/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/pythonProject1/.idea/pythonProject1.iml b/pythonProject1/.idea/pythonProject1.iml new file mode 100644 index 0000000..2c80e12 --- /dev/null +++ b/pythonProject1/.idea/pythonProject1.iml @@ -0,0 +1,10 @@ + + + + + + + + + + \ No newline at end of file diff --git a/pythonProject1/Home.py b/pythonProject1/Home.py new file mode 100644 index 0000000..2ab9c31 --- /dev/null +++ b/pythonProject1/Home.py @@ -0,0 +1,204 @@ +import tkinter as tk +import webbrowser +from tkinter import ttk, messagebox +import pandas as pd +import requests +from lxml import etree +from map import map +from line import line + + +class Home(): + def __init__(self): + self.root = tk.Tk() + self.root.title("购房推荐系统") + self.root.geometry("650x350+500+200") + + + + self.Max_index = '' + self.Current_index = 1 + # 把爬取的三条信息组成一个元组捆绑,变成一条记录 + self.all_info = [] + self.text='' + + self.label = tk.Label(self.root, text="欢迎来到北京市购房推荐系统,请选择要查询的地区", font=('宋体', 20), background="#00ffff") + self.label.grid(row=0, column=0, columnspan=4, pady=(20, 0)) + + self.button_texts = ["海淀", "朝阳", "丰台", "西城", "东城", "昌平", "大兴", "通州", "房山", "顺义", "石景山", + "密云"] + for i, text in enumerate(self.button_texts): + self.button = tk.Button(self.root, text=text, command=lambda txt=text: self.on_button_click(txt)) + row = i // 4 + 1 + col = i % 4 + self.button.grid(row=row, column=col, padx=10, pady=10) + + self.btn_map=tk.Button(self.root,text='区位房价',command=self.open_map) + self.btn_map.place(relx=0.65, rely=0.8, relheight=0.10, relwidth=0.13) + + self.btn_line=tk.Button(self.root,text="同比房价",command=self.open_line) + self.btn_line.place(relx=0.2, rely=0.8, relheight=0.10, relwidth=0.13) + + self.root.mainloop() + def open_map(self): + map() + webbrowser.open('map.html') + self.clear_csv() + def open_line(self): + line() + webbrowser.open('line.html') + self.clear_csv() + + + #爬取数据并做匹配 + def fetch_data(self,region, index='1'): + url = f"https://newhouse.fang.com/house/s/{region}/b9"+index + response = requests.get(url) + response.encoding = "utf-8" + e = etree.HTML(response.text) + names = [n.strip() for n in e.xpath('//div[@class="nlcd_name"]/a/text()')] + addresses = e.xpath('//div[@class="address"]/a/@title') + prices = [d.xpath('string(.)').strip() for d in e.xpath('//div[@class="nhouse_price"]')] + if index=='1' and self.Max_index=='': + lasts = e.xpath('//a[@class="last"]/@href') + if len(lasts)!=0: + the_string = lasts[-1] + last_char = the_string[-2] + self.Max_index = str(last_char) + else: + return 'False' + + for name, address, price in zip(names, addresses, prices): + self.all_info.append((name, price, address)) + return self.all_info + + + # 展示图表数据函数 + def show_data_in_table(self,data): + self.popup = tk.Toplevel(self.root) + self.popup.geometry('500x500') + self.popup.title("购房信息详情") + + # 使用pandas DataFrame处理数据 + df = pd.DataFrame(data, columns=["楼盘名称", "价格", "地区"]) + # 创建Treeview部件,显式指定列ID + tree = ttk.Treeview(self.popup, columns=("name", "price", "area"), show="headings") + # 设置列标题,使用之前定义的列ID + tree.heading("#1", text="楼盘名称") + tree.heading("#2", text="价格") + tree.heading("#3", text="地区") + # 设置列宽度 + tree.column("#1", width=100, anchor='center') + tree.column("#2", width=100, anchor='center') + tree.column("#3", width=100, anchor='center') + + # 将DataFrame数据插入Treeview + for index, row in df.iterrows(): + tree.insert('', 'end', values=row.tolist()) + + # 添加垂直滚动条 + vsb = ttk.Scrollbar(self.popup, orient="vertical", command=tree.yview) + tree.configure(yscrollcommand=vsb.set) + vsb.pack(side='right', fill='y') + + tree.pack(fill='both', expand=True) + + # 添加底部框架用于放置翻页按钮 + footer_frame = ttk.Frame(self.popup) + footer_frame.pack(side='bottom', fill='x') + + footer_frame = ttk.Frame(self.popup) + footer_frame.pack(side='bottom', fill='x') + + # 上一页按钮 + prev_button = ttk.Button(footer_frame, text="上一页", command=self.go_to_previous_page) + prev_button.pack(side='left', padx=5, pady=5) + + # 下一页按钮 + next_button = ttk.Button(footer_frame, text="下一页", command=self.go_to_next_page) + next_button.pack(side='right', padx=5, pady=5) + + self.center_window() + + + # 上翻页函数 + def go_to_previous_page(self): + if self.Current_index+90 -1 >=91: + self.popup.destroy() + self.Current_index -= 1 + print(self.Current_index) + self.all_info.clear() + # 把选择的区域 传入通过fetch传入url + data = self.fetch_data(self.text, str(self.Current_index)) + # 网页地址传给展示页面 + self.show_data_in_table(data) + else: + messagebox.showinfo(message='这是第一页') + + # 下翻页函数 + def go_to_next_page(self): + if self.Current_index + 1 <= int(self.Max_index): + self.popup.destroy() + self.Current_index += 1 + print(self.Current_index) + self.all_info.clear() + # 把选择的区域 传入通过fetch传入url + data = self.fetch_data(self.text, str(self.Current_index)) + # 网页地址传给展示页面 + self.show_data_in_table(data) + + else: + messagebox.showinfo(message='末尾') + # 首页点击函数 + def on_button_click(self,text, index='1'): + self.Max_index = '' + self.Current_index = 1 + self.all_info = [] + self.text = '' + + simplified_text = { + "海淀": "haidian", + "朝阳": "chaoyang", + "丰台": "fengtai", + "西城": "xicheng", + "东城": "dongcheng", + "昌平": "changping", + "大兴": "daxing", + "通州": "tongzhou", + "房山": "fangshan", + "顺义": "shunyi", + "石景山": "shijingshan", + "密云": "miyun", + }.get(text, "未知区域") + + self.text=text + # 把选择的区域 传入通过fetch传入url + if self.fetch_data(simplified_text, index)!="False": + data = self.fetch_data(simplified_text, index) + # 网页地址传给展示页面 + self.show_data_in_table(data) + else: + messagebox.showinfo(message='该地区暂无数据!') + + def center_window(self, width=1200, height=600): + # 获取屏幕宽度和高度 + screen_width = self.popup.winfo_screenwidth() + screen_height = self.popup.winfo_screenheight() + + # 计算窗口的左上角应该放置的位置 + left = (screen_width / 2) - (width / 2) + top = (screen_height / 2) - (height / 2) + + # 设置窗口的位置和大小 + self.popup.geometry("{0}x{1}+{2}+{3}".format(width, height, int(left), int(top))) + + def clear_csv(self): + with open('北京市区房价.csv', 'w', encoding='utf-8', newline=''): + pass + + + + + + + diff --git a/pythonProject1/URL.txt b/pythonProject1/URL.txt new file mode 100644 index 0000000..ca1a02a --- /dev/null +++ b/pythonProject1/URL.txt @@ -0,0 +1,5 @@ +阿里云 http://mirrors.aliyun.com/pypi/simple/ +中国科技大学 https://pypi.mirrors.ustc.edu.cn/simple/ +豆瓣 http://pypi.douban.com/simple/ +清华大学 https://pypi.tuna.tsinghua.edu.cn/simple/ +中国科学技术大学 http://pypi.mirrors.ustc.edu.cn/simple/ \ No newline at end of file diff --git a/pythonProject1/__pycache__/Home.cpython-312.pyc b/pythonProject1/__pycache__/Home.cpython-312.pyc new file mode 100644 index 0000000..3d339f7 Binary files /dev/null and b/pythonProject1/__pycache__/Home.cpython-312.pyc differ diff --git a/pythonProject1/__pycache__/getData.cpython-312.pyc b/pythonProject1/__pycache__/getData.cpython-312.pyc new file mode 100644 index 0000000..74ff20b Binary files /dev/null and b/pythonProject1/__pycache__/getData.cpython-312.pyc differ diff --git a/pythonProject1/__pycache__/line.cpython-312.pyc b/pythonProject1/__pycache__/line.cpython-312.pyc new file mode 100644 index 0000000..1f27f48 Binary files /dev/null and b/pythonProject1/__pycache__/line.cpython-312.pyc differ diff --git a/pythonProject1/__pycache__/map.cpython-312.pyc b/pythonProject1/__pycache__/map.cpython-312.pyc new file mode 100644 index 0000000..b9193c6 Binary files /dev/null and b/pythonProject1/__pycache__/map.cpython-312.pyc differ diff --git a/pythonProject1/getData.py b/pythonProject1/getData.py new file mode 100644 index 0000000..c2b68bb --- /dev/null +++ b/pythonProject1/getData.py @@ -0,0 +1,33 @@ +import csv +import requests +from bs4 import BeautifulSoup + +def getData(): + url='https://bj.cityhouse.cn/market/rankforsale.html' + headers={ + 'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7', + 'Accept-Language':'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6', + 'Content-Type':'text/html; charset=utf-8', + 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36 Edg/123.0.0.0' + } + response=requests.get(url=url,headers=headers) + soup = BeautifulSoup(response.text, 'lxml') + tbody=soup.find('tbody') + + Ver_index=len(tbody.findAll('tr')) + Ori_index=len(tbody.findAll('tr')[0].findAll('th')) + + with open('北京市区房价.csv','a',encoding='utf-8',newline='') as f: + write=csv.writer(f) + for i in range(Ver_index): + list=[] + if i==0: + for j in range(Ori_index): + list.append(tbody.findAll('tr')[i].findAll('th')[j].text.strip()) + else: + for j in range(Ori_index): + list.append(tbody.findAll('tr')[i].findAll('td')[j].text.strip()) + write.writerow(list) + print('shu数据获取完毕') +if __name__ == '__main__': + getData() \ No newline at end of file diff --git a/pythonProject1/line.html b/pythonProject1/line.html new file mode 100644 index 0000000..823d33a --- /dev/null +++ b/pythonProject1/line.html @@ -0,0 +1,424 @@ + + + + + 多折线 + + + + +
+ + + diff --git a/pythonProject1/line.py b/pythonProject1/line.py new file mode 100644 index 0000000..548bda2 --- /dev/null +++ b/pythonProject1/line.py @@ -0,0 +1,43 @@ +import pyecharts +from pyecharts.charts import Line +from pyecharts import options as opts +import pandas as pd +from getData import getData +def line(): + getData() + # 读取数据 + data = pd.read_csv('北京市区房价.csv', encoding='utf-8') + datas = data['行政区'] + compare_month = data['环比上月'] + compare_year = data['同比上年'] + + # 移除百分号并转换为浮点数 + mothon_list_float = [float(x.strip('%'))for x in compare_month] # 转换为小数(百分比转为0-1之间) + year_list_float = [float(x.strip('%'))for x in compare_year] # 转换为小数(百分比转为0-1之间) + + # 转换为列表 + name_list = datas.tolist() + + # 打印转换后的列表以验证 + print(mothon_list_float) + + # 创建Line对象并设置数据 + line = Line( + init_opts=opts.InitOpts(width='1000px',height='500px',page_title="多折线") + ) + line.add_xaxis(xaxis_data=name_list) # x轴 + line.add_yaxis(series_name='环比上月', y_axis=mothon_list_float) # 使用转换后的数据 + line.add_yaxis(series_name='同比上年', y_axis=year_list_float) # 使用转换后的数据 + + line.set_global_opts( + title_opts=opts.TitleOpts( + title="北京市区房价趋势图" + ), + toolbox_opts=opts.ToolboxOpts( + is_show=True + ) + ) + + # 渲染图表到HTML文件 + line.render('line.html') + diff --git a/pythonProject1/login.py b/pythonProject1/login.py new file mode 100644 index 0000000..1f94e28 --- /dev/null +++ b/pythonProject1/login.py @@ -0,0 +1,45 @@ +from tkinter import * +import Home + +user_login={'aaa':'123456','bbb':'888888','ccc':'333333'} +count=0 +def login(): + global count + username=entry_username.get() + if username not in user_login: + lable_message.config(text="账号错误!") + else: + password=entry_password.get() + if(password==user_login[username]): + lable_message.config(text="登陆成功!") + window.destroy() + h=Home.Home() + else: + lable_message.config(text="你还可以尝试{}次".format(2-count)) + count+=1 + if count==3: + lable_message.config(text="登陆失败!") + btn_login.config(state='disabled') +window = Tk() +window.title('用户登陆') +window.geometry("300x200") + +lable_usename=Label(window,text="账号:") +lable_usename.pack() +entry_username=Entry(window) +entry_username.pack() + +lable_password=Label(window,text="密码") +lable_password.pack() +entry_password=Entry(window) +entry_password.pack() + +#按钮登陆绑定login函数 +btn_login=Button(window,text="登陆",command=login) +btn_login.pack() +lable_message=Label(window,text="") +lable_message.pack() + +window.mainloop() + + diff --git a/pythonProject1/map.html b/pythonProject1/map.html new file mode 100644 index 0000000..d7d89ca --- /dev/null +++ b/pythonProject1/map.html @@ -0,0 +1,219 @@ + + + + + 地图 + + + + + +
+ + + diff --git a/pythonProject1/map.py b/pythonProject1/map.py new file mode 100644 index 0000000..54aca7d --- /dev/null +++ b/pythonProject1/map.py @@ -0,0 +1,43 @@ +from pyecharts.charts import Map +from pyecharts import options as opts +import pandas as pd +from getData import getData + +def map(): + getData() + data = pd.read_csv('北京市区房价.csv',encoding='utf-8') + info = data[['行政区','均价(元/㎡)']].copy() + # 移除千分号,转换为浮点数,然后四舍五入到最接近的整数(或者直接使用astype(int)向下取整) + info['均价(元/㎡)'] = info['均价(元/㎡)'].str.replace(',', '').astype(float).round().astype(int) + info_list = info.values.tolist() + + print(info_list) + + map=Map( + init_opts=opts.InitOpts( + page_title="地图" + ) + + ) + map.add( + series_name="平均房价", + data_pair=info_list, + maptype="北京", + zoom=1, + ) + map.set_global_opts( + title_opts=opts.TitleOpts( + title="北京市市区", + pos_left='center', + ), + legend_opts=opts.LegendOpts( + pos_left="left", # 图例在左边 + orient="vertical" # 垂直排列图例 + ), + visualmap_opts=opts.VisualMapOpts( + max_=130000, + min_=19000, + range_color=['#1E9600','#fff200','#ff0000'] + ) + ) + map.render('map.html') \ No newline at end of file diff --git a/pythonProject1/北京市区房价.csv b/pythonProject1/北京市区房价.csv new file mode 100644 index 0000000..e69de29