import json import random import re import os from tkinter import * from tkinter import messagebox from tkinter import ttk import requests import threading from PIL import Image, ImageTk """ 说明: 1.使用“check button”实现下载完打开文件夹操作,注册了enter、esc热键,优化体验 2.实现关键字、磁盘、用户判断逻辑 3.利用多线程来执行下载操作 """ class WeiBo_pics_Spider(object): def __init__(self, start_url): self.start_url = start_url # 解析出图片地址 def get_pics_url(self): i = 1 while True: url = self.start_url + '&page={}'.format(i) headers = {'User-Agent': get_ua()} r = requests.get(url, headers=headers) _json = json.loads(r.text) items = _json["data"]["cards"] flag = _json['ok'] if flag == 1: # 爬取数据标志+一个手动控制标志 for v in items: picslist = v.get('mblog') if picslist is not None: img_urls = picslist.get('pics') if img_urls is not None: for img_url_ in img_urls: img_url = img_url_['large']['url'] yield img_url else: t1.insert(END, f'***在第{i}页终止***\n') t1.see(END) t1.update() if r1_var.get() == 1: big_dir = disk + ':/WeiBo_Pics' os.startfile(big_dir) break i += 1 # 下载图片 def download_pics(self, url, filename): headers = {'User-Agent': get_ua()} r = requests.get(url, headers=headers) big_dir = disk + ':/WeiBo_Pics' aim_path = big_dir + '/' + user_name_selected try: os.makedirs(aim_path) except: pass with open(aim_path + '\\' + filename, 'wb') as f: f.write(r.content) # 保证焦点始终在最下 t1.see(END) # 下载完一张刷新一次 防止界面卡死崩溃 t1.insert(END, f'{filename}\n') window.update() def get_ua(): first_num = random.randint(55, 62) third_num = random.randint(0, 3200) fourth_num = random.randint(0, 140) os_type = [ '(Windows NT 6.1; WOW64)', '(Windows NT 10.0; WOW64)', '(X11; Linux x86_64)', '(Macintosh; Intel Mac OS X 10_12_6)' ] chrome_version = 'Chrome/{}.0.{}.{}'.format(first_num, third_num, fourth_num) ua = ' '.join(['Mozilla/5.0', random.choice(os_type), 'AppleWebKit/537.36', '(KHTML, like Gecko)', chrome_version, 'Safari/537.36'] ) return ua def wb_search(): # 先清空lsibox1内容,便于新内容显示 listb1.delete(0, END) url1 = 'https://m.weibo.cn/api/container/getIndex?containerid=100103type%3D3%26q%3D{}%26t%3D0' headers = {'User-Agent': get_ua()} key_word = e1.get() global user_id_list user_id_list = list() if len(key_word) != 0: # 若用户输入了user_id,则去获取screen_name if re.match('\\d{10}', key_word): user_id_list.append(key_word) try: url2 = f'https://m.weibo.cn/api/container/getIndex?uid={key_word}&containerid=100505{key_word}' r1 = requests.get(url2, headers=headers) _data = json.loads(r1.text) screen_name = _data['data']['userInfo'].get('screen_name') l3.place(x=120, y=42) l3_var.set(f'搜索成功') l3['background'] = 'green' listb1.insert(END, screen_name) except KeyError: messagebox.showinfo(title='提示', message='没有检索到相关用户,请检查用户id或使用关键字搜索!') l3.place(x=65, y=42) l3_var.set(f'请检查用户id或使用关键字搜索!') l3['background'] = 'yellow' # 没有检索到用户的话,提示之后,e1获得焦点之后,清除用户之前输入 e1.bind('WM_TAKE_FOCUS', e1_clear()) # 否则根据关键字去搜索用户信息,显示在listbox中 else: aim_url = url1.format(key_word) r = requests.get(aim_url, headers=headers) _json = json.loads(r.text) try: # 若出现了IndexError则表明没有检索到用户信息 users = _json['data']['cards'][1].get('card_group') relevant_num = len(users) l3.place(x=105, y=42) l3_var.set(f'搜索到了 {relevant_num} 个用户') l3['background'] = 'green' for user_ in users: user_info = user_.get('user') user_name = user_info.get('screen_name') id = user_info.get('id') """ 一种思路,使用一个列表存储screen_name和uid,两者用;隔开 当获取Uid时,直接切割字符串,取Listbox所选项索引,按索引在列表表值(uid) #使用字符串拼接 格式:screen_name+';'+str(id) # user_data = user_name + ';' + str(id) """ user_id_list.append(id) listb1.insert(END, user_name) except IndexError: # 如果没有检索到用户,就会报列表索引错误 messagebox.showinfo(title='提示', message='没有检索到相关用户,请更换关键字或使用用户id搜索!') l3.place(x=85, y=42) l3_var.set(f'请更换关键字或用户id搜索!') l3['background'] = 'yellow' # 没有检索到用户的话,提示之后,e1获得焦点之后,清除用户之前输入 e1.bind('WM_TAKE_FOCUS', e1_clear()) else: # 处理没有输入关键字 messagebox.showinfo(title='info', message='请输入关键字!') l3.place(x=110, y=42) l3_var.set(f'请输入关键字!') l3['background'] = 'red' def wb_pics_parse(): key_word = e1.get() select_path = c1.get() # 1.先判断关键字是否输入 if len(key_word) != 0: # 2.再判断是否选择了磁盘 if len(select_path) == 1: # 3.判断所选路径是否存在 if not os.path.exists(select_path): # 4.判断是否在列表框选择了用户名 try: # 直接获取选中项目 global user_name_selected user_name_selected = listb1.get(listb1.curselection()) user_name_index = listb1.curselection()[0] user_id = user_id_list[user_name_index] container_id = '107603' + str(user_id) start_url = f'https://m.weibo.cn/api/container/getIndex?containerid={container_id}' spider = WeiBo_pics_Spider(start_url) t1.config(state='normal') # 将Text开启,置为可读可写状态 l3.place(x=120, y=42) l3_var.set(f'正在运行......') l3['background'] = 'green' for pic_url in spider.get_pics_url(): filename = pic_url.split('/')[-1] # 字符串切割,切割出前10个字符串 filename = filename[10:] thread_it(spider.download_pics, pic_url, filename) # 搜索后,但是没选择用户,会报TclError错误,此except就用来捕获这个异常 except TclError: messagebox.showwarning(title='警告', message='请选择一个用户!') l3.place(x=105, y=42) l3_var.set(f'请选择一个用户!') l3['background'] = 'red' # 获取当前选中项目(使用索引) else: messagebox.showwarning(title='警告', message='请检查路径!') l3.place(x=80, y=42) l3_var.set(f'请检查路径!') l3['background'] = 'red' else: messagebox.showwarning(title='警告', message='您未选择磁盘!') l3.place(x=85, y=42) l3_var.set(f'请检查是否选择了磁盘!') l3['background'] = 'red' else: messagebox.showwarning(title='警告', message='请输入关键字!') l3.place(x=110, y=42) l3_var.set(f'请输入关键字!') l3['background'] = 'red' def open_disk(): disk = c1.get() big_dir = disk + ':/WeiBo_Pics' if len(disk) == 1: try: if not os.path.exists(big_dir): os.mkdir(big_dir) os.startfile(big_dir) except: messagebox.showwarning(title='警告', message='选中的磁盘不存在!') l3.place(x=110, y=42) l3_var.set(f'选中的磁盘不存在!') l3['background'] = 'red' else: messagebox.showwarning(title='警告', message='您未选中磁盘!') l3.place(x=115, y=42) l3_var.set(f'您未选中磁盘!') l3['background'] = 'red' def window_quit(): ret = messagebox.askyesno(title='提示', message='是否要退出?') if ret: window.destroy() window.quit() def e1_clear(): e1.delete(0, END) def print_path(event): # 要使用完整的路径 global disk disk = c1.get() disk_path = c1.get() + ':/' if len(disk) == 1: if os.path.exists(disk_path): messagebox.showinfo(title='提示', message=f'文件将存储到:{disk}:/WeiBo_Pics目录下') else: messagebox.showerror(title='错误', message='选定磁盘不存在!') l3.place(x=100, y=42) l3_var.set(f'选中的磁盘不存在!') l3['background'] = 'red' else: messagebox.showwarning(title='警告', message='请先选定磁盘!') l3.place(x=120, y=42) l3_var.set(f'请先选定磁盘!') l3['background'] = 'red' def switch(): if r1_var.get() == 0: r1_var.set(1) else: r1_var.set(0) def escape(event): window_quit() def enter(event): wb_search() def thread_it(func, *args): """ 解决程序卡死的重要方法,避免子线程和Ui线程在同一个线程,即将函数打包进线程 :param func: :param args: :return: None """ # 创建 t = threading.Thread(target=func, args=args) # 守护 t.setDaemon(True) # 启动 t.start() # 阻塞--卡死界面 # t.join() if __name__ == '__main__': window = Tk() width = 310 height = 395 screenWidth = window.winfo_screenwidth() # 获取显示区域的宽度 screenHeight = window.winfo_screenheight() # 获取显示区域的高度 left = (screenWidth - width) / 2 top = (screenHeight - height) / 2 window.geometry("%dx%d+%d+%d" % (width, height, left, top)) window.resizable(0, 0) window.title('微博图片简单采集软件') # 设置图标 ico_path = r'D:/Python Programs/PythonSpider/WeiboSpider/rely/icon.ico' window.iconbitmap(ico_path) # 插入图片到Label中 photo = Image.open("D:/Python Programs/PythonSpider/WeiboSpider/rely/w_b.png") # 括号里为需要显示在图形化界面里的图片 photo = photo.resize((150, 40)) # 规定图片大小 img0 = ImageTk.PhotoImage(photo) l1 = ttk.Label(window, imag=img0, justify='center') l1.pack() l3_var = StringVar() l3 = ttk.Label(window, background='yellow', textvar=l3_var) l3.place(x=120, y=42) l3_var.set('还没搜索') l1 = ttk.Label(window, text='关键字或\n用户id:') l1.place(x=13, y=60) e1 = ttk.Entry(window, justify='center') e1.place(x=80, y=65) l4 = ttk.Label(window, text='磁盘:') l4.place(x=13, y=100) disk_list = ['C', 'D', 'E', 'F', 'G', 'H', 'I'] c1 = ttk.Combobox(window, justify='center', state='readonly', width=17, value=disk_list) # Combobox默认选中索引为0的项目,即C盘 c1.bind('<>', print_path) c1.place(x=80, y=100) r1_var = IntVar() r1_var.set(1) # 默认选中为1 check1 = Checkbutton(window, text='下载完\n打开文件夹', command=switch) check1.place(x=223, y=90) b1 = ttk.Button(window, text='搜索', command=lambda: thread_it(wb_search), width=7) b1.place(x=230, y=63) l5 = ttk.Label(window, text='用户列表:') l5.place(x=13, y=150) lb1_var = StringVar() listb1 = Listbox(window, justify='center', listvariable=lb1_var, width=20, height=4) listb1.place(x=80, y=135) b2 = ttk.Button(window, text='开始爬取', command=lambda: thread_it(wb_pics_parse, ), width=7) b2.place(x=230, y=160) l6 = ttk.Label(window, text='状态:') l6.place(x=13, y=280) t1 = Text(window, width=23, font=('Times New Roman', 10), state='disable') t1.place(x=80, y=230, height=140) b3 = ttk.Button(window, text=' 打开\n文件夹', width=7, command=open_disk) b3.place(x=230, y=230) b3 = ttk.Button(window, text='退出', width=7, command=window_quit) b3.place(x=230, y=315) f1 = ttk.LabelFrame(window) f1.place(x=65, y=350) l6 = ttk.Label(f1, text='感谢您的使用!', foreground='red') l6.pack(anchor="w", fill=X) # 绑定esc键---退出 window.bind('', escape) # 使用return键给输入框Entry绑定enter事件---search搜索 e1.bind('', enter) # 加入主窗口销毁事件 window.protocol('WM_DELETE_WINDOW', window_quit) window.mainloop()