|
|
import json
|
|
|
import random
|
|
|
import re
|
|
|
import os
|
|
|
from tkinter import *
|
|
|
from tkinter import messagebox
|
|
|
from tkinter import ttk
|
|
|
import requests
|
|
|
import threading
|
|
|
from PIL import Image, ImageTk
|
|
|
|
|
|
"""
|
|
|
说明:
|
|
|
1.使用“check button”实现下载完打开文件夹操作,注册了enter、esc热键,优化体验
|
|
|
2.实现关键字、磁盘、用户判断逻辑
|
|
|
3.利用多线程来执行下载操作
|
|
|
"""
|
|
|
|
|
|
|
|
|
class WeiBo_pics_Spider(object):
|
|
|
def __init__(self, start_url):
|
|
|
self.start_url = start_url
|
|
|
|
|
|
# 解析出图片地址
|
|
|
def get_pics_url(self):
|
|
|
i = 1
|
|
|
while True:
|
|
|
url = self.start_url + '&page={}'.format(i)
|
|
|
headers = {'User-Agent': get_ua()}
|
|
|
r = requests.get(url, headers=headers)
|
|
|
_json = json.loads(r.text)
|
|
|
items = _json["data"]["cards"]
|
|
|
flag = _json['ok']
|
|
|
if flag == 1: # 爬取数据标志+一个手动控制标志
|
|
|
for v in items:
|
|
|
picslist = v.get('mblog')
|
|
|
if picslist is not None:
|
|
|
img_urls = picslist.get('pics')
|
|
|
if img_urls is not None:
|
|
|
for img_url_ in img_urls:
|
|
|
img_url = img_url_['large']['url']
|
|
|
yield img_url
|
|
|
else:
|
|
|
t1.insert(END, f'***在第{i}页终止***\n')
|
|
|
t1.see(END)
|
|
|
t1.update()
|
|
|
if r1_var.get() == 1:
|
|
|
big_dir = disk + ':/WeiBo_Pics'
|
|
|
os.startfile(big_dir)
|
|
|
break
|
|
|
i += 1
|
|
|
|
|
|
# 下载图片
|
|
|
def download_pics(self, url, filename):
|
|
|
headers = {'User-Agent': get_ua()}
|
|
|
r = requests.get(url, headers=headers)
|
|
|
big_dir = disk + ':/WeiBo_Pics'
|
|
|
aim_path = big_dir + '/' + user_name_selected
|
|
|
try:
|
|
|
os.makedirs(aim_path)
|
|
|
except:
|
|
|
pass
|
|
|
with open(aim_path + '\\' + filename, 'wb') as f:
|
|
|
f.write(r.content)
|
|
|
# 保证焦点始终在最下
|
|
|
t1.see(END)
|
|
|
# 下载完一张刷新一次 防止界面卡死崩溃
|
|
|
t1.insert(END, f'{filename}\n')
|
|
|
window.update()
|
|
|
|
|
|
|
|
|
def get_ua():
|
|
|
first_num = random.randint(55, 62)
|
|
|
third_num = random.randint(0, 3200)
|
|
|
fourth_num = random.randint(0, 140)
|
|
|
os_type = [
|
|
|
'(Windows NT 6.1; WOW64)', '(Windows NT 10.0; WOW64)', '(X11; Linux x86_64)',
|
|
|
'(Macintosh; Intel Mac OS X 10_12_6)'
|
|
|
]
|
|
|
chrome_version = 'Chrome/{}.0.{}.{}'.format(first_num, third_num, fourth_num)
|
|
|
|
|
|
ua = ' '.join(['Mozilla/5.0', random.choice(os_type), 'AppleWebKit/537.36',
|
|
|
'(KHTML, like Gecko)', chrome_version, 'Safari/537.36']
|
|
|
)
|
|
|
return ua
|
|
|
|
|
|
|
|
|
def wb_search():
|
|
|
# 先清空lsibox1内容,便于新内容显示
|
|
|
listb1.delete(0, END)
|
|
|
url1 = 'https://m.weibo.cn/api/container/getIndex?containerid=100103type%3D3%26q%3D{}%26t%3D0'
|
|
|
headers = {'User-Agent': get_ua()}
|
|
|
key_word = e1.get()
|
|
|
global user_id_list
|
|
|
user_id_list = list()
|
|
|
if len(key_word) != 0:
|
|
|
# 若用户输入了user_id,则去获取screen_name
|
|
|
if re.match('\\d{10}', key_word):
|
|
|
user_id_list.append(key_word)
|
|
|
try:
|
|
|
url2 = f'https://m.weibo.cn/api/container/getIndex?uid={key_word}&containerid=100505{key_word}'
|
|
|
r1 = requests.get(url2, headers=headers)
|
|
|
_data = json.loads(r1.text)
|
|
|
screen_name = _data['data']['userInfo'].get('screen_name')
|
|
|
l3.place(x=120, y=42)
|
|
|
l3_var.set(f'搜索成功')
|
|
|
l3['background'] = 'green'
|
|
|
listb1.insert(END, screen_name)
|
|
|
except KeyError:
|
|
|
messagebox.showinfo(title='提示', message='没有检索到相关用户,请检查用户id或使用关键字搜索!')
|
|
|
l3.place(x=65, y=42)
|
|
|
l3_var.set(f'请检查用户id或使用关键字搜索!')
|
|
|
l3['background'] = 'yellow'
|
|
|
# 没有检索到用户的话,提示之后,e1获得焦点之后,清除用户之前输入
|
|
|
e1.bind('WM_TAKE_FOCUS', e1_clear())
|
|
|
|
|
|
# 否则根据关键字去搜索用户信息,显示在listbox中
|
|
|
else:
|
|
|
aim_url = url1.format(key_word)
|
|
|
r = requests.get(aim_url, headers=headers)
|
|
|
_json = json.loads(r.text)
|
|
|
try:
|
|
|
# 若出现了IndexError则表明没有检索到用户信息
|
|
|
users = _json['data']['cards'][1].get('card_group')
|
|
|
relevant_num = len(users)
|
|
|
l3.place(x=105, y=42)
|
|
|
l3_var.set(f'搜索到了 {relevant_num} 个用户')
|
|
|
l3['background'] = 'green'
|
|
|
for user_ in users:
|
|
|
user_info = user_.get('user')
|
|
|
user_name = user_info.get('screen_name')
|
|
|
id = user_info.get('id')
|
|
|
"""
|
|
|
一种思路,使用一个列表存储screen_name和uid,两者用;隔开
|
|
|
当获取Uid时,直接切割字符串,取Listbox所选项索引,按索引在列表表值(uid)
|
|
|
#使用字符串拼接 格式:screen_name+';'+str(id)
|
|
|
# user_data = user_name + ';' + str(id)
|
|
|
"""
|
|
|
user_id_list.append(id)
|
|
|
listb1.insert(END, user_name)
|
|
|
except IndexError: # 如果没有检索到用户,就会报列表索引错误
|
|
|
messagebox.showinfo(title='提示', message='没有检索到相关用户,请更换关键字或使用用户id搜索!')
|
|
|
l3.place(x=85, y=42)
|
|
|
l3_var.set(f'请更换关键字或用户id搜索!')
|
|
|
l3['background'] = 'yellow'
|
|
|
# 没有检索到用户的话,提示之后,e1获得焦点之后,清除用户之前输入
|
|
|
e1.bind('WM_TAKE_FOCUS', e1_clear())
|
|
|
else: # 处理没有输入关键字
|
|
|
messagebox.showinfo(title='info', message='请输入关键字!')
|
|
|
l3.place(x=110, y=42)
|
|
|
l3_var.set(f'请输入关键字!')
|
|
|
l3['background'] = 'red'
|
|
|
|
|
|
|
|
|
def wb_pics_parse():
|
|
|
key_word = e1.get()
|
|
|
select_path = c1.get()
|
|
|
# 1.先判断关键字是否输入
|
|
|
if len(key_word) != 0:
|
|
|
# 2.再判断是否选择了磁盘
|
|
|
if len(select_path) == 1:
|
|
|
# 3.判断所选路径是否存在
|
|
|
if not os.path.exists(select_path):
|
|
|
# 4.判断是否在列表框选择了用户名
|
|
|
try:
|
|
|
# 直接获取选中项目
|
|
|
global user_name_selected
|
|
|
user_name_selected = listb1.get(listb1.curselection())
|
|
|
user_name_index = listb1.curselection()[0]
|
|
|
user_id = user_id_list[user_name_index]
|
|
|
container_id = '107603' + str(user_id)
|
|
|
start_url = f'https://m.weibo.cn/api/container/getIndex?containerid={container_id}'
|
|
|
spider = WeiBo_pics_Spider(start_url)
|
|
|
t1.config(state='normal') # 将Text开启,置为可读可写状态
|
|
|
l3.place(x=120, y=42)
|
|
|
l3_var.set(f'正在运行......')
|
|
|
l3['background'] = 'green'
|
|
|
for pic_url in spider.get_pics_url():
|
|
|
filename = pic_url.split('/')[-1]
|
|
|
# 字符串切割,切割出前10个字符串
|
|
|
filename = filename[10:]
|
|
|
thread_it(spider.download_pics, pic_url, filename)
|
|
|
|
|
|
# 搜索后,但是没选择用户,会报TclError错误,此except就用来捕获这个异常
|
|
|
except TclError:
|
|
|
messagebox.showwarning(title='警告', message='请选择一个用户!')
|
|
|
l3.place(x=105, y=42)
|
|
|
l3_var.set(f'请选择一个用户!')
|
|
|
l3['background'] = 'red'
|
|
|
|
|
|
# 获取当前选中项目(使用索引)
|
|
|
else:
|
|
|
messagebox.showwarning(title='警告', message='请检查路径!')
|
|
|
l3.place(x=80, y=42)
|
|
|
l3_var.set(f'请检查路径!')
|
|
|
l3['background'] = 'red'
|
|
|
else:
|
|
|
messagebox.showwarning(title='警告', message='您未选择磁盘!')
|
|
|
l3.place(x=85, y=42)
|
|
|
l3_var.set(f'请检查是否选择了磁盘!')
|
|
|
l3['background'] = 'red'
|
|
|
else:
|
|
|
messagebox.showwarning(title='警告', message='请输入关键字!')
|
|
|
l3.place(x=110, y=42)
|
|
|
l3_var.set(f'请输入关键字!')
|
|
|
l3['background'] = 'red'
|
|
|
|
|
|
|
|
|
def open_disk():
|
|
|
disk = c1.get()
|
|
|
big_dir = disk + ':/WeiBo_Pics'
|
|
|
if len(disk) == 1:
|
|
|
try:
|
|
|
if not os.path.exists(big_dir):
|
|
|
os.mkdir(big_dir)
|
|
|
os.startfile(big_dir)
|
|
|
except:
|
|
|
messagebox.showwarning(title='警告', message='选中的磁盘不存在!')
|
|
|
l3.place(x=110, y=42)
|
|
|
l3_var.set(f'选中的磁盘不存在!')
|
|
|
l3['background'] = 'red'
|
|
|
else:
|
|
|
messagebox.showwarning(title='警告', message='您未选中磁盘!')
|
|
|
l3.place(x=115, y=42)
|
|
|
l3_var.set(f'您未选中磁盘!')
|
|
|
l3['background'] = 'red'
|
|
|
|
|
|
|
|
|
def window_quit():
|
|
|
ret = messagebox.askyesno(title='提示', message='是否要退出?')
|
|
|
if ret:
|
|
|
window.destroy()
|
|
|
window.quit()
|
|
|
|
|
|
|
|
|
def e1_clear():
|
|
|
e1.delete(0, END)
|
|
|
|
|
|
|
|
|
def print_path(event):
|
|
|
# 要使用完整的路径
|
|
|
global disk
|
|
|
disk = c1.get()
|
|
|
disk_path = c1.get() + ':/'
|
|
|
if len(disk) == 1:
|
|
|
if os.path.exists(disk_path):
|
|
|
messagebox.showinfo(title='提示', message=f'文件将存储到:{disk}:/WeiBo_Pics目录下')
|
|
|
else:
|
|
|
messagebox.showerror(title='错误', message='选定磁盘不存在!')
|
|
|
l3.place(x=100, y=42)
|
|
|
l3_var.set(f'选中的磁盘不存在!')
|
|
|
l3['background'] = 'red'
|
|
|
else:
|
|
|
messagebox.showwarning(title='警告', message='请先选定磁盘!')
|
|
|
l3.place(x=120, y=42)
|
|
|
l3_var.set(f'请先选定磁盘!')
|
|
|
l3['background'] = 'red'
|
|
|
|
|
|
|
|
|
def switch():
|
|
|
if r1_var.get() == 0:
|
|
|
r1_var.set(1)
|
|
|
else:
|
|
|
r1_var.set(0)
|
|
|
|
|
|
|
|
|
def escape(event):
|
|
|
window_quit()
|
|
|
|
|
|
|
|
|
def enter(event):
|
|
|
wb_search()
|
|
|
|
|
|
|
|
|
def thread_it(func, *args):
|
|
|
"""
|
|
|
解决程序卡死的重要方法,避免子线程和Ui线程在同一个线程,即将函数打包进线程
|
|
|
:param func:
|
|
|
:param args:
|
|
|
:return: None
|
|
|
"""
|
|
|
# 创建
|
|
|
t = threading.Thread(target=func, args=args)
|
|
|
# 守护
|
|
|
t.setDaemon(True)
|
|
|
# 启动
|
|
|
t.start()
|
|
|
# 阻塞--卡死界面
|
|
|
# t.join()
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
window = Tk()
|
|
|
width = 310
|
|
|
height = 395
|
|
|
screenWidth = window.winfo_screenwidth() # 获取显示区域的宽度
|
|
|
screenHeight = window.winfo_screenheight() # 获取显示区域的高度
|
|
|
left = (screenWidth - width) / 2
|
|
|
top = (screenHeight - height) / 2
|
|
|
window.geometry("%dx%d+%d+%d" % (width, height, left, top))
|
|
|
window.resizable(0, 0)
|
|
|
window.title('微博图片简单采集软件')
|
|
|
# 设置图标
|
|
|
ico_path = r'D:/Python Programs/PythonSpider/WeiboSpider/rely/icon.ico'
|
|
|
window.iconbitmap(ico_path)
|
|
|
# 插入图片到Label中
|
|
|
photo = Image.open("D:/Python Programs/PythonSpider/WeiboSpider/rely/w_b.png") # 括号里为需要显示在图形化界面里的图片
|
|
|
photo = photo.resize((150, 40)) # 规定图片大小
|
|
|
img0 = ImageTk.PhotoImage(photo)
|
|
|
l1 = ttk.Label(window, imag=img0, justify='center')
|
|
|
l1.pack()
|
|
|
|
|
|
l3_var = StringVar()
|
|
|
l3 = ttk.Label(window, background='yellow', textvar=l3_var)
|
|
|
l3.place(x=120, y=42)
|
|
|
l3_var.set('还没搜索')
|
|
|
|
|
|
l1 = ttk.Label(window, text='关键字或\n用户id:')
|
|
|
l1.place(x=13, y=60)
|
|
|
|
|
|
e1 = ttk.Entry(window, justify='center')
|
|
|
e1.place(x=80, y=65)
|
|
|
|
|
|
l4 = ttk.Label(window, text='磁盘:')
|
|
|
l4.place(x=13, y=100)
|
|
|
|
|
|
disk_list = ['C', 'D', 'E', 'F', 'G', 'H', 'I']
|
|
|
c1 = ttk.Combobox(window, justify='center', state='readonly', width=17, value=disk_list)
|
|
|
# Combobox默认选中索引为0的项目,即C盘
|
|
|
c1.bind('<<ComboboxSelected>>', print_path)
|
|
|
c1.place(x=80, y=100)
|
|
|
|
|
|
r1_var = IntVar()
|
|
|
r1_var.set(1) # 默认选中为1
|
|
|
check1 = Checkbutton(window, text='下载完\n打开文件夹', command=switch)
|
|
|
check1.place(x=223, y=90)
|
|
|
|
|
|
b1 = ttk.Button(window, text='搜索', command=lambda: thread_it(wb_search), width=7)
|
|
|
b1.place(x=230, y=63)
|
|
|
|
|
|
l5 = ttk.Label(window, text='用户列表:')
|
|
|
l5.place(x=13, y=150)
|
|
|
lb1_var = StringVar()
|
|
|
listb1 = Listbox(window, justify='center', listvariable=lb1_var, width=20, height=4)
|
|
|
listb1.place(x=80, y=135)
|
|
|
|
|
|
b2 = ttk.Button(window, text='开始爬取', command=lambda: thread_it(wb_pics_parse, ), width=7)
|
|
|
b2.place(x=230, y=160)
|
|
|
|
|
|
l6 = ttk.Label(window, text='状态:')
|
|
|
l6.place(x=13, y=280)
|
|
|
|
|
|
t1 = Text(window, width=23, font=('Times New Roman', 10), state='disable')
|
|
|
t1.place(x=80, y=230, height=140)
|
|
|
|
|
|
b3 = ttk.Button(window, text=' 打开\n文件夹', width=7, command=open_disk)
|
|
|
b3.place(x=230, y=230)
|
|
|
|
|
|
b3 = ttk.Button(window, text='退出', width=7, command=window_quit)
|
|
|
b3.place(x=230, y=315)
|
|
|
|
|
|
f1 = ttk.LabelFrame(window)
|
|
|
f1.place(x=65, y=350)
|
|
|
l6 = ttk.Label(f1, text='感谢您的使用!', foreground='red')
|
|
|
l6.pack(anchor="w", fill=X)
|
|
|
|
|
|
# 绑定esc键---退出
|
|
|
window.bind('<Escape>', escape)
|
|
|
# 使用return键给输入框Entry绑定enter事件---search搜索
|
|
|
e1.bind('<Return>', enter)
|
|
|
|
|
|
# 加入主窗口销毁事件
|
|
|
window.protocol('WM_DELETE_WINDOW', window_quit)
|
|
|
window.mainloop()
|