You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

375 lines
14 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

import json
import random
import re
import os
from tkinter import *
from tkinter import messagebox
from tkinter import ttk
import requests
import threading
from PIL import Image, ImageTk
"""
说明:
1.使用“check button”实现下载完打开文件夹操作注册了enter、esc热键优化体验
2.实现关键字、磁盘、用户判断逻辑
3.利用多线程来执行下载操作
"""
class WeiBo_pics_Spider(object):
def __init__(self, start_url):
self.start_url = start_url
# 解析出图片地址
def get_pics_url(self):
i = 1
while True:
url = self.start_url + '&page={}'.format(i)
headers = {'User-Agent': get_ua()}
r = requests.get(url, headers=headers)
_json = json.loads(r.text)
items = _json["data"]["cards"]
flag = _json['ok']
if flag == 1: # 爬取数据标志+一个手动控制标志
for v in items:
picslist = v.get('mblog')
if picslist is not None:
img_urls = picslist.get('pics')
if img_urls is not None:
for img_url_ in img_urls:
img_url = img_url_['large']['url']
yield img_url
else:
t1.insert(END, f'***在第{i}页终止***\n')
t1.see(END)
t1.update()
if r1_var.get() == 1:
big_dir = disk + ':/WeiBo_Pics'
os.startfile(big_dir)
break
i += 1
# 下载图片
def download_pics(self, url, filename):
headers = {'User-Agent': get_ua()}
r = requests.get(url, headers=headers)
big_dir = disk + ':/WeiBo_Pics'
aim_path = big_dir + '/' + user_name_selected
try:
os.makedirs(aim_path)
except:
pass
with open(aim_path + '\\' + filename, 'wb') as f:
f.write(r.content)
# 保证焦点始终在最下
t1.see(END)
# 下载完一张刷新一次 防止界面卡死崩溃
t1.insert(END, f'{filename}\n')
window.update()
def get_ua():
first_num = random.randint(55, 62)
third_num = random.randint(0, 3200)
fourth_num = random.randint(0, 140)
os_type = [
'(Windows NT 6.1; WOW64)', '(Windows NT 10.0; WOW64)', '(X11; Linux x86_64)',
'(Macintosh; Intel Mac OS X 10_12_6)'
]
chrome_version = 'Chrome/{}.0.{}.{}'.format(first_num, third_num, fourth_num)
ua = ' '.join(['Mozilla/5.0', random.choice(os_type), 'AppleWebKit/537.36',
'(KHTML, like Gecko)', chrome_version, 'Safari/537.36']
)
return ua
def wb_search():
# 先清空lsibox1内容便于新内容显示
listb1.delete(0, END)
url1 = 'https://m.weibo.cn/api/container/getIndex?containerid=100103type%3D3%26q%3D{}%26t%3D0'
headers = {'User-Agent': get_ua()}
key_word = e1.get()
global user_id_list
user_id_list = list()
if len(key_word) != 0:
# 若用户输入了user_id则去获取screen_name
if re.match('\\d{10}', key_word):
user_id_list.append(key_word)
try:
url2 = f'https://m.weibo.cn/api/container/getIndex?uid={key_word}&containerid=100505{key_word}'
r1 = requests.get(url2, headers=headers)
_data = json.loads(r1.text)
screen_name = _data['data']['userInfo'].get('screen_name')
l3.place(x=120, y=42)
l3_var.set(f'搜索成功')
l3['background'] = 'green'
listb1.insert(END, screen_name)
except KeyError:
messagebox.showinfo(title='提示', message='没有检索到相关用户请检查用户id或使用关键字搜索')
l3.place(x=65, y=42)
l3_var.set(f'请检查用户id或使用关键字搜索')
l3['background'] = 'yellow'
# 没有检索到用户的话提示之后e1获得焦点之后清除用户之前输入
e1.bind('WM_TAKE_FOCUS', e1_clear())
# 否则根据关键字去搜索用户信息显示在listbox中
else:
aim_url = url1.format(key_word)
r = requests.get(aim_url, headers=headers)
_json = json.loads(r.text)
try:
# 若出现了IndexError则表明没有检索到用户信息
users = _json['data']['cards'][1].get('card_group')
relevant_num = len(users)
l3.place(x=105, y=42)
l3_var.set(f'搜索到了 {relevant_num} 个用户')
l3['background'] = 'green'
for user_ in users:
user_info = user_.get('user')
user_name = user_info.get('screen_name')
id = user_info.get('id')
"""
一种思路使用一个列表存储screen_name和uid两者用;隔开
当获取Uid时直接切割字符串取Listbox所选项索引按索引在列表表值uid
#使用字符串拼接 格式screen_name+';'+str(id)
# user_data = user_name + ';' + str(id)
"""
user_id_list.append(id)
listb1.insert(END, user_name)
except IndexError: # 如果没有检索到用户,就会报列表索引错误
messagebox.showinfo(title='提示', message='没有检索到相关用户请更换关键字或使用用户id搜索')
l3.place(x=85, y=42)
l3_var.set(f'请更换关键字或用户id搜索')
l3['background'] = 'yellow'
# 没有检索到用户的话提示之后e1获得焦点之后清除用户之前输入
e1.bind('WM_TAKE_FOCUS', e1_clear())
else: # 处理没有输入关键字
messagebox.showinfo(title='info', message='请输入关键字!')
l3.place(x=110, y=42)
l3_var.set(f'请输入关键字!')
l3['background'] = 'red'
def wb_pics_parse():
key_word = e1.get()
select_path = c1.get()
# 1.先判断关键字是否输入
if len(key_word) != 0:
# 2.再判断是否选择了磁盘
if len(select_path) == 1:
# 3.判断所选路径是否存在
if not os.path.exists(select_path):
# 4.判断是否在列表框选择了用户名
try:
# 直接获取选中项目
global user_name_selected
user_name_selected = listb1.get(listb1.curselection())
user_name_index = listb1.curselection()[0]
user_id = user_id_list[user_name_index]
container_id = '107603' + str(user_id)
start_url = f'https://m.weibo.cn/api/container/getIndex?containerid={container_id}'
spider = WeiBo_pics_Spider(start_url)
t1.config(state='normal') # 将Text开启置为可读可写状态
l3.place(x=120, y=42)
l3_var.set(f'正在运行......')
l3['background'] = 'green'
for pic_url in spider.get_pics_url():
filename = pic_url.split('/')[-1]
# 字符串切割切割出前10个字符串
filename = filename[10:]
thread_it(spider.download_pics, pic_url, filename)
# 搜索后但是没选择用户会报TclError错误此except就用来捕获这个异常
except TclError:
messagebox.showwarning(title='警告', message='请选择一个用户!')
l3.place(x=105, y=42)
l3_var.set(f'请选择一个用户!')
l3['background'] = 'red'
# 获取当前选中项目(使用索引)
else:
messagebox.showwarning(title='警告', message='请检查路径!')
l3.place(x=80, y=42)
l3_var.set(f'请检查路径!')
l3['background'] = 'red'
else:
messagebox.showwarning(title='警告', message='您未选择磁盘!')
l3.place(x=85, y=42)
l3_var.set(f'请检查是否选择了磁盘!')
l3['background'] = 'red'
else:
messagebox.showwarning(title='警告', message='请输入关键字!')
l3.place(x=110, y=42)
l3_var.set(f'请输入关键字!')
l3['background'] = 'red'
def open_disk():
disk = c1.get()
big_dir = disk + ':/WeiBo_Pics'
if len(disk) == 1:
try:
if not os.path.exists(big_dir):
os.mkdir(big_dir)
os.startfile(big_dir)
except:
messagebox.showwarning(title='警告', message='选中的磁盘不存在!')
l3.place(x=110, y=42)
l3_var.set(f'选中的磁盘不存在!')
l3['background'] = 'red'
else:
messagebox.showwarning(title='警告', message='您未选中磁盘!')
l3.place(x=115, y=42)
l3_var.set(f'您未选中磁盘!')
l3['background'] = 'red'
def window_quit():
ret = messagebox.askyesno(title='提示', message='是否要退出?')
if ret:
window.destroy()
window.quit()
def e1_clear():
e1.delete(0, END)
def print_path(event):
# 要使用完整的路径
global disk
disk = c1.get()
disk_path = c1.get() + ':/'
if len(disk) == 1:
if os.path.exists(disk_path):
messagebox.showinfo(title='提示', message=f'文件将存储到:{disk}:/WeiBo_Pics目录下')
else:
messagebox.showerror(title='错误', message='选定磁盘不存在!')
l3.place(x=100, y=42)
l3_var.set(f'选中的磁盘不存在!')
l3['background'] = 'red'
else:
messagebox.showwarning(title='警告', message='请先选定磁盘!')
l3.place(x=120, y=42)
l3_var.set(f'请先选定磁盘!')
l3['background'] = 'red'
def switch():
if r1_var.get() == 0:
r1_var.set(1)
else:
r1_var.set(0)
def escape(event):
window_quit()
def enter(event):
wb_search()
def thread_it(func, *args):
"""
解决程序卡死的重要方法避免子线程和Ui线程在同一个线程即将函数打包进线程
:param func:
:param args:
:return: None
"""
# 创建
t = threading.Thread(target=func, args=args)
# 守护
t.setDaemon(True)
# 启动
t.start()
# 阻塞--卡死界面
# t.join()
if __name__ == '__main__':
window = Tk()
width = 310
height = 395
screenWidth = window.winfo_screenwidth() # 获取显示区域的宽度
screenHeight = window.winfo_screenheight() # 获取显示区域的高度
left = (screenWidth - width) / 2
top = (screenHeight - height) / 2
window.geometry("%dx%d+%d+%d" % (width, height, left, top))
window.resizable(0, 0)
window.title('微博图片简单采集软件')
# 设置图标
ico_path = r'D:/Python Programs/PythonSpider/WeiboSpider/rely/icon.ico'
window.iconbitmap(ico_path)
# 插入图片到Label中
photo = Image.open("D:/Python Programs/PythonSpider/WeiboSpider/rely/w_b.png") # 括号里为需要显示在图形化界面里的图片
photo = photo.resize((150, 40)) # 规定图片大小
img0 = ImageTk.PhotoImage(photo)
l1 = ttk.Label(window, imag=img0, justify='center')
l1.pack()
l3_var = StringVar()
l3 = ttk.Label(window, background='yellow', textvar=l3_var)
l3.place(x=120, y=42)
l3_var.set('还没搜索')
l1 = ttk.Label(window, text='关键字或\n用户id')
l1.place(x=13, y=60)
e1 = ttk.Entry(window, justify='center')
e1.place(x=80, y=65)
l4 = ttk.Label(window, text='磁盘:')
l4.place(x=13, y=100)
disk_list = ['C', 'D', 'E', 'F', 'G', 'H', 'I']
c1 = ttk.Combobox(window, justify='center', state='readonly', width=17, value=disk_list)
# Combobox默认选中索引为0的项目即C盘
c1.bind('<<ComboboxSelected>>', print_path)
c1.place(x=80, y=100)
r1_var = IntVar()
r1_var.set(1) # 默认选中为1
check1 = Checkbutton(window, text='下载完\n打开文件夹', command=switch)
check1.place(x=223, y=90)
b1 = ttk.Button(window, text='搜索', command=lambda: thread_it(wb_search), width=7)
b1.place(x=230, y=63)
l5 = ttk.Label(window, text='用户列表:')
l5.place(x=13, y=150)
lb1_var = StringVar()
listb1 = Listbox(window, justify='center', listvariable=lb1_var, width=20, height=4)
listb1.place(x=80, y=135)
b2 = ttk.Button(window, text='开始爬取', command=lambda: thread_it(wb_pics_parse, ), width=7)
b2.place(x=230, y=160)
l6 = ttk.Label(window, text='状态:')
l6.place(x=13, y=280)
t1 = Text(window, width=23, font=('Times New Roman', 10), state='disable')
t1.place(x=80, y=230, height=140)
b3 = ttk.Button(window, text=' 打开\n文件夹', width=7, command=open_disk)
b3.place(x=230, y=230)
b3 = ttk.Button(window, text='退出', width=7, command=window_quit)
b3.place(x=230, y=315)
f1 = ttk.LabelFrame(window)
f1.place(x=65, y=350)
l6 = ttk.Label(f1, text='感谢您的使用!', foreground='red')
l6.pack(anchor="w", fill=X)
# 绑定esc键---退出
window.bind('<Escape>', escape)
# 使用return键给输入框Entry绑定enter事件---search搜索
e1.bind('<Return>', enter)
# 加入主窗口销毁事件
window.protocol('WM_DELETE_WINDOW', window_quit)
window.mainloop()