You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

158 lines
5.4 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

import selenium
import time
import os
from bs4 import BeautifulSoup
from selenium import webdriver
def login(login_qq, password, business_qq):
'''
登陆
:param login_qq: 登陆用的QQ
:param password: 登陆的QQ密码
:param business_qq: 业务QQ
:return: driver
'''
driver = webdriver.Chrome()
driver.get('https://user.qzone.qq.com/{}/311'.format(business_qq)) # URL
driver.implicitly_wait(10) # 隐示等待,为了等待充分加载好网址
driver.find_element_by_id('login_div')
driver.switch_to.frame('login_frame') # 切到输入账号密码的frame
driver.find_element_by_id('switcher_plogin').click() ##点击‘账号密码登录’
driver.find_element_by_id('u').clear() ##清空账号栏
driver.find_element_by_id('u').send_keys(login_qq) # 输入账号
driver.find_element_by_id('p').clear() # 清空密码栏
driver.find_element_by_id('p').send_keys(password) # 输入密码
driver.find_element_by_id('login_button').click() # 点击‘登录’
driver.switch_to.default_content()
driver.implicitly_wait(10)
time.sleep(5)
try:
driver.find_element_by_id('QM_OwnerInfo_Icon')
return driver
except:
print('不能访问' + business_qq)
return None
def get_shuoshuo(driver):
root = "C://Users//86138//Desktop//个人信息//qq空间//" # 需要存储的根目录
path = root + "动态.txt" # 需要存储的路径以及文件名,若要自定义文件名则只需将改为path=root+"文件名.jpg
if not os.path.exists(root): # 判断根目录是否存在,不存在就创建
os.mkdir(root)
with open(path,'w') as f:
f.write('')
page = 1
while True:
# 下拉滚动条
for j in range(1, 5):
driver.execute_script("window.scrollBy(0,5000)")
time.sleep(2)
# 切换 frame
driver.switch_to.frame('app_canvas_frame')
# 构建 BeautifulSoup 对象
bs = BeautifulSoup(driver.page_source.encode('GBK', 'ignore').decode('gbk'))
# 找到页面上的所有说说
pres = bs.find_all('pre', class_='content')
for pre in pres:
shuoshuo = pre.text
tx = pre.parent.parent.find('a', class_="c_tx c_tx3 goDetail")['title']
with open(path,'a') as fp:
fp.write(tx + ":" + shuoshuo + '\n')
# 页数判断
page = page + 1
maxPage = bs.find('a', title='末页').text
if int(maxPage) < page:
break
driver.find_element_by_link_text(u'下一页').click()
# 回到主文档
driver.switch_to.default_content()
# 等待页面加载
time.sleep(3)
def get_photo(driver):
# 照片下载路径
photo_path = "C://Users//86138//Desktop//pothon//{}.jpg"
# 相册索引
photoIndex = 1
while True:
# 回到主文档
driver.switch_to.default_content()
# driver.switch_to.parent_frame()
# 点击头部的相册按钮
driver.find_element_by_xpath('//*[@id="menuContainer"]/div/ul[3]/a').click()
# 等待加载
driver.implicitly_wait(10)
time.sleep(3)
# 切换 frame
driver.switch_to.frame('app_canvas_frame')
# 各个相册的超链接
a = driver.find_elements_by_class_name('album-cover')
# 单个相册
a[photoIndex].click()
driver.implicitly_wait(10)
time.sleep(3)
# 相册的第一张图
p = driver.find_elements_by_class_name('item-cover')[0]
p.click()
time.sleep(3)
# 相册大图在父frame切换到父frame
driver.switch_to.parent_frame()
# 循环相册中的照片
while True:
# 照片url地址和名称
img = driver.find_element_by_id('js-img-disp')
src = img.get_attribute('src').replace('&t=5', '')
name = driver.find_element_by_id("js-photo-name").text
# 下载
urlretrieve(src, photo_path.format(qq, name))
# 取下面的 当前照片张数/总照片数量
counts = driver.find_element_by_xpath('//*[@id="js-ctn-infoBar"]/div/div[1]/span').text
counts = counts.split('/')
# 最后一张的时候退出照片浏览
if int(counts[0]) == int(counts[1]):
# 右上角的 X 按钮
driver.find_element_by_xpath('//*[@id="js-viewer-main"]/div[1]/a').click()
break
# 点击 下一张网页加载慢所以10次加载
for i in (1, 10):
if driver.find_element_by_id('js-btn-nextPhoto'):
n = driver.find_element_by_id('js-btn-nextPhoto')
ActionChains(driver).click(n).perform()
break
else:
time.sleep(5)
# 相册数量比较,是否下载了全部的相册
photoIndex = photoIndex + 1
if len(a) <= photoIndex:
break
def qq():
print('输入你的QQ账号、密码及你想访问的目标QQ')
login_qq = input()
password = input()
buiness_qq = input()
driver = login(login_qq, password, buiness_qq)
get_shuoshuo(driver)
get_photo(driver)
if __name__ == '__main__':
qq()