|
|
import selenium
|
|
|
import time
|
|
|
import os
|
|
|
from bs4 import BeautifulSoup
|
|
|
from selenium import webdriver
|
|
|
|
|
|
def login(login_qq, password, business_qq):
|
|
|
'''
|
|
|
登陆
|
|
|
:param login_qq: 登陆用的QQ
|
|
|
:param password: 登陆的QQ密码
|
|
|
:param business_qq: 业务QQ
|
|
|
:return: driver
|
|
|
'''
|
|
|
driver = webdriver.Chrome()
|
|
|
|
|
|
driver.get('https://user.qzone.qq.com/{}/311'.format(business_qq)) # URL
|
|
|
driver.implicitly_wait(10) # 隐示等待,为了等待充分加载好网址
|
|
|
driver.find_element_by_id('login_div')
|
|
|
driver.switch_to.frame('login_frame') # 切到输入账号密码的frame
|
|
|
driver.find_element_by_id('switcher_plogin').click() ##点击‘账号密码登录’
|
|
|
driver.find_element_by_id('u').clear() ##清空账号栏
|
|
|
driver.find_element_by_id('u').send_keys(login_qq) # 输入账号
|
|
|
driver.find_element_by_id('p').clear() # 清空密码栏
|
|
|
driver.find_element_by_id('p').send_keys(password) # 输入密码
|
|
|
driver.find_element_by_id('login_button').click() # 点击‘登录’
|
|
|
driver.switch_to.default_content()
|
|
|
|
|
|
driver.implicitly_wait(10)
|
|
|
time.sleep(5)
|
|
|
|
|
|
try:
|
|
|
driver.find_element_by_id('QM_OwnerInfo_Icon')
|
|
|
return driver
|
|
|
except:
|
|
|
print('不能访问' + business_qq)
|
|
|
return None
|
|
|
|
|
|
|
|
|
def get_shuoshuo(driver):
|
|
|
root = "C://Users//86138//Desktop//个人信息//qq空间//" # 需要存储的根目录
|
|
|
path = root + "动态.txt" # 需要存储的路径以及文件名,若要自定义文件名则只需将改为path=root+"文件名.jpg
|
|
|
|
|
|
if not os.path.exists(root): # 判断根目录是否存在,不存在就创建
|
|
|
os.mkdir(root)
|
|
|
|
|
|
with open(path,'w') as f:
|
|
|
f.write('')
|
|
|
page = 1
|
|
|
while True:
|
|
|
# 下拉滚动条
|
|
|
for j in range(1, 5):
|
|
|
driver.execute_script("window.scrollBy(0,5000)")
|
|
|
time.sleep(2)
|
|
|
|
|
|
# 切换 frame
|
|
|
driver.switch_to.frame('app_canvas_frame')
|
|
|
# 构建 BeautifulSoup 对象
|
|
|
bs = BeautifulSoup(driver.page_source.encode('GBK', 'ignore').decode('gbk'))
|
|
|
# 找到页面上的所有说说
|
|
|
pres = bs.find_all('pre', class_='content')
|
|
|
|
|
|
for pre in pres:
|
|
|
shuoshuo = pre.text
|
|
|
tx = pre.parent.parent.find('a', class_="c_tx c_tx3 goDetail")['title']
|
|
|
with open(path,'a') as fp:
|
|
|
fp.write(tx + ":" + shuoshuo + '\n')
|
|
|
|
|
|
# 页数判断
|
|
|
page = page + 1
|
|
|
maxPage = bs.find('a', title='末页').text
|
|
|
|
|
|
if int(maxPage) < page:
|
|
|
break
|
|
|
|
|
|
driver.find_element_by_link_text(u'下一页').click()
|
|
|
# 回到主文档
|
|
|
driver.switch_to.default_content()
|
|
|
# 等待页面加载
|
|
|
time.sleep(3)
|
|
|
|
|
|
|
|
|
def get_photo(driver):
|
|
|
# 照片下载路径
|
|
|
photo_path = "C://Users//86138//Desktop//pothon//{}.jpg"
|
|
|
# 相册索引
|
|
|
photoIndex = 1
|
|
|
|
|
|
while True:
|
|
|
# 回到主文档
|
|
|
driver.switch_to.default_content()
|
|
|
# driver.switch_to.parent_frame()
|
|
|
# 点击头部的相册按钮
|
|
|
driver.find_element_by_xpath('//*[@id="menuContainer"]/div/ul[3]/a').click()
|
|
|
# 等待加载
|
|
|
driver.implicitly_wait(10)
|
|
|
time.sleep(3)
|
|
|
# 切换 frame
|
|
|
driver.switch_to.frame('app_canvas_frame')
|
|
|
# 各个相册的超链接
|
|
|
a = driver.find_elements_by_class_name('album-cover')
|
|
|
# 单个相册
|
|
|
a[photoIndex].click()
|
|
|
|
|
|
driver.implicitly_wait(10)
|
|
|
time.sleep(3)
|
|
|
# 相册的第一张图
|
|
|
p = driver.find_elements_by_class_name('item-cover')[0]
|
|
|
p.click()
|
|
|
time.sleep(3)
|
|
|
|
|
|
# 相册大图在父frame,切换到父frame
|
|
|
driver.switch_to.parent_frame()
|
|
|
# 循环相册中的照片
|
|
|
while True:
|
|
|
# 照片url地址和名称
|
|
|
img = driver.find_element_by_id('js-img-disp')
|
|
|
src = img.get_attribute('src').replace('&t=5', '')
|
|
|
name = driver.find_element_by_id("js-photo-name").text
|
|
|
|
|
|
# 下载
|
|
|
urlretrieve(src, photo_path.format(qq, name))
|
|
|
|
|
|
# 取下面的 当前照片张数/总照片数量
|
|
|
counts = driver.find_element_by_xpath('//*[@id="js-ctn-infoBar"]/div/div[1]/span').text
|
|
|
|
|
|
counts = counts.split('/')
|
|
|
# 最后一张的时候退出照片浏览
|
|
|
if int(counts[0]) == int(counts[1]):
|
|
|
# 右上角的 X 按钮
|
|
|
driver.find_element_by_xpath('//*[@id="js-viewer-main"]/div[1]/a').click()
|
|
|
break
|
|
|
# 点击 下一张,网页加载慢,所以10次加载
|
|
|
for i in (1, 10):
|
|
|
if driver.find_element_by_id('js-btn-nextPhoto'):
|
|
|
n = driver.find_element_by_id('js-btn-nextPhoto')
|
|
|
ActionChains(driver).click(n).perform()
|
|
|
break
|
|
|
else:
|
|
|
time.sleep(5)
|
|
|
|
|
|
# 相册数量比较,是否下载了全部的相册
|
|
|
photoIndex = photoIndex + 1
|
|
|
if len(a) <= photoIndex:
|
|
|
break
|
|
|
|
|
|
def qq():
|
|
|
print('输入你的QQ账号、密码及你想访问的目标QQ:')
|
|
|
login_qq = input()
|
|
|
password = input()
|
|
|
buiness_qq = input()
|
|
|
|
|
|
driver = login(login_qq, password, buiness_qq)
|
|
|
get_shuoshuo(driver)
|
|
|
get_photo(driver)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
qq() |