parent
168c618f7c
commit
189a373a28
@ -0,0 +1,24 @@
|
||||
import requests
|
||||
import chardet
|
||||
from lxml import etree
|
||||
# 定义url
|
||||
url = 'https://www.zhihu.com/question/267125034'
|
||||
headvalves = {
|
||||
'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36',
|
||||
'cookie':'__snaker__id=ShjacOmBomgAzzPW; SESSIONID=tJ45QDftauwLXBAw7aHYmK9bTQnWpOgxpGDNEWUe6le; JOID=VF4UBEujpzYeWoBGXqJhrLZsJRVK5NIFbjjSIAyQ8lx9LMMjO5QYOntch0dfF7zfDYc8q0EfxOJp1hVXEMn30gw=; osd=UV0QAEympDIaXYVFWqZmqbVoIRJP59YBaT3RJAiX9195KMQmOJAcPX5fg0NYEr_bCYA5qEUbw-dq0hFQFcrz1gs=; _zap=68ca005e-8ba3-4be0-82ee-4081edb8b899; d_c0="AvBdEzakVROPTmGo7wPcx9wgCcvYJSi5J8U=|1624944814"; _9755xjdesxxd_=32; YD00517437729195%3AWM_TID=cW8xh7FHWExFVUFAFFc%2BmCMtIBwOXrDo; __snaker__id=KRnrpckFODiTJxRb; _xsrf=f799f643-affd-44e3-8b3e-7411d5d2aaaa; Hm_lvt_98beee57fd2ef70ccdd5ca52b9740c49=1678257960,1678288208,1678538695,1678621837; captcha_session_v2=2|1:0|10:1678622107|18:captcha_session_v2|88:bXBWYlcwOEJ1QjMybUZCZURlSGJQUE9HK0IyMENuVGhyZkNhTkgzMjAzVW5uM2ZSSTE4TG01Z1pmMUl5MlFDWg==|03724471302ce08fa49d9f8ffea6249bf5fab43bb040593d97105fa56a44ea49; SESSIONID=Gr5wdRaT5UmeqEebQ1hEAkIC2jF2rAfzHlFHeL7BKnH; JOID=VVoSB0hHSOo_EKX6ZkSJd5AjAq51BT_cT3DynjF2GIJZZuSaAdzz41gRpfxii647V7__hbTZ5wIz1zY2mrWmHks=; osd=VVEXAElHQ-84EaXxY0OId5smBa91DjrbTnD5mzZ3GIlcYeWaCtn04lgaoPtji6U-UL7_jrHe5gI40jE3mr6jGUo=; gdxidpyhxdE=lRB359XGAEGOUhuWQPV7Up1uaa%2B92IGgGc4YvXAArvEm6op%2Btaz0SzRqbyeeg09OhxSi%2FPJD5yP3NdlqVlM4c1wXmOXy9gJ0vE%2FDfxu%2FZrCyaHfEow5a0rShiOPmI4vGY9HS3zxwZuPUzd8zgHC9gvGC%2BPECJJ1ORcR3ZvZrsQWGKHSd%3A1678623009157; YD00517437729195%3AWM_NI=ayVHonAPb4TERQSinj4ltVKbHWS%2BdhqEhQxiuOn4XIqGkBZliA4Fph8SSvsxlabPur%2FMUersLNNbv%2FbbWCaIsntVCdlkJlLQQzxlqMehm0B5ymfgYbvnDEsUs8TdXem5TTI%3D; YD00517437729195%3AWM_NIKE=9ca17ae2e6ffcda170e2e6eed6b86e88938283b64782b88fb2c15b829b9eb0d56aa392a68be549a1ec8ba6ee2af0fea7c3b92aa3a79aa9ee668390aed5d74f928f9d8aed62f8b4bcb7ca6a8c899dcce764b39ba8b2fb63b791a2d5db43b399a5a3d968ab9ffca7f364fc8d9d97f274b0bfaeb1f87aed939b86f17990928283f525a2be99a4e443b1b2bcb7c270b0b5868dcf5b93a6b88df447a7b8a3d1c13fbbb3aa88dc6ab6b28ca6f25fb1e89c85db6fafeb9cb5cc37e2a3; o_act=login; ref_source=other_https://www.zhihu.com/signin?next=/; expire_in=15552000; q_c1=2c648a7fa7cc4877b19de6c428d2a3d5|1678622150000|1678622150000; tst=r; z_c0=2|1:0|10:1678622152|4:z_c0|92:Mi4xTXhsT0RRQUFBQUFDOEYwVE5xUlZFeGNBQUFCZ0FsVk54Z3Y3WkFCZlRES2k1b1FtQ3VkMjFjcUI3OEhxbG04c2lR|915f4a5f6688419016b76008bbf4844791844d17b2ed6e8f31ac21dbf0c1a9ef; Hm_lpvt_98beee57fd2ef70ccdd5ca52b9740c49=1678622256; KLBRSID=37f2e85292ebb2c2ef70f1d8e39c2b34|1678623872|1678621836'
|
||||
}
|
||||
r = requests.get(url, headers=headvalves)
|
||||
# print(r.text)
|
||||
# 获取编码类型并修改
|
||||
code_type = r.apparent_encoding
|
||||
if code_type == 'GB2312':
|
||||
code_type = 'GBK'
|
||||
html = etree.HTML(r.text)
|
||||
# 主体
|
||||
contents = html.xpath('//p/text()')
|
||||
print(contents)
|
||||
for i in contents:
|
||||
# f = open("zhihu.txt", "a", encoding="utf-8")
|
||||
content = i.strip()
|
||||
# f.write(content)
|
||||
print(content)
|
Loading…
Reference in new issue