新浪爬虫

zhihu、xinlang
p2u3zombq 2 years ago
parent 189a373a28
commit eed4fabe5e

@ -0,0 +1,26 @@
import requests
import chardet
from lxml import etree
# 定义url
url = 'https://sailboatdata.com/sailboat/'
# 伪装浏览器 user——Agent为登陆地址cookie为密钥
headvalues = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36 Edg/111.0.1661.62',
'Cookie': '_ga=GA1.2.463000159.1680249637; _gid=GA1.2.206316371.1680249637; __qca=P0-2064736007-1680249637796; laravel_cookie_consent=1; XSRF-TOKEN=eyJpdiI6IkVSZk1kWlBBV2psRmp5bHRKQnFaRVE9PSIsInZhbHVlIjoiV0VwTWxOOWt0aVwvdGdFMk5sRWlqcHpzTk1ZWUNJVEtwMU1oSXAyMkFZVTB1MmVzQWhLRkpTb0piZUlmZkkwZEQiLCJtYWMiOiI4NTk1OTUwYmMxMWIyOWVkN2IwN2IxYWZiYzlhYmI5NGYyMTkwMmEwZTEyZmM3ZjY2OTEwYTg2YmJiOTg3ODAxIn0%3D; sailboatdatacom_session=eyJpdiI6Im95UXI2ZjJZOUYzMzNRWWMyTFcxMnc9PSIsInZhbHVlIjoiRjhrM3Q0TlZsMk1BNzQ0ZTdJUXlob3dOdG0rRndZbDFZZWZNSGFLeHJtXC83RHlDR2xSN1d6bVRTMmVxcjAwRGciLCJtYWMiOiI0NDA3YTQzMTcyYjE1YjBhODNlMDAyYmMyMjhjMDkzMDEyNDg5YzA0ZDk3OTY3NDgyMTg1MDI4ZDc2MzQxNjBiIn0%3D'
}
variety = 'J122'
url = url + variety
r = requests.get(url, headers=headvalues)
print(r.text)
# # 获取编码类型并修改
# code_type = r.apparent_encoding
# if code_type == 'GB2312':
# code_type = 'GBK'
# html = etree.HTML(r.text)
# # 主体
# contents = html.xpath('//text()')
# print(contents)
Loading…
Cancel
Save