|
|
|
@ -0,0 +1,26 @@
|
|
|
|
|
import requests
|
|
|
|
|
import chardet
|
|
|
|
|
from lxml import etree
|
|
|
|
|
|
|
|
|
|
# 定义url
|
|
|
|
|
url = 'https://sailboatdata.com/sailboat/'
|
|
|
|
|
# 伪装浏览器 user——Agent为登陆地址,cookie为密钥
|
|
|
|
|
headvalues = {
|
|
|
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36 Edg/111.0.1661.62',
|
|
|
|
|
'Cookie': '_ga=GA1.2.463000159.1680249637; _gid=GA1.2.206316371.1680249637; __qca=P0-2064736007-1680249637796; laravel_cookie_consent=1; XSRF-TOKEN=eyJpdiI6IkVSZk1kWlBBV2psRmp5bHRKQnFaRVE9PSIsInZhbHVlIjoiV0VwTWxOOWt0aVwvdGdFMk5sRWlqcHpzTk1ZWUNJVEtwMU1oSXAyMkFZVTB1MmVzQWhLRkpTb0piZUlmZkkwZEQiLCJtYWMiOiI4NTk1OTUwYmMxMWIyOWVkN2IwN2IxYWZiYzlhYmI5NGYyMTkwMmEwZTEyZmM3ZjY2OTEwYTg2YmJiOTg3ODAxIn0%3D; sailboatdatacom_session=eyJpdiI6Im95UXI2ZjJZOUYzMzNRWWMyTFcxMnc9PSIsInZhbHVlIjoiRjhrM3Q0TlZsMk1BNzQ0ZTdJUXlob3dOdG0rRndZbDFZZWZNSGFLeHJtXC83RHlDR2xSN1d6bVRTMmVxcjAwRGciLCJtYWMiOiI0NDA3YTQzMTcyYjE1YjBhODNlMDAyYmMyMjhjMDkzMDEyNDg5YzA0ZDk3OTY3NDgyMTg1MDI4ZDc2MzQxNjBiIn0%3D'
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
variety = 'J122'
|
|
|
|
|
url = url + variety
|
|
|
|
|
r = requests.get(url, headers=headvalues)
|
|
|
|
|
print(r.text)
|
|
|
|
|
|
|
|
|
|
# # 获取编码类型并修改
|
|
|
|
|
# code_type = r.apparent_encoding
|
|
|
|
|
# if code_type == 'GB2312':
|
|
|
|
|
# code_type = 'GBK'
|
|
|
|
|
# html = etree.HTML(r.text)
|
|
|
|
|
# # 主体
|
|
|
|
|
# contents = html.xpath('//text()')
|
|
|
|
|
# print(contents)
|
|
|
|
|
|