更新useragent(好像没什么用)

master
wkyuu 3 years ago
parent f0915223b0
commit e0f470fa65

@ -109,6 +109,7 @@ pip how selenium
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
chrome_options = Options()
# chrome_options.add_argument('lang=zh_CN.UTF-8') # 设置中文
chrome_options.add_argument('--headless') # 无界面
chrome_options.add_argument('--no-sandbox') # 解决DevToolsActivePort文件不存在报错问题
chrome_options.add_argument('--disable-gpu') # 禁用GPU硬件加速。如果软件渲染器没有就位则GPU进程将不会启动。
@ -149,9 +150,17 @@ html1 = etree.parse('test.html',etree.HTMLParser(encoding='utf-8'))
```
### ChromeDriver
请求头cookie等
下载 [ChromeDriver](https://chromedriver.chromium.org/home) 放到 python 根目录就行
```python
# 访问 https://httpbin.org/get?show_env=1 可以返回当前浏览器的请求信息
options.add_argument('lang=zh_CN.UTF-8')
```
ChromeDriver
下载 [ChromeDriver](https://chromedriver.chromium.org/home) 放到当前目录就行(如果是放在 python 根目录可以不用在实例化 selenium 时指定chromedriver 路径)
### Redis
@ -199,4 +208,4 @@ redisconn = redis.Redis(host = '127.0.0.1', port = '6379', password = 'x', db =
11[如何理解Python装饰器- 知乎](https://www.zhihu.com/question/26930016/answer/360300235)
12
12[【自动化】selenium设置请求头](https://www.jianshu.com/p/419eb4e00963)

@ -3,23 +3,25 @@
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from lxml import etree
import random
import settings
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.75 Safari/537.36'
'User-Agent': random.choice(settings.USER_AGENT)
}
def getsource(url):
init = Options()
initChrome = Options()
init.add_argument('--no-sandbox')
init.add_argument('--headless')
init.add_argument('--disable-gpu')
init.add_argument("disable-cache")
init.add_argument('disable-infobars')
init.add_argument('log-level=3') # INFO = 0 WARNING = 1 LOG_ERROR = 2 LOG_FATAL = 3 default is 0
init.add_experimental_option("excludeSwitches",['enable-automation','enable-logging'])
initChrome.add_argument('--no-sandbox')
initChrome.add_argument('--headless')
initChrome.add_argument('--disable-gpu')
initChrome.add_argument("disable-cache")
initChrome.add_argument('disable-infobars')
initChrome.add_argument('log-level=3') # INFO = 0 WARNING = 1 LOG_ERROR = 2 LOG_FATAL = 3 default is 0
initChrome.add_experimental_option("excludeSwitches",['enable-automation','enable-logging'])
driver = webdriver.Chrome(chrome_options = init)
driver = webdriver.Chrome(chrome_options = initChrome, executable_path = './chromedriver.exe')
driver.implicitly_wait(10)
driver.get(url)

@ -28,4 +28,12 @@ FILEPATH = {
REDIS_HOST = 'tencentCloud'
REDIS_PORT = '6379'
REDIS_PASSWORD = 'root'
REDIS_LISTNAME = "urlList"
REDIS_LISTNAME = "urlList"
# 下载器相关配置
USER_AGENT = [
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:97.0) Gecko/20100101 Firefox/97.0',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2486.0 Safari/537.36 Edge/13.10586',
'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.106 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0'
]
Loading…
Cancel
Save