You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
psrGROUP/env/Lib/site-packages/ua_parser/user_agent_parser.py

529 lines
15 KiB

import os
import re
import warnings
from typing import Any, Dict, Optional, Tuple
class UserAgentParser(object):
def __init__(
self, pattern, family_replacement=None, v1_replacement=None, v2_replacement=None
):
"""Initialize UserAgentParser.
Args:
pattern: a regular expression string
family_replacement: a string to override the matched family (optional)
v1_replacement: a string to override the matched v1 (optional)
v2_replacement: a string to override the matched v2 (optional)
"""
self.user_agent_re = re.compile(pattern)
self.family_replacement = family_replacement
self.v1_replacement = v1_replacement
self.v2_replacement = v2_replacement
def Parse(
self, user_agent_string: str
) -> Tuple[
Optional[str],
Optional[str],
Optional[str],
Optional[str],
]:
family, v1, v2, v3 = None, None, None, None
match = self.user_agent_re.search(user_agent_string)
if match:
if self.family_replacement:
if re.search(r"\$1", self.family_replacement):
family = re.sub(r"\$1", match[1], self.family_replacement)
else:
family = self.family_replacement
else:
family = match[1]
if self.v1_replacement:
v1 = self.v1_replacement
elif match.lastindex and match.lastindex >= 2:
v1 = match[2] or None
if self.v2_replacement:
v2 = self.v2_replacement
elif match.lastindex and match.lastindex >= 3:
v2 = match[3] or None
if match.lastindex and match.lastindex >= 4:
v3 = match[4] or None
return family, v1, v2, v3
class OSParser(object):
def __init__(
self,
pattern,
os_replacement=None,
os_v1_replacement=None,
os_v2_replacement=None,
os_v3_replacement=None,
os_v4_replacement=None,
):
"""Initialize UserAgentParser.
Args:
pattern: a regular expression string
os_replacement: a string to override the matched os (optional)
os_v1_replacement: a string to override the matched v1 (optional)
os_v2_replacement: a string to override the matched v2 (optional)
os_v3_replacement: a string to override the matched v3 (optional)
os_v4_replacement: a string to override the matched v4 (optional)
"""
self.user_agent_re = re.compile(pattern)
self.os_replacement = os_replacement
self.os_v1_replacement = os_v1_replacement
self.os_v2_replacement = os_v2_replacement
self.os_v3_replacement = os_v3_replacement
self.os_v4_replacement = os_v4_replacement
def Parse(
self, user_agent_string: str
) -> Tuple[
Optional[str],
Optional[str],
Optional[str],
Optional[str],
Optional[str],
]:
os, os_v1, os_v2, os_v3, os_v4 = None, None, None, None, None
match = self.user_agent_re.search(user_agent_string)
if match:
if self.os_replacement:
os = MultiReplace(self.os_replacement, match)
elif match.lastindex:
os = match[1]
if self.os_v1_replacement:
os_v1 = MultiReplace(self.os_v1_replacement, match)
elif match.lastindex and match.lastindex >= 2:
os_v1 = match[2]
if self.os_v2_replacement:
os_v2 = MultiReplace(self.os_v2_replacement, match)
elif match.lastindex and match.lastindex >= 3:
os_v2 = match[3]
if self.os_v3_replacement:
os_v3 = MultiReplace(self.os_v3_replacement, match)
elif match.lastindex and match.lastindex >= 4:
os_v3 = match[4]
if self.os_v4_replacement:
os_v4 = MultiReplace(self.os_v4_replacement, match)
elif match.lastindex and match.lastindex >= 5:
os_v4 = match[5]
return os, os_v1, os_v2, os_v3, os_v4
def MultiReplace(string, match):
def _repl(m):
index = int(m[1]) - 1
group = match.groups()
if index < len(group):
return group[index]
return ""
_string = re.sub(r"\$(\d)", _repl, string).strip()
return _string or None
class DeviceParser(object):
def __init__(
self,
pattern,
regex_flag=None,
device_replacement=None,
brand_replacement=None,
model_replacement=None,
):
"""Initialize UserAgentParser.
Args:
pattern: a regular expression string
device_replacement: a string to override the matched device (optional)
"""
self.user_agent_re = re.compile(
pattern, re.IGNORECASE if regex_flag == "i" else 0
)
self.device_replacement = device_replacement
self.brand_replacement = brand_replacement
self.model_replacement = model_replacement
def Parse(
self, user_agent_string: str
) -> Tuple[
Optional[str],
Optional[str],
Optional[str],
]:
device, brand, model = None, None, None
match = self.user_agent_re.search(user_agent_string)
if match:
if self.device_replacement:
device = MultiReplace(self.device_replacement, match)
else:
device = match[1]
if self.brand_replacement:
brand = MultiReplace(self.brand_replacement, match)
if self.model_replacement:
model = MultiReplace(self.model_replacement, match)
elif len(match.groups()) > 0:
model = match[1]
return device, brand, model
MAX_CACHE_SIZE = 200
_PARSE_CACHE: Dict[str, Dict[str, Any]] = {}
def _lookup(ua):
if not isinstance(ua, str):
raise TypeError(f"Expected user agent to be a string, got {ua!r}")
entry = _PARSE_CACHE.get(ua)
if entry is not None:
return entry
if len(_PARSE_CACHE) >= MAX_CACHE_SIZE:
_PARSE_CACHE.clear()
v = _PARSE_CACHE[ua] = {"string": ua}
return v
def _cached(ua, key, fn):
entry = _lookup(ua)
r = entry.get(key)
if not r:
r = entry[key] = fn(ua)
return r
def Parse(user_agent_string, **_jsParseBits):
"""Parse all the things
Args:
user_agent_string: the full user agent string
Returns:
A dictionary containing all parsed bits
"""
if _jsParseBits:
warnings.warn(
"javascript overrides are not used anymore",
category=DeprecationWarning,
stacklevel=2,
)
entry = _lookup(user_agent_string)
# entry is complete, return directly
if len(entry) == 4:
return entry
# entry is partially or entirely empty
if "user_agent" not in entry:
entry["user_agent"] = _ParseUserAgent(user_agent_string)
if "os" not in entry:
entry["os"] = _ParseOS(user_agent_string)
if "device" not in entry:
entry["device"] = _ParseDevice(user_agent_string)
return entry
def ParseUserAgent(user_agent_string, **_jsParseBits):
"""Parses the user-agent string for user agent (browser) info.
Args:
user_agent_string: The full user-agent string.
Returns:
A dictionary containing parsed bits.
"""
if _jsParseBits:
warnings.warn(
"javascript overrides are not used anymore",
category=DeprecationWarning,
stacklevel=2,
)
return _cached(user_agent_string, "user_agent", _ParseUserAgent)
def _ParseUserAgent(user_agent_string):
for uaParser in USER_AGENT_PARSERS:
family, v1, v2, v3 = uaParser.Parse(user_agent_string)
if family:
break
family = family or "Other"
return {
"family": family,
"major": v1 or None,
"minor": v2 or None,
"patch": v3 or None,
}
def ParseOS(user_agent_string, **_jsParseBits):
"""Parses the user-agent string for operating system info
Args:
user_agent_string: The full user-agent string.
Returns:
A dictionary containing parsed bits.
"""
if _jsParseBits:
warnings.warn(
"javascript overrides are not used anymore",
category=DeprecationWarning,
stacklevel=2,
)
return _cached(user_agent_string, "os", _ParseOS)
def _ParseOS(user_agent_string):
for osParser in OS_PARSERS:
os, os_v1, os_v2, os_v3, os_v4 = osParser.Parse(user_agent_string)
if os:
break
os = os or "Other"
return {
"family": os,
"major": os_v1,
"minor": os_v2,
"patch": os_v3,
"patch_minor": os_v4,
}
def ParseDevice(user_agent_string, **_jsParseBits):
"""Parses the user-agent string for device info.
Args:
user_agent_string: The full user-agent string.
Returns:
A dictionary containing parsed bits.
"""
if _jsParseBits:
warnings.warn(
"javascript overrides are not used anymore",
category=DeprecationWarning,
stacklevel=2,
)
return _cached(user_agent_string, "device", _ParseDevice)
def _ParseDevice(user_agent_string):
for deviceParser in DEVICE_PARSERS:
device, brand, model = deviceParser.Parse(user_agent_string)
if device:
break
if device is None:
device = "Other"
return {"family": device, "brand": brand, "model": model}
def PrettyUserAgent(family, v1=None, v2=None, v3=None):
"""Pretty user agent string."""
if v3:
if v3[0].isdigit():
return f"{family} {v1}.{v2}.{v3}"
else:
return f"{family} {v1}.{v2}{v3}"
elif v2:
return f"{family} {v1}.{v2}"
elif v1:
return f"{family} {v1}"
return family
def PrettyOS(os, os_v1=None, os_v2=None, os_v3=None, os_v4=None):
"""Pretty os string."""
if os_v4:
return f"{os} {os_v1}.{os_v2}.{os_v3}.{os_v4}"
if os_v3:
if os_v3[0].isdigit():
return f"{os} {os_v1}.{os_v2}.{os_v3}"
else:
return f"{os} {os_v1}.{os_v2}{os_v3}"
elif os_v2:
return f"{os} {os_v1}.{os_v2}"
elif os_v1:
return f"{os} {os_v1}"
return os
def ParseWithJSOverrides(
user_agent_string,
js_user_agent_string=None,
js_user_agent_family=None,
js_user_agent_v1=None,
js_user_agent_v2=None,
js_user_agent_v3=None,
):
warnings.warn(
"Use Parse (or a specialised parser)", DeprecationWarning, stacklevel=2
)
# Override via JS properties.
if js_user_agent_family is not None and js_user_agent_family != "":
family = js_user_agent_family
v1 = None
v2 = None
v3 = None
if js_user_agent_v1 is not None:
v1 = js_user_agent_v1
if js_user_agent_v2 is not None:
v2 = js_user_agent_v2
if js_user_agent_v3 is not None:
v3 = js_user_agent_v3
else:
for parser in USER_AGENT_PARSERS:
family, v1, v2, v3 = parser.Parse(user_agent_string)
if family:
break
# Override for Chrome Frame IFF Chrome is enabled.
if (
js_user_agent_string
and js_user_agent_string.find("Chrome/") > -1
and user_agent_string.find("chromeframe") > -1
):
family = "Chrome Frame (%s %s)" % (family, v1)
ua_dict = ParseUserAgent(js_user_agent_string)
v1 = ua_dict["major"]
v2 = ua_dict["minor"]
v3 = ua_dict["patch"]
return family or "Other", v1, v2, v3
def Pretty(family, v1=None, v2=None, v3=None):
warnings.warn("Use PrettyUserAgent", DeprecationWarning, stacklevel=2)
if v3:
if v3[0].isdigit():
return f"{family} {v1}.{v2}.{v3}"
else:
return f"{family} {v1}.{v2}{v3}"
elif v2:
return f"{family} {v1}.{v2}"
elif v1:
return f"{family} {v1}"
return family
def GetFilters(
user_agent_string,
js_user_agent_string=None,
js_user_agent_family=None,
js_user_agent_v1=None,
js_user_agent_v2=None,
js_user_agent_v3=None,
):
warnings.warn("No use case anymore", DeprecationWarning, stacklevel=2)
filters = {}
filterdict = {
"js_user_agent_string": js_user_agent_string,
"js_user_agent_family": js_user_agent_family,
"js_user_agent_v1": js_user_agent_v1,
"js_user_agent_v2": js_user_agent_v2,
"js_user_agent_v3": js_user_agent_v3,
}
for key, value in filterdict.items():
if value is not None and value != "":
filters[key] = value
return filters
# Build the list of user agent parsers from YAML
UA_PARSER_YAML = os.environ.get("UA_PARSER_YAML")
if UA_PARSER_YAML:
# This will raise an ImportError if missing, obviously since it's no
# longer a requirement
import yaml
try:
# Try and use libyaml bindings if available since faster,
# pyyaml doesn't do it by default (yaml/pyyaml#436)
from yaml import CSafeLoader as SafeLoader
except ImportError:
from yaml import SafeLoader # type: ignore
with open(UA_PARSER_YAML, "rb") as fp:
regexes = yaml.load(fp, Loader=SafeLoader)
USER_AGENT_PARSERS = []
for _ua_parser in regexes["user_agent_parsers"]:
_regex = _ua_parser["regex"]
_family_replacement = _ua_parser.get("family_replacement")
_v1_replacement = _ua_parser.get("v1_replacement")
_v2_replacement = _ua_parser.get("v2_replacement")
USER_AGENT_PARSERS.append(
UserAgentParser(
_regex, _family_replacement, _v1_replacement, _v2_replacement
)
)
OS_PARSERS = []
for _os_parser in regexes["os_parsers"]:
_regex = _os_parser["regex"]
_os_replacement = _os_parser.get("os_replacement")
_os_v1_replacement = _os_parser.get("os_v1_replacement")
_os_v2_replacement = _os_parser.get("os_v2_replacement")
_os_v3_replacement = _os_parser.get("os_v3_replacement")
_os_v4_replacement = _os_parser.get("os_v4_replacement")
OS_PARSERS.append(
OSParser(
_regex,
_os_replacement,
_os_v1_replacement,
_os_v2_replacement,
_os_v3_replacement,
_os_v4_replacement,
)
)
DEVICE_PARSERS = []
for _device_parser in regexes["device_parsers"]:
_regex = _device_parser["regex"]
_regex_flag = _device_parser.get("regex_flag")
_device_replacement = _device_parser.get("device_replacement")
_brand_replacement = _device_parser.get("brand_replacement")
_model_replacement = _device_parser.get("model_replacement")
DEVICE_PARSERS.append(
DeviceParser(
_regex,
_regex_flag,
_device_replacement,
_brand_replacement,
_model_replacement,
)
)
# Clean our our temporary vars explicitly
# so they can't be reused or imported
del regexes
del yaml
del SafeLoader
else:
# Just load our pre-compiled versions
from ua_parser_builtins.regexes import (
DEVICE_PARSERS,
OS_PARSERS,
USER_AGENT_PARSERS,
)