add basic.py

1 year ago · 574649a2f0
parent efcacba71e
commit 574649a2f0
2 changed files with 461 additions and 46 deletions
--- a/src/sqlmap-master/lib/request/basic.py
+++ b/src/sqlmap-master/lib/request/basic.py
@ -0,0 +1,446 @@
+#!/usr/bin/env python
+
+"""
+Copyright (c) 2006-2024 sqlmap developers (https://sqlmap.org/)
+See the file 'LICENSE' for copying permission
+"""
+
+import codecs
+import gzip
+import io
+import logging
+import re
+import struct
+import zlib
+
+from lib.core.common import Backend
+from lib.core.common import extractErrorMessage
+from lib.core.common import extractRegexResult
+from lib.core.common import filterNone
+from lib.core.common import getPublicTypeMembers
+from lib.core.common import getSafeExString
+from lib.core.common import isListLike
+from lib.core.common import randomStr
+from lib.core.common import readInput
+from lib.core.common import resetCookieJar
+from lib.core.common import singleTimeLogMessage
+from lib.core.common import singleTimeWarnMessage
+from lib.core.common import unArrayizeValue
+from lib.core.convert import decodeHex
+from lib.core.convert import getBytes
+from lib.core.convert import getText
+from lib.core.convert import getUnicode
+from lib.core.data import conf
+from lib.core.data import kb
+from lib.core.data import logger
+from lib.core.decorators import cachedmethod
+from lib.core.decorators import lockedmethod
+from lib.core.dicts import HTML_ENTITIES
+from lib.core.enums import DBMS
+from lib.core.enums import HTTP_HEADER
+from lib.core.enums import PLACE
+from lib.core.exception import SqlmapCompressionException
+from lib.core.settings import BLOCKED_IP_REGEX
+from lib.core.settings import DEFAULT_COOKIE_DELIMITER
+from lib.core.settings import EVENTVALIDATION_REGEX
+from lib.core.settings import HEURISTIC_PAGE_SIZE_THRESHOLD
+from lib.core.settings import IDENTYWAF_PARSE_LIMIT
+from lib.core.settings import MAX_CONNECTION_TOTAL_SIZE
+from lib.core.settings import META_CHARSET_REGEX
+from lib.core.settings import PARSE_HEADERS_LIMIT
+from lib.core.settings import PRINTABLE_BYTES
+from lib.core.settings import SELECT_FROM_TABLE_REGEX
+from lib.core.settings import UNICODE_ENCODING
+from lib.core.settings import VIEWSTATE_REGEX
+from lib.parse.headers import headersParser
+from lib.parse.html import htmlParser
+from thirdparty import six
+from thirdparty.chardet import detect
+from thirdparty.identywaf import identYwaf
+from thirdparty.odict import OrderedDict
+from thirdparty.six import unichr as _unichr
+from thirdparty.six.moves import http_client as _http_client
+
+@lockedmethod
+def forgeHeaders(items=None, base=None):
+    """
+    Prepare HTTP Cookie, HTTP User-Agent and HTTP Referer headers to use when performing
+    the HTTP requests
+    """
+
+    items = items or {}
+
+    for _ in list(items.keys()):
+        if items[_] is None:
+            del items[_]
+
+    headers = OrderedDict(conf.httpHeaders if base is None else base)
+    headers.update(items.items())
+
+    class _str(str):
+        def capitalize(self):
+            return _str(self)
+
+        def title(self):
+            return _str(self)
+
+    _ = headers
+    headers = OrderedDict()
+    for key, value in _.items():
+        success = False
+
+        for _ in headers:
+            if _.upper() == key.upper():
+                del headers[_]
+                break
+
+        if key.upper() not in (_.upper() for _ in getPublicTypeMembers(HTTP_HEADER, True)):
+            try:
+                headers[_str(key)] = value  # dirty hack for http://bugs.python.org/issue12455
+            except UnicodeEncodeError:      # don't do the hack on non-ASCII header names (they have to be properly encoded later on)
+                pass
+            else:
+                success = True
+        if not success:
+            key = '-'.join(_.capitalize() for _ in key.split('-'))
+            headers[key] = value
+
+    if conf.cj:
+        if HTTP_HEADER.COOKIE in headers:
+            for cookie in conf.cj:
+                if cookie is None or cookie.domain_specified and not (conf.hostname or "").endswith(cookie.domain):
+                    continue
+
+                if ("%s=" % getUnicode(cookie.name)) in getUnicode(headers[HTTP_HEADER.COOKIE]):
+                    if conf.loadCookies:
+                        conf.httpHeaders = filterNone((item if item[0] != HTTP_HEADER.COOKIE else None) for item in conf.httpHeaders)
+                    elif kb.mergeCookies is None:
+                        message = "you provided a HTTP %s header value, while " % HTTP_HEADER.COOKIE
+                        message += "target URL provides its own cookies within "
+                        message += "HTTP %s header which intersect with yours. " % HTTP_HEADER.SET_COOKIE
+                        message += "Do you want to merge them in further requests? [Y/n] "
+
+                        kb.mergeCookies = readInput(message, default='Y', boolean=True)
+
+                    if kb.mergeCookies and kb.injection.place != PLACE.COOKIE:
+                        def _(value):
+                            return re.sub(r"(?i)\b%s=[^%s]+" % (re.escape(getUnicode(cookie.name)), conf.cookieDel or DEFAULT_COOKIE_DELIMITER), ("%s=%s" % (getUnicode(cookie.name), getUnicode(cookie.value))).replace('\\', r'\\'), value)
+
+                        headers[HTTP_HEADER.COOKIE] = _(headers[HTTP_HEADER.COOKIE])
+
+                        if PLACE.COOKIE in conf.parameters:
+                            conf.parameters[PLACE.COOKIE] = _(conf.parameters[PLACE.COOKIE])
+
+                        conf.httpHeaders = [(item[0], item[1] if item[0] != HTTP_HEADER.COOKIE else _(item[1])) for item in conf.httpHeaders]
+
+                elif not kb.testMode:
+                    headers[HTTP_HEADER.COOKIE] += "%s %s=%s" % (conf.cookieDel or DEFAULT_COOKIE_DELIMITER, getUnicode(cookie.name), getUnicode(cookie.value))
+
+        if kb.testMode and not any((conf.csrfToken, conf.safeUrl)):
+            resetCookieJar(conf.cj)
+
+    return headers
+
+def parseResponse(page, headers, status=None):
+    """
+    @param page: the page to parse to feed the knowledge base htmlFp
+    (back-end DBMS fingerprint based upon DBMS error messages return
+    through the web application) list and absFilePaths (absolute file
+    paths) set.
+    """
+
+    if headers:
+        headersParser(headers)
+
+    if page:
+        htmlParser(page if not status else "%s\n\n%s" % (status, page))
+
+@cachedmethod
+def checkCharEncoding(encoding, warn=True):
+    """
+    Checks encoding name, repairs common misspellings and adjusts to
+    proper namings used in codecs module
+
+    >>> checkCharEncoding('iso-8858', False)
+    'iso8859-1'
+    >>> checkCharEncoding('en_us', False)
+    'utf8'
+    """
+
+    if isinstance(encoding, six.binary_type):
+        encoding = getUnicode(encoding)
+
+    if isListLike(encoding):
+        encoding = unArrayizeValue(encoding)
+
+    if encoding:
+        encoding = encoding.lower()
+    else:
+        return encoding
+
+    # Reference: http://www.destructor.de/charsets/index.htm
+    translate = {"windows-874": "iso-8859-11", "utf-8859-1": "utf8", "en_us": "utf8", "macintosh": "iso-8859-1", "euc_tw": "big5_tw", "th": "tis-620", "unicode": "utf8", "utc8": "utf8", "ebcdic": "ebcdic-cp-be", "iso-8859": "iso8859-1", "iso-8859-0": "iso8859-1", "ansi": "ascii", "gbk2312": "gbk", "windows-31j": "cp932", "en": "us"}
+
+    for delimiter in (';', ',', '('):
+        if delimiter in encoding:
+            encoding = encoding[:encoding.find(delimiter)].strip()
+
+    encoding = encoding.replace("&quot", "")
+
+    # popular typos/errors
+    if "8858" in encoding:
+        encoding = encoding.replace("8858", "8859")  # iso-8858 -> iso-8859
+    elif "8559" in encoding:
+        encoding = encoding.replace("8559", "8859")  # iso-8559 -> iso-8859
+    elif "8895" in encoding:
+        encoding = encoding.replace("8895", "8859")  # iso-8895 -> iso-8859
+    elif "5889" in encoding:
+        encoding = encoding.replace("5889", "8859")  # iso-5889 -> iso-8859
+    elif "5589" in encoding:
+        encoding = encoding.replace("5589", "8859")  # iso-5589 -> iso-8859
+    elif "2313" in encoding:
+        encoding = encoding.replace("2313", "2312")  # gb2313 -> gb2312
+    elif encoding.startswith("x-"):
+        encoding = encoding[len("x-"):]              # x-euc-kr -> euc-kr  /  x-mac-turkish -> mac-turkish
+    elif "windows-cp" in encoding:
+        encoding = encoding.replace("windows-cp", "windows")  # windows-cp-1254 -> windows-1254
+
+    # name adjustment for compatibility
+    if encoding.startswith("8859"):
+        encoding = "iso-%s" % encoding
+    elif encoding.startswith("cp-"):
+        encoding = "cp%s" % encoding[3:]
+    elif encoding.startswith("euc-"):
+        encoding = "euc_%s" % encoding[4:]
+    elif encoding.startswith("windows") and not encoding.startswith("windows-"):
+        encoding = "windows-%s" % encoding[7:]
+    elif encoding.find("iso-88") > 0:
+        encoding = encoding[encoding.find("iso-88"):]
+    elif encoding.startswith("is0-"):
+        encoding = "iso%s" % encoding[4:]
+    elif encoding.find("ascii") > 0:
+        encoding = "ascii"
+    elif encoding.find("utf8") > 0:
+        encoding = "utf8"
+    elif encoding.find("utf-8") > 0:
+        encoding = "utf-8"
+
+    # Reference: http://philip.html5.org/data/charsets-2.html
+    if encoding in translate:
+        encoding = translate[encoding]
+    elif encoding in ("null", "{charset}", "charset", "*") or not re.search(r"\w", encoding):
+        return None
+
+    # Reference: http://www.iana.org/assignments/character-sets
+    # Reference: http://docs.python.org/library/codecs.html
+    try:
+        codecs.lookup(encoding)
+    except:
+        encoding = None
+
+    if encoding:
+        try:
+            six.text_type(getBytes(randomStr()), encoding)
+        except:
+            if warn:
+                warnMsg = "invalid web page charset '%s'" % encoding
+                singleTimeLogMessage(warnMsg, logging.WARN, encoding)
+            encoding = None
+
+    return encoding
+
+def getHeuristicCharEncoding(page):
+    """
+    Returns page encoding charset detected by usage of heuristics
+
+    Reference: https://chardet.readthedocs.io/en/latest/usage.html
+
+    >>> getHeuristicCharEncoding(b"<html></html>")
+    'ascii'
+    """
+
+    key = hash(page)
+    retVal = kb.cache.encoding[key] if key in kb.cache.encoding else detect(page[:HEURISTIC_PAGE_SIZE_THRESHOLD])["encoding"]
+    kb.cache.encoding[key] = retVal
+
+    if retVal and retVal.lower().replace('-', "") == UNICODE_ENCODING.lower().replace('-', ""):
+        infoMsg = "heuristics detected web page charset '%s'" % retVal
+        singleTimeLogMessage(infoMsg, logging.INFO, retVal)
+
+    return retVal
+
+def decodePage(page, contentEncoding, contentType, percentDecode=True):
+    """
+    Decode compressed/charset HTTP response
+
+    >>> getText(decodePage(b"<html>foo&amp;bar</html>", None, "text/html; charset=utf-8"))
+    '<html>foo&bar</html>'
+    >>> getText(decodePage(b"&#x9;", None, "text/html; charset=utf-8"))
+    '\\t'
+    """
+
+    if not page or (conf.nullConnection and len(page) < 2):
+        return getUnicode(page)
+
+    contentEncoding = contentEncoding.lower() if hasattr(contentEncoding, "lower") else ""
+    contentType = contentType.lower() if hasattr(contentType, "lower") else ""
+
+    if contentEncoding in ("gzip", "x-gzip", "deflate"):
+        if not kb.pageCompress:
+            return None
+
+        try:
+            if contentEncoding == "deflate":
+                data = io.BytesIO(zlib.decompress(page, -15))  # Reference: http://stackoverflow.com/questions/1089662/python-inflate-and-deflate-implementations
+            else:
+                data = gzip.GzipFile("", "rb", 9, io.BytesIO(page))
+                size = struct.unpack("<l", page[-4:])[0]  # Reference: http://pydoc.org/get.cgi/usr/local/lib/python2.5/gzip.py
+                if size > MAX_CONNECTION_TOTAL_SIZE:
+                    raise Exception("size too large")
+
+            page = data.read()
+        except Exception as ex:
+            if b"<html" not in page:  # in some cases, invalid "Content-Encoding" appears for plain HTML (should be ignored)
+                errMsg = "detected invalid data for declared content "
+                errMsg += "encoding '%s' ('%s')" % (contentEncoding, getSafeExString(ex))
+                singleTimeLogMessage(errMsg, logging.ERROR)
+
+                warnMsg = "turning off page compression"
+                singleTimeWarnMessage(warnMsg)
+
+                kb.pageCompress = False
+                raise SqlmapCompressionException
+
+    if not conf.encoding:
+        httpCharset, metaCharset = None, None
+
+        # Reference: http://stackoverflow.com/questions/1020892/python-urllib2-read-to-unicode
+        if contentType.find("charset=") != -1:
+            httpCharset = checkCharEncoding(contentType.split("charset=")[-1])
+
+        metaCharset = checkCharEncoding(extractRegexResult(META_CHARSET_REGEX, page))
+
+        if (any((httpCharset, metaCharset)) and (not all((httpCharset, metaCharset)) or isinstance(page, six.binary_type) and all(_ in PRINTABLE_BYTES for _ in page))) or (httpCharset == metaCharset and all((httpCharset, metaCharset))):
+            kb.pageEncoding = httpCharset or metaCharset  # Reference: http://bytes.com/topic/html-css/answers/154758-http-equiv-vs-true-header-has-precedence
+            debugMsg = "declared web page charset '%s'" % kb.pageEncoding
+            singleTimeLogMessage(debugMsg, logging.DEBUG, debugMsg)
+        else:
+            kb.pageEncoding = None
+    else:
+        kb.pageEncoding = conf.encoding
+
+    # can't do for all responses because we need to support binary files too
+    if isinstance(page, six.binary_type) and "text/" in contentType:
+        if not kb.disableHtmlDecoding:
+            # e.g. &#x9;&#195;&#235;&#224;&#226;&#224;
+            if b"&#" in page:
+                page = re.sub(b"&#x([0-9a-f]{1,2});", lambda _: decodeHex(_.group(1) if len(_.group(1)) == 2 else b"0%s" % _.group(1)), page)
+                page = re.sub(b"&#(\\d{1,3});", lambda _: six.int2byte(int(_.group(1))) if int(_.group(1)) < 256 else _.group(0), page)
+
+            # e.g. %20%28%29
+            if percentDecode:
+                if b"%" in page:
+                    page = re.sub(b"%([0-9a-f]{2})", lambda _: decodeHex(_.group(1)), page)
+                    page = re.sub(b"%([0-9A-F]{2})", lambda _: decodeHex(_.group(1)), page)     # Note: %DeepSee_SQL in CACHE
+
+            # e.g. &amp;
+            page = re.sub(b"&([^;]+);", lambda _: six.int2byte(HTML_ENTITIES[getText(_.group(1))]) if HTML_ENTITIES.get(getText(_.group(1)), 256) < 256 else _.group(0), page)
+
+            kb.pageEncoding = kb.pageEncoding or checkCharEncoding(getHeuristicCharEncoding(page))
+
+            if (kb.pageEncoding or "").lower() == "utf-8-sig":
+                kb.pageEncoding = "utf-8"
+                if page and page.startswith(b"\xef\xbb\xbf"):  # Reference: https://docs.python.org/2/library/codecs.html (Note: noticed problems when "utf-8-sig" is left to Python for handling)
+                    page = page[3:]
+
+            page = getUnicode(page, kb.pageEncoding)
+
+            # e.g. &#8217;&#8230;&#8482;
+            if "&#" in page:
+                def _(match):
+                    retVal = match.group(0)
+                    try:
+                        retVal = _unichr(int(match.group(1)))
+                    except (ValueError, OverflowError):
+                        pass
+                    return retVal
+                page = re.sub(r"&#(\d+);", _, page)
+
+            # e.g. &zeta;
+            page = re.sub(r"&([^;]+);", lambda _: _unichr(HTML_ENTITIES[_.group(1)]) if HTML_ENTITIES.get(_.group(1), 0) > 255 else _.group(0), page)
+        else:
+            page = getUnicode(page, kb.pageEncoding)
+
+    return page
+
+def processResponse(page, responseHeaders, code=None, status=None):
+    kb.processResponseCounter += 1
+    page = page or ""
+
+    parseResponse(page, responseHeaders if kb.processResponseCounter < PARSE_HEADERS_LIMIT else None, status)
+
+    if not kb.tableFrom and Backend.getIdentifiedDbms() in (DBMS.ACCESS,):
+        kb.tableFrom = extractRegexResult(SELECT_FROM_TABLE_REGEX, page)
+    else:
+        kb.tableFrom = None
+
+    if conf.parseErrors:
+        msg = extractErrorMessage(page)
+
+        if msg:
+            logger.warning("parsed DBMS error message: '%s'" % msg.rstrip('.'))
+
+    if not conf.skipWaf and kb.processResponseCounter < IDENTYWAF_PARSE_LIMIT:
+        rawResponse = "%s %s %s\n%s\n%s" % (_http_client.HTTPConnection._http_vsn_str, code or "", status or "", "".join(getUnicode(responseHeaders.headers if responseHeaders else [])), page[:HEURISTIC_PAGE_SIZE_THRESHOLD])
+
+        with kb.locks.identYwaf:
+            identYwaf.non_blind.clear()
+            if identYwaf.non_blind_check(rawResponse, silent=True):
+                for waf in set(identYwaf.non_blind):
+                    if waf not in kb.identifiedWafs:
+                        kb.identifiedWafs.add(waf)
+                        errMsg = "WAF/IPS identified as '%s'" % identYwaf.format_name(waf)
+                        singleTimeLogMessage(errMsg, logging.CRITICAL)
+
+    if kb.originalPage is None:
+        for regex in (EVENTVALIDATION_REGEX, VIEWSTATE_REGEX):
+            match = re.search(regex, page)
+            if match and PLACE.POST in conf.parameters:
+                name, value = match.groups()
+                if PLACE.POST in conf.paramDict and name in conf.paramDict[PLACE.POST]:
+                    if conf.paramDict[PLACE.POST][name] in page:
+                        continue
+                    else:
+                        msg = "do you want to automatically adjust the value of '%s'? [y/N]" % name
+
+                        if not readInput(msg, default='N', boolean=True):
+                            continue
+
+                        conf.paramDict[PLACE.POST][name] = value
+                conf.parameters[PLACE.POST] = re.sub(r"(?i)(%s=)[^&]+" % re.escape(name), r"\g<1>%s" % value.replace('\\', r'\\'), conf.parameters[PLACE.POST])
+
+    if not kb.browserVerification and re.search(r"(?i)browser.?verification", page or ""):
+        kb.browserVerification = True
+        warnMsg = "potential browser verification protection mechanism detected"
+        if re.search(r"(?i)CloudFlare", page):
+            warnMsg += " (CloudFlare)"
+        singleTimeWarnMessage(warnMsg)
+
+    if not kb.captchaDetected and re.search(r"(?i)captcha", page or ""):
+        for match in re.finditer(r"(?si)<form.+?</form>", page):
+            if re.search(r"(?i)captcha", match.group(0)):
+                kb.captchaDetected = True
+                break
+
+        if re.search(r"<meta[^>]+\brefresh\b[^>]+\bcaptcha\b", page):
+            kb.captchaDetected = True
+
+        if kb.captchaDetected:
+            warnMsg = "potential CAPTCHA protection mechanism detected"
+            if re.search(r"(?i)<title>[^<]*CloudFlare", page):
+                warnMsg += " (CloudFlare)"
+            singleTimeWarnMessage(warnMsg)
+
+    if re.search(BLOCKED_IP_REGEX, page):
+        warnMsg = "it appears that you have been blocked by the target server"
+        singleTimeWarnMessage(warnMsg)
--- a/src/sqlmap-master/lib/request/connect.py
+++ b/src/sqlmap-master/lib/request/connect.py
@ -5,7 +5,6 @@ Copyright (c) 2006-2024 sqlmap developers (https://sqlmap.org/)
 See the file 'LICENSE' for copying permission
 """

-# 导入所需的标准库
 import binascii
 import inspect
 import logging
@ -19,7 +18,6 @@ import sys
 import time
 import traceback

-# 尝试导入websocket库,如果不存在则定义一个简单的异常类
 try:
    import websocket
    from websocket import WebSocketException
@ -27,7 +25,6 @@ except ImportError:
    class WebSocketException(Exception):
        pass

-# 导入sqlmap自定义的库和工具函数
 from lib.core.agent import agent
 from lib.core.common import asciifyUrl
 from lib.core.common import calculateDeltaSeconds
@ -149,18 +146,13 @@ from thirdparty.socks.socks import ProxyError

 class Connect(object):
    """
-    这个类定义了用于执行HTTP请求的方法
+    This class defines methods used to perform HTTP requests
    """

    @staticmethod
    def _getPageProxy(**kwargs):
-        """
-        代理方法,用于处理页面请求
-        检查递归深度并调用getPage方法
-        """
        try:
-            # 检查调用栈深度是否超过限制
-            if (len(inspect.stack()) > sys.getrecursionlimit() // 2):   
+            if (len(inspect.stack()) > sys.getrecursionlimit() // 2):   # Note: https://github.com/sqlmapproject/sqlmap/issues/4525
                warnMsg = "unable to connect to the target URL"
                raise SqlmapConnectionException(warnMsg)
        except (TypeError, UnicodeError):
@ -173,15 +165,9 @@ class Connect(object):

    @staticmethod
    def _retryProxy(**kwargs):
-        """
-        重试代理方法
-        处理请求失败时的重试逻辑
-        """
-        # 获取当前线程数据
        threadData = getCurrentThreadData()
        threadData.retriesCount += 1

-        # 如果配置了代理列表且重试次数达到上限,则更换代理
        if conf.proxyList and threadData.retriesCount >= conf.retries and not kb.locks.handlers.locked():
            warnMsg = "changing proxy"
            logger.warning(warnMsg)
@ -191,8 +177,9 @@ class Connect(object):

            setHTTPHandlers()

-        # 处理基于时间的测试模式
        if kb.testMode and kb.previousMethod == PAYLOAD.METHOD.TIME:
+            # timed based payloads can cause web server unresponsiveness
+            # if the injectable piece of code is some kind of JOIN-like query
            warnMsg = "most likely web server instance hasn't recovered yet "
            warnMsg += "from previous timed based payload. If the problem "
            warnMsg += "persists please wait for a few minutes and rerun "
@ -201,7 +188,6 @@ class Connect(object):
            warnMsg += "lower the value of option '--time-sec' (e.g. '--time-sec=2')"
            singleTimeWarnMessage(warnMsg)

-        # 处理原始页面为空的情况
        elif kb.originalPage is None:
            if conf.tor:
                warnMsg = "please make sure that you have "
@ -228,28 +214,20 @@ class Connect(object):

            singleTimeWarnMessage(warnMsg)

-        # 处理多线程情况
        elif conf.threads > 1:
            warnMsg = "if the problem persists please try to lower "
            warnMsg += "the number of used threads (option '--threads')"
            singleTimeWarnMessage(warnMsg)

-        # 重试请求
        kwargs['retrying'] = True
        return Connect._getPageProxy(**kwargs)

    @staticmethod
    def _connReadProxy(conn):
-        """
-        读取连接响应的代理方法
-        处理压缩和大响应的情况
-        """
        retVal = b""

-        # 如果不是DNS模式且连接存在
        if not kb.dnsMode and conn:
            headers = conn.info()
-            # 处理压缩响应
            if kb.pageCompress and headers and hasattr(headers, "getheader") and (headers.getheader(HTTP_HEADER.CONTENT_ENCODING, "").lower() in ("gzip", "deflate") or "text" not in headers.getheader(HTTP_HEADER.CONTENT_TYPE, "").lower()):
                retVal = conn.read(MAX_CONNECTION_TOTAL_SIZE)
                if len(retVal) == MAX_CONNECTION_TOTAL_SIZE:
@ -258,7 +236,6 @@ class Connect(object):
                    kb.pageCompress = False
                    raise SqlmapCompressionException
            else:
-                # 分块读取大响应
                while True:
                    if not conn:
                        break
@ -277,13 +254,11 @@ class Connect(object):
                        retVal += part
                        break

-                    # 检查总响应大小是否超过限制
                    if len(retVal) > MAX_CONNECTION_TOTAL_SIZE:
                        warnMsg = "too large response detected. Automatically trimming it"
                        singleTimeWarnMessage(warnMsg)
                        break

-        # 处理特殊的响应放大因子
        if conf.yuge:
            retVal = YUGE_FACTOR * retVal

@ -292,14 +267,13 @@ class Connect(object):
    @staticmethod
    def getPage(**kwargs):
        """
-        这个方法连接到目标URL或代理并返回目标URL页面内容
+        This method connects to the target URL or proxy and returns
+        the target URL page content
        """

-        # 如果是离线模式直接返回
        if conf.offline:
            return None, None, None

-        # 获取请求参数
        url = kwargs.get("url", None) or conf.url
        get = kwargs.get("get", None)
        post = kwargs.get("post", None)
@ -323,19 +297,16 @@ class Connect(object):
        finalCode = kwargs.get("finalCode", False)
        chunked = kwargs.get("chunked", False) or conf.chunked

-        # 处理请求延迟
        if isinstance(conf.delay, (int, float)) and conf.delay > 0:
            time.sleep(conf.delay)

        start = time.time()

-        # 获取当前线程数据
        threadData = getCurrentThreadData()
        with kb.locks.request:
            kb.requestCounter += 1
            threadData.lastRequestUID = kb.requestCounter

-            # 处理代理频率
            if conf.proxyFreq:
                if kb.requestCounter % conf.proxyFreq == 0:
                    conf.proxy = None
@ -345,7 +316,6 @@ class Connect(object):

                    setHTTPHandlers()

-        # 处理测试模式
        if conf.dummy or conf.murphyRate and randomInt() % conf.murphyRate == 0:
            if conf.murphyRate:
                time.sleep(randomInt() % (MAX_MURPHY_SLEEP_TIME + 1))
@ -357,7 +327,6 @@ class Connect(object):

            return page, headers, code

-        # 处理cookie
        if conf.liveCookies:
            with kb.locks.liveCookies:
                if not checkFile(conf.liveCookies, raiseOnError=False) or os.path.getsize(conf.liveCookies) == 0:
@ -382,7 +351,6 @@ class Connect(object):
                cookie = openFile(conf.liveCookies).read().strip()
                cookie = re.sub(r"(?i)\ACookie:\s*", "", cookie)

-        # 处理multipart请求
        if multipart:
            post = multipart
        else:
@ -393,20 +361,20 @@ class Connect(object):
                post = _urllib.parse.unquote(post)
                post = chunkSplitPostData(post)

-        # 处理WebSocket请求
        webSocket = url.lower().startswith("ws")

        if not _urllib.parse.urlsplit(url).netloc:
            url = _urllib.parse.urljoin(conf.url, url)

-        # 检查是否是相同的目标主机
+        # flag to know if we are dealing with the same target host
        target = checkSameHost(url, conf.url)

        if not retrying:
-            # 重置连接重试次数
+            # Reset the number of connection retries
            threadData.retriesCount = 0

-        # 修复URL中的空格
+        # fix for known issue when urllib2 just skips the other part of provided
+        # url splitted with space char while urlencoding it in the later phase
        url = url.replace(" ", "%20")

        if "://" not in url:
@ -428,7 +396,8 @@ class Connect(object):

        raise404 = raise404 and not kb.ignoreNotFound

-        # 支持非拉丁字符的URL
+        # support for non-latin (e.g. cyrillic) URLs as urllib/urllib2 doesn't
+        # support those by default
        url = asciifyUrl(url)

        try:
@ -471,7 +440,7 @@ class Connect(object):

            requestMsg += " %s" % _http_client.HTTPConnection._http_vsn_str

-            # 准备HTTP头
+            # Prepare HTTP headers
            headers = forgeHeaders({HTTP_HEADER.COOKIE: cookie, HTTP_HEADER.USER_AGENT: ua, HTTP_HEADER.REFERER: referer, HTTP_HEADER.HOST: getHeader(dict(conf.httpHeaders), HTTP_HEADER.HOST) or getHostHeader(url)}, base=None if target else {})

            if HTTP_HEADER.COOKIE in headers:
@ -655,11 +624,11 @@ class Connect(object):
                if not kb.proxyAuthHeader and getRequestHeader(req, HTTP_HEADER.PROXY_AUTHORIZATION):
                    kb.proxyAuthHeader = getRequestHeader(req, HTTP_HEADER.PROXY_AUTHORIZATION)

-                # 返回响应对象
+                # Return response object
                if response:
                    return conn, None, None

-                # 获取HTTP响应
+                # Get HTTP response
                if hasattr(conn, "redurl"):
                    page = (threadData.lastRedirectMsg[1] if kb.choices.redirect == REDIRECTION.NO else Connect._connReadProxy(conn)) if not skipRead else None
                    skipLogTraffic = kb.choices.redirect == REDIRECTION.NO