add comments to parse

pull/3/head
wang 2 months ago
parent 6869c9f61b
commit 129d83dfba

@ -5,37 +5,31 @@ Copyright (c) 2006-2024 sqlmap developers (https://sqlmap.org/)
See the file 'LICENSE' for copying permission
"""
import re
from xml.sax.handler import ContentHandler
from lib.core.common import Backend
from lib.core.common import parseXmlFile
from lib.core.common import sanitizeStr
from lib.core.data import kb
from lib.core.data import paths
from lib.core.enums import DBMS
from lib.parse.handler import FingerprintHandler
# ... 导入相关模块 ...
class MSSQLBannerHandler(ContentHandler):
"""
This class defines methods to parse and extract information from the
given Microsoft SQL Server banner based upon the data in XML file
该类用于解析和提取Microsoft SQL Server banner信息
基于XML文件中的数据进行匹配和处理
"""
def __init__(self, banner, info):
ContentHandler.__init__(self)
self._banner = sanitizeStr(banner or "")
self._inVersion = False
self._inServicePack = False
self._release = None
self._version = ""
self._versionAlt = None
self._servicePack = ""
self._info = info
# 初始化banner信息和状态标志
self._banner = sanitizeStr(banner or "") # 清理并存储banner字符串
self._inVersion = False # 是否正在处理版本信息的标志
self._inServicePack = False # 是否正在处理ServicePack信息的标志
self._release = None # 发布版本信息
self._version = "" # 版本号
self._versionAlt = None # 替代版本号格式
self._servicePack = "" # ServicePack版本
self._info = info # 存储解析结果的字典
def _feedInfo(self, key, value):
"""
将解析到的信息存入结果字典
"""
value = sanitizeStr(value)
if value in (None, "None"):
@ -44,30 +38,40 @@ class MSSQLBannerHandler(ContentHandler):
self._info[key] = value
def startElement(self, name, attrs):
"""
处理XML元素开始标签
"""
if name == "signatures":
self._release = sanitizeStr(attrs.get("release"))
elif name == "version":
self._inVersion = True
elif name == "servicepack":
self._inServicePack = True
def characters(self, content):
"""
处理XML元素的文本内容
"""
if self._inVersion:
self._version += sanitizeStr(content)
elif self._inServicePack:
self._servicePack += sanitizeStr(content)
def endElement(self, name):
"""
处理XML元素结束标签
"""
if name == "signature":
# 检查banner中是否包含匹配的版本信息
for version in (self._version, self._versionAlt):
if version and self._banner and re.search(r" %s[\.\ ]+" % re.escape(version), self._banner):
# 找到匹配后,保存相关信息
self._feedInfo("dbmsRelease", self._release)
self._feedInfo("dbmsVersion", self._version)
self._feedInfo("dbmsServicePack", self._servicePack)
break
# 重置临时变量
self._version = ""
self._versionAlt = None
self._servicePack = ""
@ -76,6 +80,7 @@ class MSSQLBannerHandler(ContentHandler):
self._inVersion = False
self._version = self._version.replace(" ", "")
# 处理特殊的版本号格式
match = re.search(r"\A(?P<major>\d+)\.00\.(?P<build>\d+)\Z", self._version)
self._versionAlt = "%s.0.%s.0" % (match.group('major'), match.group('build')) if match else None
@ -85,12 +90,11 @@ class MSSQLBannerHandler(ContentHandler):
def bannerParser(banner):
"""
This function calls a class to extract information from the given
DBMS banner based upon the data in XML file
根据不同的数据库类型调用相应的处理器来解析banner信息
"""
xmlfile = None
# 根据数据库类型选择对应的XML配置文件
if Backend.isDbms(DBMS.MSSQL):
xmlfile = paths.MSSQL_XML
elif Backend.isDbms(DBMS.MYSQL):
@ -103,6 +107,7 @@ def bannerParser(banner):
if not xmlfile:
return
# 针对MSSQL使用专门的处理器其他数据库使用通用处理器
if Backend.isDbms(DBMS.MSSQL):
handler = MSSQLBannerHandler(banner, kb.bannerFp)
parseXmlFile(xmlfile, handler)
@ -112,4 +117,4 @@ def bannerParser(banner):
else:
handler = FingerprintHandler(banner, kb.bannerFp)
parseXmlFile(xmlfile, handler)
parseXmlFile(paths.GENERIC_XML, handler)
parseXmlFile(paths.GENERIC_XML, handler)

@ -13,40 +13,51 @@ import shlex
import sys
try:
# 从optparse模块中导入OptionError、OptionGroup、OptionParser和SUPPRESS_HELP
from optparse import OptionError as ArgumentError
from optparse import OptionGroup
from optparse import OptionParser as ArgumentParser
from optparse import SUPPRESS_HELP as SUPPRESS
# 将ArgumentParser的add_argument方法替换为add_option方法
ArgumentParser.add_argument = ArgumentParser.add_option
# 定义一个_add_argument_group方法用于添加参数组
def _add_argument_group(self, *args, **kwargs):
return self.add_option_group(OptionGroup(self, *args, **kwargs))
# 将ArgumentParser的add_argument_group方法替换为_add_argument_group方法
ArgumentParser.add_argument_group = _add_argument_group
# 定义一个_add_argument方法用于添加参数
def _add_argument(self, *args, **kwargs):
return self.add_option(*args, **kwargs)
# 将OptionGroup的add_argument方法替换为_add_argument方法
OptionGroup.add_argument = _add_argument
except ImportError:
# 如果导入optparse模块失败则从argparse模块中导入ArgumentParser、ArgumentError和SUPPRESS
from argparse import ArgumentParser
from argparse import ArgumentError
from argparse import SUPPRESS
finally:
# 定义一个get_actions方法用于获取所有参数
def get_actions(instance):
for attr in ("option_list", "_group_actions", "_actions"):
if hasattr(instance, attr):
return getattr(instance, attr)
# 定义一个get_groups方法用于获取所有参数组
def get_groups(parser):
return getattr(parser, "option_groups", None) or getattr(parser, "_action_groups")
# 定义一个get_all_options方法用于获取所有参数和参数组中的参数
def get_all_options(parser):
retVal = set()
# 遍历所有参数
for option in get_actions(parser):
if hasattr(option, "option_strings"):
retVal.update(option.option_strings)
@ -54,6 +65,7 @@ finally:
retVal.update(option._long_opts)
retVal.update(option._short_opts)
# 遍历所有参数组中的参数
for group in get_groups(parser):
for option in get_actions(group):
if hasattr(option, "option_strings"):

@ -5,48 +5,58 @@ Copyright (c) 2006-2024 sqlmap developers (https://sqlmap.org/)
See the file 'LICENSE' for copying permission
"""
from lib.core.common import checkFile
from lib.core.common import getSafeExString
from lib.core.common import openFile
from lib.core.common import unArrayizeValue
from lib.core.common import UnicodeRawConfigParser
from lib.core.convert import getUnicode
from lib.core.data import cmdLineOptions
from lib.core.data import conf
from lib.core.data import logger
from lib.core.enums import OPTION_TYPE
from lib.core.exception import SqlmapMissingMandatoryOptionException
from lib.core.exception import SqlmapSyntaxException
from lib.core.optiondict import optDict
# 导入所需的模块和函数
from lib.core.common import checkFile # 检查文件是否存在和可访问
from lib.core.common import getSafeExString # 安全地获取异常的字符串表示
from lib.core.common import openFile # 打开文件的工具函数
from lib.core.common import unArrayizeValue # 将数组值转换为单个值
from lib.core.common import UnicodeRawConfigParser # 处理Unicode的配置文件解析器
from lib.core.convert import getUnicode # 将输入转换为Unicode字符串
from lib.core.data import cmdLineOptions # 命令行选项存储
from lib.core.data import conf # 全局配置字典
from lib.core.data import logger # 日志记录器
from lib.core.enums import OPTION_TYPE # 选项类型枚举
from lib.core.exception import SqlmapMissingMandatoryOptionException # 缺少必需选项异常
from lib.core.exception import SqlmapSyntaxException # 语法错误异常
from lib.core.optiondict import optDict # 选项字典
# 全局配置解析器对象
config = None
def configFileProxy(section, option, datatype):
"""
Parse configuration file and save settings into the configuration
advanced dictionary.
解析配置文件并将设置保存到高级配置字典中
参数:
section: 配置文件中的节名
option: 选项名
datatype: 数据类型
"""
if config.has_option(section, option):
if config.has_option(section, option): # 检查配置中是否存在该选项
try:
if datatype == OPTION_TYPE.BOOLEAN:
# 根据数据类型获取相应的值
if datatype == OPTION_TYPE.BOOLEAN: # 布尔类型
value = config.getboolean(section, option) if config.get(section, option) else False
elif datatype == OPTION_TYPE.INTEGER:
elif datatype == OPTION_TYPE.INTEGER: # 整数类型
value = config.getint(section, option) if config.get(section, option) else 0
elif datatype == OPTION_TYPE.FLOAT:
elif datatype == OPTION_TYPE.FLOAT: # 浮点数类型
value = config.getfloat(section, option) if config.get(section, option) else 0.0
else:
else: # 字符串类型
value = config.get(section, option)
except ValueError as ex:
# 如果值转换失败,抛出语法错误异常
errMsg = "error occurred while processing the option "
errMsg += "'%s' in provided configuration file ('%s')" % (option, getUnicode(ex))
raise SqlmapSyntaxException(errMsg)
# 将值存储到全局配置字典中
if value:
conf[option] = value
else:
conf[option] = None
else:
# 如果选项不存在,记录调试信息
debugMsg = "missing requested option '%s' (section " % option
debugMsg += "'%s') into the configuration file, " % section
debugMsg += "ignoring. Skipping to next."
@ -54,44 +64,55 @@ def configFileProxy(section, option, datatype):
def configFileParser(configFile):
"""
Parse configuration file and save settings into the configuration
advanced dictionary.
解析配置文件的主函数
参数:
configFile: 配置文件路径
"""
global config
# 记录开始解析的调试信息
debugMsg = "parsing configuration file"
logger.debug(debugMsg)
# 检查配置文件是否存在和可访问
checkFile(configFile)
configFP = openFile(configFile, "rb")
try:
# 创建配置解析器实例并读取配置文件
config = UnicodeRawConfigParser()
if hasattr(config, "read_file"):
if hasattr(config, "read_file"): # Python 3
config.read_file(configFP)
else:
else: # Python 2
config.readfp(configFP)
except Exception as ex:
# 如果解析失败,抛出语法错误异常
errMsg = "you have provided an invalid and/or unreadable configuration file ('%s')" % getSafeExString(ex)
raise SqlmapSyntaxException(errMsg)
# 检查是否存在必需的Target节
if not config.has_section("Target"):
errMsg = "missing a mandatory section 'Target' in the configuration file"
raise SqlmapMissingMandatoryOptionException(errMsg)
# 检查必需选项
mandatory = False
# 检查Target节中是否至少存在一个必需的选项
for option in ("direct", "url", "logFile", "bulkFile", "googleDork", "requestFile", "wizard"):
if config.has_option("Target", option) and config.get("Target", option) or cmdLineOptions.get(option):
mandatory = True
break
# 如果没有找到任何必需选项,抛出异常
if not mandatory:
errMsg = "missing a mandatory option in the configuration file "
errMsg += "(direct, url, logFile, bulkFile, googleDork, requestFile or wizard)"
raise SqlmapMissingMandatoryOptionException(errMsg)
# 遍历所有选项并解析它们
for family, optionData in optDict.items():
for option, datatype in optionData.items():
datatype = unArrayizeValue(datatype)

@ -13,66 +13,95 @@ from lib.core.common import sanitizeStr
class FingerprintHandler(ContentHandler):
"""
This class defines methods to parse and extract information from
the given DBMS banner based upon the data in XML file
这个类定义了解析和提取数据库管理系统(DBMS)横幅信息的方法
基于XML文件中的数据进行匹配和提取
"""
def __init__(self, banner, info):
"""
初始化方法
:param banner: DBMS的横幅信息字符串
:param info: 用于存储提取信息的字典
"""
ContentHandler.__init__(self)
self._banner = sanitizeStr(banner or "")
self._regexp = None
self._match = None
self._dbmsVersion = None
self._techVersion = None
self._info = info
self._banner = sanitizeStr(banner or "") # 清理并存储横幅信息
self._regexp = None # 存储当前正则表达式
self._match = None # 存储正则匹配结果
self._dbmsVersion = None # 存储数据库版本信息
self._techVersion = None # 存储技术版本信息
self._info = info # 存储所有提取的信息
def _feedInfo(self, key, value):
value = sanitizeStr(value)
"""
将提取的信息存入info字典
:param key: 信息类型()
:param value: 信息内容()
"""
value = sanitizeStr(value) # 清理输入值
# 如果值为空则直接返回
if value in (None, "None", ""):
return
# 特殊处理数据库版本信息
if key == "dbmsVersion":
self._info[key] = value
else:
# 对于其他类型的信息,创建一个集合来存储
if key not in self._info:
self._info[key] = set()
# 处理可能包含多个值的情况(用|分隔)
for _ in value.split("|"):
self._info[key].add(_)
def startElement(self, name, attrs):
"""
处理XML元素开始标签
:param name: 元素名称
:param attrs: 元素属性字典
"""
# 处理regexp标签,用于匹配横幅信息
if name == "regexp":
self._regexp = sanitizeStr(attrs.get("value"))
_ = re.match(r"\A[A-Za-z0-9]+", self._regexp) # minor trick avoiding compiling of large amount of regexes
# 优化技巧:通过快速检查避免编译大量正则表达式
_ = re.match(r"\A[A-Za-z0-9]+", self._regexp)
# 如果快速检查通过或无法快速检查,则进行完整的正则匹配
if _ and self._banner and _.group(0).lower() in self._banner.lower() or not _:
self._match = re.search(self._regexp, self._banner, re.I | re.M)
else:
self._match = None
# 处理info标签,提取各种版本和技术信息
if name == "info" and self._match:
self._feedInfo("type", attrs.get("type"))
self._feedInfo("distrib", attrs.get("distrib"))
self._feedInfo("release", attrs.get("release"))
self._feedInfo("codename", attrs.get("codename"))
# 提取基本信息
self._feedInfo("type", attrs.get("type")) # 类型信息
self._feedInfo("distrib", attrs.get("distrib")) # 发行版信息
self._feedInfo("release", attrs.get("release")) # 发布信息
self._feedInfo("codename", attrs.get("codename")) # 代号信息
# 获取版本相关信息
self._dbmsVersion = sanitizeStr(attrs.get("dbms_version"))
self._techVersion = sanitizeStr(attrs.get("tech_version"))
self._sp = sanitizeStr(attrs.get("sp"))
# 处理数据库版本信息
if self._dbmsVersion and self._dbmsVersion.isdigit():
self._feedInfo("dbmsVersion", self._match.group(int(self._dbmsVersion)))
# 处理技术版本信息
if self._techVersion and self._techVersion.isdigit():
self._feedInfo("technology", "%s %s" % (attrs.get("technology"), self._match.group(int(self._techVersion))))
else:
self._feedInfo("technology", attrs.get("technology"))
# 处理Service Pack信息
if self._sp.isdigit():
self._feedInfo("sp", "Service Pack %s" % int(self._sp))
# 重置所有临时变量
self._regexp = None
self._match = None
self._dbmsVersion = None

@ -5,33 +5,53 @@ Copyright (c) 2006-2024 sqlmap developers (https://sqlmap.org/)
See the file 'LICENSE' for copying permission
"""
# 导入操作系统相关的功能模块
import os
# 从lib.core.common导入XML文件解析函数
from lib.core.common import parseXmlFile
# 从lib.core.data导入全局变量存储对象kb和路径配置对象paths
from lib.core.data import kb
from lib.core.data import paths
# 导入指纹识别处理器类
from lib.parse.handler import FingerprintHandler
def headersParser(headers):
"""
This function calls a class that parses the input HTTP headers to
fingerprint the back-end database management system operating system
and the web application technology
此函数通过解析HTTP请求头来识别:
1. 后端数据库管理系统
2. 操作系统类型
3. Web应用技术栈
参数headers: HTTP请求头字典
"""
# 如果全局变量中还没有初始化headerPaths
if not kb.headerPaths:
# 初始化一个字典,存储不同HTTP头对应的XML规则文件路径
kb.headerPaths = {
# SharePoint服务器特征识别规则
"microsoftsharepointteamservices": os.path.join(paths.SQLMAP_XML_BANNER_PATH, "sharepoint.xml"),
# 服务器类型识别规则
"server": os.path.join(paths.SQLMAP_XML_BANNER_PATH, "server.xml"),
# Java Servlet容器识别规则
"servlet-engine": os.path.join(paths.SQLMAP_XML_BANNER_PATH, "servlet-engine.xml"),
# Cookie特征识别规则
"set-cookie": os.path.join(paths.SQLMAP_XML_BANNER_PATH, "set-cookie.xml"),
# ASP.NET版本识别规则
"x-aspnet-version": os.path.join(paths.SQLMAP_XML_BANNER_PATH, "x-aspnet-version.xml"),
# 服务端技术栈识别规则
"x-powered-by": os.path.join(paths.SQLMAP_XML_BANNER_PATH, "x-powered-by.xml"),
}
# 遍历HTTP头,将头名称转为小写并检查是否在规则文件字典中
for header in (_.lower() for _ in headers if _.lower() in kb.headerPaths):
# 获取该HTTP头的值
value = headers[header]
# 获取对应的XML规则文件路径
xmlfile = kb.headerPaths[header]
# 创建一个指纹识别处理器实例
handler = FingerprintHandler(value, kb.headersFp)
# 解析特定规则文件
parseXmlFile(xmlfile, handler)
# 解析通用规则文件
parseXmlFile(paths.GENERIC_XML, handler)

@ -5,64 +5,74 @@ Copyright (c) 2006-2024 sqlmap developers (https://sqlmap.org/)
See the file 'LICENSE' for copying permission
"""
import re
# 导入所需的模块
import re # 用于正则表达式处理
from xml.sax.handler import ContentHandler
from xml.sax.handler import ContentHandler # XML内容处理器
from lib.core.common import urldecode
from lib.core.common import parseXmlFile
from lib.core.data import kb
from lib.core.data import paths
from lib.core.settings import HEURISTIC_PAGE_SIZE_THRESHOLD
from lib.core.threads import getCurrentThreadData
from lib.core.common import urldecode # URL解码函数
from lib.core.common import parseXmlFile # XML文件解析函数
from lib.core.data import kb # 知识库,存储全局变量
from lib.core.data import paths # 路径相关配置
from lib.core.settings import HEURISTIC_PAGE_SIZE_THRESHOLD # 页面大小阈值设置
from lib.core.threads import getCurrentThreadData # 获取当前线程数据
class HTMLHandler(ContentHandler):
"""
This class defines methods to parse the input HTML page to
fingerprint the back-end database management system
这个类定义了解析HTML页面的方法,用于识别后端数据库管理系统的指纹
"""
def __init__(self, page):
# 初始化父类
ContentHandler.__init__(self)
self._dbms = None
self._page = (page or "")
self._dbms = None # 存储数据库类型
self._page = (page or "") # 存储页面内容
try:
self._lower_page = self._page.lower()
except SystemError: # https://bugs.python.org/issue18183
self._lower_page = self._page.lower() # 将页面转换为小写
except SystemError: # 处理Python bug: https://bugs.python.org/issue18183
self._lower_page = None
self._urldecoded_page = urldecode(self._page)
self._urldecoded_page = urldecode(self._page) # URL解码后的页面内容
self.dbms = None
self.dbms = None # 最终识别出的数据库类型
def _markAsErrorPage(self):
"""标记当前页面为错误页面"""
threadData = getCurrentThreadData()
threadData.lastErrorPage = (threadData.lastRequestUID, self._page)
def startElement(self, name, attrs):
if self.dbms:
"""
处理XML元素开始标签
name: 标签名
attrs: 标签属性
"""
if self.dbms: # 如果已经识别出数据库类型,直接返回
return
if name == "dbms":
if name == "dbms": # 如果是数据库标签
self._dbms = attrs.get("value")
elif name == "error":
elif name == "error": # 如果是错误标签
regexp = attrs.get("regexp")
if regexp not in kb.cache.regex:
# 提取正则表达式中的关键词
keywords = re.findall(r"\w+", re.sub(r"\\.", " ", regexp))
keywords = sorted(keywords, key=len)
kb.cache.regex[regexp] = keywords[-1].lower()
# 检查页面是否匹配错误模式
if ('|' in regexp or kb.cache.regex[regexp] in (self._lower_page or kb.cache.regex[regexp])) and re.search(regexp, self._urldecoded_page, re.I):
self.dbms = self._dbms
self._markAsErrorPage()
kb.forkNote = kb.forkNote or attrs.get("fork")
self.dbms = self._dbms # 设置识别出的数据库类型
self._markAsErrorPage() # 标记为错误页面
kb.forkNote = kb.forkNote or attrs.get("fork") # 设置fork注释
def htmlParser(page):
"""
This function calls a class that parses the input HTML page to
fingerprint the back-end database management system
解析HTML页面以识别后端数据库类型的主函数
page: 要解析的HTML页面内容
示例:
>>> from lib.core.enums import DBMS
>>> htmlParser("Warning: mysql_fetch_array() expects parameter 1 to be resource") == DBMS.MYSQL
True
@ -70,30 +80,32 @@ def htmlParser(page):
>>> threadData.lastErrorPage = None
"""
page = page[:HEURISTIC_PAGE_SIZE_THRESHOLD]
page = page[:HEURISTIC_PAGE_SIZE_THRESHOLD] # 截取页面内容到阈值大小
xmlfile = paths.ERRORS_XML
handler = HTMLHandler(page)
key = hash(page)
xmlfile = paths.ERRORS_XML # 错误模式的XML配置文件
handler = HTMLHandler(page) # 创建处理器实例
key = hash(page) # 计算页面内容的哈希值
# generic SQL warning/error messages
# 检查通用SQL警告/错误信息
if re.search(r"SQL (warning|error|syntax)", page, re.I):
handler._markAsErrorPage()
# 如果页面已经解析过,直接返回缓存的结果
if key in kb.cache.parsedDbms:
retVal = kb.cache.parsedDbms[key]
if retVal:
handler._markAsErrorPage()
return retVal
parseXmlFile(xmlfile, handler)
parseXmlFile(xmlfile, handler) # 解析XML配置文件
# 更新识别状态
if handler.dbms and handler.dbms not in kb.htmlFp:
kb.lastParserStatus = handler.dbms
kb.htmlFp.append(handler.dbms)
else:
kb.lastParserStatus = None
kb.cache.parsedDbms[key] = handler.dbms
kb.cache.parsedDbms[key] = handler.dbms # 缓存解析结果
return handler.dbms
return handler.dbms # 返回识别出的数据库类型

@ -5,11 +5,13 @@ Copyright (c) 2006-2024 sqlmap developers (https://sqlmap.org/)
See the file 'LICENSE' for copying permission
"""
# 导入所需的标准库
import os
import re
from xml.etree import ElementTree as et
from xml.etree import ElementTree as et # 导入XML解析库
# 导入自定义模块和函数
from lib.core.common import getSafeExString
from lib.core.compat import xrange
from lib.core.data import conf
@ -19,31 +21,48 @@ from lib.core.exception import SqlmapInstallationException
from lib.core.settings import PAYLOAD_XML_FILES
def cleanupVals(text, tag):
"""
清理和转换XML中的值
:param text: 需要处理的文本
:param tag: XML标签名
:return: 处理后的值
"""
# 处理clause标签中的范围表示法(如"1-3"转换为"1,2,3")
if tag == "clause" and '-' in text:
text = re.sub(r"(\d+)-(\d+)", lambda match: ','.join(str(_) for _ in xrange(int(match.group(1)), int(match.group(2)) + 1)), text)
# 对clause和where标签的内容按逗号分割
if tag in ("clause", "where"):
text = text.split(',')
# 如果文本是纯数字,转换为整数
if hasattr(text, "isdigit") and text.isdigit():
text = int(text)
# 处理列表类型的值
elif isinstance(text, list):
count = 0
# 遍历列表,将数字字符串转换为整数
for _ in text:
text[count] = int(_) if _.isdigit() else _
count += 1
# 如果列表只有一个元素且不是特定标签,则返回该元素
if len(text) == 1 and tag not in ("clause", "where"):
text = text[0]
return text
def parseXmlNode(node):
"""
解析XML节点
:param node: XML节点对象
"""
# 解析boundary(边界)节点
for element in node.findall("boundary"):
boundary = AttribDict()
boundary = AttribDict() # 创建一个属性字典
# 遍历boundary的子节点
for child in element:
if child.text:
values = cleanupVals(child.text, child.tag)
@ -53,20 +72,24 @@ def parseXmlNode(node):
conf.boundaries.append(boundary)
# 解析test(测试)节点
for element in node.findall("test"):
test = AttribDict()
test = AttribDict() # 创建一个属性字典
# 遍历test的子节点
for child in element:
if child.text and child.text.strip():
values = cleanupVals(child.text, child.tag)
test[child.tag] = values
else:
# 处理没有子元素的节点
if len(child.findall("*")) == 0:
test[child.tag] = None
continue
else:
test[child.tag] = AttribDict()
# 处理有子元素的节点
for gchild in child:
if gchild.tag in test[child.tag]:
prevtext = test[child.tag][gchild.tag]
@ -78,17 +101,18 @@ def parseXmlNode(node):
def loadBoundaries():
"""
Loads boundaries from XML
从XML文件加载边界定义
>>> conf.boundaries = []
>>> loadBoundaries()
>>> len(conf.boundaries) > 0
True
"""
try:
# 尝试解析boundaries.xml文件
doc = et.parse(paths.BOUNDARIES_XML)
except Exception as ex:
# 如果解析失败,抛出安装异常
errMsg = "something appears to be wrong with "
errMsg += "the file '%s' ('%s'). Please make " % (paths.BOUNDARIES_XML, getSafeExString(ex))
errMsg += "sure that you haven't made any changes to it"
@ -99,20 +123,22 @@ def loadBoundaries():
def loadPayloads():
"""
Loads payloads/tests from XML
从XML文件加载有效载荷/测试用例
>>> conf.tests = []
>>> loadPayloads()
>>> len(conf.tests) > 0
True
"""
# 遍历所有payload XML文件
for payloadFile in PAYLOAD_XML_FILES:
payloadFilePath = os.path.join(paths.SQLMAP_XML_PAYLOADS_PATH, payloadFile)
try:
# 尝试解析payload XML文件
doc = et.parse(payloadFilePath)
except Exception as ex:
# 如果解析失败,抛出安装异常
errMsg = "something appears to be wrong with "
errMsg += "the file '%s' ('%s'). Please make " % (payloadFilePath, getSafeExString(ex))
errMsg += "sure that you haven't made any changes to it"

@ -5,52 +5,79 @@ Copyright (c) 2006-2024 sqlmap developers (https://sqlmap.org/)
See the file 'LICENSE' for copying permission
"""
# 导入正则表达式模块,用于匹配和提取文本
import re
from lib.core.common import readInput
from lib.core.data import kb
from lib.core.data import logger
from lib.core.datatype import OrderedSet
from lib.core.exception import SqlmapSyntaxException
from lib.request.connect import Connect as Request
from thirdparty.six.moves import http_client as _http_client
# 导入所需的功能模块
from lib.core.common import readInput # 用于读取用户输入的函数
from lib.core.data import kb # 知识库(knowledge base),存储全局变量和配置信息
from lib.core.data import logger # 日志记录器,用于记录程序运行信息
from lib.core.datatype import OrderedSet # 有序集合数据类型,可以保持元素插入顺序
from lib.core.exception import SqlmapSyntaxException # SQL注入工具的语法异常类
from lib.request.connect import Connect as Request # HTTP请求处理类,用于发送网络请求
from thirdparty.six.moves import http_client as _http_client # HTTP客户端,用于处理HTTP连接
# 定义全局中止标志,用于控制程序终止
abortedFlag = None
def parseSitemap(url, retVal=None):
global abortedFlag
"""
解析网站地图(sitemap)的函数
参数说明:
url: 网站地图的URL地址,即要解析的sitemap文件地址
retVal: 存储解析结果的集合,默认为None如果为None会创建新的集合
返回值:
OrderedSet类型,包含从sitemap中提取的所有URL地址
"""
global abortedFlag # 声明使用全局中止标志变量
# 如果retVal不为空,说明是递归调用,记录开始解析新sitemap的日志
if retVal is not None:
logger.debug("parsing sitemap '%s'" % url)
try:
# 如果retVal为空,说明是首次调用,初始化返回值集合和中止标志
if retVal is None:
abortedFlag = False
retVal = OrderedSet()
abortedFlag = False # 重置中止标志为False
retVal = OrderedSet() # 创建一个新的有序集合用于存储URL
try:
# 发送HTTP请求获取网站地图内容
# raise404=True表示如果页面不存在(404错误)会抛出异常
# 如果已经设置中止标志,则返回空字符串
content = Request.getPage(url=url, raise404=True)[0] if not abortedFlag else ""
except _http_client.InvalidURL:
# 如果提供的URL格式无效,抛出语法异常
errMsg = "invalid URL given for sitemap ('%s')" % url
raise SqlmapSyntaxException(errMsg)
# 使用正则表达式查找sitemap中所有<loc>标签内的URL
# <loc>标签是sitemap格式中用于存放URL的标准标签
for match in re.finditer(r"<loc>\s*([^<]+)", content or ""):
if abortedFlag:
if abortedFlag: # 如果收到中止信号,立即退出循环
break
url = match.group(1).strip()
url = match.group(1).strip() # 提取URL并去除首尾空白字符
# 判断是否为子sitemap文件
# sitemap文件通常以.xml结尾,且URL中包含"sitemap"字样
if url.endswith(".xml") and "sitemap" in url.lower():
# 首次遇到子sitemap时询问用户是否要递归处理
if kb.followSitemapRecursion is None:
message = "sitemap recursion detected. Do you want to follow? [y/N] "
kb.followSitemapRecursion = readInput(message, default='N', boolean=True)
# 如果用户同意递归处理,则解析子sitemap
if kb.followSitemapRecursion:
parseSitemap(url, retVal)
parseSitemap(url, retVal) # 递归调用解析函数
else:
retVal.add(url)
retVal.add(url) # 将找到的URL添加到结果集合中
except KeyboardInterrupt:
abortedFlag = True
# 捕获键盘中断信号(用户按Ctrl+C)
abortedFlag = True # 设置中止标志
warnMsg = "user aborted during sitemap parsing. sqlmap "
warnMsg += "will use partial list"
logger.warning(warnMsg)
logger.warning(warnMsg) # 记录警告信息,提示将使用部分解析结果
return retVal
return retVal # 返回收集到的所有URL集合

Loading…
Cancel
Save