add comments to parse

pull/3/head
wang 4 months ago
parent 6869c9f61b
commit 129d83dfba

@ -5,37 +5,31 @@ Copyright (c) 2006-2024 sqlmap developers (https://sqlmap.org/)
See the file 'LICENSE' for copying permission See the file 'LICENSE' for copying permission
""" """
import re # ... 导入相关模块 ...
from xml.sax.handler import ContentHandler
from lib.core.common import Backend
from lib.core.common import parseXmlFile
from lib.core.common import sanitizeStr
from lib.core.data import kb
from lib.core.data import paths
from lib.core.enums import DBMS
from lib.parse.handler import FingerprintHandler
class MSSQLBannerHandler(ContentHandler): class MSSQLBannerHandler(ContentHandler):
""" """
This class defines methods to parse and extract information from the 该类用于解析和提取Microsoft SQL Server banner信息
given Microsoft SQL Server banner based upon the data in XML file 基于XML文件中的数据进行匹配和处理
""" """
def __init__(self, banner, info): def __init__(self, banner, info):
ContentHandler.__init__(self) ContentHandler.__init__(self)
self._banner = sanitizeStr(banner or "") # 初始化banner信息和状态标志
self._inVersion = False self._banner = sanitizeStr(banner or "") # 清理并存储banner字符串
self._inServicePack = False self._inVersion = False # 是否正在处理版本信息的标志
self._release = None self._inServicePack = False # 是否正在处理ServicePack信息的标志
self._version = "" self._release = None # 发布版本信息
self._versionAlt = None self._version = "" # 版本号
self._servicePack = "" self._versionAlt = None # 替代版本号格式
self._info = info self._servicePack = "" # ServicePack版本
self._info = info # 存储解析结果的字典
def _feedInfo(self, key, value): def _feedInfo(self, key, value):
"""
将解析到的信息存入结果字典
"""
value = sanitizeStr(value) value = sanitizeStr(value)
if value in (None, "None"): if value in (None, "None"):
@ -44,30 +38,40 @@ class MSSQLBannerHandler(ContentHandler):
self._info[key] = value self._info[key] = value
def startElement(self, name, attrs): def startElement(self, name, attrs):
"""
处理XML元素开始标签
"""
if name == "signatures": if name == "signatures":
self._release = sanitizeStr(attrs.get("release")) self._release = sanitizeStr(attrs.get("release"))
elif name == "version": elif name == "version":
self._inVersion = True self._inVersion = True
elif name == "servicepack": elif name == "servicepack":
self._inServicePack = True self._inServicePack = True
def characters(self, content): def characters(self, content):
"""
处理XML元素的文本内容
"""
if self._inVersion: if self._inVersion:
self._version += sanitizeStr(content) self._version += sanitizeStr(content)
elif self._inServicePack: elif self._inServicePack:
self._servicePack += sanitizeStr(content) self._servicePack += sanitizeStr(content)
def endElement(self, name): def endElement(self, name):
"""
处理XML元素结束标签
"""
if name == "signature": if name == "signature":
# 检查banner中是否包含匹配的版本信息
for version in (self._version, self._versionAlt): for version in (self._version, self._versionAlt):
if version and self._banner and re.search(r" %s[\.\ ]+" % re.escape(version), self._banner): if version and self._banner and re.search(r" %s[\.\ ]+" % re.escape(version), self._banner):
# 找到匹配后,保存相关信息
self._feedInfo("dbmsRelease", self._release) self._feedInfo("dbmsRelease", self._release)
self._feedInfo("dbmsVersion", self._version) self._feedInfo("dbmsVersion", self._version)
self._feedInfo("dbmsServicePack", self._servicePack) self._feedInfo("dbmsServicePack", self._servicePack)
break break
# 重置临时变量
self._version = "" self._version = ""
self._versionAlt = None self._versionAlt = None
self._servicePack = "" self._servicePack = ""
@ -76,6 +80,7 @@ class MSSQLBannerHandler(ContentHandler):
self._inVersion = False self._inVersion = False
self._version = self._version.replace(" ", "") self._version = self._version.replace(" ", "")
# 处理特殊的版本号格式
match = re.search(r"\A(?P<major>\d+)\.00\.(?P<build>\d+)\Z", self._version) match = re.search(r"\A(?P<major>\d+)\.00\.(?P<build>\d+)\Z", self._version)
self._versionAlt = "%s.0.%s.0" % (match.group('major'), match.group('build')) if match else None self._versionAlt = "%s.0.%s.0" % (match.group('major'), match.group('build')) if match else None
@ -85,12 +90,11 @@ class MSSQLBannerHandler(ContentHandler):
def bannerParser(banner): def bannerParser(banner):
""" """
This function calls a class to extract information from the given 根据不同的数据库类型调用相应的处理器来解析banner信息
DBMS banner based upon the data in XML file
""" """
xmlfile = None xmlfile = None
# 根据数据库类型选择对应的XML配置文件
if Backend.isDbms(DBMS.MSSQL): if Backend.isDbms(DBMS.MSSQL):
xmlfile = paths.MSSQL_XML xmlfile = paths.MSSQL_XML
elif Backend.isDbms(DBMS.MYSQL): elif Backend.isDbms(DBMS.MYSQL):
@ -103,6 +107,7 @@ def bannerParser(banner):
if not xmlfile: if not xmlfile:
return return
# 针对MSSQL使用专门的处理器其他数据库使用通用处理器
if Backend.isDbms(DBMS.MSSQL): if Backend.isDbms(DBMS.MSSQL):
handler = MSSQLBannerHandler(banner, kb.bannerFp) handler = MSSQLBannerHandler(banner, kb.bannerFp)
parseXmlFile(xmlfile, handler) parseXmlFile(xmlfile, handler)
@ -112,4 +117,4 @@ def bannerParser(banner):
else: else:
handler = FingerprintHandler(banner, kb.bannerFp) handler = FingerprintHandler(banner, kb.bannerFp)
parseXmlFile(xmlfile, handler) parseXmlFile(xmlfile, handler)
parseXmlFile(paths.GENERIC_XML, handler) parseXmlFile(paths.GENERIC_XML, handler)

@ -13,40 +13,51 @@ import shlex
import sys import sys
try: try:
# 从optparse模块中导入OptionError、OptionGroup、OptionParser和SUPPRESS_HELP
from optparse import OptionError as ArgumentError from optparse import OptionError as ArgumentError
from optparse import OptionGroup from optparse import OptionGroup
from optparse import OptionParser as ArgumentParser from optparse import OptionParser as ArgumentParser
from optparse import SUPPRESS_HELP as SUPPRESS from optparse import SUPPRESS_HELP as SUPPRESS
# 将ArgumentParser的add_argument方法替换为add_option方法
ArgumentParser.add_argument = ArgumentParser.add_option ArgumentParser.add_argument = ArgumentParser.add_option
# 定义一个_add_argument_group方法用于添加参数组
def _add_argument_group(self, *args, **kwargs): def _add_argument_group(self, *args, **kwargs):
return self.add_option_group(OptionGroup(self, *args, **kwargs)) return self.add_option_group(OptionGroup(self, *args, **kwargs))
# 将ArgumentParser的add_argument_group方法替换为_add_argument_group方法
ArgumentParser.add_argument_group = _add_argument_group ArgumentParser.add_argument_group = _add_argument_group
# 定义一个_add_argument方法用于添加参数
def _add_argument(self, *args, **kwargs): def _add_argument(self, *args, **kwargs):
return self.add_option(*args, **kwargs) return self.add_option(*args, **kwargs)
# 将OptionGroup的add_argument方法替换为_add_argument方法
OptionGroup.add_argument = _add_argument OptionGroup.add_argument = _add_argument
except ImportError: except ImportError:
# 如果导入optparse模块失败则从argparse模块中导入ArgumentParser、ArgumentError和SUPPRESS
from argparse import ArgumentParser from argparse import ArgumentParser
from argparse import ArgumentError from argparse import ArgumentError
from argparse import SUPPRESS from argparse import SUPPRESS
finally: finally:
# 定义一个get_actions方法用于获取所有参数
def get_actions(instance): def get_actions(instance):
for attr in ("option_list", "_group_actions", "_actions"): for attr in ("option_list", "_group_actions", "_actions"):
if hasattr(instance, attr): if hasattr(instance, attr):
return getattr(instance, attr) return getattr(instance, attr)
# 定义一个get_groups方法用于获取所有参数组
def get_groups(parser): def get_groups(parser):
return getattr(parser, "option_groups", None) or getattr(parser, "_action_groups") return getattr(parser, "option_groups", None) or getattr(parser, "_action_groups")
# 定义一个get_all_options方法用于获取所有参数和参数组中的参数
def get_all_options(parser): def get_all_options(parser):
retVal = set() retVal = set()
# 遍历所有参数
for option in get_actions(parser): for option in get_actions(parser):
if hasattr(option, "option_strings"): if hasattr(option, "option_strings"):
retVal.update(option.option_strings) retVal.update(option.option_strings)
@ -54,6 +65,7 @@ finally:
retVal.update(option._long_opts) retVal.update(option._long_opts)
retVal.update(option._short_opts) retVal.update(option._short_opts)
# 遍历所有参数组中的参数
for group in get_groups(parser): for group in get_groups(parser):
for option in get_actions(group): for option in get_actions(group):
if hasattr(option, "option_strings"): if hasattr(option, "option_strings"):

@ -5,48 +5,58 @@ Copyright (c) 2006-2024 sqlmap developers (https://sqlmap.org/)
See the file 'LICENSE' for copying permission See the file 'LICENSE' for copying permission
""" """
from lib.core.common import checkFile # 导入所需的模块和函数
from lib.core.common import getSafeExString from lib.core.common import checkFile # 检查文件是否存在和可访问
from lib.core.common import openFile from lib.core.common import getSafeExString # 安全地获取异常的字符串表示
from lib.core.common import unArrayizeValue from lib.core.common import openFile # 打开文件的工具函数
from lib.core.common import UnicodeRawConfigParser from lib.core.common import unArrayizeValue # 将数组值转换为单个值
from lib.core.convert import getUnicode from lib.core.common import UnicodeRawConfigParser # 处理Unicode的配置文件解析器
from lib.core.data import cmdLineOptions from lib.core.convert import getUnicode # 将输入转换为Unicode字符串
from lib.core.data import conf from lib.core.data import cmdLineOptions # 命令行选项存储
from lib.core.data import logger from lib.core.data import conf # 全局配置字典
from lib.core.enums import OPTION_TYPE from lib.core.data import logger # 日志记录器
from lib.core.exception import SqlmapMissingMandatoryOptionException from lib.core.enums import OPTION_TYPE # 选项类型枚举
from lib.core.exception import SqlmapSyntaxException from lib.core.exception import SqlmapMissingMandatoryOptionException # 缺少必需选项异常
from lib.core.optiondict import optDict from lib.core.exception import SqlmapSyntaxException # 语法错误异常
from lib.core.optiondict import optDict # 选项字典
# 全局配置解析器对象
config = None config = None
def configFileProxy(section, option, datatype): def configFileProxy(section, option, datatype):
""" """
Parse configuration file and save settings into the configuration 解析配置文件并将设置保存到高级配置字典中
advanced dictionary.
参数:
section: 配置文件中的节名
option: 选项名
datatype: 数据类型
""" """
if config.has_option(section, option): if config.has_option(section, option): # 检查配置中是否存在该选项
try: try:
if datatype == OPTION_TYPE.BOOLEAN: # 根据数据类型获取相应的值
if datatype == OPTION_TYPE.BOOLEAN: # 布尔类型
value = config.getboolean(section, option) if config.get(section, option) else False value = config.getboolean(section, option) if config.get(section, option) else False
elif datatype == OPTION_TYPE.INTEGER: elif datatype == OPTION_TYPE.INTEGER: # 整数类型
value = config.getint(section, option) if config.get(section, option) else 0 value = config.getint(section, option) if config.get(section, option) else 0
elif datatype == OPTION_TYPE.FLOAT: elif datatype == OPTION_TYPE.FLOAT: # 浮点数类型
value = config.getfloat(section, option) if config.get(section, option) else 0.0 value = config.getfloat(section, option) if config.get(section, option) else 0.0
else: else: # 字符串类型
value = config.get(section, option) value = config.get(section, option)
except ValueError as ex: except ValueError as ex:
# 如果值转换失败,抛出语法错误异常
errMsg = "error occurred while processing the option " errMsg = "error occurred while processing the option "
errMsg += "'%s' in provided configuration file ('%s')" % (option, getUnicode(ex)) errMsg += "'%s' in provided configuration file ('%s')" % (option, getUnicode(ex))
raise SqlmapSyntaxException(errMsg) raise SqlmapSyntaxException(errMsg)
# 将值存储到全局配置字典中
if value: if value:
conf[option] = value conf[option] = value
else: else:
conf[option] = None conf[option] = None
else: else:
# 如果选项不存在,记录调试信息
debugMsg = "missing requested option '%s' (section " % option debugMsg = "missing requested option '%s' (section " % option
debugMsg += "'%s') into the configuration file, " % section debugMsg += "'%s') into the configuration file, " % section
debugMsg += "ignoring. Skipping to next." debugMsg += "ignoring. Skipping to next."
@ -54,44 +64,55 @@ def configFileProxy(section, option, datatype):
def configFileParser(configFile): def configFileParser(configFile):
""" """
Parse configuration file and save settings into the configuration 解析配置文件的主函数
advanced dictionary.
参数:
configFile: 配置文件路径
""" """
global config global config
# 记录开始解析的调试信息
debugMsg = "parsing configuration file" debugMsg = "parsing configuration file"
logger.debug(debugMsg) logger.debug(debugMsg)
# 检查配置文件是否存在和可访问
checkFile(configFile) checkFile(configFile)
configFP = openFile(configFile, "rb") configFP = openFile(configFile, "rb")
try: try:
# 创建配置解析器实例并读取配置文件
config = UnicodeRawConfigParser() config = UnicodeRawConfigParser()
if hasattr(config, "read_file"): if hasattr(config, "read_file"): # Python 3
config.read_file(configFP) config.read_file(configFP)
else: else: # Python 2
config.readfp(configFP) config.readfp(configFP)
except Exception as ex: except Exception as ex:
# 如果解析失败,抛出语法错误异常
errMsg = "you have provided an invalid and/or unreadable configuration file ('%s')" % getSafeExString(ex) errMsg = "you have provided an invalid and/or unreadable configuration file ('%s')" % getSafeExString(ex)
raise SqlmapSyntaxException(errMsg) raise SqlmapSyntaxException(errMsg)
# 检查是否存在必需的Target节
if not config.has_section("Target"): if not config.has_section("Target"):
errMsg = "missing a mandatory section 'Target' in the configuration file" errMsg = "missing a mandatory section 'Target' in the configuration file"
raise SqlmapMissingMandatoryOptionException(errMsg) raise SqlmapMissingMandatoryOptionException(errMsg)
# 检查必需选项
mandatory = False mandatory = False
# 检查Target节中是否至少存在一个必需的选项
for option in ("direct", "url", "logFile", "bulkFile", "googleDork", "requestFile", "wizard"): for option in ("direct", "url", "logFile", "bulkFile", "googleDork", "requestFile", "wizard"):
if config.has_option("Target", option) and config.get("Target", option) or cmdLineOptions.get(option): if config.has_option("Target", option) and config.get("Target", option) or cmdLineOptions.get(option):
mandatory = True mandatory = True
break break
# 如果没有找到任何必需选项,抛出异常
if not mandatory: if not mandatory:
errMsg = "missing a mandatory option in the configuration file " errMsg = "missing a mandatory option in the configuration file "
errMsg += "(direct, url, logFile, bulkFile, googleDork, requestFile or wizard)" errMsg += "(direct, url, logFile, bulkFile, googleDork, requestFile or wizard)"
raise SqlmapMissingMandatoryOptionException(errMsg) raise SqlmapMissingMandatoryOptionException(errMsg)
# 遍历所有选项并解析它们
for family, optionData in optDict.items(): for family, optionData in optDict.items():
for option, datatype in optionData.items(): for option, datatype in optionData.items():
datatype = unArrayizeValue(datatype) datatype = unArrayizeValue(datatype)

@ -13,66 +13,95 @@ from lib.core.common import sanitizeStr
class FingerprintHandler(ContentHandler): class FingerprintHandler(ContentHandler):
""" """
This class defines methods to parse and extract information from 这个类定义了解析和提取数据库管理系统(DBMS)横幅信息的方法
the given DBMS banner based upon the data in XML file 基于XML文件中的数据进行匹配和提取
""" """
def __init__(self, banner, info): def __init__(self, banner, info):
"""
初始化方法
:param banner: DBMS的横幅信息字符串
:param info: 用于存储提取信息的字典
"""
ContentHandler.__init__(self) ContentHandler.__init__(self)
self._banner = sanitizeStr(banner or "") self._banner = sanitizeStr(banner or "") # 清理并存储横幅信息
self._regexp = None self._regexp = None # 存储当前正则表达式
self._match = None self._match = None # 存储正则匹配结果
self._dbmsVersion = None self._dbmsVersion = None # 存储数据库版本信息
self._techVersion = None self._techVersion = None # 存储技术版本信息
self._info = info self._info = info # 存储所有提取的信息
def _feedInfo(self, key, value): def _feedInfo(self, key, value):
value = sanitizeStr(value) """
将提取的信息存入info字典
:param key: 信息类型()
:param value: 信息内容()
"""
value = sanitizeStr(value) # 清理输入值
# 如果值为空则直接返回
if value in (None, "None", ""): if value in (None, "None", ""):
return return
# 特殊处理数据库版本信息
if key == "dbmsVersion": if key == "dbmsVersion":
self._info[key] = value self._info[key] = value
else: else:
# 对于其他类型的信息,创建一个集合来存储
if key not in self._info: if key not in self._info:
self._info[key] = set() self._info[key] = set()
# 处理可能包含多个值的情况(用|分隔)
for _ in value.split("|"): for _ in value.split("|"):
self._info[key].add(_) self._info[key].add(_)
def startElement(self, name, attrs): def startElement(self, name, attrs):
"""
处理XML元素开始标签
:param name: 元素名称
:param attrs: 元素属性字典
"""
# 处理regexp标签,用于匹配横幅信息
if name == "regexp": if name == "regexp":
self._regexp = sanitizeStr(attrs.get("value")) self._regexp = sanitizeStr(attrs.get("value"))
_ = re.match(r"\A[A-Za-z0-9]+", self._regexp) # minor trick avoiding compiling of large amount of regexes # 优化技巧:通过快速检查避免编译大量正则表达式
_ = re.match(r"\A[A-Za-z0-9]+", self._regexp)
# 如果快速检查通过或无法快速检查,则进行完整的正则匹配
if _ and self._banner and _.group(0).lower() in self._banner.lower() or not _: if _ and self._banner and _.group(0).lower() in self._banner.lower() or not _:
self._match = re.search(self._regexp, self._banner, re.I | re.M) self._match = re.search(self._regexp, self._banner, re.I | re.M)
else: else:
self._match = None self._match = None
# 处理info标签,提取各种版本和技术信息
if name == "info" and self._match: if name == "info" and self._match:
self._feedInfo("type", attrs.get("type")) # 提取基本信息
self._feedInfo("distrib", attrs.get("distrib")) self._feedInfo("type", attrs.get("type")) # 类型信息
self._feedInfo("release", attrs.get("release")) self._feedInfo("distrib", attrs.get("distrib")) # 发行版信息
self._feedInfo("codename", attrs.get("codename")) self._feedInfo("release", attrs.get("release")) # 发布信息
self._feedInfo("codename", attrs.get("codename")) # 代号信息
# 获取版本相关信息
self._dbmsVersion = sanitizeStr(attrs.get("dbms_version")) self._dbmsVersion = sanitizeStr(attrs.get("dbms_version"))
self._techVersion = sanitizeStr(attrs.get("tech_version")) self._techVersion = sanitizeStr(attrs.get("tech_version"))
self._sp = sanitizeStr(attrs.get("sp")) self._sp = sanitizeStr(attrs.get("sp"))
# 处理数据库版本信息
if self._dbmsVersion and self._dbmsVersion.isdigit(): if self._dbmsVersion and self._dbmsVersion.isdigit():
self._feedInfo("dbmsVersion", self._match.group(int(self._dbmsVersion))) self._feedInfo("dbmsVersion", self._match.group(int(self._dbmsVersion)))
# 处理技术版本信息
if self._techVersion and self._techVersion.isdigit(): if self._techVersion and self._techVersion.isdigit():
self._feedInfo("technology", "%s %s" % (attrs.get("technology"), self._match.group(int(self._techVersion)))) self._feedInfo("technology", "%s %s" % (attrs.get("technology"), self._match.group(int(self._techVersion))))
else: else:
self._feedInfo("technology", attrs.get("technology")) self._feedInfo("technology", attrs.get("technology"))
# 处理Service Pack信息
if self._sp.isdigit(): if self._sp.isdigit():
self._feedInfo("sp", "Service Pack %s" % int(self._sp)) self._feedInfo("sp", "Service Pack %s" % int(self._sp))
# 重置所有临时变量
self._regexp = None self._regexp = None
self._match = None self._match = None
self._dbmsVersion = None self._dbmsVersion = None

@ -5,33 +5,53 @@ Copyright (c) 2006-2024 sqlmap developers (https://sqlmap.org/)
See the file 'LICENSE' for copying permission See the file 'LICENSE' for copying permission
""" """
# 导入操作系统相关的功能模块
import os import os
# 从lib.core.common导入XML文件解析函数
from lib.core.common import parseXmlFile from lib.core.common import parseXmlFile
# 从lib.core.data导入全局变量存储对象kb和路径配置对象paths
from lib.core.data import kb from lib.core.data import kb
from lib.core.data import paths from lib.core.data import paths
# 导入指纹识别处理器类
from lib.parse.handler import FingerprintHandler from lib.parse.handler import FingerprintHandler
def headersParser(headers): def headersParser(headers):
""" """
This function calls a class that parses the input HTTP headers to 此函数通过解析HTTP请求头来识别:
fingerprint the back-end database management system operating system 1. 后端数据库管理系统
and the web application technology 2. 操作系统类型
3. Web应用技术栈
参数headers: HTTP请求头字典
""" """
# 如果全局变量中还没有初始化headerPaths
if not kb.headerPaths: if not kb.headerPaths:
# 初始化一个字典,存储不同HTTP头对应的XML规则文件路径
kb.headerPaths = { kb.headerPaths = {
# SharePoint服务器特征识别规则
"microsoftsharepointteamservices": os.path.join(paths.SQLMAP_XML_BANNER_PATH, "sharepoint.xml"), "microsoftsharepointteamservices": os.path.join(paths.SQLMAP_XML_BANNER_PATH, "sharepoint.xml"),
# 服务器类型识别规则
"server": os.path.join(paths.SQLMAP_XML_BANNER_PATH, "server.xml"), "server": os.path.join(paths.SQLMAP_XML_BANNER_PATH, "server.xml"),
# Java Servlet容器识别规则
"servlet-engine": os.path.join(paths.SQLMAP_XML_BANNER_PATH, "servlet-engine.xml"), "servlet-engine": os.path.join(paths.SQLMAP_XML_BANNER_PATH, "servlet-engine.xml"),
# Cookie特征识别规则
"set-cookie": os.path.join(paths.SQLMAP_XML_BANNER_PATH, "set-cookie.xml"), "set-cookie": os.path.join(paths.SQLMAP_XML_BANNER_PATH, "set-cookie.xml"),
# ASP.NET版本识别规则
"x-aspnet-version": os.path.join(paths.SQLMAP_XML_BANNER_PATH, "x-aspnet-version.xml"), "x-aspnet-version": os.path.join(paths.SQLMAP_XML_BANNER_PATH, "x-aspnet-version.xml"),
# 服务端技术栈识别规则
"x-powered-by": os.path.join(paths.SQLMAP_XML_BANNER_PATH, "x-powered-by.xml"), "x-powered-by": os.path.join(paths.SQLMAP_XML_BANNER_PATH, "x-powered-by.xml"),
} }
# 遍历HTTP头,将头名称转为小写并检查是否在规则文件字典中
for header in (_.lower() for _ in headers if _.lower() in kb.headerPaths): for header in (_.lower() for _ in headers if _.lower() in kb.headerPaths):
# 获取该HTTP头的值
value = headers[header] value = headers[header]
# 获取对应的XML规则文件路径
xmlfile = kb.headerPaths[header] xmlfile = kb.headerPaths[header]
# 创建一个指纹识别处理器实例
handler = FingerprintHandler(value, kb.headersFp) handler = FingerprintHandler(value, kb.headersFp)
# 解析特定规则文件
parseXmlFile(xmlfile, handler) parseXmlFile(xmlfile, handler)
# 解析通用规则文件
parseXmlFile(paths.GENERIC_XML, handler) parseXmlFile(paths.GENERIC_XML, handler)

@ -5,64 +5,74 @@ Copyright (c) 2006-2024 sqlmap developers (https://sqlmap.org/)
See the file 'LICENSE' for copying permission See the file 'LICENSE' for copying permission
""" """
import re # 导入所需的模块
import re # 用于正则表达式处理
from xml.sax.handler import ContentHandler from xml.sax.handler import ContentHandler # XML内容处理器
from lib.core.common import urldecode from lib.core.common import urldecode # URL解码函数
from lib.core.common import parseXmlFile from lib.core.common import parseXmlFile # XML文件解析函数
from lib.core.data import kb from lib.core.data import kb # 知识库,存储全局变量
from lib.core.data import paths from lib.core.data import paths # 路径相关配置
from lib.core.settings import HEURISTIC_PAGE_SIZE_THRESHOLD from lib.core.settings import HEURISTIC_PAGE_SIZE_THRESHOLD # 页面大小阈值设置
from lib.core.threads import getCurrentThreadData from lib.core.threads import getCurrentThreadData # 获取当前线程数据
class HTMLHandler(ContentHandler): class HTMLHandler(ContentHandler):
""" """
This class defines methods to parse the input HTML page to 这个类定义了解析HTML页面的方法,用于识别后端数据库管理系统的指纹
fingerprint the back-end database management system
""" """
def __init__(self, page): def __init__(self, page):
# 初始化父类
ContentHandler.__init__(self) ContentHandler.__init__(self)
self._dbms = None self._dbms = None # 存储数据库类型
self._page = (page or "") self._page = (page or "") # 存储页面内容
try: try:
self._lower_page = self._page.lower() self._lower_page = self._page.lower() # 将页面转换为小写
except SystemError: # https://bugs.python.org/issue18183 except SystemError: # 处理Python bug: https://bugs.python.org/issue18183
self._lower_page = None self._lower_page = None
self._urldecoded_page = urldecode(self._page) self._urldecoded_page = urldecode(self._page) # URL解码后的页面内容
self.dbms = None self.dbms = None # 最终识别出的数据库类型
def _markAsErrorPage(self): def _markAsErrorPage(self):
"""标记当前页面为错误页面"""
threadData = getCurrentThreadData() threadData = getCurrentThreadData()
threadData.lastErrorPage = (threadData.lastRequestUID, self._page) threadData.lastErrorPage = (threadData.lastRequestUID, self._page)
def startElement(self, name, attrs): def startElement(self, name, attrs):
if self.dbms: """
处理XML元素开始标签
name: 标签名
attrs: 标签属性
"""
if self.dbms: # 如果已经识别出数据库类型,直接返回
return return
if name == "dbms": if name == "dbms": # 如果是数据库标签
self._dbms = attrs.get("value") self._dbms = attrs.get("value")
elif name == "error": elif name == "error": # 如果是错误标签
regexp = attrs.get("regexp") regexp = attrs.get("regexp")
if regexp not in kb.cache.regex: if regexp not in kb.cache.regex:
# 提取正则表达式中的关键词
keywords = re.findall(r"\w+", re.sub(r"\\.", " ", regexp)) keywords = re.findall(r"\w+", re.sub(r"\\.", " ", regexp))
keywords = sorted(keywords, key=len) keywords = sorted(keywords, key=len)
kb.cache.regex[regexp] = keywords[-1].lower() kb.cache.regex[regexp] = keywords[-1].lower()
# 检查页面是否匹配错误模式
if ('|' in regexp or kb.cache.regex[regexp] in (self._lower_page or kb.cache.regex[regexp])) and re.search(regexp, self._urldecoded_page, re.I): if ('|' in regexp or kb.cache.regex[regexp] in (self._lower_page or kb.cache.regex[regexp])) and re.search(regexp, self._urldecoded_page, re.I):
self.dbms = self._dbms self.dbms = self._dbms # 设置识别出的数据库类型
self._markAsErrorPage() self._markAsErrorPage() # 标记为错误页面
kb.forkNote = kb.forkNote or attrs.get("fork") kb.forkNote = kb.forkNote or attrs.get("fork") # 设置fork注释
def htmlParser(page): def htmlParser(page):
""" """
This function calls a class that parses the input HTML page to 解析HTML页面以识别后端数据库类型的主函数
fingerprint the back-end database management system page: 要解析的HTML页面内容
示例:
>>> from lib.core.enums import DBMS >>> from lib.core.enums import DBMS
>>> htmlParser("Warning: mysql_fetch_array() expects parameter 1 to be resource") == DBMS.MYSQL >>> htmlParser("Warning: mysql_fetch_array() expects parameter 1 to be resource") == DBMS.MYSQL
True True
@ -70,30 +80,32 @@ def htmlParser(page):
>>> threadData.lastErrorPage = None >>> threadData.lastErrorPage = None
""" """
page = page[:HEURISTIC_PAGE_SIZE_THRESHOLD] page = page[:HEURISTIC_PAGE_SIZE_THRESHOLD] # 截取页面内容到阈值大小
xmlfile = paths.ERRORS_XML xmlfile = paths.ERRORS_XML # 错误模式的XML配置文件
handler = HTMLHandler(page) handler = HTMLHandler(page) # 创建处理器实例
key = hash(page) key = hash(page) # 计算页面内容的哈希值
# generic SQL warning/error messages # 检查通用SQL警告/错误信息
if re.search(r"SQL (warning|error|syntax)", page, re.I): if re.search(r"SQL (warning|error|syntax)", page, re.I):
handler._markAsErrorPage() handler._markAsErrorPage()
# 如果页面已经解析过,直接返回缓存的结果
if key in kb.cache.parsedDbms: if key in kb.cache.parsedDbms:
retVal = kb.cache.parsedDbms[key] retVal = kb.cache.parsedDbms[key]
if retVal: if retVal:
handler._markAsErrorPage() handler._markAsErrorPage()
return retVal return retVal
parseXmlFile(xmlfile, handler) parseXmlFile(xmlfile, handler) # 解析XML配置文件
# 更新识别状态
if handler.dbms and handler.dbms not in kb.htmlFp: if handler.dbms and handler.dbms not in kb.htmlFp:
kb.lastParserStatus = handler.dbms kb.lastParserStatus = handler.dbms
kb.htmlFp.append(handler.dbms) kb.htmlFp.append(handler.dbms)
else: else:
kb.lastParserStatus = None kb.lastParserStatus = None
kb.cache.parsedDbms[key] = handler.dbms kb.cache.parsedDbms[key] = handler.dbms # 缓存解析结果
return handler.dbms return handler.dbms # 返回识别出的数据库类型

@ -5,11 +5,13 @@ Copyright (c) 2006-2024 sqlmap developers (https://sqlmap.org/)
See the file 'LICENSE' for copying permission See the file 'LICENSE' for copying permission
""" """
# 导入所需的标准库
import os import os
import re import re
from xml.etree import ElementTree as et from xml.etree import ElementTree as et # 导入XML解析库
# 导入自定义模块和函数
from lib.core.common import getSafeExString from lib.core.common import getSafeExString
from lib.core.compat import xrange from lib.core.compat import xrange
from lib.core.data import conf from lib.core.data import conf
@ -19,31 +21,48 @@ from lib.core.exception import SqlmapInstallationException
from lib.core.settings import PAYLOAD_XML_FILES from lib.core.settings import PAYLOAD_XML_FILES
def cleanupVals(text, tag): def cleanupVals(text, tag):
"""
清理和转换XML中的值
:param text: 需要处理的文本
:param tag: XML标签名
:return: 处理后的值
"""
# 处理clause标签中的范围表示法(如"1-3"转换为"1,2,3")
if tag == "clause" and '-' in text: if tag == "clause" and '-' in text:
text = re.sub(r"(\d+)-(\d+)", lambda match: ','.join(str(_) for _ in xrange(int(match.group(1)), int(match.group(2)) + 1)), text) text = re.sub(r"(\d+)-(\d+)", lambda match: ','.join(str(_) for _ in xrange(int(match.group(1)), int(match.group(2)) + 1)), text)
# 对clause和where标签的内容按逗号分割
if tag in ("clause", "where"): if tag in ("clause", "where"):
text = text.split(',') text = text.split(',')
# 如果文本是纯数字,转换为整数
if hasattr(text, "isdigit") and text.isdigit(): if hasattr(text, "isdigit") and text.isdigit():
text = int(text) text = int(text)
# 处理列表类型的值
elif isinstance(text, list): elif isinstance(text, list):
count = 0 count = 0
# 遍历列表,将数字字符串转换为整数
for _ in text: for _ in text:
text[count] = int(_) if _.isdigit() else _ text[count] = int(_) if _.isdigit() else _
count += 1 count += 1
# 如果列表只有一个元素且不是特定标签,则返回该元素
if len(text) == 1 and tag not in ("clause", "where"): if len(text) == 1 and tag not in ("clause", "where"):
text = text[0] text = text[0]
return text return text
def parseXmlNode(node): def parseXmlNode(node):
"""
解析XML节点
:param node: XML节点对象
"""
# 解析boundary(边界)节点
for element in node.findall("boundary"): for element in node.findall("boundary"):
boundary = AttribDict() boundary = AttribDict() # 创建一个属性字典
# 遍历boundary的子节点
for child in element: for child in element:
if child.text: if child.text:
values = cleanupVals(child.text, child.tag) values = cleanupVals(child.text, child.tag)
@ -53,20 +72,24 @@ def parseXmlNode(node):
conf.boundaries.append(boundary) conf.boundaries.append(boundary)
# 解析test(测试)节点
for element in node.findall("test"): for element in node.findall("test"):
test = AttribDict() test = AttribDict() # 创建一个属性字典
# 遍历test的子节点
for child in element: for child in element:
if child.text and child.text.strip(): if child.text and child.text.strip():
values = cleanupVals(child.text, child.tag) values = cleanupVals(child.text, child.tag)
test[child.tag] = values test[child.tag] = values
else: else:
# 处理没有子元素的节点
if len(child.findall("*")) == 0: if len(child.findall("*")) == 0:
test[child.tag] = None test[child.tag] = None
continue continue
else: else:
test[child.tag] = AttribDict() test[child.tag] = AttribDict()
# 处理有子元素的节点
for gchild in child: for gchild in child:
if gchild.tag in test[child.tag]: if gchild.tag in test[child.tag]:
prevtext = test[child.tag][gchild.tag] prevtext = test[child.tag][gchild.tag]
@ -78,17 +101,18 @@ def parseXmlNode(node):
def loadBoundaries(): def loadBoundaries():
""" """
Loads boundaries from XML 从XML文件加载边界定义
>>> conf.boundaries = [] >>> conf.boundaries = []
>>> loadBoundaries() >>> loadBoundaries()
>>> len(conf.boundaries) > 0 >>> len(conf.boundaries) > 0
True True
""" """
try: try:
# 尝试解析boundaries.xml文件
doc = et.parse(paths.BOUNDARIES_XML) doc = et.parse(paths.BOUNDARIES_XML)
except Exception as ex: except Exception as ex:
# 如果解析失败,抛出安装异常
errMsg = "something appears to be wrong with " errMsg = "something appears to be wrong with "
errMsg += "the file '%s' ('%s'). Please make " % (paths.BOUNDARIES_XML, getSafeExString(ex)) errMsg += "the file '%s' ('%s'). Please make " % (paths.BOUNDARIES_XML, getSafeExString(ex))
errMsg += "sure that you haven't made any changes to it" errMsg += "sure that you haven't made any changes to it"
@ -99,20 +123,22 @@ def loadBoundaries():
def loadPayloads(): def loadPayloads():
""" """
Loads payloads/tests from XML 从XML文件加载有效载荷/测试用例
>>> conf.tests = [] >>> conf.tests = []
>>> loadPayloads() >>> loadPayloads()
>>> len(conf.tests) > 0 >>> len(conf.tests) > 0
True True
""" """
# 遍历所有payload XML文件
for payloadFile in PAYLOAD_XML_FILES: for payloadFile in PAYLOAD_XML_FILES:
payloadFilePath = os.path.join(paths.SQLMAP_XML_PAYLOADS_PATH, payloadFile) payloadFilePath = os.path.join(paths.SQLMAP_XML_PAYLOADS_PATH, payloadFile)
try: try:
# 尝试解析payload XML文件
doc = et.parse(payloadFilePath) doc = et.parse(payloadFilePath)
except Exception as ex: except Exception as ex:
# 如果解析失败,抛出安装异常
errMsg = "something appears to be wrong with " errMsg = "something appears to be wrong with "
errMsg += "the file '%s' ('%s'). Please make " % (payloadFilePath, getSafeExString(ex)) errMsg += "the file '%s' ('%s'). Please make " % (payloadFilePath, getSafeExString(ex))
errMsg += "sure that you haven't made any changes to it" errMsg += "sure that you haven't made any changes to it"

@ -5,52 +5,79 @@ Copyright (c) 2006-2024 sqlmap developers (https://sqlmap.org/)
See the file 'LICENSE' for copying permission See the file 'LICENSE' for copying permission
""" """
# 导入正则表达式模块,用于匹配和提取文本
import re import re
from lib.core.common import readInput # 导入所需的功能模块
from lib.core.data import kb from lib.core.common import readInput # 用于读取用户输入的函数
from lib.core.data import logger from lib.core.data import kb # 知识库(knowledge base),存储全局变量和配置信息
from lib.core.datatype import OrderedSet from lib.core.data import logger # 日志记录器,用于记录程序运行信息
from lib.core.exception import SqlmapSyntaxException from lib.core.datatype import OrderedSet # 有序集合数据类型,可以保持元素插入顺序
from lib.request.connect import Connect as Request from lib.core.exception import SqlmapSyntaxException # SQL注入工具的语法异常类
from thirdparty.six.moves import http_client as _http_client from lib.request.connect import Connect as Request # HTTP请求处理类,用于发送网络请求
from thirdparty.six.moves import http_client as _http_client # HTTP客户端,用于处理HTTP连接
# 定义全局中止标志,用于控制程序终止
abortedFlag = None abortedFlag = None
def parseSitemap(url, retVal=None): def parseSitemap(url, retVal=None):
global abortedFlag """
解析网站地图(sitemap)的函数
参数说明:
url: 网站地图的URL地址,即要解析的sitemap文件地址
retVal: 存储解析结果的集合,默认为None如果为None会创建新的集合
返回值:
OrderedSet类型,包含从sitemap中提取的所有URL地址
"""
global abortedFlag # 声明使用全局中止标志变量
# 如果retVal不为空,说明是递归调用,记录开始解析新sitemap的日志
if retVal is not None: if retVal is not None:
logger.debug("parsing sitemap '%s'" % url) logger.debug("parsing sitemap '%s'" % url)
try: try:
# 如果retVal为空,说明是首次调用,初始化返回值集合和中止标志
if retVal is None: if retVal is None:
abortedFlag = False abortedFlag = False # 重置中止标志为False
retVal = OrderedSet() retVal = OrderedSet() # 创建一个新的有序集合用于存储URL
try: try:
# 发送HTTP请求获取网站地图内容
# raise404=True表示如果页面不存在(404错误)会抛出异常
# 如果已经设置中止标志,则返回空字符串
content = Request.getPage(url=url, raise404=True)[0] if not abortedFlag else "" content = Request.getPage(url=url, raise404=True)[0] if not abortedFlag else ""
except _http_client.InvalidURL: except _http_client.InvalidURL:
# 如果提供的URL格式无效,抛出语法异常
errMsg = "invalid URL given for sitemap ('%s')" % url errMsg = "invalid URL given for sitemap ('%s')" % url
raise SqlmapSyntaxException(errMsg) raise SqlmapSyntaxException(errMsg)
# 使用正则表达式查找sitemap中所有<loc>标签内的URL
# <loc>标签是sitemap格式中用于存放URL的标准标签
for match in re.finditer(r"<loc>\s*([^<]+)", content or ""): for match in re.finditer(r"<loc>\s*([^<]+)", content or ""):
if abortedFlag: if abortedFlag: # 如果收到中止信号,立即退出循环
break break
url = match.group(1).strip() url = match.group(1).strip() # 提取URL并去除首尾空白字符
# 判断是否为子sitemap文件
# sitemap文件通常以.xml结尾,且URL中包含"sitemap"字样
if url.endswith(".xml") and "sitemap" in url.lower(): if url.endswith(".xml") and "sitemap" in url.lower():
# 首次遇到子sitemap时询问用户是否要递归处理
if kb.followSitemapRecursion is None: if kb.followSitemapRecursion is None:
message = "sitemap recursion detected. Do you want to follow? [y/N] " message = "sitemap recursion detected. Do you want to follow? [y/N] "
kb.followSitemapRecursion = readInput(message, default='N', boolean=True) kb.followSitemapRecursion = readInput(message, default='N', boolean=True)
# 如果用户同意递归处理,则解析子sitemap
if kb.followSitemapRecursion: if kb.followSitemapRecursion:
parseSitemap(url, retVal) parseSitemap(url, retVal) # 递归调用解析函数
else: else:
retVal.add(url) retVal.add(url) # 将找到的URL添加到结果集合中
except KeyboardInterrupt: except KeyboardInterrupt:
abortedFlag = True # 捕获键盘中断信号(用户按Ctrl+C)
abortedFlag = True # 设置中止标志
warnMsg = "user aborted during sitemap parsing. sqlmap " warnMsg = "user aborted during sitemap parsing. sqlmap "
warnMsg += "will use partial list" warnMsg += "will use partial list"
logger.warning(warnMsg) logger.warning(warnMsg) # 记录警告信息,提示将使用部分解析结果
return retVal return retVal # 返回收集到的所有URL集合

Loading…
Cancel
Save