You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
AFLplusplus/src/AFLplusplus-stable/custom_mutators/examples/XmlMutatorMin.py

349 lines
10 KiB

#!/usr/bin/python
""" Mutation of XML documents, should be called from one of its wrappers (CLI, AFL, ...) """
from __future__ import print_function
from copy import deepcopy
from lxml import etree as ET
import random, re, io
###########################
# The XmlMutatorMin class #
###########################
class XmlMutatorMin:
"""
Optionals parameters:
seed Seed used by the PRNG (default: "RANDOM")
verbose Verbosity (default: False)
"""
def __init__(self, seed="RANDOM", verbose=False):
""" Initialize seed, database and mutators """
# Verbosity
self.verbose = verbose
# Initialize PRNG
self.seed = str(seed)
if self.seed == "RANDOM":
random.seed()
else:
if self.verbose:
print("Static seed '%s'" % self.seed)
random.seed(self.seed)
# Initialize input and output documents
self.input_tree = None
self.tree = None
# High-level mutators (no database needed)
hl_mutators_delete = [
"del_node_and_children",
"del_node_but_children",
"del_attribute",
"del_content",
] # Delete items
hl_mutators_fuzz = ["fuzz_attribute"] # Randomly change attribute values
# Exposed mutators
self.hl_mutators_all = hl_mutators_fuzz + hl_mutators_delete
def __parse_xml(self, xml):
""" Parse an XML string. Basic wrapper around lxml.parse() """
try:
# Function parse() takes care of comments / DTD / processing instructions / ...
tree = ET.parse(io.BytesIO(xml))
except ET.ParseError:
raise RuntimeError("XML isn't well-formed!")
except LookupError as e:
raise RuntimeError(e)
# Return a document wrapper
return tree
def __exec_among(self, module, functions, min_times, max_times):
""" Randomly execute $functions between $min and $max times """
for i in xrange(random.randint(min_times, max_times)):
# Function names are mangled because they are "private"
getattr(module, "_XmlMutatorMin__" + random.choice(functions))()
def __serialize_xml(self, tree):
""" Serialize a XML document. Basic wrapper around lxml.tostring() """
return ET.tostring(
tree, with_tail=False, xml_declaration=True, encoding=tree.docinfo.encoding
)
def __ver(self, version):
""" Helper for displaying lxml version numbers """
return ".".join(map(str, version))
def reset(self):
""" Reset the mutator """
self.tree = deepcopy(self.input_tree)
def init_from_string(self, input_string):
""" Initialize the mutator from a XML string """
# Get a pointer to the top-element
self.input_tree = self.__parse_xml(input_string)
# Get a working copy
self.tree = deepcopy(self.input_tree)
def save_to_string(self):
""" Return the current XML document as UTF-8 string """
# Return a text version of the tree
return self.__serialize_xml(self.tree)
def __pick_element(self, exclude_root_node=False):
""" Pick a random element from the current document """
# Get a list of all elements, but nodes like PI and comments
elems = list(self.tree.getroot().iter(tag=ET.Element))
# Is the root node excluded?
if exclude_root_node:
start = 1
else:
start = 0
# Pick a random element
try:
elem_id = random.randint(start, len(elems) - 1)
elem = elems[elem_id]
except ValueError:
# Should only occurs if "exclude_root_node = True"
return (None, None)
return (elem_id, elem)
def __fuzz_attribute(self):
""" Fuzz (part of) an attribute value """
# Select a node to modify
(rand_elem_id, rand_elem) = self.__pick_element()
# Get all the attributes
attribs = rand_elem.keys()
# Is there attributes?
if len(attribs) < 1:
if self.verbose:
print("No attribute: can't replace!")
return
# Pick a random attribute
rand_attrib_id = random.randint(0, len(attribs) - 1)
rand_attrib = attribs[rand_attrib_id]
# We have the attribute to modify
# Get its value
attrib_value = rand_elem.get(rand_attrib)
# print("- Value: " + attrib_value)
# Should we work on the whole value?
func_call = "(?P<func>[a-zA-Z:\-]+)\((?P<args>.*?)\)"
p = re.compile(func_call)
l = p.findall(attrib_value)
if random.choice((True, False)) and l:
# Randomly pick one the function calls
(func, args) = random.choice(l)
# Split by "," and randomly pick one of the arguments
value = random.choice(args.split(","))
# Remove superfluous characters
unclean_value = value
value = value.strip(" ").strip("'")
# print("Selected argument: [%s]" % value)
else:
value = attrib_value
# For each type, define some possible replacement values
choices_number = (
"0",
"11111",
"-128",
"2",
"-1",
"1/3",
"42/0",
"1094861636 idiv 1.0",
"-1123329771506872 idiv 3.8",
"17=$numericRTF",
str(3 + random.randrange(0, 100)),
)
choices_letter = (
"P" * (25 * random.randrange(1, 100)),
"%s%s%s%s%s%s",
"foobar",
)
choices_alnum = (
"Abc123",
"020F0302020204030204",
"020F0302020204030204" * (random.randrange(5, 20)),
)
# Fuzz the value
if random.choice((True, False)) and value == "":
# Empty
new_value = value
elif random.choice((True, False)) and value.isdigit():
# Numbers
new_value = random.choice(choices_number)
elif random.choice((True, False)) and value.isalpha():
# Letters
new_value = random.choice(choices_letter)
elif random.choice((True, False)) and value.isalnum():
# Alphanumeric
new_value = random.choice(choices_alnum)
else:
# Default type
new_value = random.choice(choices_alnum + choices_letter + choices_number)
# If we worked on a substring, apply changes to the whole string
if value != attrib_value:
# No ' around empty values
if new_value != "" and value != "":
new_value = "'" + new_value + "'"
# Apply changes
new_value = attrib_value.replace(unclean_value, new_value)
# Log something
if self.verbose:
print(
"Fuzzing attribute #%i '%s' of tag #%i '%s'"
% (rand_attrib_id, rand_attrib, rand_elem_id, rand_elem.tag)
)
# Modify the attribute
rand_elem.set(rand_attrib, new_value.decode("utf-8"))
def __del_node_and_children(self):
"""High-level minimizing mutator
Delete a random node and its children (i.e. delete a random tree)"""
self.__del_node(True)
def __del_node_but_children(self):
"""High-level minimizing mutator
Delete a random node but its children (i.e. link them to the parent of the deleted node)"""
self.__del_node(False)
def __del_node(self, delete_children):
""" Called by the __del_node_* mutators """
# Select a node to modify (but the root one)
(rand_elem_id, rand_elem) = self.__pick_element(exclude_root_node=True)
# If the document includes only a top-level element
# Then we can't pick a element (given that "exclude_root_node = True")
# Is the document deep enough?
if rand_elem is None:
if self.verbose:
print("Can't delete a node: document not deep enough!")
return
# Log something
if self.verbose:
but_or_and = "and" if delete_children else "but"
print(
"Deleting tag #%i '%s' %s its children"
% (rand_elem_id, rand_elem.tag, but_or_and)
)
if delete_children is False:
# Link children of the random (soon to be deleted) node to its parent
for child in rand_elem:
rand_elem.getparent().append(child)
# Remove the node
rand_elem.getparent().remove(rand_elem)
def __del_content(self):
"""High-level minimizing mutator
Delete the attributes and children of a random node"""
# Select a node to modify
(rand_elem_id, rand_elem) = self.__pick_element()
# Log something
if self.verbose:
print("Reseting tag #%i '%s'" % (rand_elem_id, rand_elem.tag))
# Reset the node
rand_elem.clear()
def __del_attribute(self):
"""High-level minimizing mutator
Delete a random attribute from a random node"""
# Select a node to modify
(rand_elem_id, rand_elem) = self.__pick_element()
# Get all the attributes
attribs = rand_elem.keys()
# Is there attributes?
if len(attribs) < 1:
if self.verbose:
print("No attribute: can't delete!")
return
# Pick a random attribute
rand_attrib_id = random.randint(0, len(attribs) - 1)
rand_attrib = attribs[rand_attrib_id]
# Log something
if self.verbose:
print(
"Deleting attribute #%i '%s' of tag #%i '%s'"
% (rand_attrib_id, rand_attrib, rand_elem_id, rand_elem.tag)
)
# Delete the attribute
rand_elem.attrib.pop(rand_attrib)
def mutate(self, min=1, max=5):
""" Execute some high-level mutators between $min and $max times, then some medium-level ones """
# High-level mutation
self.__exec_among(self, self.hl_mutators_all, min, max)