#!/usr/bin/python

""" Mutation of XML documents, should be called from one of its wrappers (CLI, AFL, ...) """

from __future__ import print_function
from copy import deepcopy
from lxml import etree as ET
import random, re, io


###########################
# The XmlMutatorMin class #
###########################


class XmlMutatorMin:

    """
    Optionals parameters:
        seed        Seed used by the PRNG (default: "RANDOM")
        verbose     Verbosity (default: False)
    """

    def __init__(self, seed="RANDOM", verbose=False):

        """ Initialize seed, database and mutators """

        # Verbosity
        self.verbose = verbose

        # Initialize PRNG
        self.seed = str(seed)
        if self.seed == "RANDOM":
            random.seed()
        else:
            if self.verbose:
                print("Static seed '%s'" % self.seed)
            random.seed(self.seed)

        # Initialize input and output documents
        self.input_tree = None
        self.tree = None

        # High-level mutators (no database needed)
        hl_mutators_delete = [
            "del_node_and_children",
            "del_node_but_children",
            "del_attribute",
            "del_content",
        ]  # Delete items
        hl_mutators_fuzz = ["fuzz_attribute"]  # Randomly change attribute values

        # Exposed mutators
        self.hl_mutators_all = hl_mutators_fuzz + hl_mutators_delete

    def __parse_xml(self, xml):

        """ Parse an XML string. Basic wrapper around lxml.parse() """

        try:
            # Function parse() takes care of comments / DTD / processing instructions / ...
            tree = ET.parse(io.BytesIO(xml))
        except ET.ParseError:
            raise RuntimeError("XML isn't well-formed!")
        except LookupError as e:
            raise RuntimeError(e)

        # Return a document wrapper
        return tree

    def __exec_among(self, module, functions, min_times, max_times):

        """ Randomly execute $functions between $min and $max times """

        for i in xrange(random.randint(min_times, max_times)):
            # Function names are mangled because they are "private"
            getattr(module, "_XmlMutatorMin__" + random.choice(functions))()

    def __serialize_xml(self, tree):

        """ Serialize a XML document. Basic wrapper around lxml.tostring() """

        return ET.tostring(
            tree, with_tail=False, xml_declaration=True, encoding=tree.docinfo.encoding
        )

    def __ver(self, version):

        """ Helper for displaying lxml version numbers """

        return ".".join(map(str, version))

    def reset(self):

        """ Reset the mutator """

        self.tree = deepcopy(self.input_tree)

    def init_from_string(self, input_string):

        """ Initialize the mutator from a XML string """

        # Get a pointer to the top-element
        self.input_tree = self.__parse_xml(input_string)

        # Get a working copy
        self.tree = deepcopy(self.input_tree)

    def save_to_string(self):

        """ Return the current XML document as UTF-8 string """

        # Return a text version of the tree
        return self.__serialize_xml(self.tree)

    def __pick_element(self, exclude_root_node=False):

        """ Pick a random element from the current document """

        # Get a list of all elements, but nodes like PI and comments
        elems = list(self.tree.getroot().iter(tag=ET.Element))

        # Is the root node excluded?
        if exclude_root_node:
            start = 1
        else:
            start = 0

        # Pick a random element
        try:
            elem_id = random.randint(start, len(elems) - 1)
            elem = elems[elem_id]
        except ValueError:
            # Should only occurs if "exclude_root_node = True"
            return (None, None)

        return (elem_id, elem)

    def __fuzz_attribute(self):

        """ Fuzz (part of) an attribute value """

        # Select a node to modify
        (rand_elem_id, rand_elem) = self.__pick_element()

        # Get all the attributes
        attribs = rand_elem.keys()

        # Is there attributes?
        if len(attribs) < 1:
            if self.verbose:
                print("No attribute: can't replace!")
            return

        # Pick a random attribute
        rand_attrib_id = random.randint(0, len(attribs) - 1)
        rand_attrib = attribs[rand_attrib_id]

        # We have the attribute to modify
        # Get its value
        attrib_value = rand_elem.get(rand_attrib)
        # print("- Value: " + attrib_value)

        # Should we work on the whole value?
        func_call = "(?P<func>[a-zA-Z:\-]+)\((?P<args>.*?)\)"
        p = re.compile(func_call)
        l = p.findall(attrib_value)
        if random.choice((True, False)) and l:
            # Randomly pick one the function calls
            (func, args) = random.choice(l)
            # Split by "," and randomly pick one of the arguments
            value = random.choice(args.split(","))
            # Remove superfluous characters
            unclean_value = value
            value = value.strip(" ").strip("'")
            # print("Selected argument: [%s]" % value)
        else:
            value = attrib_value

        # For each type, define some possible replacement values
        choices_number = (
            "0",
            "11111",
            "-128",
            "2",
            "-1",
            "1/3",
            "42/0",
            "1094861636 idiv 1.0",
            "-1123329771506872 idiv 3.8",
            "17=$numericRTF",
            str(3 + random.randrange(0, 100)),
        )

        choices_letter = (
            "P" * (25 * random.randrange(1, 100)),
            "%s%s%s%s%s%s",
            "foobar",
        )

        choices_alnum = (
            "Abc123",
            "020F0302020204030204",
            "020F0302020204030204" * (random.randrange(5, 20)),
        )

        # Fuzz the value
        if random.choice((True, False)) and value == "":

            # Empty
            new_value = value

        elif random.choice((True, False)) and value.isdigit():

            # Numbers
            new_value = random.choice(choices_number)

        elif random.choice((True, False)) and value.isalpha():

            # Letters
            new_value = random.choice(choices_letter)

        elif random.choice((True, False)) and value.isalnum():

            # Alphanumeric
            new_value = random.choice(choices_alnum)

        else:

            # Default type
            new_value = random.choice(choices_alnum + choices_letter + choices_number)

        # If we worked on a substring, apply changes to the whole string
        if value != attrib_value:
            # No ' around empty values
            if new_value != "" and value != "":
                new_value = "'" + new_value + "'"
            # Apply changes
            new_value = attrib_value.replace(unclean_value, new_value)

        # Log something
        if self.verbose:
            print(
                "Fuzzing attribute #%i '%s' of tag #%i '%s'"
                % (rand_attrib_id, rand_attrib, rand_elem_id, rand_elem.tag)
            )

        # Modify the attribute
        rand_elem.set(rand_attrib, new_value.decode("utf-8"))

    def __del_node_and_children(self):

        """High-level minimizing mutator
        Delete a random node and its children (i.e. delete a random tree)"""

        self.__del_node(True)

    def __del_node_but_children(self):

        """High-level minimizing mutator
        Delete a random node but its children (i.e. link them to the parent of the deleted node)"""

        self.__del_node(False)

    def __del_node(self, delete_children):

        """ Called by the __del_node_* mutators """

        # Select a node to modify (but the root one)
        (rand_elem_id, rand_elem) = self.__pick_element(exclude_root_node=True)

        # If the document includes only a top-level element
        # Then we can't pick a element (given that "exclude_root_node = True")

        # Is the document deep enough?
        if rand_elem is None:
            if self.verbose:
                print("Can't delete a node: document not deep enough!")
            return

        # Log something
        if self.verbose:
            but_or_and = "and" if delete_children else "but"
            print(
                "Deleting tag #%i '%s' %s its children"
                % (rand_elem_id, rand_elem.tag, but_or_and)
            )

        if delete_children is False:
            # Link children of the random (soon to be deleted) node to its parent
            for child in rand_elem:
                rand_elem.getparent().append(child)

        # Remove the node
        rand_elem.getparent().remove(rand_elem)

    def __del_content(self):

        """High-level minimizing mutator
        Delete the attributes and children of a random node"""

        # Select a node to modify
        (rand_elem_id, rand_elem) = self.__pick_element()

        # Log something
        if self.verbose:
            print("Reseting tag #%i '%s'" % (rand_elem_id, rand_elem.tag))

        # Reset the node
        rand_elem.clear()

    def __del_attribute(self):

        """High-level minimizing mutator
        Delete a random attribute from a random node"""

        # Select a node to modify
        (rand_elem_id, rand_elem) = self.__pick_element()

        # Get all the attributes
        attribs = rand_elem.keys()

        # Is there attributes?
        if len(attribs) < 1:
            if self.verbose:
                print("No attribute: can't delete!")
            return

        # Pick a random attribute
        rand_attrib_id = random.randint(0, len(attribs) - 1)
        rand_attrib = attribs[rand_attrib_id]

        # Log something
        if self.verbose:
            print(
                "Deleting attribute #%i '%s' of tag #%i '%s'"
                % (rand_attrib_id, rand_attrib, rand_elem_id, rand_elem.tag)
            )

        # Delete the attribute
        rand_elem.attrib.pop(rand_attrib)

    def mutate(self, min=1, max=5):

        """ Execute some high-level mutators between $min and $max times, then some medium-level ones """

        # High-level mutation
        self.__exec_among(self, self.hl_mutators_all, min, max)