psrGROUP/env/Lib/site-packages/haystack/cli.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-

from __future__ import print_function

import argparse
import logging
import os
import sys
import time

import pkg_resources

try:
    from urllib.parse import urlparse
except ImportError:
    from urlparse import urlparse

from haystack import argparse_utils
from haystack import basicmodel
from haystack import constraints
from haystack.search import api

log = logging.getLogger('cli')

# the description of the function
SEARCH_DESC = 'Search for instance of a record_type in the allocated memory of a process. '
SHOW_DESC = 'Cast the bytes at this address into a record_type. '
WATCH_DESC = 'Cast the bytes at this address into a record_type and refresh regularly. '
DUMP_DESC = 'Extract the process dump from the OS memory dump in haystack format. '

# some dumptype constants
DUMPTYPE_BASE = 'haystack'
DUMPTYPE_VOLATILITY = 'volatility'
DUMPTYPE_REKALL = 'rekall'
DUMPTYPE_LIVE = 'live'
DUMPTYPE_MINIDUMP = 'minidump'
DUMPTYPE_FRIDA = 'frida'

# from urlparse import urlparse
# >>> o = urlparse('http://www.cwi.nl:80/%7Eguido/Python.html')
# ParseResult(scheme='http', netloc='www.cwi.nl:80', path='/%7Eguido/Python.html',
# URL_SCHEMES = {'dir': DUMPTYPE_BASE,
#                'volatility': DUMPTYPE_VOLATILITY,
#                'rekall': DUMPTYPE_REKALL,
#                'live': DUMPTYPE_LIVE,
#                'dmp': DUMPTYPE_MINIDUMP,
#                'frida': DUMPTYPE_FRIDA}

SUPPORTED_DUMP_URI = {}
# populate SUPPORTED_DUMP_URI
for entry_point in pkg_resources.iter_entry_points("haystack.mappings_loader"):
    SUPPORTED_DUMP_URI[entry_point.name] = entry_point.resolve()


def url(u):
    """Validates the argument is an url"""
    # dont populate SUPPORTED_DUMP_URI here, otherwise API wont have it.
    _url = urlparse(u)
    scheme = _url.scheme.lower()
    if scheme not in SUPPORTED_DUMP_URI.keys():
        raise argparse.ArgumentTypeError("Target type {s}:// not supported".format(s=scheme))
    path = _url.path
    # be nice with relative path
    if _url.netloc.startswith('~'):
        path = os.path.expanduser(os.path.sep.join([_url.netloc, path]))
        _url = urlparse("%s://%s" % (scheme, path))
    if _url.netloc.startswith('.'):
        path = os.path.abspath(os.path.sep.join([_url.netloc, path]))
        _url = urlparse("%s://%s" % (scheme, path))
    if scheme in ['volatility', 'rekall']:
        path = _url.path.split(':')[0]
    if scheme in ['dir', 'volatility', 'rekall', 'dmp']:
        if not os.path.exists(path):
            raise argparse.ArgumentTypeError("Target {p} does not exists".format(p=path))
        # see url.netloc for host name, frida ? live ?
    return _url


# the description of the dump type
DUMPTYPE_BASE_DESC = 'The process dump is a folder produced by a haystack-dump script.'
DUMPTYPE_VOL_DESC = 'The process dump is a volatility OS dump. The PID is the targeted process.'
DUMPTYPE_REKALL_DESC = 'The process dump is a rekall OS dump. The PID is the targeted process.'
DUMPTYPE_LIVE_DESC = 'The PID must be a running process.'
DUMPTYPE_MINIDUMP_DESC = 'The process dump is a Minidump (MDMP) process dump.'


class HaystackError(Exception):
    pass


def make_memory_handler(opts):
    dumptype = opts.target.scheme.lower()
    if dumptype not in SUPPORTED_DUMP_URI.keys():
        raise TypeError('dump type has no case support. %s' % dumptype)
    loader = SUPPORTED_DUMP_URI[dumptype](opts)
    return loader.make_memory_handler()


def get_output(memory_handler, results, rtype):
    if rtype == 'string':
        ret = api.output_to_string(memory_handler, results)
    elif rtype == 'python':
        # useful in interactive mode
        ret = api.output_to_python(memory_handler, results)
    elif rtype == 'json':
        ret = api.output_to_json(memory_handler, results)
    elif rtype == 'pickled':
        ret = api.output_to_pickle(memory_handler, results)
    else:
        raise ValueError('unknown output format')
    return ret


def dump_process(opts):
    """ Extract the process dump from the OS memory dump in haystack format. """
    if opts.dumptype == DUMPTYPE_VOLATILITY:
        pass
    elif opts.dumptype == DUMPTYPE_REKALL:
        from haystack.mappings import rek
        rek.rekall_dump_to_haystack(opts.dump_filename, opts.pid, opts.output_folder_name)
    return


def search_cmdline(args):
    """ Search for instance of a record_type in the allocated memory of a process. """
    # get the memory handler adequate for the type requested
    memory_handler = make_memory_handler(args)
    # try to load constraints
    my_constraints = None
    if args.constraints_file:
        handler = constraints.ConstraintsConfigHandler()
        my_constraints = handler.read(args.constraints_file.name)
    # get the python record type
    modulename, sep, classname = args.record_type_name.rpartition('.')
    _module = None
    try:
        _module = memory_handler.get_model().import_module(modulename)
    except ImportError as e:
        log.error('sys.path is %s', sys.path)
        raise e
    record_type = getattr(_module, classname)
    # do the search
    results = api.search_record(memory_handler, record_type, my_constraints, extended_search=args.extended)
    # output handling
    try:
        ret = get_output(memory_handler, results, args.output)
        # print output on stdout
        print(ret)
    except Exception as e:
        log.error(e)
    finally:
        if args.interactive:
            print('results are local variable "results"')
            import code
            code.interact(local=locals())
    return


def show_cmdline(args):
    """Cast the bytes at this address into a record_type. """
    # we need an int
    memory_address = args.address
    # get the memory handler adequate for the type requested
    memory_handler = make_memory_handler(args)
    # check the validity of the address
    heap = memory_handler.is_valid_address_value(memory_address)
    if not heap:
        log.error("the address is not accessible in the memoryMap")
        raise ValueError("the address is not accessible in the memoryMap")
    # get the structure name
    modulename, sep, classname = args.record_type_name.rpartition('.')
    _module = None
    try:
        _module = memory_handler.get_model().import_module(modulename)
    except ImportError as e:
        log.error('sys.path is %s', sys.path)
        raise e
    record_type = getattr(_module, classname)
    # load the record
    result = api.load_record(memory_handler, record_type, memory_address)
    results = [result]
    # validate if required
    validation = None
    if args.constraints_file:
        handler = constraints.ConstraintsConfigHandler()
        my_constraints = handler.read(args.constraints_file.name)
        validation = api.validate_record(memory_handler, result[0], my_constraints)
    # output handling
    ret = None
    try:
        ret = get_output(memory_handler, results, args.output)
        # print output on stdout
        print(ret)
        if args.constraints_file:
            print('Validated', validation)
    except Exception as e:
        log.error(e)
    finally:
        if args.interactive:
            print('results are local variable "results"')
            import code
            code.interact(local=locals())
    return


def check_varname_for_type(memory_handler, varname, struct_type):
    done = []
    st = struct_type
    model = memory_handler.get_model()
    ctypes = memory_handler.get_target_platform().get_target_ctypes()
    for v in varname:
        if not hasattr(st, v):
            fields = ["%s: %s" % (n, t) for n, t in basicmodel.get_fields(st)]
            log.error('(%s.)%s does not exists in type %s\n\t%s', '.'.join(done), v, st, '\n\t'.join(fields))
            return False
        st = st._get_field_type(v)
        if ctypes.is_pointer_type(st):  # accept pointers
            st = model.get_subtype(st)
        done.append(v)
    return True


def get_varname_value(varname, instance):
    done = []
    var = instance
    for v in varname:
        var = getattr(var, v)
        done.append(v)
    return '%s = \n%s' % ('.'.join(done), var)


def watch(args):
    """Cast the bytes at this address into a record_type and refresh regularly. """
    memory_address = args.addr
    refresh = args.refresh_rate
    varname = args.varname
    # get the memory handler adequate for the type requested
    memory_handler = make_memory_handler(args)
    # check the validity of the address
    heap = memory_handler.is_valid_address_value(memory_address)
    if not heap:
        log.error("the address is not accessible in the memoryMap")
        raise ValueError("the address is not accessible in the memoryMap")
    # get the structure name
    modulename, sep, classname = args.record_type_name.rpartition('.')
    _module = None
    try:
        _module = memory_handler.get_model().import_module(modulename)
    except ImportError as e:
        log.error('sys.path is %s', sys.path)
        raise e
    record_type = getattr(_module, classname)
    # verify target fieldcompliance
    if varname is not None:
        varname = varname.split('.')
        if not check_varname_for_type(memory_handler, varname, record_type):
            return False
    # load the record
    result = api.load_record(memory_handler, record_type, memory_address)
    results = [result]
    # output handling
    output = api.output_to_python(memory_handler, results)
    # _get_output(memory_handler, results, rtype):
    # Conflicts with varname
    py_obj = output[0][0]
    # print pyObj
    # print as asked every n secs.
    while True:
        # clear terminal
        print(chr(27) + "[2J")
        #
        if varname is None:
            print(py_obj)
        else:
            print(get_varname_value(varname, py_obj))

        if refresh == 0:
            break
        time.sleep(refresh)
        result = api.load_record(memory_handler, record_type, memory_address)
        results = [result]
        # output handling
        output = api.output_to_python(memory_handler, results)
        py_obj = output[0][0]


def base_argparser(program_name, description):
    """ Base options shared by all console scripts """
    rootparser = argparse.ArgumentParser(prog=program_name, description=description)
    verbosity = rootparser.add_mutually_exclusive_group(required=False)
    verbosity.add_argument('--debug', dest='debug', action='store_true', help='Set verbosity to DEBUG')
    verbosity.add_argument('--quiet', dest='quiet', action='store_true', help='Set verbosity to ERROR only')
    rootparser.add_argument('--interactive', dest='interactive', action='store_true',
                            help='drop to python command line after action')
    rootparser.add_argument('--nommap', dest='mmap', action='store_false', help='disable mmap()-ing')
    rootparser.add_argument('--osname', '-n', action='store', default=None, choices=['linux', 'winxp', 'win7'], help='Force a specific OS')
    rootparser.add_argument('--bits', '-b', type=int, action='store', default=None, choices=[32, 64], help='Force a specific word size')
    text = '://, '.join(sorted(SUPPORTED_DUMP_URI.keys())) + '://'
    help_desc = 'target file or process. Supported URL types: %s' % text
    rootparser.add_argument('target', type=url, help=help_desc)
    return rootparser


def search_argparser(search_parser):
    """ Search function options argument parser """
    search_parser.add_argument('record_type_name', type=str,
                               help='Python record type name. Module must be in Python path')
    search_parser.add_argument('--constraints_file', type=argparse.FileType('r'),
                               help='Filename that contains Constraints for the record types in the module')
    search_parser.add_argument('--extended', action='store_true',
                               help='Do not restrict the search to allocated chunks')
    search_parser.add_argument('--hint', type=argparse_utils.int16,
                               help='Restrict the search to the memory page containing this hint address')
    search_parser.set_defaults(func=search_cmdline)
    return search_parser


def show_argparser(show_parser):
    """ Show function options argument parser """
    show_parser.add_argument('record_type_name', type=str,
                             help='Python record type name. Module must be in Python path')
    show_parser.add_argument('address', type=argparse_utils.int16, help='Record memory address in hex')
    show_parser.add_argument('--constraints_file', type=argparse.FileType('r'),
                             help='Filename that contains Constraints for the record types in the module. '
                             'The validation results will be shown on stdout.')
    show_parser.set_defaults(func=show_cmdline)
    return show_parser


def watch_argparser(watch_parser):
    """ Watch function options argument parser """
    # only useful for live PID. Not rekall/vol.
    watch_parser.add_argument('record_type_name', type=str,
                              help='Python record type name. Module must be in Python path')
    watch_parser.add_argument('address', type=argparse_utils.int16, help='Structure memory address')
    watch_parser.add_argument('refresh_rate', type=int, default=0, help='Seconds between refresh')
    watch_parser.add_argument('varname', type=str, default=None,
                              help='structure member name (eg. pointername.valuename)')
    watch_parser.set_defaults(func=watch)
    return watch_parser


def dump_argparser(dump_parser):
    """ Dumper function options argument parser """
    # FIXME create the rekall/vol dumpers.
    # only useful for live PID. Not rekall/vol.
    dump_parser.add_argument('output_folder_name', type=str, help='Output to this memory dump folder')
    dump_parser.set_defaults(func=dump_process)
    return dump_parser


def output_argparser(rootparser):
    """ Output choices options argument parser """
    output = rootparser.add_mutually_exclusive_group(required=False)
    output.add_argument('--string', dest='output', action='store_const', const='string',
                        help='Print results as human readable string')
    output.add_argument('--json', dest='output', action='store_const', const='json',
                        help='Print results as json readable string')
    # useful in interactive mode
    output.add_argument('--python', dest='output', action='store_const', const='python',
                        help='Print results as python code')
    output.add_argument('--pickled', dest='output', action='store_const', const='pickled',
                        help='Print results as pickled string')
    output.set_defaults(output='string')
    return rootparser


def set_logging_level(opts):
    level = logging.INFO
    if opts.debug:
        level = logging.DEBUG
    elif opts.quiet:
        level = logging.ERROR
    #
    if opts.debug:
        flog = os.path.normpath('log')
        # FORMAT = '%(relativeCreated)d %(message)s'
        # logging.basicConfig(format=FORMAT, level=level, filename=flog, filemode='w')
        logging.basicConfig(level=level, filename=flog, filemode='w')
        print('[+] **** COMPLETE debug log to %s' % flog)
    else:
        logging.basicConfig(level=level)
    # 2.6, 2.7 compat
    sh = logging.StreamHandler(sys.stdout)
    logging.getLogger('haystack').addHandler(sh)
    return


def live_watch():
    argv = sys.argv[1:]
    desc = WATCH_DESC + DUMPTYPE_LIVE_DESC
    rootparser = base_argparser(program_name=os.path.basename(sys.argv[0]), description=desc)
    rootparser.add_argument('pid', type=int, help='Target PID on the local system')
    watch_argparser(rootparser)
    output_argparser(rootparser)
    opts = rootparser.parse_args(argv)
    opts.dumptype = DUMPTYPE_LIVE
    # apply verbosity
    set_logging_level(opts)
    # execute function
    opts.func(opts)
    return


def volatility_dump():
    argv = sys.argv[1:]
    desc = DUMP_DESC + DUMPTYPE_VOL_DESC
    rootparser = base_argparser(program_name=os.path.basename(sys.argv[0]), description=desc)
    rootparser.add_argument('dump_filename', type=argparse_utils.readable, help='Use this memory dump file')
    rootparser.add_argument('pid', type=int, help='Target PID in the OS memory dump')
    dump_argparser(rootparser)
    opts = rootparser.parse_args(argv)
    opts.dumptype = DUMPTYPE_VOLATILITY
    # apply verbosity
    set_logging_level(opts)
    # execute function
    opts.func(opts)
    return


def rekall_dump():
    argv = sys.argv[1:]
    desc = DUMP_DESC + DUMPTYPE_REKALL_DESC
    rootparser = base_argparser(program_name=os.path.basename(sys.argv[0]), description=desc)
    rootparser.add_argument('dump_filename', type=argparse_utils.readable, help='Use this memory dump file')
    rootparser.add_argument('pid', type=int, help='Target PID in the OS memory dump')
    dump_argparser(rootparser)
    opts = rootparser.parse_args(argv)
    opts.dumptype = DUMPTYPE_REKALL
    # apply verbosity
    set_logging_level(opts)
    # execute function
    opts.func(opts)
    return


def search():
    argv = sys.argv[1:]
    desc = SEARCH_DESC
    rootparser = base_argparser(program_name=os.path.basename(sys.argv[0]), description=desc)
    search_argparser(rootparser)
    output_argparser(rootparser)
    opts = rootparser.parse_args(argv)
    # apply verbosity
    set_logging_level(opts)
    # execute function
    opts.func(opts)
    return


def show():
    argv = sys.argv[1:]
    desc = SHOW_DESC
    rootparser = base_argparser(program_name=os.path.basename(sys.argv[0]), description=desc)
    show_argparser(rootparser)
    output_argparser(rootparser)
    opts = rootparser.parse_args(argv)
    # apply verbosity
    set_logging_level(opts)
    # execute function
    opts.func(opts)
    return


if '__main__' == __name__:
    search()