apt-hunter/src/lib/EvtxHunt.py

import csv
import re
from netaddr import *  # 导入netaddr库的所有内容，用于处理网络地址
import xml.etree.ElementTree as ET  # XML解析器
import pandas as pd  # 数据分析库
from datetime import datetime, timezone  # 日期时间处理
from evtx import PyEvtxParser  # 解析Windows事件日志文件的库
from dateutil.parser import parse, isoparse  # 解析日期时间字符串
from pytz import timezone  # 处理时区
minlength = 1000  # 可能用于某个字符串长度的检查，但在这个文件中未使用
# 初始化一个字典列表，用于存储猎取的事件信息
Hunting_events = [{'Date and Time': [], 'timestamp': [], 'Channel': [], 'Computer': [], 'Event ID': [], 'Original Event Log': []}]
# 正则表达式用于从事件日志中提取特定信息
EventID_rex = re.compile('<EventID.*>(.*)<\/EventID>', re.IGNORECASE)
Channel_rex = re.compile('<Channel.*>(.*)<\/Channel>', re.IGNORECASE)
Computer_rex = re.compile('<Computer.*>(.*)<\/Computer>', re.IGNORECASE)
def Evtx_hunt(files, str_regexes, eid, input_timzone, output, timestart, timeend):
    """
    解析并搜索Windows事件日志文件中的特定事件。

    参数:
    - files: 要解析的事件日志文件列表
    - str_regexes: 用于匹配事件数据的正则表达式列表
    - eid: 事件ID，如果提供则只搜索此ID的事件
    - input_timzone: 输入日志的时区
    - output: 输出文件名
    - timestart, timeend: 搜索时间范围
    """
    for file in files:
        file = str(file)
        print("Analyzing " + file)
        try:
            parser = PyEvtxParser(file)
        except:
            print("Issue analyzing " + file + "\nplease check if its not corrupted")
            continue

        for record in parser.records():
            try:
                # 提取事件ID
                EventID = EventID_rex.findall(record['data'])
                # 如果提供了时间范围，则检查事件是否在该范围内
                if timestart is not None and timeend is not None:
                    timestamp = datetime.timestamp(isoparse(parse(record["timestamp"]).astimezone(input_timzone).isoformat()))
                    if not (timestamp > timestart and timestamp < timeend):
                        continue  # 事件不在时间范围内，跳过
                # 如果有EventID并且匹配eid（如果eid不为None）
                if len(EventID) > 0 and (eid is None or EventID[0] == eid):
                    Computer = Computer_rex.findall(record['data'])
                    Channel = Channel_rex.findall(record['data'])
                    channel = Channel[0] if len(Channel) > 0 else " "
                    # 遍历所有提供的正则表达式
                    for str_regex in str_regexes:
                        rex = re.compile(str_regex, re.IGNORECASE)
                        if rex.findall(record['data']):
                            # 如果匹配到正则表达式，记录事件信息
                            Hunting_events[0]['timestamp'].append(datetime.timestamp(isoparse(parse(record["timestamp"]).astimezone(input_timzone).isoformat())))
                            Hunting_events[0]['Date and Time'].append(parse(record["timestamp"]).astimezone(input_timzone).isoformat())
                            Hunting_events[0]['Channel'].append(channel)
                            Hunting_events[0]['Event ID'].append(EventID[0])
                            Hunting_events[0]['Computer'].append(Computer[0])
                            Hunting_events[0]['Original Event Log'].append(str(record['data']).replace("\r", " ").replace("\n", " "))
            except Exception as e:
                print("issue searching log : " + record['data'] + "\n Error : " + str(e))  # 修正了错误的打印函数调用
    hunt_report(output)
def hunt_report(output):
    """
    生成猎取事件的报告。
    参数:
    - output: 输出CSV文件的前缀
    """
    global Hunting_events
    Events = pd.DataFrame(Hunting_events[0])
    print("Found " + str(len(Hunting_events[0]["timestamp"])) + " Events")
    Events.to_csv(output + "_hunting.csv", index=False)