ADD file via upload

12 months ago · 517dc33918
parent ea9b08c550
commit 517dc33918
1 changed files with 321 additions and 0 deletions
--- a/source/lib/O365Hunter.py
+++ b/source/lib/O365Hunter.py
@ -0,0 +1,321 @@
 import json
 import sqlite3
 import tempfile
 import os
 import time
 import pandas as pd
 import geoip2.database
 import requests
 from dateutil import parser, tz
 import pandas as pd
 import json
 import csv
 from pathlib import Path
 start_time=0
 end_time=0
 password_spray_query = '''
        WITH FailedLogins AS (
    SELECT 
        UserId,
         ClientIP,
        datetime(CreationTime) AS LoginDate
    FROM 
        events
    WHERE 
        Operation = 'UserLoginFailed'
 )
 SELECT 
    UserId,
    GROUP_CONCAT(ClientIP, ', ') AS ClientIPs,
    COUNT(DISTINCT ClientIP) AS UniqueIPCount,
    COUNT(*) AS FailedLoginAttempts,
    LoginDate
 FROM 
    FailedLogins
 GROUP BY 
    UserId, 
   strftime('%Y-%m-%d %H', LoginDate)
 HAVING 
    COUNT(*) > 5 AND UniqueIPCount > 3
 ORDER BY 
    FailedLoginAttempts DESC;
        '''
 user_logon_query = '''
 SELECT 
    UserId,
    date(CreationTime) AS LoginDate,
    COUNT(*) AS TotalLoginAttempts,
    SUM(CASE WHEN Operation = 'UserLoggedIn' THEN 1 ELSE 0 END) AS SuccessfulLogins,
    SUM(CASE WHEN Operation = 'UserLoginFailed' THEN 1 ELSE 0 END) AS FailedLogins
    FROM 
        events
    where 
    Operation = 'UserLoggedIn' OR Operation = 'UserLoginFailed'
    GROUP BY 
        UserId, 
        LoginDate
    ORDER BY 
        LoginDate, 
        UserId;
 '''
 User_operations_query = '''
 SELECT 
    UserId,
    COUNT(DISTINCT Operation) AS OperationCount,
    GROUP_CONCAT(Operation, ', ') AS UniqueOperations
 FROM 
    (SELECT DISTINCT UserId, Operation FROM events)
 GROUP BY 
    UserId
 ORDER BY 
    OperationCount DESC;
 '''
 user_operation_by_day_query = '''
 SELECT 
    UserId,
    DATE(CreationTime) AS OperationDate,
    COUNT(DISTINCT Operation) AS OperationCount,
    GROUP_CONCAT( Operation, ', ') AS UniqueOperations
 FROM 
    events
 GROUP BY 
    UserId, 
    OperationDate
 ORDER BY 
    OperationCount DESC
 '''
 def convert_csv(input_file,temp):
    with open(input_file, 'r', encoding='utf-8') as csv_file:
        # Create a CSV reader
        reader = csv.DictReader(csv_file)
        json_file =  'audit_data.json'
        json_file=os.path.join(temp, json_file)
        with open(json_file, 'w', encoding='utf-8') as jsonl_file:
            # Extract and write the AuditData column to a file as JSON Lines
            for row in reader:
                # Extract the AuditData which is already a JSON formatted string
                json_data = json.loads(row['AuditData'])
                # Convert the JSON object back to a string to store in the file
                json_string = json.dumps(json_data)
                # Write the JSON string to the file with a newline
                jsonl_file.write(json_string + '\n')
    return json_file
 def flatten_json_file(input_file, timezone, chunk_size=10000):
    # Read the JSON file in chunks
    chunks = []
    with open(input_file, 'r') as file:
        lines = file.readlines()
        for i in range(0, len(lines), chunk_size):
            chunk = [json.loads(line) for line in lines[i:i + chunk_size]]
            # Convert the CreationTime to the desired timezone
            for record in chunk:
                if 'CreationTime' in record:
                    # Parse the CreationTime
                    creation_time = parser.parse(record['CreationTime'])
                    # Check if the datetime object is timezone aware
                    if creation_time.tzinfo is None:
                        # Assume the original time is in UTC if no timezone info is present
                        creation_time = creation_time.replace(tzinfo=tz.tzutc())
                    # Convert the CreationTime to the desired timezone
                    record['CreationTime'] = creation_time.astimezone(timezone).isoformat()
            chunks.append(pd.json_normalize(chunk))
    # Concatenate all chunks into a single DataFrame
    flattened_records = pd.concat(chunks, ignore_index=True)
    return flattened_records
 def create_sqlite_db_from_dataframe(dataframe, db_name):
    conn = sqlite3.connect(db_name)
    # Convert all columns to string
    dataframe = dataframe.astype(str)
    # Write the DataFrame to SQLite, treating all fields as text
    dataframe.to_sql('events', conn, if_exists='replace', index=False,
                     dtype={col_name: 'TEXT' for col_name in dataframe.columns})
    conn.close()
 def read_detection_rules(rule_file):
    with open(rule_file, 'r') as file:
        rules = json.load(file)
    return rules
 def apply_detection_logic_sqlite(db_name, rules):
    conn = sqlite3.connect(db_name)
    all_detected_events = []
    for rule in rules:
        rule_name = rule['name']
        severity = rule['severity']
        query = rule['query']
        detected_events = pd.read_sql_query(query, conn)
        detected_events['RuleName'] = rule_name
        detected_events['Severity'] = severity
        all_detected_events.append(detected_events)
    conn.close()
    if all_detected_events:
        result = pd.concat(all_detected_events, ignore_index=True)
    else:
        result = pd.DataFrame()
    return result
 def download_geolite_db(geolite_db_path):
    url = "https://git.io/GeoLite2-Country.mmdb"
    print(f"Downloading GeoLite2 database from {url}...")
    response = requests.get(url)
    response.raise_for_status()  # Check if the download was successful
    with open(geolite_db_path, 'wb') as file:
        file.write(response.content)
    print(f"GeoLite2 database downloaded and saved to {geolite_db_path}")
 def get_country_from_ip(ip, reader):
    try:
        response = reader.country(ip)
        return response.country.name
    except Exception as e:
        #print(f"Could not resolve IP {ip}: {e}")
        return 'Unknown'
 def analyzeoff365(auditfile, rule_file, output, timezone, include_flattened_data=False,
                  geolite_db_path='GeoLite2-Country.mmdb'):
    start_time = time.time()
    temp_dir = ".temp"
    if output is None or output == "":
        output = os.path.splitext(auditfile)[0]
    try:
        # Create necessary directories
        os.makedirs(output, exist_ok=True)
        os.makedirs(temp_dir, exist_ok=True)
        # Check if the GeoLite2 database exists, and download it if not
        if not os.path.exists(geolite_db_path):
            download_geolite_db(geolite_db_path)
        # Convert CSV to JSON (assuming convert_csv is a valid function that you have)
        json_file = convert_csv(auditfile, temp_dir)
        # Input and output file paths
        input_file = json_file
        db_name = os.path.join(temp_dir, 'audit_data.db')
        if rule_file is None:
            rule_file = 'O365_detection_rules.json'
        output_file = f"{output}_o365_report.xlsx"
        # Measure the start time
        # Flatten the JSON file
        flattened_df = flatten_json_file(input_file, timezone)
        # Create SQLite database from the flattened DataFrame
        create_sqlite_db_from_dataframe(flattened_df, db_name)
        # Open the GeoLite2 database
        with geoip2.database.Reader(geolite_db_path) as reader:
            # Resolve ClientIP to country names
            if 'ClientIP' in flattened_df.columns:
                flattened_df['Country'] = flattened_df['ClientIP'].apply(lambda ip: get_country_from_ip(ip, reader))
        # Read detection rules
        rules = read_detection_rules(rule_file)
        # Apply detection logic using SQLite
        detected_events = apply_detection_logic_sqlite(db_name, rules)
        # Reorder columns to make RuleName the first column
        if not detected_events.empty:
            columns = ['RuleName', 'Severity'] + [col for col in detected_events.columns if
                                                  col not in ['RuleName', 'Severity']]
            detected_events = detected_events[columns]
        # Perform the brute-force detection query
        conn = sqlite3.connect(db_name)
        try:
            user_login_tracker_df = pd.read_sql_query(user_logon_query, conn)
            password_spray_df = pd.read_sql_query(password_spray_query, conn)
            user_operations_df = pd.read_sql_query(User_operations_query, conn)
            user_operation_by_day_df = pd.read_sql_query(user_operation_by_day_query, conn)
        finally:
            conn.close()
        # Create a new workbook with the detection results
        with pd.ExcelWriter(output_file, engine='xlsxwriter') as writer:
            if include_flattened_data:
                # Split the flattened data into multiple sheets if needed
                max_rows_per_sheet = 65000
                num_sheets = len(flattened_df) // max_rows_per_sheet + 1
                for i in range(num_sheets):
                    start_row = i * max_rows_per_sheet
                    end_row = (i + 1) * max_rows_per_sheet
                    sheet_name = f'Flattened Data {i + 1}'
                    flattened_df.iloc[start_row:end_row].to_excel(writer, sheet_name=sheet_name, index=False)
            # Write statistics for various fields
            detected_events.to_excel(writer, sheet_name='Detection Results', index=False)
            user_login_tracker_df.to_excel(writer, sheet_name='User Login Tracker', index=False)
            password_spray_df.to_excel(writer, sheet_name='Password Spray Attacks', index=False)
            user_operations_df.to_excel(writer, sheet_name='User Operations', index=False)
            user_operation_by_day_df.to_excel(writer, sheet_name='User Operations by Day', index=False)
            flattened_df['Operation'].value_counts().to_frame().to_excel(writer, sheet_name='Operation Stats')
            flattened_df['ClientIP'].value_counts().to_frame().to_excel(writer, sheet_name='ClientIP Stats')
            flattened_df['Country'].value_counts().to_frame().to_excel(writer, sheet_name='Country Stats')
            flattened_df['UserAgent'].value_counts().to_frame().to_excel(writer, sheet_name='UserAgent Stats')
            flattened_df['UserId'].value_counts().to_frame().to_excel(writer, sheet_name='UserId Stats')
            flattened_df['AuthenticationType'].value_counts().to_frame().to_excel(writer,
                                                                                  sheet_name='AuthenticationType Stats')
        # Measure the end time
        end_time = time.time()
        print(f"Office365 analysis finished in time: {end_time - start_time:.2f} seconds")
    except Exception as e:
        print(f"An error occurred during the analysis: {e}")
    finally:
        #Clean up the temporary directory
        if os.path.exists(temp_dir):
            for file in Path(temp_dir).glob('*'):
                file.unlink()  # Delete the file
            os.rmdir(temp_dir)  # Remove the directory
        # Write the User Login Tracker results to a new sheet
    # Measure the end time
    end_time = time.time()
    # Calculate and print the running time
    running_time = end_time - start_time
    print(f"Office365 hunter finished in time: {running_time:.2f} seconds")