ADD file via upload

5 months ago · 517dc33918
parent ea9b08c550
commit 517dc33918
1 changed files with 321 additions and 0 deletions
--- a/source/lib/O365Hunter.py
+++ b/source/lib/O365Hunter.py
@ -0,0 +1,321 @@
+import json
+import sqlite3
+import tempfile
+import os
+import time
+import pandas as pd
+import geoip2.database
+import requests
+from dateutil import parser, tz
+import pandas as pd
+import json
+import csv
+from pathlib import Path
+
+start_time=0
+end_time=0
+password_spray_query = '''
+        WITH FailedLogins AS (
+
+    SELECT 
+        UserId,
+         ClientIP,
+        datetime(CreationTime) AS LoginDate
+    FROM 
+        events
+    WHERE 
+        Operation = 'UserLoginFailed'
+
+)
+SELECT 
+    UserId,
+    GROUP_CONCAT(ClientIP, ', ') AS ClientIPs,
+    COUNT(DISTINCT ClientIP) AS UniqueIPCount,
+    COUNT(*) AS FailedLoginAttempts,
+    LoginDate
+
+FROM 
+    FailedLogins
+GROUP BY 
+    UserId, 
+   strftime('%Y-%m-%d %H', LoginDate)
+HAVING 
+    COUNT(*) > 5 AND UniqueIPCount > 3
+ORDER BY 
+    FailedLoginAttempts DESC;
+        '''
+
+user_logon_query = '''
+SELECT 
+    UserId,
+    date(CreationTime) AS LoginDate,
+    COUNT(*) AS TotalLoginAttempts,
+    SUM(CASE WHEN Operation = 'UserLoggedIn' THEN 1 ELSE 0 END) AS SuccessfulLogins,
+    SUM(CASE WHEN Operation = 'UserLoginFailed' THEN 1 ELSE 0 END) AS FailedLogins
+    FROM 
+        events
+    where 
+    Operation = 'UserLoggedIn' OR Operation = 'UserLoginFailed'
+    GROUP BY 
+        UserId, 
+        LoginDate
+    ORDER BY 
+        LoginDate, 
+        UserId;
+'''
+
+User_operations_query = '''
+SELECT 
+    UserId,
+    COUNT(DISTINCT Operation) AS OperationCount,
+    GROUP_CONCAT(Operation, ', ') AS UniqueOperations
+FROM 
+    (SELECT DISTINCT UserId, Operation FROM events)
+GROUP BY 
+    UserId
+ORDER BY 
+    OperationCount DESC;
+'''
+
+user_operation_by_day_query = '''
+SELECT 
+    UserId,
+    DATE(CreationTime) AS OperationDate,
+    COUNT(DISTINCT Operation) AS OperationCount,
+    GROUP_CONCAT( Operation, ', ') AS UniqueOperations
+FROM 
+    events
+GROUP BY 
+    UserId, 
+    OperationDate
+ORDER BY 
+    OperationCount DESC
+'''
+
+
+def convert_csv(input_file,temp):
+    with open(input_file, 'r', encoding='utf-8') as csv_file:
+        # Create a CSV reader
+        reader = csv.DictReader(csv_file)
+
+        json_file =  'audit_data.json'
+        json_file=os.path.join(temp, json_file)
+        with open(json_file, 'w', encoding='utf-8') as jsonl_file:
+            # Extract and write the AuditData column to a file as JSON Lines
+            for row in reader:
+                # Extract the AuditData which is already a JSON formatted string
+                json_data = json.loads(row['AuditData'])
+                # Convert the JSON object back to a string to store in the file
+                json_string = json.dumps(json_data)
+                # Write the JSON string to the file with a newline
+                jsonl_file.write(json_string + '\n')
+
+    return json_file
+
+
+def flatten_json_file(input_file, timezone, chunk_size=10000):
+    # Read the JSON file in chunks
+    chunks = []
+    with open(input_file, 'r') as file:
+        lines = file.readlines()
+        for i in range(0, len(lines), chunk_size):
+            chunk = [json.loads(line) for line in lines[i:i + chunk_size]]
+
+            # Convert the CreationTime to the desired timezone
+            for record in chunk:
+                if 'CreationTime' in record:
+                    # Parse the CreationTime
+                    creation_time = parser.parse(record['CreationTime'])
+
+                    # Check if the datetime object is timezone aware
+                    if creation_time.tzinfo is None:
+                        # Assume the original time is in UTC if no timezone info is present
+                        creation_time = creation_time.replace(tzinfo=tz.tzutc())
+
+                    # Convert the CreationTime to the desired timezone
+                    record['CreationTime'] = creation_time.astimezone(timezone).isoformat()
+
+            chunks.append(pd.json_normalize(chunk))
+
+    # Concatenate all chunks into a single DataFrame
+    flattened_records = pd.concat(chunks, ignore_index=True)
+
+    return flattened_records
+
+
+def create_sqlite_db_from_dataframe(dataframe, db_name):
+    conn = sqlite3.connect(db_name)
+
+    # Convert all columns to string
+    dataframe = dataframe.astype(str)
+
+    # Write the DataFrame to SQLite, treating all fields as text
+    dataframe.to_sql('events', conn, if_exists='replace', index=False,
+                     dtype={col_name: 'TEXT' for col_name in dataframe.columns})
+
+    conn.close()
+
+
+def read_detection_rules(rule_file):
+    with open(rule_file, 'r') as file:
+        rules = json.load(file)
+    return rules
+
+
+def apply_detection_logic_sqlite(db_name, rules):
+    conn = sqlite3.connect(db_name)
+    all_detected_events = []
+
+    for rule in rules:
+        rule_name = rule['name']
+        severity = rule['severity']
+        query = rule['query']
+
+        detected_events = pd.read_sql_query(query, conn)
+        detected_events['RuleName'] = rule_name
+        detected_events['Severity'] = severity
+
+        all_detected_events.append(detected_events)
+
+    conn.close()
+
+    if all_detected_events:
+        result = pd.concat(all_detected_events, ignore_index=True)
+    else:
+        result = pd.DataFrame()
+
+    return result
+
+def download_geolite_db(geolite_db_path):
+    url = "https://git.io/GeoLite2-Country.mmdb"
+    print(f"Downloading GeoLite2 database from {url}...")
+    response = requests.get(url)
+    response.raise_for_status()  # Check if the download was successful
+
+    with open(geolite_db_path, 'wb') as file:
+        file.write(response.content)
+    print(f"GeoLite2 database downloaded and saved to {geolite_db_path}")
+
+def get_country_from_ip(ip, reader):
+    try:
+        response = reader.country(ip)
+        return response.country.name
+    except Exception as e:
+        #print(f"Could not resolve IP {ip}: {e}")
+        return 'Unknown'
+
+
+def analyzeoff365(auditfile, rule_file, output, timezone, include_flattened_data=False,
+                  geolite_db_path='GeoLite2-Country.mmdb'):
+    start_time = time.time()
+    temp_dir = ".temp"
+    if output is None or output == "":
+        output = os.path.splitext(auditfile)[0]
+    try:
+        # Create necessary directories
+        os.makedirs(output, exist_ok=True)
+        os.makedirs(temp_dir, exist_ok=True)
+
+        # Check if the GeoLite2 database exists, and download it if not
+        if not os.path.exists(geolite_db_path):
+            download_geolite_db(geolite_db_path)
+
+        # Convert CSV to JSON (assuming convert_csv is a valid function that you have)
+        json_file = convert_csv(auditfile, temp_dir)
+
+        # Input and output file paths
+        input_file = json_file
+        db_name = os.path.join(temp_dir, 'audit_data.db')
+
+        if rule_file is None:
+            rule_file = 'O365_detection_rules.json'
+        output_file = f"{output}_o365_report.xlsx"
+
+        # Measure the start time
+
+
+        # Flatten the JSON file
+        flattened_df = flatten_json_file(input_file, timezone)
+
+        # Create SQLite database from the flattened DataFrame
+        create_sqlite_db_from_dataframe(flattened_df, db_name)
+
+        # Open the GeoLite2 database
+        with geoip2.database.Reader(geolite_db_path) as reader:
+            # Resolve ClientIP to country names
+            if 'ClientIP' in flattened_df.columns:
+                flattened_df['Country'] = flattened_df['ClientIP'].apply(lambda ip: get_country_from_ip(ip, reader))
+
+        # Read detection rules
+        rules = read_detection_rules(rule_file)
+
+        # Apply detection logic using SQLite
+        detected_events = apply_detection_logic_sqlite(db_name, rules)
+
+        # Reorder columns to make RuleName the first column
+        if not detected_events.empty:
+            columns = ['RuleName', 'Severity'] + [col for col in detected_events.columns if
+                                                  col not in ['RuleName', 'Severity']]
+            detected_events = detected_events[columns]
+
+        # Perform the brute-force detection query
+        conn = sqlite3.connect(db_name)
+
+        try:
+            user_login_tracker_df = pd.read_sql_query(user_logon_query, conn)
+            password_spray_df = pd.read_sql_query(password_spray_query, conn)
+            user_operations_df = pd.read_sql_query(User_operations_query, conn)
+            user_operation_by_day_df = pd.read_sql_query(user_operation_by_day_query, conn)
+        finally:
+            conn.close()
+
+        # Create a new workbook with the detection results
+        with pd.ExcelWriter(output_file, engine='xlsxwriter') as writer:
+            if include_flattened_data:
+                # Split the flattened data into multiple sheets if needed
+                max_rows_per_sheet = 65000
+                num_sheets = len(flattened_df) // max_rows_per_sheet + 1
+
+                for i in range(num_sheets):
+                    start_row = i * max_rows_per_sheet
+                    end_row = (i + 1) * max_rows_per_sheet
+                    sheet_name = f'Flattened Data {i + 1}'
+                    flattened_df.iloc[start_row:end_row].to_excel(writer, sheet_name=sheet_name, index=False)
+
+            # Write statistics for various fields
+            detected_events.to_excel(writer, sheet_name='Detection Results', index=False)
+            user_login_tracker_df.to_excel(writer, sheet_name='User Login Tracker', index=False)
+            password_spray_df.to_excel(writer, sheet_name='Password Spray Attacks', index=False)
+            user_operations_df.to_excel(writer, sheet_name='User Operations', index=False)
+            user_operation_by_day_df.to_excel(writer, sheet_name='User Operations by Day', index=False)
+            flattened_df['Operation'].value_counts().to_frame().to_excel(writer, sheet_name='Operation Stats')
+            flattened_df['ClientIP'].value_counts().to_frame().to_excel(writer, sheet_name='ClientIP Stats')
+            flattened_df['Country'].value_counts().to_frame().to_excel(writer, sheet_name='Country Stats')
+            flattened_df['UserAgent'].value_counts().to_frame().to_excel(writer, sheet_name='UserAgent Stats')
+            flattened_df['UserId'].value_counts().to_frame().to_excel(writer, sheet_name='UserId Stats')
+            flattened_df['AuthenticationType'].value_counts().to_frame().to_excel(writer,
+                                                                                  sheet_name='AuthenticationType Stats')
+
+        # Measure the end time
+        end_time = time.time()
+        print(f"Office365 analysis finished in time: {end_time - start_time:.2f} seconds")
+
+    except Exception as e:
+        print(f"An error occurred during the analysis: {e}")
+
+    finally:
+        #Clean up the temporary directory
+        if os.path.exists(temp_dir):
+            for file in Path(temp_dir).glob('*'):
+                file.unlink()  # Delete the file
+            os.rmdir(temp_dir)  # Remove the directory
+
+
+        # Write the User Login Tracker results to a new sheet
+
+    # Measure the end time
+    end_time = time.time()
+
+    # Calculate and print the running time
+    running_time = end_time - start_time
+    print(f"Office365 hunter finished in time: {running_time:.2f} seconds")