parent
ea9b08c550
commit
517dc33918
@ -0,0 +1,321 @@
|
|||||||
|
import json
|
||||||
|
import sqlite3
|
||||||
|
import tempfile
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
import pandas as pd
|
||||||
|
import geoip2.database
|
||||||
|
import requests
|
||||||
|
from dateutil import parser, tz
|
||||||
|
import pandas as pd
|
||||||
|
import json
|
||||||
|
import csv
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
start_time=0
|
||||||
|
end_time=0
|
||||||
|
password_spray_query = '''
|
||||||
|
WITH FailedLogins AS (
|
||||||
|
|
||||||
|
SELECT
|
||||||
|
UserId,
|
||||||
|
ClientIP,
|
||||||
|
datetime(CreationTime) AS LoginDate
|
||||||
|
FROM
|
||||||
|
events
|
||||||
|
WHERE
|
||||||
|
Operation = 'UserLoginFailed'
|
||||||
|
|
||||||
|
)
|
||||||
|
SELECT
|
||||||
|
UserId,
|
||||||
|
GROUP_CONCAT(ClientIP, ', ') AS ClientIPs,
|
||||||
|
COUNT(DISTINCT ClientIP) AS UniqueIPCount,
|
||||||
|
COUNT(*) AS FailedLoginAttempts,
|
||||||
|
LoginDate
|
||||||
|
|
||||||
|
FROM
|
||||||
|
FailedLogins
|
||||||
|
GROUP BY
|
||||||
|
UserId,
|
||||||
|
strftime('%Y-%m-%d %H', LoginDate)
|
||||||
|
HAVING
|
||||||
|
COUNT(*) > 5 AND UniqueIPCount > 3
|
||||||
|
ORDER BY
|
||||||
|
FailedLoginAttempts DESC;
|
||||||
|
'''
|
||||||
|
|
||||||
|
user_logon_query = '''
|
||||||
|
SELECT
|
||||||
|
UserId,
|
||||||
|
date(CreationTime) AS LoginDate,
|
||||||
|
COUNT(*) AS TotalLoginAttempts,
|
||||||
|
SUM(CASE WHEN Operation = 'UserLoggedIn' THEN 1 ELSE 0 END) AS SuccessfulLogins,
|
||||||
|
SUM(CASE WHEN Operation = 'UserLoginFailed' THEN 1 ELSE 0 END) AS FailedLogins
|
||||||
|
FROM
|
||||||
|
events
|
||||||
|
where
|
||||||
|
Operation = 'UserLoggedIn' OR Operation = 'UserLoginFailed'
|
||||||
|
GROUP BY
|
||||||
|
UserId,
|
||||||
|
LoginDate
|
||||||
|
ORDER BY
|
||||||
|
LoginDate,
|
||||||
|
UserId;
|
||||||
|
'''
|
||||||
|
|
||||||
|
User_operations_query = '''
|
||||||
|
SELECT
|
||||||
|
UserId,
|
||||||
|
COUNT(DISTINCT Operation) AS OperationCount,
|
||||||
|
GROUP_CONCAT(Operation, ', ') AS UniqueOperations
|
||||||
|
FROM
|
||||||
|
(SELECT DISTINCT UserId, Operation FROM events)
|
||||||
|
GROUP BY
|
||||||
|
UserId
|
||||||
|
ORDER BY
|
||||||
|
OperationCount DESC;
|
||||||
|
'''
|
||||||
|
|
||||||
|
user_operation_by_day_query = '''
|
||||||
|
SELECT
|
||||||
|
UserId,
|
||||||
|
DATE(CreationTime) AS OperationDate,
|
||||||
|
COUNT(DISTINCT Operation) AS OperationCount,
|
||||||
|
GROUP_CONCAT( Operation, ', ') AS UniqueOperations
|
||||||
|
FROM
|
||||||
|
events
|
||||||
|
GROUP BY
|
||||||
|
UserId,
|
||||||
|
OperationDate
|
||||||
|
ORDER BY
|
||||||
|
OperationCount DESC
|
||||||
|
'''
|
||||||
|
|
||||||
|
|
||||||
|
def convert_csv(input_file,temp):
|
||||||
|
with open(input_file, 'r', encoding='utf-8') as csv_file:
|
||||||
|
# Create a CSV reader
|
||||||
|
reader = csv.DictReader(csv_file)
|
||||||
|
|
||||||
|
json_file = 'audit_data.json'
|
||||||
|
json_file=os.path.join(temp, json_file)
|
||||||
|
with open(json_file, 'w', encoding='utf-8') as jsonl_file:
|
||||||
|
# Extract and write the AuditData column to a file as JSON Lines
|
||||||
|
for row in reader:
|
||||||
|
# Extract the AuditData which is already a JSON formatted string
|
||||||
|
json_data = json.loads(row['AuditData'])
|
||||||
|
# Convert the JSON object back to a string to store in the file
|
||||||
|
json_string = json.dumps(json_data)
|
||||||
|
# Write the JSON string to the file with a newline
|
||||||
|
jsonl_file.write(json_string + '\n')
|
||||||
|
|
||||||
|
return json_file
|
||||||
|
|
||||||
|
|
||||||
|
def flatten_json_file(input_file, timezone, chunk_size=10000):
|
||||||
|
# Read the JSON file in chunks
|
||||||
|
chunks = []
|
||||||
|
with open(input_file, 'r') as file:
|
||||||
|
lines = file.readlines()
|
||||||
|
for i in range(0, len(lines), chunk_size):
|
||||||
|
chunk = [json.loads(line) for line in lines[i:i + chunk_size]]
|
||||||
|
|
||||||
|
# Convert the CreationTime to the desired timezone
|
||||||
|
for record in chunk:
|
||||||
|
if 'CreationTime' in record:
|
||||||
|
# Parse the CreationTime
|
||||||
|
creation_time = parser.parse(record['CreationTime'])
|
||||||
|
|
||||||
|
# Check if the datetime object is timezone aware
|
||||||
|
if creation_time.tzinfo is None:
|
||||||
|
# Assume the original time is in UTC if no timezone info is present
|
||||||
|
creation_time = creation_time.replace(tzinfo=tz.tzutc())
|
||||||
|
|
||||||
|
# Convert the CreationTime to the desired timezone
|
||||||
|
record['CreationTime'] = creation_time.astimezone(timezone).isoformat()
|
||||||
|
|
||||||
|
chunks.append(pd.json_normalize(chunk))
|
||||||
|
|
||||||
|
# Concatenate all chunks into a single DataFrame
|
||||||
|
flattened_records = pd.concat(chunks, ignore_index=True)
|
||||||
|
|
||||||
|
return flattened_records
|
||||||
|
|
||||||
|
|
||||||
|
def create_sqlite_db_from_dataframe(dataframe, db_name):
|
||||||
|
conn = sqlite3.connect(db_name)
|
||||||
|
|
||||||
|
# Convert all columns to string
|
||||||
|
dataframe = dataframe.astype(str)
|
||||||
|
|
||||||
|
# Write the DataFrame to SQLite, treating all fields as text
|
||||||
|
dataframe.to_sql('events', conn, if_exists='replace', index=False,
|
||||||
|
dtype={col_name: 'TEXT' for col_name in dataframe.columns})
|
||||||
|
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
def read_detection_rules(rule_file):
|
||||||
|
with open(rule_file, 'r') as file:
|
||||||
|
rules = json.load(file)
|
||||||
|
return rules
|
||||||
|
|
||||||
|
|
||||||
|
def apply_detection_logic_sqlite(db_name, rules):
|
||||||
|
conn = sqlite3.connect(db_name)
|
||||||
|
all_detected_events = []
|
||||||
|
|
||||||
|
for rule in rules:
|
||||||
|
rule_name = rule['name']
|
||||||
|
severity = rule['severity']
|
||||||
|
query = rule['query']
|
||||||
|
|
||||||
|
detected_events = pd.read_sql_query(query, conn)
|
||||||
|
detected_events['RuleName'] = rule_name
|
||||||
|
detected_events['Severity'] = severity
|
||||||
|
|
||||||
|
all_detected_events.append(detected_events)
|
||||||
|
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
if all_detected_events:
|
||||||
|
result = pd.concat(all_detected_events, ignore_index=True)
|
||||||
|
else:
|
||||||
|
result = pd.DataFrame()
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
def download_geolite_db(geolite_db_path):
|
||||||
|
url = "https://git.io/GeoLite2-Country.mmdb"
|
||||||
|
print(f"Downloading GeoLite2 database from {url}...")
|
||||||
|
response = requests.get(url)
|
||||||
|
response.raise_for_status() # Check if the download was successful
|
||||||
|
|
||||||
|
with open(geolite_db_path, 'wb') as file:
|
||||||
|
file.write(response.content)
|
||||||
|
print(f"GeoLite2 database downloaded and saved to {geolite_db_path}")
|
||||||
|
|
||||||
|
def get_country_from_ip(ip, reader):
|
||||||
|
try:
|
||||||
|
response = reader.country(ip)
|
||||||
|
return response.country.name
|
||||||
|
except Exception as e:
|
||||||
|
#print(f"Could not resolve IP {ip}: {e}")
|
||||||
|
return 'Unknown'
|
||||||
|
|
||||||
|
|
||||||
|
def analyzeoff365(auditfile, rule_file, output, timezone, include_flattened_data=False,
|
||||||
|
geolite_db_path='GeoLite2-Country.mmdb'):
|
||||||
|
start_time = time.time()
|
||||||
|
temp_dir = ".temp"
|
||||||
|
if output is None or output == "":
|
||||||
|
output = os.path.splitext(auditfile)[0]
|
||||||
|
try:
|
||||||
|
# Create necessary directories
|
||||||
|
os.makedirs(output, exist_ok=True)
|
||||||
|
os.makedirs(temp_dir, exist_ok=True)
|
||||||
|
|
||||||
|
# Check if the GeoLite2 database exists, and download it if not
|
||||||
|
if not os.path.exists(geolite_db_path):
|
||||||
|
download_geolite_db(geolite_db_path)
|
||||||
|
|
||||||
|
# Convert CSV to JSON (assuming convert_csv is a valid function that you have)
|
||||||
|
json_file = convert_csv(auditfile, temp_dir)
|
||||||
|
|
||||||
|
# Input and output file paths
|
||||||
|
input_file = json_file
|
||||||
|
db_name = os.path.join(temp_dir, 'audit_data.db')
|
||||||
|
|
||||||
|
if rule_file is None:
|
||||||
|
rule_file = 'O365_detection_rules.json'
|
||||||
|
output_file = f"{output}_o365_report.xlsx"
|
||||||
|
|
||||||
|
# Measure the start time
|
||||||
|
|
||||||
|
|
||||||
|
# Flatten the JSON file
|
||||||
|
flattened_df = flatten_json_file(input_file, timezone)
|
||||||
|
|
||||||
|
# Create SQLite database from the flattened DataFrame
|
||||||
|
create_sqlite_db_from_dataframe(flattened_df, db_name)
|
||||||
|
|
||||||
|
# Open the GeoLite2 database
|
||||||
|
with geoip2.database.Reader(geolite_db_path) as reader:
|
||||||
|
# Resolve ClientIP to country names
|
||||||
|
if 'ClientIP' in flattened_df.columns:
|
||||||
|
flattened_df['Country'] = flattened_df['ClientIP'].apply(lambda ip: get_country_from_ip(ip, reader))
|
||||||
|
|
||||||
|
# Read detection rules
|
||||||
|
rules = read_detection_rules(rule_file)
|
||||||
|
|
||||||
|
# Apply detection logic using SQLite
|
||||||
|
detected_events = apply_detection_logic_sqlite(db_name, rules)
|
||||||
|
|
||||||
|
# Reorder columns to make RuleName the first column
|
||||||
|
if not detected_events.empty:
|
||||||
|
columns = ['RuleName', 'Severity'] + [col for col in detected_events.columns if
|
||||||
|
col not in ['RuleName', 'Severity']]
|
||||||
|
detected_events = detected_events[columns]
|
||||||
|
|
||||||
|
# Perform the brute-force detection query
|
||||||
|
conn = sqlite3.connect(db_name)
|
||||||
|
|
||||||
|
try:
|
||||||
|
user_login_tracker_df = pd.read_sql_query(user_logon_query, conn)
|
||||||
|
password_spray_df = pd.read_sql_query(password_spray_query, conn)
|
||||||
|
user_operations_df = pd.read_sql_query(User_operations_query, conn)
|
||||||
|
user_operation_by_day_df = pd.read_sql_query(user_operation_by_day_query, conn)
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
# Create a new workbook with the detection results
|
||||||
|
with pd.ExcelWriter(output_file, engine='xlsxwriter') as writer:
|
||||||
|
if include_flattened_data:
|
||||||
|
# Split the flattened data into multiple sheets if needed
|
||||||
|
max_rows_per_sheet = 65000
|
||||||
|
num_sheets = len(flattened_df) // max_rows_per_sheet + 1
|
||||||
|
|
||||||
|
for i in range(num_sheets):
|
||||||
|
start_row = i * max_rows_per_sheet
|
||||||
|
end_row = (i + 1) * max_rows_per_sheet
|
||||||
|
sheet_name = f'Flattened Data {i + 1}'
|
||||||
|
flattened_df.iloc[start_row:end_row].to_excel(writer, sheet_name=sheet_name, index=False)
|
||||||
|
|
||||||
|
# Write statistics for various fields
|
||||||
|
detected_events.to_excel(writer, sheet_name='Detection Results', index=False)
|
||||||
|
user_login_tracker_df.to_excel(writer, sheet_name='User Login Tracker', index=False)
|
||||||
|
password_spray_df.to_excel(writer, sheet_name='Password Spray Attacks', index=False)
|
||||||
|
user_operations_df.to_excel(writer, sheet_name='User Operations', index=False)
|
||||||
|
user_operation_by_day_df.to_excel(writer, sheet_name='User Operations by Day', index=False)
|
||||||
|
flattened_df['Operation'].value_counts().to_frame().to_excel(writer, sheet_name='Operation Stats')
|
||||||
|
flattened_df['ClientIP'].value_counts().to_frame().to_excel(writer, sheet_name='ClientIP Stats')
|
||||||
|
flattened_df['Country'].value_counts().to_frame().to_excel(writer, sheet_name='Country Stats')
|
||||||
|
flattened_df['UserAgent'].value_counts().to_frame().to_excel(writer, sheet_name='UserAgent Stats')
|
||||||
|
flattened_df['UserId'].value_counts().to_frame().to_excel(writer, sheet_name='UserId Stats')
|
||||||
|
flattened_df['AuthenticationType'].value_counts().to_frame().to_excel(writer,
|
||||||
|
sheet_name='AuthenticationType Stats')
|
||||||
|
|
||||||
|
# Measure the end time
|
||||||
|
end_time = time.time()
|
||||||
|
print(f"Office365 analysis finished in time: {end_time - start_time:.2f} seconds")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"An error occurred during the analysis: {e}")
|
||||||
|
|
||||||
|
finally:
|
||||||
|
#Clean up the temporary directory
|
||||||
|
if os.path.exists(temp_dir):
|
||||||
|
for file in Path(temp_dir).glob('*'):
|
||||||
|
file.unlink() # Delete the file
|
||||||
|
os.rmdir(temp_dir) # Remove the directory
|
||||||
|
|
||||||
|
|
||||||
|
# Write the User Login Tracker results to a new sheet
|
||||||
|
|
||||||
|
# Measure the end time
|
||||||
|
end_time = time.time()
|
||||||
|
|
||||||
|
# Calculate and print the running time
|
||||||
|
running_time = end_time - start_time
|
||||||
|
print(f"Office365 hunter finished in time: {running_time:.2f} seconds")
|
Loading…
Reference in new issue