import re
import os
import io
import base64
import time
import requests
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
from collections import Counter, defaultdict

# ---------------------------------------------------------
# CONFIG
# ---------------------------------------------------------
LOGFILE = r"c:\Temp\projectGodaddy\delacruz.belliniseven.ai-Mar-2026.txt"
REPORT_HTML = r"c:\Temp\projectGodaddy\delacruz_report.html"

HIGH_RATE_THRESHOLD = 50
ERROR_THRESHOLD = 20
SENSITIVE_KEYWORDS = ["admin", "login", "wp-", "cgi-bin", "config", "backup"]

# ---------------------------------------------------------
# LOG PARSING
# ---------------------------------------------------------
LOG_PATTERN = re.compile(r"""
    (?P<ip>\S+)\s+                # IP address
    \S+\s+\S+\s+                  # ident/user (ignored)


\[(?P<time>.*?)\]

\s+          # timestamp
    "(?P<method>\S+)\s+           # HTTP method
    (?P<url>\S+)\s+               # URL
    (?P<protocol>[^"]+)"\s+       # protocol
    (?P<status>\d{3})\s+          # status code
    (?P<size>\S+)                 # response size
""", re.VERBOSE)

TIME_FORMAT = "%d/%b/%Y:%H:%M:%S %z"


def parse_log_line(line):
    m = LOG_PATTERN.match(line)
    if not m:
        return None

    d = m.groupdict()

    try:
        dt = datetime.strptime(d["time"], TIME_FORMAT)
    except Exception:
        return None

    size = int(d["size"]) if d["size"].isdigit() else 0

    return {