#!/usr/bin/env python3
"""
Error Pattern Detector — Dynamic Bug Pattern Scanner

Scans the codebase for patterns that have caused bugs before.
When a new bug is fixed, add a pattern rule to the PATTERNS list
so the scanner catches similar issues elsewhere.

Stack-agnostic scanner engine with universal starter patterns.
Add project-specific patterns to the PATTERNS list as bugs are discovered.

Usage:
    detect_error_patterns.py                    # Full scan
    detect_error_patterns.py --staged           # Staged files only
    detect_error_patterns.py --severity high    # Filter by severity
    detect_error_patterns.py --json             # JSON output (for CI)
    detect_error_patterns.py --summary          # One-line summary
    detect_error_patterns.py --list-rules       # List all pattern rules
    detect_error_patterns.py --analyze "error"  # Reactive error analysis
    detect_error_patterns.py --analyze-log f.log # Analyze log file for errors
    detect_error_patterns.py --list-knowledge   # List knowledge base entries

Exit codes:
    0 — No high-severity findings
    1 — High-severity findings detected
    2 — Script error
"""
import json
import re
import subprocess
import sys
from dataclasses import dataclass, field
from pathlib import Path

ROOT = Path(__file__).resolve().parents[2]


# ============================================================================
# DATA CLASSES
# ============================================================================

@dataclass
class PatternRule:
    """A single error pattern to detect."""
    id: str
    name: str
    description: str
    severity: str  # "high", "medium", "low"
    file_glob: str  # e.g. "**/*.py" or "src/**/*.ts"
    pattern: str  # regex pattern
    negative_pattern: str = ""  # if present nearby, suppress the finding
    context_lines: int = 0  # extra lines to check for negative_pattern
    fix_hint: str = ""
    incident: str = ""  # reference to the bug that caused this rule
    examples: list = field(default_factory=list)
    exclude_paths: list = field(default_factory=list)  # substrings to skip


@dataclass
class Finding:
    """A detected pattern match."""
    rule_id: str
    rule_name: str
    severity: str
    file: str
    line: int
    code: str
    fix_hint: str
    incident: str


@dataclass
class ErrorKnowledge:
    """Maps error fingerprints to code patterns for reactive analysis."""
    id: str
    name: str
    keywords: list[str]  # ALL must match (case-insensitive)
    alt_keywords: list[list[str]]  # Alternative keyword sets (any one set matches)
    category: str  # e.g. "browser", "python", "react", "database"
    rules: list[PatternRule] = field(default_factory=list)  # Patterns to scan when matched

    def __post_init__(self):
        if not self.alt_keywords:
            self.alt_keywords = []


# ============================================================================
# PATTERN REGISTRY — Universal starter patterns
# Add project-specific patterns below as bugs are discovered.
# ============================================================================

PATTERNS = [
    # --- Universal: Hardcoded secrets ---
    PatternRule(
        id="EP-001",
        name="Potential hardcoded secret",
        description="Hardcoded passwords, API keys, or tokens in source code.",
        severity="high",
        file_glob="**/*",
        pattern=r"""(?:password|passwd|pwd|secret|api_key|apikey|token|auth_token)\s*[:=]\s*["'][^"']{8,}["']""",
        negative_pattern=r"example|placeholder|test|mock|fake|dummy|changeme|your[_-]|CHANGE_ME|TODO",
        context_lines=1,
        fix_hint="Use environment variables or a secrets manager instead of hardcoding credentials",
        exclude_paths=["test_", "_test.", ".test.", "spec/", "mock", "fixture", "node_modules", ".git"],
    ),

    # --- Universal: TODO/FIXME/HACK left in committed code ---
    PatternRule(
        id="EP-002",
        name="TODO/FIXME/HACK marker in code",
        description="Code markers that indicate incomplete or temporary work.",
        severity="low",
        file_glob="**/*",
        pattern=r"(?:#|//|/\*)\s*(?:TODO|FIXME|HACK|XXX|TEMP|WORKAROUND)\b",
        fix_hint="Resolve the TODO or create a backlog item to track it",
        exclude_paths=["node_modules", ".git", "__pycache__", "vendor/"],
    ),

    # --- Python: Bare except ---
    PatternRule(
        id="EP-010",
        name="Bare except clause",
        description="Bare 'except:' catches all exceptions including KeyboardInterrupt and SystemExit.",
        severity="medium",
        file_glob="**/*.py",
        pattern=r"^\s*except\s*:",
        negative_pattern=r"noqa|type:\s*ignore",
        context_lines=0,
        fix_hint="Use 'except Exception:' or a specific exception type",
    ),

    # --- Python: eval/exec usage ---
    PatternRule(
        id="EP-011",
        name="eval() or exec() usage",
        description="Dynamic code execution is a security risk and makes debugging difficult.",
        severity="high",
        file_glob="**/*.py",
        pattern=r"\b(?:eval|exec)\s*\(",
        negative_pattern=r"noqa|# safe|# trusted",
        context_lines=1,
        fix_hint="Use ast.literal_eval() for data parsing, or refactor to avoid dynamic code execution",
        exclude_paths=["test_", "setup.py", "conftest.py"],
    ),

    # --- Python: Mutable default argument ---
    PatternRule(
        id="EP-012",
        name="Mutable default argument",
        description="Using mutable objects (list, dict, set) as default arguments causes shared state bugs.",
        severity="medium",
        file_glob="**/*.py",
        pattern=r"def\s+\w+\s*\([^)]*:\s*(?:list|dict|set|List|Dict|Set)\s*=\s*(?:\[\]|\{\}|set\(\))",
        fix_hint="Use None as default and create the mutable object inside the function body",
        exclude_paths=["test_", "__pycache__"],
    ),

    # --- Python: Sync I/O in async function ---
    PatternRule(
        id="EP-013",
        name="Sync I/O in async function",
        description="Calling sync file/network I/O in an async function blocks the event loop.",
        severity="medium",
        file_glob="**/*.py",
        pattern=r"(?:open\s*\(|\.read_text\s*\(|\.read_bytes\s*\(|\.write_text\s*\(|requests\.)",
        negative_pattern=r"run_in_executor|aiofiles|anyio|aiohttp|httpx",
        context_lines=10,
        fix_hint="Use aiofiles or run_in_executor for file I/O, use httpx/aiohttp for HTTP in async functions",
        exclude_paths=["test_", "conftest.py", "migrations/", "alembic/"],
    ),

    # --- JS/TS: console.log in production code ---
    PatternRule(
        id="EP-020",
        name="console.log in production code",
        description="Console statements left in production code pollute output and may leak data.",
        severity="low",
        file_glob="**/*.{ts,tsx,js,jsx}",
        pattern=r"\bconsole\.(log|debug|info|warn)\s*\(",
        negative_pattern=r"eslint-disable|logger|// keep",
        context_lines=0,
        fix_hint="Use a proper logging library or remove console statements before committing",
        exclude_paths=["test", "spec", "__tests__", "node_modules", ".next", "dist/"],
    ),

    # --- JS/TS: Unhandled promise ---
    PatternRule(
        id="EP-021",
        name="Unhandled promise (no catch or await)",
        description="Calling an async function without await or .catch() silently swallows errors.",
        severity="medium",
        file_glob="**/*.{ts,tsx,js,jsx}",
        pattern=r"(?:fetch|axios\.\w+|\.post|\.get|\.put|\.delete|\.patch)\s*\([^)]*\)\s*;",
        negative_pattern=r"await|\.then|\.catch|try\s*\{",
        context_lines=3,
        fix_hint="Add 'await' or '.catch()' to handle promise rejections",
        exclude_paths=["test", "spec", "__tests__", "node_modules"],
    ),

    # =====================================================================
    # BUG PATTERNS (EP-070+)
    # Common coding mistakes that cause production bugs.
    # =====================================================================

    # --- JS: Loose equality ---
    PatternRule(
        id="EP-070",
        name="Loose equality (== instead of ===)",
        description="== performs type coercion which causes subtle bugs (e.g., '' == false is true).",
        severity="medium",
        file_glob="**/*.{ts,tsx,js,jsx}",
        pattern=r"(?<![!=<>])=[=](?!=)\s",
        negative_pattern=r"eslint-disable|# intentional|null\s*==\s|==\s*null",
        context_lines=0,
        fix_hint="Use === for strict equality. Exception: == null to check both null and undefined.",
        exclude_paths=["test", "spec", "__tests__", "node_modules", ".next", "dist/"],
    ),

    # --- Python: open() without context manager ---
    PatternRule(
        id="EP-071",
        name="File open without context manager",
        description="Using open() without 'with' risks leaving file handles unclosed on exceptions.",
        severity="medium",
        file_glob="**/*.py",
        pattern=r"^\s*\w+\s*=\s*open\s*\(",
        negative_pattern=r"# managed|# closed below|contextmanager|closing",
        context_lines=3,
        fix_hint="Use 'with open(...) as f:' to ensure the file is always closed.",
        exclude_paths=["test_", "_test.", "conftest.py", "setup.py"],
    ),

    # --- Python: wildcard import ---
    PatternRule(
        id="EP-072",
        name="Wildcard import",
        description="'from x import *' pollutes the namespace and makes it impossible to trace symbol origins.",
        severity="medium",
        file_glob="**/*.py",
        pattern=r"^\s*from\s+\S+\s+import\s+\*",
        negative_pattern=r"# noqa|__init__|# re-export",
        context_lines=0,
        fix_hint="Import specific names: 'from module import ClassA, func_b'",
        exclude_paths=["test_", "_test.", "__init__.py", "conftest.py"],
    ),

    # --- Python: identity vs equality ---
    PatternRule(
        id="EP-073",
        name="Identity check on non-singleton",
        description="Using 'is' to compare strings, numbers, or lists checks object identity, not value.",
        severity="medium",
        file_glob="**/*.py",
        pattern=r"""\bis\s+(?:["']\w|[0-9]|\[|\()""",
        negative_pattern=r"is\s+None|is\s+True|is\s+False|is\s+not\s+None|# intentional",
        context_lines=0,
        fix_hint="Use == for value comparison. 'is' is only correct for None, True, False, and singletons.",
        exclude_paths=["test_", "_test."],
    ),

    # --- React: missing key prop ---
    PatternRule(
        id="EP-074",
        name="React list without key prop",
        description="Rendering lists without unique key props causes unnecessary re-renders and bugs.",
        severity="medium",
        file_glob="**/*.{tsx,jsx}",
        pattern=r"\.map\s*\([^)]*\)\s*(?:=>|\.)\s*(?:<\w+)(?![^>]*key\s*=)",
        negative_pattern=r"key=|# no key needed",
        context_lines=2,
        fix_hint="Add a unique key prop to each element rendered in a .map(): <Item key={item.id} />",
        exclude_paths=["test", "spec", "__tests__", "node_modules"],
    ),

    # --- React: direct state mutation ---
    PatternRule(
        id="EP-075",
        name="Direct state mutation in React",
        description="Mutating state directly (push, splice, assignment) bypasses React's change detection.",
        severity="high",
        file_glob="**/*.{tsx,jsx,ts,js}",
        pattern=r"(?:state\.\w+\s*=\s|state\.\w+\.push\s*\(|state\.\w+\.splice\s*\(|state\.\w+\.pop\s*\()",
        negative_pattern=r"useState|useReducer|this\.setState|# immutable copy|\.slice\(\)|\.\.\.state",
        context_lines=3,
        fix_hint="Use setState/useState setter with a new object/array: setState(prev => [...prev, item])",
        exclude_paths=["test", "spec", "__tests__", "node_modules"],
    ),

    # --- Database: SELECT * ---
    PatternRule(
        id="EP-076",
        name="SELECT * in production query",
        description="SELECT * fetches unnecessary columns, wastes bandwidth, and breaks on schema changes.",
        severity="low",
        file_glob="**/*",
        pattern=r"""(?:SELECT|select)\s+\*\s+(?:FROM|from)\s""",
        negative_pattern=r"# ok|COUNT\(\*\)|count\(\*\)|test|migration|-- select|EXISTS",
        context_lines=1,
        fix_hint="List specific columns: SELECT id, name, email FROM users",
        exclude_paths=["test_", "_test.", "migration", "alembic", "node_modules"],
    ),

    # --- Database: missing LIMIT ---
    PatternRule(
        id="EP-077",
        name="Unbounded query (no LIMIT)",
        description="Queries without LIMIT can return millions of rows and crash the application.",
        severity="medium",
        file_glob="**/*",
        pattern=r"""(?:SELECT|select)\s+(?!\*\s*FROM.*(?:LIMIT|limit|TOP|top|FETCH|fetch)).*(?:FROM|from)\s+\w+\s*(?:WHERE|where|;|$)""",
        negative_pattern=r"LIMIT|limit|TOP|top|FETCH|fetch|COUNT|count|EXISTS|exists|migration|\.first\(|\.one\(|\.get\(|\.scalar\(",
        context_lines=2,
        fix_hint="Add LIMIT clause to prevent unbounded result sets. ORM: use .limit() or pagination.",
        exclude_paths=["test_", "_test.", "migration", "alembic", "node_modules"],
    ),

    # --- Hardcoded localhost/ports in non-config ---
    PatternRule(
        id="EP-078",
        name="Hardcoded host/port in application code",
        description="Hardcoded localhost or port numbers in non-config files break across environments.",
        severity="low",
        file_glob="**/*.{py,ts,tsx,js,jsx}",
        pattern=r"""["'](?:localhost|127\.0\.0\.1):\d{4,5}["']""",
        negative_pattern=r"# config|# default|os\.environ|getenv|process\.env|\.env|test|fixture|example|fallback",
        context_lines=2,
        fix_hint="Use environment variables or config files for host/port values.",
        exclude_paths=["test_", "_test.", "conftest.py", "node_modules", ".env", "config"],
    ),

    # --- JS: async void / fire-and-forget ---
    PatternRule(
        id="EP-079",
        name="Async function without error handling",
        description="Async arrow functions in event handlers without try/catch silently swallow errors.",
        severity="medium",
        file_glob="**/*.{ts,tsx,js,jsx}",
        pattern=r"(?:on\w+|addEventListener)\s*(?:=|\()\s*async\s",
        negative_pattern=r"try\s*\{|\.catch|# handled|ErrorBoundary",
        context_lines=5,
        fix_hint="Wrap async event handlers in try/catch or use an error boundary.",
        exclude_paths=["test", "spec", "__tests__", "node_modules"],
    ),

    # --- Regex DoS (catastrophic backtracking) ---
    PatternRule(
        id="EP-080",
        name="Regex denial of service risk",
        description="Nested quantifiers like (a+)+ or (a|a)* cause exponential backtracking on crafted input.",
        severity="high",
        file_glob="**/*",
        pattern=r"""(?:re\.compile|new\s+RegExp|/)\s*\(?["'/].*(?:\([^)]*[+*]\)\s*[+*]|\([^)]*\|[^)]*\)\s*[+*])""",
        negative_pattern=r"# safe|# bounded|# validated input|atomic|possessive",
        context_lines=1,
        fix_hint="Avoid nested quantifiers. Use atomic groups or rewrite the pattern to prevent backtracking.",
        exclude_paths=["test_", "_test.", "node_modules"],
    ),

    # =====================================================================
    # SECURITY PATTERNS (EP-030+)
    # =====================================================================

    # --- SQL Injection ---
    PatternRule(
        id="EP-030",
        name="SQL string concatenation",
        description="Building SQL queries with string concatenation or f-strings enables SQL injection.",
        severity="high",
        file_glob="**/*.py",
        pattern=r"""(?:execute|cursor\.execute|\.raw|\.extra)\s*\(\s*(?:f["']|["'].*%s|["'].*\+|["'].*\.format)""",
        negative_pattern=r"# safe|# parameterized|noqa|sanitize",
        context_lines=1,
        fix_hint="Use parameterized queries: cursor.execute('SELECT * FROM t WHERE id = %s', [user_id])",
        exclude_paths=["test_", "_test.", "migration", "alembic"],
    ),
    PatternRule(
        id="EP-031",
        name="SQL string concatenation (JS/TS)",
        description="Building SQL queries with template literals or concatenation enables SQL injection.",
        severity="high",
        file_glob="**/*.{ts,tsx,js,jsx}",
        pattern=r"""(?:\.query|\.execute|\.raw)\s*\(\s*(?:`[^`]*\$\{|['"].*\+)""",
        negative_pattern=r"parameterized|prepared|placeholder|sanitize",
        context_lines=1,
        fix_hint="Use parameterized queries: db.query('SELECT * FROM t WHERE id = $1', [userId])",
        exclude_paths=["test", "spec", "__tests__", "node_modules", "migration"],
    ),

    # --- XSS ---
    PatternRule(
        id="EP-032",
        name="Dangerous HTML injection",
        description="Using dangerouslySetInnerHTML or innerHTML without sanitization enables XSS attacks.",
        severity="high",
        file_glob="**/*.{ts,tsx,js,jsx}",
        pattern=r"(?:dangerouslySetInnerHTML|\.innerHTML\s*=)",
        negative_pattern=r"sanitize|DOMPurify|purify|escape|# safe|xss",
        context_lines=3,
        fix_hint="Sanitize HTML with DOMPurify before injecting: DOMPurify.sanitize(html)",
        exclude_paths=["test", "spec", "__tests__", "node_modules"],
    ),

    # --- Command Injection ---
    PatternRule(
        id="EP-033",
        name="Shell injection risk",
        description="Using shell=True with user-controlled input enables command injection.",
        severity="high",
        file_glob="**/*.py",
        pattern=r"subprocess\.(?:call|run|Popen|check_output|check_call)\s*\([^)]*shell\s*=\s*True",
        negative_pattern=r"# safe|# trusted|# no user input|shlex\.quote",
        context_lines=2,
        fix_hint="Use shell=False with a list of args, or sanitize with shlex.quote()",
        exclude_paths=["test_", "_test.", "conftest.py"],
    ),
    PatternRule(
        id="EP-034",
        name="os.system usage",
        description="os.system() runs commands in a shell and is vulnerable to injection.",
        severity="high",
        file_glob="**/*.py",
        pattern=r"\bos\.system\s*\(",
        negative_pattern=r"# safe|# trusted",
        context_lines=1,
        fix_hint="Use subprocess.run() with shell=False and a list of arguments",
        exclude_paths=["test_", "_test."],
    ),

    # --- Insecure Deserialization ---
    PatternRule(
        id="EP-035",
        name="Unsafe deserialization",
        description="pickle/shelve/yaml.load can execute arbitrary code from untrusted data.",
        severity="high",
        file_glob="**/*.py",
        pattern=r"(?:pickle\.loads?\s*\(|shelve\.open\s*\(|yaml\.load\s*\([^)]*(?:Loader\s*=\s*yaml\.(?:Unsafe|Full)Loader|(?!Loader)))",
        negative_pattern=r"# trusted|SafeLoader|yaml\.safe_load",
        context_lines=2,
        fix_hint="Use yaml.safe_load(), json, or a safe serialization format for untrusted data",
        exclude_paths=["test_", "_test.", "conftest.py"],
    ),

    # --- Insecure HTTP ---
    PatternRule(
        id="EP-036",
        name="Insecure HTTP URL",
        description="Using http:// instead of https:// transmits data without encryption.",
        severity="medium",
        file_glob="**/*",
        pattern=r"""["']http://(?!localhost|127\.0\.0\.1|0\.0\.0\.0|::1|\[::1\]|example\.com|example\.org)""",
        negative_pattern=r"# http ok|# local|# test|redirect.*https|upgrade.*insecure",
        context_lines=1,
        fix_hint="Use https:// for all external URLs to ensure encrypted transport",
        exclude_paths=["test_", "_test.", "node_modules", ".git", "vendor/", "lock", "CHANGELOG"],
    ),

    # --- Permissive CORS ---
    PatternRule(
        id="EP-037",
        name="Permissive CORS (wildcard origin)",
        description="Allow-Origin: * with credentials exposes the API to any website.",
        severity="medium",
        file_glob="**/*",
        pattern=r"""(?:Access-Control-Allow-Origin|allow_origins|cors_origins|CORS_ORIGIN)\s*[:=]\s*["'*]?\*""",
        negative_pattern=r"# dev only|# local|development|localhost|DEBUG",
        context_lines=3,
        fix_hint="Restrict CORS to specific trusted origins instead of wildcard *",
        exclude_paths=["test_", "node_modules", ".git"],
    ),

    # --- Path Traversal ---
    PatternRule(
        id="EP-038",
        name="Path traversal risk",
        description="Joining user input into file paths without validation enables path traversal attacks.",
        severity="high",
        file_glob="**/*.py",
        pattern=r"""(?:os\.path\.join|Path)\s*\([^)]*(?:request\.|user_|input|param|query|args\[|form\[)""",
        negative_pattern=r"resolve\(\)|realpath|abspath|# validated|# safe|secure_filename",
        context_lines=3,
        fix_hint="Validate paths with Path.resolve() and check they stay within the allowed directory",
        exclude_paths=["test_", "_test.", "conftest.py"],
    ),

    # --- Hardcoded JWT Secret ---
    PatternRule(
        id="EP-039",
        name="Hardcoded JWT/signing secret",
        description="JWT secrets in source code can be extracted to forge authentication tokens.",
        severity="high",
        file_glob="**/*",
        pattern=r"""(?:SECRET_KEY|JWT_SECRET|SIGNING_KEY|jwt_secret)\s*[:=]\s*["'][^"']{8,}["']""",
        negative_pattern=r"example|placeholder|test|mock|change.?me|your[_-]|TODO|os\.environ|getenv|env\(",
        context_lines=1,
        fix_hint="Load signing secrets from environment variables or a secrets manager",
        exclude_paths=["test_", "_test.", "node_modules", ".git", ".env.example"],
    ),

    # --- JS/TS: eval / Function constructor ---
    PatternRule(
        id="EP-040",
        name="Dynamic code execution (JS/TS)",
        description="eval() and new Function() execute arbitrary code and are frequent RCE sinks.",
        severity="high",
        file_glob="**/*.{ts,tsx,js,jsx}",
        pattern=r"\b(?:eval|new\s+Function)\s*\(",
        negative_pattern=r"eslint-disable|# safe|# trusted|webpack|bundler",
        context_lines=1,
        fix_hint="Refactor to avoid dynamic code execution. Use JSON.parse() for data parsing.",
        exclude_paths=["test", "spec", "__tests__", "node_modules", ".next", "dist/"],
    ),

    # --- JS/TS: child_process command injection ---
    PatternRule(
        id="EP-041",
        name="Node.js command injection sink",
        description="child_process.exec() runs commands in a shell and is vulnerable to injection.",
        severity="high",
        file_glob="**/*.{ts,tsx,js,jsx}",
        pattern=r"child_process\.(?:exec|execSync)\s*\(",
        negative_pattern=r"# safe|# trusted|# no user input|execFile",
        context_lines=2,
        fix_hint="Use child_process.execFile() or spawn() with an args array instead of exec()",
        exclude_paths=["test", "spec", "__tests__", "node_modules"],
    ),

    # --- JWT verification bypass ---
    PatternRule(
        id="EP-042",
        name="JWT signature verification bypass",
        description="Decoding JWT tokens without verifying the signature allows token forgery.",
        severity="high",
        file_glob="**/*.py",
        pattern=r"""jwt\.decode\s*\([^)]*(?:verify\s*=\s*False|options\s*=\s*\{[^}]*"verify_signature"\s*:\s*False)""",
        negative_pattern=r"# intentional|# public claims only|test",
        context_lines=2,
        fix_hint="Always verify JWT signatures: jwt.decode(token, key, algorithms=['HS256'])",
        exclude_paths=["test_", "_test.", "conftest.py"],
    ),
    PatternRule(
        id="EP-042b",
        name="JWT 'none' algorithm (JS/TS)",
        description="Allowing the 'none' algorithm in JWT verification permits unsigned tokens.",
        severity="high",
        file_glob="**/*.{ts,tsx,js,jsx}",
        pattern=r"""algorithms\s*:\s*\[[^\]]*['"]none['"]""",
        negative_pattern=r"# intentional|test",
        context_lines=1,
        fix_hint="Never allow the 'none' algorithm in JWT verification. Specify explicit algorithms.",
        exclude_paths=["test", "spec", "__tests__", "node_modules"],
    ),

    # --- TLS verification disabled ---
    PatternRule(
        id="EP-043",
        name="TLS certificate verification disabled",
        description="Disabling TLS verification exposes connections to man-in-the-middle attacks.",
        severity="high",
        file_glob="**/*.py",
        pattern=r"(?:verify\s*=\s*False|CERT_NONE|check_hostname\s*=\s*False)",
        negative_pattern=r"# dev only|# localhost|# self-signed ok|test",
        context_lines=2,
        fix_hint="Never disable TLS verification in production. Use proper certificates.",
        exclude_paths=["test_", "_test.", "conftest.py"],
    ),
    PatternRule(
        id="EP-043b",
        name="TLS verification disabled (JS/TS)",
        description="Disabling TLS verification exposes connections to man-in-the-middle attacks.",
        severity="high",
        file_glob="**/*.{ts,tsx,js,jsx}",
        pattern=r"(?:rejectUnauthorized\s*:\s*false|NODE_TLS_REJECT_UNAUTHORIZED\s*=\s*['\"]?0)",
        negative_pattern=r"# dev only|# localhost|# self-signed ok|test",
        context_lines=2,
        fix_hint="Never disable TLS verification in production. Use proper certificates.",
        exclude_paths=["test", "spec", "__tests__", "node_modules"],
    ),

    # --- Weak cryptographic algorithms ---
    PatternRule(
        id="EP-049",
        name="Weak cryptographic hash (MD5/SHA1)",
        description="MD5 and SHA1 are broken for security purposes (collisions demonstrated).",
        severity="medium",
        file_glob="**/*.py",
        pattern=r"hashlib\.(?:md5|sha1)\s*\(",
        negative_pattern=r"# non-security|# checksum only|# fingerprint|# cache key|# etag",
        context_lines=1,
        fix_hint="Use hashlib.sha256() or hashlib.sha3_256() for security-sensitive hashing",
        exclude_paths=["test_", "_test.", "migration"],
    ),
    PatternRule(
        id="EP-049b",
        name="Weak cryptographic hash (JS/TS)",
        description="MD5 and SHA1 are broken for security purposes (collisions demonstrated).",
        severity="medium",
        file_glob="**/*.{ts,tsx,js,jsx}",
        pattern=r"""createHash\s*\(\s*['"](?:md5|sha1)['"]""",
        negative_pattern=r"# non-security|# checksum only|# fingerprint|# cache key|# etag",
        context_lines=1,
        fix_hint="Use createHash('sha256') for security-sensitive hashing",
        exclude_paths=["test", "spec", "__tests__", "node_modules"],
    ),

    # --- Insecure randomness for secrets ---
    PatternRule(
        id="EP-050",
        name="Insecure randomness for security use",
        description="Using Math.random() or Python's random module for tokens/secrets is predictable.",
        severity="medium",
        file_glob="**/*.py",
        pattern=r"random\.(?:random|randint|choice|randrange|sample)\s*\(",
        negative_pattern=r"# non-security|# game|# shuffle|# display|# ui|# test|secrets\.",
        context_lines=3,
        fix_hint="Use secrets.token_hex() or secrets.token_urlsafe() for security-sensitive randomness",
        exclude_paths=["test_", "_test.", "conftest.py", "seed", "fixture"],
    ),
    PatternRule(
        id="EP-050b",
        name="Insecure randomness for security use (JS/TS)",
        description="Math.random() is not cryptographically secure and produces predictable values.",
        severity="medium",
        file_glob="**/*.{ts,tsx,js,jsx}",
        pattern=r"Math\.random\s*\(\s*\)",
        negative_pattern=r"# non-security|# game|# animation|# ui|# display|# test|crypto\.",
        context_lines=3,
        fix_hint="Use crypto.randomUUID() or crypto.getRandomValues() for security-sensitive randomness",
        exclude_paths=["test", "spec", "__tests__", "node_modules", ".next"],
    ),

    # =====================================================================
    # OWASP 2025 PATTERNS (EP-060+)
    # A03:2025 Software Supply Chain, A10:2025 Exception Handling
    # =====================================================================

    # --- A03:2025: Unpinned dependencies ---
    PatternRule(
        id="EP-060",
        name="Unpinned dependency version",
        description="Using '*' or 'latest' for dependency versions enables supply chain attacks via malicious updates.",
        severity="medium",
        file_glob="**/package.json",
        pattern=r"""["']\s*:\s*["'](?:\*|latest|>=)["']""",
        negative_pattern=r"peerDependencies|devDependencies.*optional",
        context_lines=0,
        fix_hint="Pin dependency versions exactly (e.g., '1.2.3') or use a lock file",
        exclude_paths=["node_modules"],
    ),

    # --- A10:2025: Sensitive data in error messages ---
    PatternRule(
        id="EP-061",
        name="Sensitive data in error/log output",
        description="Logging passwords, tokens, or keys in error messages can leak credentials (OWASP A10:2025).",
        severity="high",
        file_glob="**/*.py",
        pattern=r"""(?:print|logger?\.\w+|logging\.\w+)\s*\([^)]*(?:password|passwd|secret|token|api_key|private_key|credential)""",
        negative_pattern=r"# safe|mask|redact|\*\*\*|sanitize|test|debug only",
        context_lines=2,
        fix_hint="Never log sensitive values. Redact or mask credentials before logging.",
        exclude_paths=["test_", "_test.", "conftest.py"],
    ),
    PatternRule(
        id="EP-061b",
        name="Sensitive data in error/log output (JS/TS)",
        description="Logging passwords, tokens, or keys in error messages can leak credentials (OWASP A10:2025).",
        severity="high",
        file_glob="**/*.{ts,tsx,js,jsx}",
        pattern=r"""(?:console\.\w+|logger?\.\w+)\s*\([^)]*(?:password|passwd|secret|token|apiKey|privateKey|credential)""",
        negative_pattern=r"# safe|mask|redact|\*\*\*|sanitize|test|debug only",
        context_lines=2,
        fix_hint="Never log sensitive values. Redact or mask credentials before logging.",
        exclude_paths=["test", "spec", "__tests__", "node_modules"],
    ),

    # --- A10:2025: Swallowed exceptions ---
    PatternRule(
        id="EP-062",
        name="Swallowed exception (except: pass)",
        description="Catching exceptions and doing nothing hides bugs and security issues (OWASP A10:2025).",
        severity="medium",
        file_glob="**/*.py",
        pattern=r"except\s*(?:\w+\s*)?:\s*\n\s*pass\s*$",
        negative_pattern=r"# intentional|# best effort|# optional",
        context_lines=0,
        fix_hint="At minimum log the exception. Silent failures hide security-relevant errors.",
        exclude_paths=["test_", "_test.", "conftest.py"],
    ),

    # --- A10:2025: Stack trace exposure to users ---
    PatternRule(
        id="EP-063",
        name="Debug mode / stack trace exposure",
        description="Running in debug mode in production exposes stack traces, config, and internal paths.",
        severity="medium",
        file_glob="**/*.py",
        pattern=r"""(?:DEBUG\s*=\s*True|app\.run\([^)]*debug\s*=\s*True|FLASK_DEBUG\s*=\s*["']?1)""",
        negative_pattern=r"# dev only|# local|# test|if.*development|os\.environ|getenv",
        context_lines=2,
        fix_hint="Use environment variables for debug flags. Never hardcode DEBUG=True.",
        exclude_paths=["test_", "_test.", ".env.example"],
    ),

    # =====================================================================
    # CI/CD & SUPPLY CHAIN PATTERNS (EP-090+)
    # =====================================================================

    # --- GitHub Actions: unpinned action ---
    PatternRule(
        id="EP-090",
        name="GitHub Action not pinned to commit SHA",
        description="Using tags like @v4 instead of full SHA lets attackers hijack the action via tag mutation.",
        severity="high",
        file_glob="**/.github/workflows/*.yml",
        pattern=r"uses:\s*[^@\s]+@(?![a-f0-9]{40}\b)",
        negative_pattern=r"actions/checkout@|actions/setup-|# pinned",
        context_lines=0,
        fix_hint="Pin actions to full commit SHA: uses: owner/action@abc123... (40-char hex)",
    ),

    # --- GitHub Actions: pull_request_target + checkout ---
    PatternRule(
        id="EP-091",
        name="Dangerous pull_request_target with PR checkout",
        description="Checking out PR head in pull_request_target runs untrusted code with write permissions.",
        severity="high",
        file_glob="**/.github/workflows/*.yml",
        pattern=r"pull_request_target",
        negative_pattern=r"# safe|# reviewed|ref:\s*\$\{\{\s*github\.sha",
        context_lines=10,
        fix_hint="Never checkout PR head code in pull_request_target workflows. Use pull_request event instead.",
    ),

    # --- GitHub Actions: overprivileged token ---
    PatternRule(
        id="EP-092",
        name="Overprivileged GitHub Actions permissions",
        description="write-all grants the GITHUB_TOKEN full write access — violates least privilege.",
        severity="medium",
        file_glob="**/.github/workflows/*.yml",
        pattern=r"permissions:\s*write-all",
        negative_pattern=r"# required|# release workflow",
        context_lines=0,
        fix_hint="Declare minimal permissions per job: permissions: { contents: read, pull-requests: write }",
    ),

    # --- CI: curl | bash (remote script execution) ---
    PatternRule(
        id="EP-093",
        name="Remote script execution (curl pipe bash)",
        description="Piping curl/wget output to bash executes untrusted remote code.",
        severity="high",
        file_glob="**/*",
        pattern=r"(?:curl\s+[^|\n]+|wget\s+-qO-\s+\S+)\s*\|\s*(?:bash|sh)\b",
        negative_pattern=r"# trusted|# official installer|# verified",
        context_lines=0,
        fix_hint="Download the script first, verify its checksum, then execute it.",
        exclude_paths=["node_modules", ".git"],
    ),

    # =====================================================================
    # INFRASTRUCTURE PATTERNS (EP-094+)
    # =====================================================================

    # --- Docker: mutable :latest tag ---
    PatternRule(
        id="EP-094",
        name="Docker FROM uses mutable :latest tag",
        description="Using :latest in Dockerfile means builds are not reproducible and may pull compromised images.",
        severity="medium",
        file_glob="**/Dockerfile*",
        pattern=r"^\s*FROM\s+\S+:latest(?:\s|$)",
        negative_pattern=r"# ok|# dev only|AS\s+builder",
        context_lines=0,
        fix_hint="Pin to a specific version and digest: FROM python:3.12-slim@sha256:abc...",
    ),

    # --- Docker: running as root ---
    PatternRule(
        id="EP-095",
        name="Docker container runs as root",
        description="Running as root inside containers exposes the host to container escape attacks.",
        severity="medium",
        file_glob="**/Dockerfile*",
        pattern=r"^\s*USER\s+root\b",
        negative_pattern=r"# build stage|# install only|AS\s+builder",
        context_lines=0,
        fix_hint="Add 'USER nonroot' or 'USER 1000' before the final CMD/ENTRYPOINT.",
    ),

    # --- Kubernetes: privileged container ---
    PatternRule(
        id="EP-096",
        name="Kubernetes privileged container",
        description="Privileged containers have full host access — effectively root on the node.",
        severity="high",
        file_glob="**/*.{yml,yaml}",
        pattern=r"^\s*privileged:\s*true\b",
        negative_pattern=r"# required|# debug only|# init container",
        context_lines=0,
        fix_hint="Remove privileged: true. Use specific capabilities instead: capabilities: { add: [NET_ADMIN] }",
        exclude_paths=["node_modules", ".git"],
    ),

    # --- Kubernetes: privilege escalation ---
    PatternRule(
        id="EP-097",
        name="Kubernetes allowPrivilegeEscalation enabled",
        description="Allows processes inside the container to gain more privileges than their parent.",
        severity="high",
        file_glob="**/*.{yml,yaml}",
        pattern=r"^\s*allowPrivilegeEscalation:\s*true\b",
        negative_pattern=r"# required|# init container",
        context_lines=0,
        fix_hint="Set allowPrivilegeEscalation: false in securityContext.",
        exclude_paths=["node_modules", ".git"],
    ),

    # --- Kubernetes: hostPath volume ---
    PatternRule(
        id="EP-098",
        name="Kubernetes hostPath volume mount",
        description="hostPath volumes expose host filesystem to the container — path traversal and data theft risk.",
        severity="high",
        file_glob="**/*.{yml,yaml}",
        pattern=r"^\s*hostPath:",
        negative_pattern=r"# required|# /dev/|# socket|type:\s*Socket",
        context_lines=2,
        fix_hint="Use PersistentVolumeClaim or emptyDir instead of hostPath.",
        exclude_paths=["node_modules", ".git"],
    ),

    # =====================================================================
    # API SECURITY PATTERNS (EP-099+)
    # =====================================================================

    # --- SSRF: user-controlled URL ---
    PatternRule(
        id="EP-099",
        name="Potential SSRF (user-controlled URL in request)",
        description="Passing user input directly as a URL to HTTP clients enables SSRF attacks on internal services.",
        severity="high",
        file_glob="**/*.py",
        pattern=r"""(?:requests|httpx|urllib)\.\w+\s*\([^)]*(?:request\.\w+|params\[|args\[|user_|input\()""",
        negative_pattern=r"# validated|# allowlisted|urlparse|validators\.url",
        context_lines=3,
        fix_hint="Validate and allowlist URLs before fetching. Block internal IPs (169.254.x.x, 10.x.x.x, etc.).",
        exclude_paths=["test_", "_test.", "conftest.py"],
    ),
    PatternRule(
        id="EP-099b",
        name="Potential SSRF (JS/TS)",
        description="Passing user input directly as a URL to fetch/axios enables SSRF attacks.",
        severity="high",
        file_glob="**/*.{ts,tsx,js,jsx}",
        pattern=r"""(?:fetch|axios\.\w+)\s*\([^)]*(?:req\.(?:query|body|params)|user_|input)""",
        negative_pattern=r"# validated|# allowlisted|new URL|url\.parse",
        context_lines=3,
        fix_hint="Validate and allowlist URLs before fetching. Block internal/private IP ranges.",
        exclude_paths=["test", "spec", "__tests__", "node_modules"],
    ),

    # --- Mass assignment ---
    PatternRule(
        id="EP-100",
        name="Mass assignment (unfiltered request body)",
        description="Passing the full request body to ORM create/update allows attackers to set any field (e.g., isAdmin).",
        severity="high",
        file_glob="**/*.py",
        pattern=r"""(?:\.create|\.update|\.filter)\s*\(\s*\*\*(?:request\.data|request\.json|kwargs)""",
        negative_pattern=r"# validated|serializer|schema|form\.cleaned_data",
        context_lines=2,
        fix_hint="Explicitly list allowed fields. Use serializers/schemas to validate input.",
        exclude_paths=["test_", "_test.", "migration"],
    ),
    PatternRule(
        id="EP-100b",
        name="Mass assignment (JS/TS)",
        description="Passing req.body directly to ORM create/update allows attackers to set any field.",
        severity="high",
        file_glob="**/*.{ts,tsx,js,jsx}",
        pattern=r"""(?:\.create|\.update|\.insert)\s*\(\s*req\.body\b""",
        negative_pattern=r"# validated|schema|validator|zod|joi|yup",
        context_lines=2,
        fix_hint="Destructure only allowed fields: const { name, email } = req.body",
        exclude_paths=["test", "spec", "__tests__", "node_modules"],
    ),

    # =====================================================================
    # MULTI-LANGUAGE DESERIALIZATION (EP-101+)
    # =====================================================================

    # --- C#: BinaryFormatter ---
    PatternRule(
        id="EP-101",
        name="Insecure .NET deserialization (BinaryFormatter)",
        description="BinaryFormatter can execute arbitrary code from untrusted data. Deprecated by Microsoft.",
        severity="high",
        file_glob="**/*.cs",
        pattern=r"\bBinaryFormatter\b",
        negative_pattern=r"# trusted|# legacy ok|obsolete",
        context_lines=1,
        fix_hint="Use System.Text.Json or JsonSerializer. BinaryFormatter is officially deprecated.",
    ),

    # --- Java: ObjectInputStream ---
    PatternRule(
        id="EP-102",
        name="Insecure Java deserialization (ObjectInputStream)",
        description="Java native deserialization can execute arbitrary code via crafted object streams.",
        severity="high",
        file_glob="**/*.java",
        pattern=r"(?:new\s+ObjectInputStream|\.readObject\s*\()",
        negative_pattern=r"# trusted|ObjectInputFilter|ValidatingObjectInputStream",
        context_lines=2,
        fix_hint="Use JSON/Protocol Buffers instead, or add ObjectInputFilter for class allowlisting.",
    ),

    # --- PHP: unserialize ---
    PatternRule(
        id="EP-103",
        name="Insecure PHP deserialization",
        description="PHP unserialize() can trigger __wakeup/__destruct for arbitrary code execution.",
        severity="high",
        file_glob="**/*.php",
        pattern=r"\bunserialize\s*\(",
        negative_pattern=r"# trusted|allowed_classes\s*=>|# safe",
        context_lines=1,
        fix_hint="Use json_decode() instead, or pass allowed_classes option: unserialize($data, ['allowed_classes' => false])",
    ),

    # --- Ruby: Marshal/YAML.load ---
    PatternRule(
        id="EP-104",
        name="Insecure Ruby deserialization",
        description="Marshal.load and YAML.load can execute arbitrary Ruby code from untrusted data.",
        severity="high",
        file_glob="**/*.rb",
        pattern=r"\b(?:Marshal\.load|YAML\.load|Psych\.unsafe_load)\b",
        negative_pattern=r"# trusted|safe_load|permitted_classes",
        context_lines=1,
        fix_hint="Use YAML.safe_load or JSON.parse for untrusted data.",
    ),

    # =====================================================================
    # AI/LLM SECURITY PATTERNS (EP-105+)
    # =====================================================================

    # --- LLM output to code execution ---
    PatternRule(
        id="EP-105",
        name="LLM output passed to code execution",
        description="Executing LLM/model output as code or shell commands enables prompt injection to RCE.",
        severity="high",
        file_glob="**/*.py",
        pattern=r"""(?:subprocess|os\.system|exec|eval)\s*\([^)]*(?:response|completion|output|result|message|content|answer|reply)""",
        negative_pattern=r"# sandboxed|# validated|# safe|test|mock",
        context_lines=3,
        fix_hint="Never execute LLM output directly. Validate, sandbox, or use structured tool calling instead.",
        exclude_paths=["test_", "_test.", "conftest.py"],
    ),
    PatternRule(
        id="EP-105b",
        name="LLM output passed to code execution (JS/TS)",
        description="Executing LLM/model output as code or shell commands enables prompt injection to RCE.",
        severity="high",
        file_glob="**/*.{ts,tsx,js,jsx}",
        pattern=r"""(?:eval|new\s+Function|child_process\.exec)\s*\([^)]*(?:response|completion|output|result|message|content|answer|reply)""",
        negative_pattern=r"# sandboxed|# validated|# safe|test|mock",
        context_lines=3,
        fix_hint="Never execute LLM output directly. Validate, sandbox, or use structured tool calling instead.",
        exclude_paths=["test", "spec", "__tests__", "node_modules"],
    ),

    # --- User input in system prompt ---
    PatternRule(
        id="EP-106",
        name="Untrusted input in LLM system prompt",
        description="Injecting user input into system prompts enables prompt injection attacks.",
        severity="medium",
        file_glob="**/*",
        pattern=r"""(?:system|instructions?)\s*[:=].*(?:f["']|\.format\s*\(|%s|user_input|request\.|req\.)""",
        negative_pattern=r"# sanitized|# validated|# escaped|xml_escape|template",
        context_lines=3,
        fix_hint="Isolate user input in user messages with XML tags. Never interpolate into system prompts.",
        exclude_paths=["test_", "_test.", "node_modules", "spec"],
    ),

    # --- Go: unsafe package ---
    PatternRule(
        id="EP-110",
        name="Go unsafe package usage",
        description="The unsafe package bypasses Go's type safety and memory safety guarantees.",
        severity="medium",
        file_glob="**/*.go",
        pattern=r"""import\s+["']unsafe["']|unsafe\.Pointer""",
        negative_pattern=r"// cgo|// required|// ffi",
        context_lines=1,
        fix_hint="Avoid unsafe unless interfacing with C code. Use encoding/binary for byte manipulation.",
        exclude_paths=["vendor/", "test"],
    ),

    # --- Go: unvalidated HTTP redirect ---
    PatternRule(
        id="EP-111",
        name="Go open redirect",
        description="Using user input directly in http.Redirect enables open redirect attacks.",
        severity="medium",
        file_glob="**/*.go",
        pattern=r"http\.Redirect\s*\([^)]*r\.(URL|FormValue|Form\.Get)",
        negative_pattern=r"// validated|// allowlisted",
        context_lines=2,
        fix_hint="Validate redirect URLs against an allowlist of trusted hosts.",
    ),

    # --- Python: dict.get() with default bypassed by None value ---
    PatternRule(
        id="EP-120",
        name="dict.get() default bypassed by None value",
        description="dict.get('key', '').strip() crashes when key exists with None value — "
                    ".get() only returns the default when the key is ABSENT, not when it's None.",
        severity="high",
        file_glob="**/*.py",
        pattern=r"""\.get\(\s*["'][^"']+["']\s*,\s*["'][^"']*["']\s*\)\s*\.\s*(?:strip|lower|upper|split|replace|startswith|endswith|format)\s*\(""",
        negative_pattern=r"\bor\s+[\"']|if\s+\w+\s+is\s+not\s+None|noqa",
        context_lines=2,
        fix_hint="Use (d.get('key') or '').strip() instead of d.get('key', '').strip() — "
                 "the 'or' pattern handles both missing keys AND None values",
        incident="dict.get('matrix_article', '').strip() → AttributeError: 'NoneType' has no attribute 'strip'",
    ),

    # --- Python: dict.get() default bypassed by None value (JS/TS) ---
    PatternRule(
        id="EP-120b",
        name="Object property default bypassed by null/undefined",
        description="obj.key || '' does not protect against null when using .trim()/.toLowerCase() etc.",
        severity="medium",
        file_glob="**/*.{ts,tsx,js,jsx}",
        pattern=r"""\.\w+\s*\|\|\s*["'][^"']*["']\s*\)\s*\.\s*(?:trim|toLowerCase|toUpperCase|split|replace|startsWith|endsWith)\s*\(""",
        negative_pattern=r"\?\?|noqa|// safe",
        context_lines=2,
        fix_hint="Use nullish coalescing: (obj.key ?? '').trim() instead of (obj.key || '').trim()",
    ),

    # --- Docker: Container memory limit too low for dev tooling ---
    PatternRule(
        id="EP-121",
        name="Docker container memory limit below 512MB",
        description="Containers with mem_limit under 512MB may OOM-kill dev servers (Next.js Turbopack, "
                    "webpack-dev-server, etc.) silently with exit code 0 and no traceback.",
        severity="medium",
        file_glob="**/docker-compose*.{yml,yaml}",
        pattern=r"mem_limit:\s*(?:12[0-8]m|192m|25[0-6]m|[1-9]\d?m)",
        negative_pattern=r"# production|# optimized|redis|postgres|minio|nginx",
        context_lines=3,
        fix_hint="Set mem_limit >= 512m for app containers, >= 2g for Node.js dev servers with Turbopack/webpack. "
                 "Silent OOM kills show as exit code 0 with restart loops — check 'docker inspect' for OOMKilled.",
        incident="Next.js 16 Turbopack dev server OOM-killed at 256MB, crash-looped with no error output",
    ),

    # --- Frontend: State variable collected but not sent in API payload ---
    PatternRule(
        id="EP-122",
        name="React state not included in API request payload",
        description="State variable set via onChange/setState but never referenced in the fetch/post payload. "
                    "The UI collects user input but silently discards it on submit.",
        severity="medium",
        file_glob="**/*.{tsx,jsx}",
        pattern=r"set\w+\s*\(\s*(?:\([^)]*\)\s*=>|prev\s*=>|\{)",
        negative_pattern=r"noqa|// UI only|// display only",
        context_lines=0,
        fix_hint="Verify every state variable populated by user input is included in the API request body. "
                 "Search for the setter name and confirm its getter appears in the fetch/post call.",
        incident="certidaoAssignments state collected from dropdown but never sent in finalize-multi payload",
    ),

    # --- HTTP: 404 for optional/missing resources instead of 204 ---
    PatternRule(
        id="EP-123",
        name="HTTP 404 for optional resource instead of 204",
        description="Returning 404 for optional resources (thumbnails, avatars, previews) causes console errors "
                    "and error tracking noise. Use 204 No Content for 'valid endpoint, no content available'.",
        severity="low",
        file_glob="**/*.py",
        pattern=r"(?:raise\s+HTTPException|return\s+JSONResponse)\s*\([^)]*(?:404|status_code\s*=\s*404)[^)]*(?:thumbnail|preview|avatar|icon|image)",
        negative_pattern=r"# required|# must exist|noqa",
        context_lines=3,
        fix_hint="Return 204 No Content instead of 404 for optional resources. "
                 "Update frontend to handle: .then(r => r.ok && r.status !== 204 ? r.blob() : null)",
        incident="Thumbnail 404s flooding console after rolled-back transactions left ghost document references",
    ),

    # --- Docker: memswap_limit not matching mem_limit ---
    PatternRule(
        id="EP-124",
        name="Docker memswap_limit not matching mem_limit",
        description="When memswap_limit equals mem_limit, swap is effectively disabled. When memswap_limit "
                    "is less than mem_limit, the container may behave unpredictably.",
        severity="low",
        file_glob="**/docker-compose*.{yml,yaml}",
        pattern=r"memswap_limit:\s*(\S+)",
        negative_pattern=r"# intentional|# no swap ok",
        context_lines=3,
        fix_hint="Set memswap_limit = 2x mem_limit to allow swap headroom, or omit to allow unlimited swap. "
                 "Setting memswap_limit == mem_limit disables swap entirely, making OOM kills more likely.",
    ),

    # =====================================================================
    # RUST CORE BUG PATTERNS (EP-200+) — added as bugs are fixed
    # =====================================================================

    # --- Capacity bound off-by-one (media::decrypt_media, slice 013) ---
    PatternRule(
        id="EP-200",
        name="Capacity bound multiplies count by unit instead of bounding the count (off-by-one)",
        description="A size guard of the form `count * UNIT > MAX_BYTES` rejects the final partial unit: "
                    "an input of exactly MAX_BYTES needs ceil(MAX_BYTES/UNIT) units, so count*UNIT exceeds "
                    "MAX_BYTES and the guard wrongly rejects a legitimate max-size object. Bound the count "
                    "directly against ceil(MAX_BYTES/UNIT). Surfaced by mutation testing in "
                    "media::decrypt_media — the symmetric form is the fix.",
        severity="low",
        file_glob="**/*.rs",
        pattern=r"\w*count\w*\s+as\s+u64\s*\*\s*\w+\s+as\s+u64\s*>\s*MAX_",
        negative_pattern=r"div_ceil|# off-by-one ok|bound the count directly",
        context_lines=2,
        fix_hint="Bound the count directly: `count > MAX_BYTES.div_ceil(UNIT)` — keeps encrypt/decrypt "
                 "size limits symmetric so a legitimately-encrypted max-size object still decrypts.",
        exclude_paths=["test_", "_test.", "/tests/"],
    ),

    # --- Ungated temporary debug logging (transport metadata at rest) ---
    PatternRule(
        id="EP-201",
        name="Temporary debug logging left ungated",
        description="A 'TEMPORARY DEBUG' / 'remove after debugging' marker signals diagnostics that "
                    "run unconditionally in shipped builds. The Tor diagnostic log wrote circuit/guard/"
                    "onion metadata to a plaintext file next to the exe by default (fixed in 0.1.35 by "
                    "gating behind PVTCOMS_TOR_DEBUG_LOG). Debug output that can carry transport "
                    "metadata or secrets must be opt-in, never default-on.",
        severity="high",
        file_glob="**/*.rs",
        pattern=r"TEMPORARY DEBUG|[Rr]emove after debugging",
        context_lines=2,
        fix_hint="Gate the diagnostic behind an explicit opt-in env var (off by default, fail closed) "
                 "and drop the TEMPORARY marker, or delete the debug output before committing.",
        exclude_paths=["test_", "_test.", "/tests/"],
    ),

    # --- Temp-file name collision via with_extension (migration/atomic-write) ---
    PatternRule(
        id="EP-202",
        name="with_extension for a temp name collides on shared-stem files",
        description="`Path::with_extension(\"…tmp\")` replaces only the FINAL extension, so files that "
                    "share a stem but differ in final extension (the SQLCipher triplet history.db / "
                    ".db-wal / .db-shm) all map to ONE temp path. A move/atomic-write staging through "
                    "that shared temp can clobber a sibling and lose data (fixed in 0.1.41 — the data-dir "
                    "migration's WAL-clobber). Derive the temp from the FULL file name instead.",
        severity="high",
        file_glob="**/*.rs",
        pattern=r"\.with_extension\(\s*[\"'][^\"']*tmp[\"']\s*\)",
        negative_pattern=r"with_file_name|unique-stem ok|Regression:",
        context_lines=2,
        fix_hint="Use `dst.with_file_name(format!(\"{}.tmp\", full_name))` so each final name gets its "
                 "own staging path; with_extension collapses shared-stem files onto one temp.",
        exclude_paths=["test_", "_test.", "/tests/"],
    ),

    # --- General: Large file (>500 lines) ---
    # This uses a meta-check approach - the scanner won't detect this via regex.
    # Keeping as documentation; actual check is in enforce_coding_standards.py.
]


# ============================================================================
# ERROR KNOWLEDGE BASE — Reactive error analysis
# Maps error fingerprints to code patterns that commonly cause them.
# ============================================================================

KNOWLEDGE: list[ErrorKnowledge] = [
    # --- Python: Import/Module not found ---
    ErrorKnowledge(
        id="EK-001",
        name="Import/Module not found",
        keywords=["modulenotfounderror"],
        alt_keywords=[["importerror"], ["no module named"]],
        category="python",
        rules=[
            PatternRule(
                id="EK-001-R1",
                name="Missing dependency in requirements",
                description="Module referenced in code but may be missing from requirements/pyproject.",
                severity="high",
                file_glob="**/*.py",
                pattern=r"^\s*(?:from|import)\s+(\w+)",
                fix_hint="Ensure the module is installed and listed in requirements.txt or pyproject.toml",
            ),
        ],
    ),

    # --- Python: Attribute/Type error on None ---
    ErrorKnowledge(
        id="EK-002",
        name="Attribute/Type error on None",
        keywords=["attributeerror", "nonetype"],
        alt_keywords=[["typeerror", "none"]],
        category="python",
        rules=[
            PatternRule(
                id="EK-002-R1",
                name="Missing None guard before attribute access",
                description="Accessing attributes on a value that may be None without checking first.",
                severity="medium",
                file_glob="**/*.py",
                pattern=r"(?:\.get\([^)]*\)|= None).*\.\w+\(",
                negative_pattern=r"if\s+\w+\s+is\s+not\s+None|if\s+\w+:",
                context_lines=3,
                fix_hint="Add a None check before accessing attributes: if obj is not None: obj.method()",
            ),
            PatternRule(
                id="EK-002-R2",
                name="Function returning None implicitly",
                description="Functions without explicit return may return None unexpectedly.",
                severity="low",
                file_glob="**/*.py",
                pattern=r"def\s+\w+\s*\([^)]*\)\s*(?:->.*)?:\s*\n(?:(?:\s+.*\n)*?)(?=\ndef\s|\Z)",
                negative_pattern=r"return\s+\S|->.*None",
                context_lines=0,
                fix_hint="Ensure all code paths return a value, or add explicit '-> None' return type hint",
            ),
        ],
    ),

    # --- Python: SQLAlchemy async lazy-load (MissingGreenlet) ---
    ErrorKnowledge(
        id="EK-003",
        name="SQLAlchemy async lazy-load (MissingGreenlet)",
        keywords=["missinggreenlet"],
        alt_keywords=[["lazy load", "async"]],
        category="python",
        rules=[
            PatternRule(
                id="EK-003-R1",
                name="Missing selectinload/joinedload for async relationship",
                description="Accessing lazy-loaded relationships in async SQLAlchemy triggers MissingGreenlet.",
                severity="high",
                file_glob="**/*.py",
                pattern=r"(?:\.query|select)\s*\([^)]*\)(?!.*(?:selectinload|joinedload|subqueryload|lazyload|options))",
                negative_pattern=r"selectinload|joinedload|subqueryload|raiseload|lazy=['\"]selectin",
                context_lines=5,
                fix_hint="Use .options(selectinload(Model.relation)) for eager loading in async sessions",
            ),
        ],
    ),

    # --- Database: Connection refused ---
    ErrorKnowledge(
        id="EK-004",
        name="Database connection refused",
        keywords=["connection refused"],
        alt_keywords=[["connection", "refused", "5432"], ["connection", "refused", "3306"]],
        category="database",
        rules=[
            PatternRule(
                id="EK-004-R1",
                name="Hardcoded database host or port",
                description="Database connection parameters hardcoded instead of using environment variables.",
                severity="medium",
                file_glob="**/*.{py,ts,tsx,js,jsx,yml,yaml}",
                pattern=r"""(?:host|HOST)\s*[:=]\s*["'](?:localhost|127\.0\.0\.1|0\.0\.0\.0)["']""",
                negative_pattern=r"os\.environ|getenv|process\.env|\.env|# default|# fallback|test|example",
                context_lines=2,
                fix_hint="Use environment variables for database host/port to support different environments",
            ),
            PatternRule(
                id="EK-004-R2",
                name="Hardcoded database port",
                description="Database port hardcoded instead of configurable.",
                severity="low",
                file_glob="**/*.{py,ts,tsx,js,jsx}",
                pattern=r"""(?:port|PORT)\s*[:=]\s*(?:5432|3306|27017|6379)\b""",
                negative_pattern=r"os\.environ|getenv|process\.env|\.env|# default|# fallback|test|example",
                context_lines=2,
                fix_hint="Use environment variables for database port configuration",
            ),
        ],
    ),

    # --- JavaScript/Browser: Null/undefined property access ---
    ErrorKnowledge(
        id="EK-005",
        name="Null/undefined property access",
        keywords=["cannot read properties", "null"],
        alt_keywords=[
            ["cannot read properties", "undefined"],
            ["is not a function"],
            ["typeerror", "null"],
        ],
        category="browser",
        rules=[
            PatternRule(
                id="EK-005-R1",
                name="Missing optional chaining",
                description="Property access chains without optional chaining (?.) crash on null/undefined.",
                severity="medium",
                file_glob="**/*.{ts,tsx,js,jsx}",
                pattern=r"\w+\.\w+\.\w+(?!\s*[?])",
                negative_pattern=r"\?\.|&&|!=\s*null|!==\s*null|!==\s*undefined|if\s*\(",
                context_lines=2,
                fix_hint="Use optional chaining: obj?.prop?.nested instead of obj.prop.nested",
            ),
            PatternRule(
                id="EK-005-R2",
                name="Missing null guard before method call",
                description="Calling methods on potentially null values without checking first.",
                severity="medium",
                file_glob="**/*.{ts,tsx,js,jsx}",
                pattern=r"(?:getElementById|querySelector|find|get)\s*\([^)]*\)\s*\.\w+",
                negative_pattern=r"\?\.|if\s*\(|&&|!==?\s*null",
                context_lines=2,
                fix_hint="Add null check or use optional chaining: element?.method() or if (element) { ... }",
            ),
        ],
    ),

    # --- React: Hooks order violation ---
    ErrorKnowledge(
        id="EK-006",
        name="React hooks order violation",
        keywords=["rendered fewer hooks"],
        alt_keywords=[["hooks", "order", "previous render"]],
        category="react",
        rules=[
            PatternRule(
                id="EK-006-R1",
                name="Conditional hook call",
                description="Hooks called inside if/else, loops, or after early returns violate Rules of Hooks.",
                severity="high",
                file_glob="**/*.{tsx,jsx}",
                pattern=r"(?:if\s*\([^)]*\)\s*\{[^}]*(?:useState|useEffect|useMemo|useCallback|useRef|useContext)|return\s+[^;]*;\s*\n\s*(?:const|let)\s+\[?\w+[,\]]?\s*=\s*use\w+)",
                negative_pattern=r"# conditional ok|eslint-disable",
                context_lines=5,
                fix_hint="Move all hooks to the top level of the component, before any conditional returns",
            ),
        ],
    ),

    # --- React: Hydration mismatch ---
    ErrorKnowledge(
        id="EK-007",
        name="React hydration mismatch",
        keywords=["hydration"],
        alt_keywords=[["server", "client", "mismatch"]],
        category="react",
        rules=[
            PatternRule(
                id="EK-007-R1",
                name="Browser-only API in render path",
                description="Using window/document/localStorage during SSR causes hydration mismatches.",
                severity="medium",
                file_glob="**/*.{tsx,jsx,ts,js}",
                pattern=r"(?:window\.|document\.|localStorage\.|sessionStorage\.|navigator\.)(?!.*(?:useEffect|componentDidMount|typeof\s+window))",
                negative_pattern=r"useEffect|componentDidMount|typeof\s+window|'use client'|# client only",
                context_lines=5,
                fix_hint="Wrap browser APIs in useEffect or check typeof window !== 'undefined' before access",
            ),
        ],
    ),

    # --- CORS policy violation ---
    ErrorKnowledge(
        id="EK-008",
        name="CORS policy violation",
        keywords=["cors"],
        alt_keywords=[["access-control-allow-origin"]],
        category="browser",
        rules=[
            PatternRule(
                id="EK-008-R1",
                name="Permissive or missing CORS configuration",
                description="Wildcard CORS or missing CORS headers cause cross-origin request failures.",
                severity="medium",
                file_glob="**/*.{py,ts,tsx,js,jsx}",
                pattern=r"""(?:Access-Control-Allow-Origin|allow_origins|cors_origins|CORS_ORIGIN)\s*[:=]\s*["'*]?\*""",
                negative_pattern=r"# dev only|# local|development|localhost|DEBUG",
                context_lines=3,
                fix_hint="Configure CORS with specific allowed origins matching your frontend domain",
            ),
        ],
    ),

    # --- Unhandled promise rejection ---
    ErrorKnowledge(
        id="EK-009",
        name="Unhandled promise rejection",
        keywords=["unhandled", "promise", "rejection"],
        alt_keywords=[["unhandledrejection"]],
        category="browser",
        rules=[
            PatternRule(
                id="EK-009-R1",
                name="Async call without catch/await",
                description="Promise-returning calls without .catch() or try/await silently swallow errors.",
                severity="medium",
                file_glob="**/*.{ts,tsx,js,jsx}",
                pattern=r"(?:fetch|axios\.\w+|\.post|\.get|\.put|\.delete|\.patch)\s*\([^)]*\)\s*;",
                negative_pattern=r"await|\.then|\.catch|try\s*\{",
                context_lines=3,
                fix_hint="Add 'await' or '.catch()' to handle promise rejections",
            ),
            PatternRule(
                id="EK-009-R2",
                name="Async event handler without try/catch",
                description="Async functions in event handlers without error handling cause unhandled rejections.",
                severity="medium",
                file_glob="**/*.{ts,tsx,js,jsx}",
                pattern=r"(?:on\w+|addEventListener)\s*(?:=|\()\s*async\s",
                negative_pattern=r"try\s*\{|\.catch|# handled|ErrorBoundary",
                context_lines=5,
                fix_hint="Wrap async event handlers in try/catch blocks",
            ),
        ],
    ),

    # --- Container OOM kill (silent crash) ---
    ErrorKnowledge(
        id="EK-011",
        name="Container OOM kill (silent crash)",
        keywords=["oomkilled"],
        alt_keywords=[
            ["exit code", "137"],
            ["killed", "signal", "9"],
            ["out of memory"],
            ["restart", "loop", "container"],
            ["empty reply from server"],
        ],
        category="docker",
        rules=[
            PatternRule(
                id="EK-011-R1",
                name="Docker container memory limit too low",
                description="Container mem_limit may be too low for the workload, causing silent OOM kills.",
                severity="high",
                file_glob="**/docker-compose*.{yml,yaml}",
                pattern=r"mem_limit:\s*\S+",
                fix_hint="Check 'docker inspect <container> --format {{.State.OOMKilled}}' to confirm. "
                         "Bump mem_limit (2g+ for Node.js dev, 1g+ for Python APIs). "
                         "Also run 'docker stats' to see peak memory usage.",
            ),
            PatternRule(
                id="EK-011-R2",
                name="Node.js heap limit not set",
                description="Node.js may exceed container memory limit without --max-old-space-size.",
                severity="medium",
                file_glob="**/Dockerfile*",
                pattern=r"(?:node|next|npm|pnpm|yarn)\s+(?:dev|start|run)",
                negative_pattern=r"max-old-space-size|NODE_OPTIONS",
                context_lines=3,
                fix_hint="Set NODE_OPTIONS='--max-old-space-size=1536' to cap Node.js heap within container limits",
            ),
        ],
    ),

    # --- Python: dict.get() returns None not default ---
    ErrorKnowledge(
        id="EK-012",
        name="dict.get() None value bypasses default",
        keywords=["attributeerror", "nonetype", "strip"],
        alt_keywords=[
            ["nonetype", "has no attribute"],
            ["nonetype", "lower"],
            ["nonetype", "upper"],
            ["nonetype", "split"],
        ],
        category="python",
        rules=[
            PatternRule(
                id="EK-012-R1",
                name="dict.get() with string default chained to string method",
                description="d.get('key', '').strip() fails when key exists with None value. "
                            "dict.get() only uses the default when key is ABSENT.",
                severity="high",
                file_glob="**/*.py",
                pattern=r"""\.get\(\s*["'][^"']+["']\s*,\s*["'][^"']*["']\s*\)\s*\.\s*(?:strip|lower|upper|split|replace)\s*\(""",
                fix_hint="Use (d.get('key') or '').strip() — the 'or' pattern handles both missing AND None values",
            ),
        ],
    ),

    # --- Permission denied / EACCES ---
    ErrorKnowledge(
        id="EK-010",
        name="Permission denied / EACCES",
        keywords=["eacces"],
        alt_keywords=[["permission denied"], ["errno", "13"]],
        category="system",
        rules=[
            PatternRule(
                id="EK-010-R1",
                name="Hardcoded file path in restricted directory",
                description="Writing to system directories or paths requiring elevated permissions.",
                severity="medium",
                file_glob="**/*.{py,ts,tsx,js,jsx,sh}",
                pattern=r"""(?:open|write|mkdir|chmod)\s*\([^)]*["']/(?:etc|usr|var|opt|root)/""",
                negative_pattern=r"# root ok|# system|sudo|test|example|mock",
                context_lines=2,
                fix_hint="Use user-writable directories (e.g., /tmp, ~/.config, or $HOME) or check permissions first",
            ),
            PatternRule(
                id="EK-010-R2",
                name="Missing directory creation before file write",
                description="Writing files without ensuring parent directories exist causes EACCES/ENOENT.",
                severity="low",
                file_glob="**/*.py",
                pattern=r"(?:open|write_text|write_bytes)\s*\(",
                negative_pattern=r"mkdir|makedirs|exist_ok|Path.*parent.*mkdir|os\.path\.exists|ensure",
                context_lines=5,
                fix_hint="Call os.makedirs(dir, exist_ok=True) or Path.mkdir(parents=True, exist_ok=True) before writing",
            ),
        ],
    ),
]


# ============================================================================
# SCANNER ENGINE (stack-agnostic)
# ============================================================================

def get_staged_files():
    result = subprocess.run(
        ["git", "diff", "--cached", "--name-only", "--diff-filter=ACMR"],
        capture_output=True, text=True, cwd=ROOT,
    )
    return [ROOT / f.strip() for f in result.stdout.strip().split("\n") if f.strip()]


def find_files(glob_pattern, base):
    return sorted(base.rglob(glob_pattern))


def matches_glob(filepath, glob_pattern):
    try:
        rel = filepath.relative_to(ROOT)
    except ValueError:
        return False
    return rel.match(glob_pattern)


def scan_file(filepath, rule):
    filepath_str = str(filepath)
    if rule.exclude_paths:
        for excl in rule.exclude_paths:
            if excl in filepath_str:
                return []
    try:
        content = filepath.read_text(encoding="utf-8", errors="replace")
    except OSError:
        return []

    findings = []
    lines = content.split("\n")
    regex = re.compile(rule.pattern)

    for i, line in enumerate(lines):
        if regex.search(line):
            # Inline suppression: # noqa: EP-XXX
            if f"noqa: {rule.id}" in line or "noqa: all" in line:
                continue
            if rule.negative_pattern:
                start = max(0, i - rule.context_lines)
                end = min(len(lines), i + rule.context_lines + 1)
                context = "\n".join(lines[start:end])
                if re.search(rule.negative_pattern, context):
                    continue
            try:
                rel_path = str(filepath.relative_to(ROOT))
            except ValueError:
                rel_path = str(filepath)
            findings.append(Finding(
                rule_id=rule.id,
                rule_name=rule.name,
                severity=rule.severity,
                file=rel_path,
                line=i + 1,
                code=line.strip()[:120],
                fix_hint=rule.fix_hint,
                incident=rule.incident,
            ))
    return findings


def scan_codebase(rules, staged_only=False, severity_filter=None):
    all_findings = []

    if staged_only:
        files = get_staged_files()
    else:
        files = None

    for rule in rules:
        if severity_filter and rule.severity != severity_filter:
            continue
        if files is not None:
            rule_files = [f for f in files if matches_glob(f, rule.file_glob)]
        else:
            rule_files = []
            if "{" in rule.file_glob:
                match = re.search(r'\{([^}]+)\}', rule.file_glob)
                if match:
                    for ext in match.group(1).split(","):
                        expanded = rule.file_glob[:match.start()] + ext + rule.file_glob[match.end():]
                        rule_files.extend(find_files(expanded, ROOT))
            else:
                rule_files.extend(find_files(rule.file_glob, ROOT))
            rule_files = [
                f for f in rule_files
                if "__pycache__" not in str(f)
                and "node_modules" not in str(f)
                and ".git/" not in str(f)
                and ".next" not in str(f)
                and "dist/" not in str(f)
                and "build/" not in str(f)
                and "vendor/" not in str(f)
            ]
        for filepath in rule_files:
            all_findings.extend(scan_file(filepath, rule))

    return all_findings


# ============================================================================
# OUTPUT FORMATTERS
# ============================================================================

SEVERITY_ICONS = {"high": "!!!", "medium": "!!", "low": "!"}
SEVERITY_COLORS = {"high": "\033[91m", "medium": "\033[93m", "low": "\033[90m"}
RESET = "\033[0m"


def format_text(findings):
    if not findings:
        return "No error patterns detected."
    by_severity = {"high": [], "medium": [], "low": []}
    for f in findings:
        by_severity[f.severity].append(f)
    lines = [
        f"ERROR PATTERN DETECTOR — {len(findings)} finding(s)",
        "=" * 60,
    ]
    for sev in ["high", "medium", "low"]:
        group = by_severity[sev]
        if not group:
            continue
        icon = SEVERITY_ICONS[sev]
        color = SEVERITY_COLORS[sev]
        lines.append(f"\n{color}[{sev.upper()}] {len(group)} finding(s){RESET}")
        lines.append("-" * 40)
        for f in group:
            lines.append(f"  {color}{icon}{RESET} [{f.rule_id}] {f.rule_name}")
            lines.append(f"    File: {f.file}:{f.line}")
            lines.append(f"    Code: {f.code}")
            lines.append(f"    Fix:  {f.fix_hint}")
            if f.incident:
                lines.append(f"    Ref:  {f.incident}")
            lines.append("")
    return "\n".join(lines)


def format_json(findings):
    return json.dumps(
        [
            {
                "rule_id": f.rule_id,
                "rule_name": f.rule_name,
                "severity": f.severity,
                "file": f.file,
                "line": f.line,
                "code": f.code,
                "fix_hint": f.fix_hint,
                "incident": f.incident,
            }
            for f in findings
        ],
        indent=2,
    )


def format_summary(findings):
    high = sum(1 for f in findings if f.severity == "high")
    med = sum(1 for f in findings if f.severity == "medium")
    low = sum(1 for f in findings if f.severity == "low")
    return f"Patterns: {high} high, {med} medium, {low} low ({len(findings)} total)"


# ============================================================================
# REACTIVE ERROR ANALYSIS ENGINE
# ============================================================================

def _iter_pack_modules():
    """Yield (module, py_file) tuples for each non-underscore .py in error_knowledge/.

    Shared loader for both KNOWLEDGE (reactive) and PROACTIVE_PATTERNS (proactive).
    """
    import importlib.util

    knowledge_dir = Path(__file__).resolve().parent / "error_knowledge"
    if not knowledge_dir.is_dir():
        return

    for py_file in sorted(knowledge_dir.glob("*.py")):
        if py_file.name.startswith("_"):
            continue
        try:
            spec = importlib.util.spec_from_file_location(
                f"error_knowledge.{py_file.stem}", py_file
            )
            if spec and spec.loader:
                mod = importlib.util.module_from_spec(spec)
                spec.loader.exec_module(mod)
                yield mod, py_file
        except Exception as exc:
            print(f"Warning: Failed to load knowledge pack {py_file.name}: {exc}",
                  file=sys.stderr)


def load_knowledge_packs() -> list[ErrorKnowledge]:
    """Load reactive knowledge base entries from error_knowledge/*.py modules.

    Each module may define a KNOWLEDGE list of ErrorKnowledge entries
    (matched against error messages via --analyze).
    Falls back to built-in KNOWLEDGE if directory doesn't exist.
    Returns the merged list (built-in + all packs).
    """
    merged = list(KNOWLEDGE)
    for mod, _py_file in _iter_pack_modules():
        merged.extend(getattr(mod, "KNOWLEDGE", []))
    return merged


_proactive_patterns_cache: list | None = None


def load_proactive_patterns() -> list:
    """Load proactive project-specific PatternRules from error_knowledge/*.py.

    Each module may define a PROACTIVE_PATTERNS list of PatternRule entries
    (scanned over the codebase on every commit, same way as the built-in
    PATTERNS list). Use this to add project-specific rules without forking
    detect_error_patterns.py.

    Cached per-process — re-importing modules is wasteful and rule lists
    are stable for the duration of a CLI invocation.
    """
    global _proactive_patterns_cache
    if _proactive_patterns_cache is not None:
        return _proactive_patterns_cache

    patterns = []
    for mod, py_file in _iter_pack_modules():
        pack_patterns = getattr(mod, "PROACTIVE_PATTERNS", [])
        if pack_patterns and not isinstance(pack_patterns, list):
            print(f"Warning: PROACTIVE_PATTERNS in {py_file.name} is not a list, skipping",
                  file=sys.stderr)
            continue
        patterns.extend(pack_patterns)

    _proactive_patterns_cache = patterns
    return patterns


def get_all_patterns() -> list:
    """Built-in PATTERNS + project-specific PROACTIVE_PATTERNS from plugins."""
    return PATTERNS + load_proactive_patterns()


def parse_stack_trace(error_text: str) -> list[tuple[str, int]]:
    """Extract file:line pairs from common stack trace formats.

    Supports: Python tracebacks, Node.js stack traces, browser console errors.
    Returns list of (filepath, line_number) tuples.
    """
    results = []
    seen = set()

    # Python: File "path/to/file.py", line 42
    for match in re.finditer(r'File\s+"([^"]+)",\s+line\s+(\d+)', error_text):
        filepath, lineno = match.group(1), int(match.group(2))
        key = (filepath, lineno)
        if key not in seen:
            seen.add(key)
            results.append(key)

    # Node/Browser: at functionName (path/to/file.ts:42:15)
    for match in re.finditer(r'at\s+\S+\s+\(([^)]+):(\d+):\d+\)', error_text):
        filepath = match.group(1)
        lineno = int(match.group(2))
        # Strip webpack:/// prefix
        filepath = re.sub(r'^webpack:///\./', '', filepath)
        key = (filepath, lineno)
        if key not in seen:
            seen.add(key)
            results.append(key)

    # Node/Browser: at path/to/file.ts:42:15 (no parens)
    for match in re.finditer(r'at\s+(/[^:]+|[a-zA-Z]:\\[^:]+):(\d+):\d+', error_text):
        filepath, lineno = match.group(1), int(match.group(2))
        key = (filepath, lineno)
        if key not in seen:
            seen.add(key)
            results.append(key)

    # Generic: path/to/file.ext:42 (must have a recognizable extension)
    for match in re.finditer(
        r'(?:^|\s)((?:/[\w._-]+)+\.(?:py|js|ts|tsx|jsx|go|rb|java|cs|php)):(\d+)',
        error_text, re.MULTILINE
    ):
        filepath, lineno = match.group(1), int(match.group(2))
        key = (filepath, lineno)
        if key not in seen:
            seen.add(key)
            results.append(key)

    return results


def _match_keywords(error_lower: str, entry: ErrorKnowledge) -> bool:
    """Check if an error message matches an ErrorKnowledge entry's keywords."""
    # Primary keywords: ALL must match
    if all(kw in error_lower for kw in entry.keywords):
        return True
    # Alternative keyword sets: any ONE complete set matches
    for alt_set in entry.alt_keywords:
        if all(kw in error_lower for kw in alt_set):
            return True
    return False


def analyze_error(error_text: str, root: Path) -> list[Finding]:
    """Reactive error analysis. Feed an error message, get code pattern matches.

    1. Match error against knowledge base keywords
    2. If KB match: run associated scanning rules
    3. Extract file:line from stack traces, check those files first
    4. If no KB match: extract identifiers/quoted strings, grep codebase
    5. Deduplicate and cap at 50 results
    """
    all_findings: list[Finding] = []
    seen_keys: set[tuple[str, int, str]] = set()
    error_lower = error_text.lower()

    # Load knowledge (built-in + packs)
    knowledge = load_knowledge_packs()

    # Step 1-2: Match against knowledge base and run associated rules
    matched_entries: list[ErrorKnowledge] = []
    for entry in knowledge:
        if _match_keywords(error_lower, entry):
            matched_entries.append(entry)

    # Step 3: Extract stack trace file:line pairs
    stack_files = parse_stack_trace(error_text)

    # Run KB-matched rules
    for entry in matched_entries:
        for rule in entry.rules:
            # Prioritize stack trace files
            if stack_files:
                for filepath_str, _lineno in stack_files:
                    filepath = root / filepath_str if not Path(filepath_str).is_absolute() else Path(filepath_str)
                    if filepath.exists() and matches_glob(filepath, rule.file_glob):
                        for finding in scan_file(filepath, rule):
                            key = (finding.file, finding.line, finding.rule_id)
                            if key not in seen_keys:
                                seen_keys.add(key)
                                all_findings.append(finding)

            # Also scan broader codebase with the rule
            broader = scan_codebase([rule])
            for finding in broader:
                key = (finding.file, finding.line, finding.rule_id)
                if key not in seen_keys:
                    seen_keys.add(key)
                    all_findings.append(finding)

    # Step 4: If no KB match, extract identifiers and grep
    if not matched_entries:
        # Extract quoted strings and identifiers from the error
        identifiers: list[str] = []
        for match in re.finditer(r"""['"]([^'"]{3,60})['"]""", error_text):
            identifiers.append(match.group(1))
        for match in re.finditer(r'\b([A-Z][a-zA-Z0-9]+(?:Error|Exception|Warning))\b', error_text):
            identifiers.append(match.group(1))

        # Scan stack trace files with all PATTERNS (+ plugin-provided)
        all_patterns = get_all_patterns()
        if stack_files:
            for filepath_str, _lineno in stack_files:
                filepath = root / filepath_str if not Path(filepath_str).is_absolute() else Path(filepath_str)
                if filepath.exists():
                    for rule in all_patterns:
                        if matches_glob(filepath, rule.file_glob):
                            for finding in scan_file(filepath, rule):
                                key = (finding.file, finding.line, finding.rule_id)
                                if key not in seen_keys:
                                    seen_keys.add(key)
                                    all_findings.append(finding)

        # Search for identifiers in codebase
        for ident in identifiers[:5]:  # Cap to avoid excessive scanning
            try:
                result = subprocess.run(
                    ["grep", "-rnl", "--include=*.py", "--include=*.ts",
                     "--include=*.tsx", "--include=*.js", "--include=*.jsx",
                     ident, str(root)],
                    capture_output=True, text=True, timeout=10,
                )
                for line in result.stdout.strip().split("\n"):
                    if line.strip():
                        filepath = Path(line.strip())
                        if filepath.exists():
                            for rule in all_patterns:
                                if matches_glob(filepath, rule.file_glob):
                                    for finding in scan_file(filepath, rule):
                                        key = (finding.file, finding.line, finding.rule_id)
                                        if key not in seen_keys:
                                            seen_keys.add(key)
                                            all_findings.append(finding)
                                        if len(all_findings) >= 50:
                                            break
                                if len(all_findings) >= 50:
                                    break
                        if len(all_findings) >= 50:
                            break
            except (subprocess.TimeoutExpired, OSError):
                continue
            if len(all_findings) >= 50:
                break

    # Step 5: Cap results
    return all_findings[:50]


def analyze_log_file(log_path: Path, root: Path) -> dict[str, list[Finding]]:
    """Parse a log file for errors, deduplicate, analyze each unique one.

    Splits on common error boundaries (timestamps, 'ERROR', 'Traceback', etc.)
    Returns {error_signature: [findings]}
    """
    try:
        content = log_path.read_text(encoding="utf-8", errors="replace")
    except OSError as exc:
        print(f"Error reading log file: {exc}", file=sys.stderr)
        return {}

    # Split into error blocks
    # Match boundaries: lines starting with timestamps, ERROR level, or Traceback
    boundary_re = re.compile(
        r'^(?:\d{4}[-/]\d{2}[-/]\d{2}[\sT]\d{2}:\d{2}|'
        r'(?:ERROR|CRITICAL|FATAL|Exception|Traceback)\b)',
        re.MULTILINE
    )

    blocks: list[str] = []
    positions = [m.start() for m in boundary_re.finditer(content)]

    if not positions:
        # No clear boundaries — treat the whole file as one block
        blocks = [content]
    else:
        for i, pos in enumerate(positions):
            end = positions[i + 1] if i + 1 < len(positions) else len(content)
            block = content[pos:end].strip()
            if block:
                blocks.append(block)

    # Filter to error-like blocks and deduplicate
    error_keywords = re.compile(
        r'(?:error|exception|traceback|failed|fatal|critical)',
        re.IGNORECASE
    )
    seen_signatures: dict[str, str] = {}  # signature -> full block

    for block in blocks:
        if not error_keywords.search(block):
            continue
        # Create a signature by normalizing variable parts (numbers, hex, paths)
        sig = re.sub(r'0x[0-9a-fA-F]+', '0xHEX', block)
        sig = re.sub(r'\b\d{4,}\b', 'NUM', sig)
        sig = re.sub(r'line \d+', 'line N', sig)
        # Truncate signature for dedup
        sig_key = sig[:200]
        if sig_key not in seen_signatures:
            seen_signatures[sig_key] = block

    # Analyze each unique error
    results: dict[str, list[Finding]] = {}
    for sig, block in seen_signatures.items():
        findings = analyze_error(block, root)
        if findings:
            # Use first line as display key
            display_key = block.split("\n")[0][:120]
            results[display_key] = findings

    return results


# ============================================================================
# CLI
# ============================================================================

def main():
    import argparse
    parser = argparse.ArgumentParser(description="Scan codebase for known error patterns")
    parser.add_argument("--staged", action="store_true", help="Only scan staged files")
    parser.add_argument("--severity", choices=["high", "medium", "low"], help="Filter by severity")
    parser.add_argument("--json", action="store_true", help="JSON output")
    parser.add_argument("--summary", action="store_true", help="One-line summary only")
    parser.add_argument("--list-rules", action="store_true", help="List all pattern rules")
    parser.add_argument("--analyze", type=str, metavar="ERROR_MSG",
                        help="Reactive analysis: match an error message against the knowledge base")
    parser.add_argument("--analyze-log", type=str, metavar="LOG_PATH",
                        help="Parse a log file for errors and analyze each unique one")
    parser.add_argument("--list-knowledge", action="store_true",
                        help="List all ErrorKnowledge entries (id, name, category, keywords)")
    args = parser.parse_args()

    if args.list_rules:
        all_rules = get_all_patterns()
        proactive = load_proactive_patterns()
        print(f"{'ID':<8} {'Sev':<7} {'Name'}")
        print("-" * 60)
        for r in all_rules:
            print(f"{r.id:<8} {r.severity:<7} {r.name}")
        print(f"\n{len(all_rules)} pattern rules registered "
              f"({len(PATTERNS)} built-in + {len(proactive)} from error_knowledge/ plugins).")
        return 0

    if args.list_knowledge:
        knowledge = load_knowledge_packs()
        print(f"{'ID':<8} {'Category':<12} {'Name':<45} {'Keywords'}")
        print("-" * 100)
        for ek in knowledge:
            kw_str = ", ".join(ek.keywords)
            print(f"{ek.id:<8} {ek.category:<12} {ek.name:<45} {kw_str}")
        print(f"\n{len(knowledge)} knowledge entries loaded.")
        return 0

    if args.analyze:
        findings = analyze_error(args.analyze, ROOT)
        if args.json:
            print(format_json(findings))
        elif args.summary:
            print(format_summary(findings))
        else:
            if findings:
                print(f"REACTIVE ANALYSIS — {len(findings)} finding(s) for error:")
                print(f"  \"{args.analyze[:100]}{'...' if len(args.analyze) > 100 else ''}\"")
                print("=" * 60)
                print(format_text(findings))
            else:
                print("No code patterns matched for this error message.")
        return 1 if any(f.severity == "high" for f in findings) else 0

    if args.analyze_log:
        log_path = Path(args.analyze_log)
        if not log_path.exists():
            print(f"Error: Log file not found: {log_path}", file=sys.stderr)
            return 2
        results = analyze_log_file(log_path, ROOT)
        if args.json:
            json_out = {
                sig: [
                    {
                        "rule_id": f.rule_id, "rule_name": f.rule_name,
                        "severity": f.severity, "file": f.file, "line": f.line,
                        "code": f.code, "fix_hint": f.fix_hint, "incident": f.incident,
                    }
                    for f in findings
                ]
                for sig, findings in results.items()
            }
            print(json.dumps(json_out, indent=2))
        elif args.summary:
            total = sum(len(fs) for fs in results.values())
            print(f"Log analysis: {len(results)} unique errors, {total} total findings")
        else:
            if results:
                total = sum(len(fs) for fs in results.values())
                print(f"LOG ANALYSIS — {len(results)} unique error(s), {total} finding(s)")
                print("=" * 60)
                for sig, findings in results.items():
                    print(f"\nError: {sig}")
                    print("-" * 40)
                    print(format_text(findings))
            else:
                print("No actionable patterns found in log file.")
        has_high = any(
            f.severity == "high" for fs in results.values() for f in fs
        )
        return 1 if has_high else 0

    findings = scan_codebase(get_all_patterns(), staged_only=args.staged, severity_filter=args.severity)

    if args.json:
        print(format_json(findings))
    elif args.summary:
        print(format_summary(findings))
    else:
        print(format_text(findings))

    return 1 if any(f.severity == "high" for f in findings) else 0


if __name__ == "__main__":
    sys.exit(main())
