#!/usr/bin/env python3
"""
Verify Pipeline Health — sanity-check that hooks and rules are wired correctly.

Catches the silent-failure mode where a hook exists in settings.json but its
script is missing/non-executable, or a rules file is missing from CLAUDE.md.
Run on session start (or manually) to surface drift early.

What it checks:
  1. All scripts in scripts/docs/ are executable
  2. All hook scripts referenced in .claude/settings.json exist
  3. .claude/rules/ files are referenced from CLAUDE.md (project root)
  4. echo_critical_rules.sh has the critical-rules sentinel markers
  5. Recent activity in .claude/sub-skill-log.jsonl (warns if 0 entries)
  6. BACKLOG.md and CHANGELOG.md exist
  7. context-budget hook is wired

Usage:
    verify_pipeline_health.py              # run all checks
    verify_pipeline_health.py --quiet      # only print failures
    verify_pipeline_health.py --json       # machine-readable

Exit codes:
    0 — all checks passed (or only warnings)
    1 — at least one check failed
    2 — script error
"""

from __future__ import annotations

import argparse
import json
import os
import re
import sys
from pathlib import Path

# Resolve project root from the script's own location: when installed as
# scripts/docs/verify_pipeline_health.py the project root is two levels up.
# Override via PROJECT_ROOT env var if running from a non-standard location.
_DEFAULT_ROOT = Path(__file__).resolve().parents[2] if len(Path(__file__).resolve().parents) >= 3 else Path(".").resolve()
ROOT = Path(os.environ.get("PROJECT_ROOT", _DEFAULT_ROOT)).resolve()


def _ok(name: str, detail: str = "") -> dict:
    return {"check": name, "status": "ok", "detail": detail}


def _warn(name: str, detail: str) -> dict:
    return {"check": name, "status": "warn", "detail": detail}


def _fail(name: str, detail: str) -> dict:
    return {"check": name, "status": "fail", "detail": detail}


def check_scripts_executable() -> list[dict]:
    results = []
    scripts_dir = ROOT / "scripts" / "docs"
    if not scripts_dir.is_dir():
        return [_fail("scripts_dir", f"missing: {scripts_dir}")]
    for f in scripts_dir.rglob("*.py"):
        # Skip module files (__init__.py and anything under a subpackage like
        # error_knowledge/). Library modules are imported, not exec'd.
        if f.name == "__init__.py" or f.parent != scripts_dir:
            continue
        if not os.access(f, os.X_OK):
            results.append(_warn("script_exec", f"not executable: {f.relative_to(ROOT)}"))
    for f in scripts_dir.rglob("*.sh"):
        if not os.access(f, os.X_OK):
            results.append(_warn("script_exec", f"not executable: {f.relative_to(ROOT)}"))
    if not results:
        results.append(_ok("script_exec", "all CLI scripts executable"))
    return results


def check_hook_scripts_exist() -> list[dict]:
    settings_path = ROOT / ".claude" / "settings.json"
    if not settings_path.exists():
        return [_fail("settings_json", f"missing: {settings_path}")]
    try:
        data = json.loads(settings_path.read_text(encoding="utf-8"))
    except json.JSONDecodeError as exc:
        return [_fail("settings_json", f"invalid JSON: {exc}")]
    results = []
    referenced = set()
    for stanza in (data.get("hooks") or {}).values():
        for entry in stanza:
            for hook in entry.get("hooks") or []:
                cmd = hook.get("command") or ""
                for match in re.finditer(r"scripts/docs/[\w_.-]+", cmd):
                    referenced.add(match.group(0))
    for rel in sorted(referenced):
        full = ROOT / rel
        if not full.exists():
            results.append(_fail("hook_script", f"referenced but missing: {rel}"))
    if not [r for r in results if r["status"] == "fail"]:
        results.append(_ok("hook_script", f"{len(referenced)} hook scripts present"))
    return results


def check_rules_referenced_from_claude_md() -> list[dict]:
    rules_dir = ROOT / ".claude" / "rules"
    claude_md = ROOT / "CLAUDE.md"
    if not rules_dir.is_dir():
        return [_warn("rules_dir", "no .claude/rules/ — skipping")]
    if not claude_md.exists():
        return [_fail("claude_md", f"missing: {claude_md}")]
    content = claude_md.read_text(encoding="utf-8")
    missing = []
    for rule_file in rules_dir.glob("*.md"):
        if rule_file.name not in content:
            missing.append(rule_file.name)
    if missing:
        return [_warn("rules_reference",
                      f"not referenced from CLAUDE.md: {', '.join(missing)}")]
    return [_ok("rules_reference", "all rules referenced")]


def check_critical_rules_sentinels() -> list[dict]:
    script = ROOT / "scripts" / "docs" / "echo_critical_rules.sh"
    if not script.exists():
        return [_fail("critical_rules", f"missing: {script}")]
    text = script.read_text(encoding="utf-8")
    if "__PROJECT_CRITICAL_RULES_START__" not in text or "__PROJECT_CRITICAL_RULES_END__" not in text:
        return [_fail("critical_rules", "sentinel markers missing")]
    return [_ok("critical_rules", "sentinels present")]


def check_sub_skill_log_activity() -> list[dict]:
    log = ROOT / ".claude" / "sub-skill-log.jsonl"
    if not log.exists():
        return [_warn("sub_skill_log", "no entries yet — router may not be in use")]
    lines = [l for l in log.read_text(encoding="utf-8").splitlines() if l.strip()]
    if not lines:
        return [_warn("sub_skill_log", "log file exists but is empty")]
    return [_ok("sub_skill_log", f"{len(lines)} entries")]


def check_core_files() -> list[dict]:
    results = []
    for name in ("BACKLOG.md", "CHANGELOG.md", "VERSION"):
        if not (ROOT / name).exists():
            results.append(_fail("core_file", f"missing: {name}"))
    if not results:
        results.append(_ok("core_file", "BACKLOG.md, CHANGELOG.md, VERSION all present"))
    return results


def check_context_budget_hook() -> list[dict]:
    settings = ROOT / ".claude" / "settings.json"
    if not settings.exists():
        return [_fail("context_budget_hook", "settings.json missing")]
    text = settings.read_text(encoding="utf-8")
    if "context_budget_check.py" not in text:
        return [_warn("context_budget_hook", "not wired in settings.json")]
    return [_ok("context_budget_hook", "wired")]


CHECKS = [
    check_scripts_executable,
    check_hook_scripts_exist,
    check_rules_referenced_from_claude_md,
    check_critical_rules_sentinels,
    check_sub_skill_log_activity,
    check_core_files,
    check_context_budget_hook,
]


def main() -> int:
    p = argparse.ArgumentParser(description="Pipeline health check")
    p.add_argument("--quiet", action="store_true",
                   help="print only fail/warn; suppress ok lines")
    p.add_argument("--json", action="store_true",
                   help="machine-readable output")
    args = p.parse_args()

    all_results: list[dict] = []
    for check in CHECKS:
        try:
            all_results.extend(check())
        except Exception as exc:  # noqa: BLE001
            all_results.append(_fail(check.__name__, f"check raised: {exc}"))

    if args.json:
        print(json.dumps(all_results, indent=2))
    else:
        for r in all_results:
            if args.quiet and r["status"] == "ok":
                continue
            symbol = {"ok": "OK", "warn": "WARN", "fail": "FAIL"}[r["status"]]
            print(f"[{symbol:<4}] {r['check']:<25} {r['detail']}")

    has_fail = any(r["status"] == "fail" for r in all_results)
    return 1 if has_fail else 0


if __name__ == "__main__":
    sys.exit(main())