#!/usr/bin/env python3
"""Documentation freshness checker.

Scans docs/ for YAML frontmatter and reports:
- Stale docs (last_verified older than freshness_days)
- Version mismatches (verified_version != current VERSION)
- Missing frontmatter on critical docs

Usage:
    check_freshness.py [--strict]

Exit codes:
    0 = all docs fresh (or no docs/ directory)
    1 = stale/mismatched/missing found (--strict mode)
"""
import re
import sys
from datetime import date, timedelta
from pathlib import Path

ROOT = Path(__file__).resolve().parents[2]
DOCS_DIR = ROOT / "docs"
VERSION_FILE = ROOT / "VERSION"
DOC_INDEX = DOCS_DIR / "_meta" / "doc_index.yaml"

FALLBACK_REQUIRED = {
    "README.md",
    "development/ARCHITECTURE.md",
    "development/TESTING.md",
    "development/DEPLOYMENT_GUIDE.md",
}


def load_required():
    if not DOC_INDEX.exists():
        return FALLBACK_REQUIRED
    try:
        text = DOC_INDEX.read_text(encoding="utf-8")
        paths = set()
        for line in text.splitlines():
            s = line.strip()
            if s.startswith("path:"):
                paths.add(s.split(":", 1)[1].strip())
        return paths or FALLBACK_REQUIRED
    except Exception:
        return FALLBACK_REQUIRED


REQUIRED = load_required()


def parse_frontmatter(filepath):
    text = filepath.read_text(encoding="utf-8")
    m = re.match(r"^---\s*\n(.*?)\n---", text, re.DOTALL)
    if not m:
        return None
    meta = {}
    for line in m.group(1).strip().splitlines():
        if ":" in line:
            k, _, v = line.partition(":")
            meta[k.strip()] = v.strip()
    return meta


def current_version():
    if VERSION_FILE.exists():
        return VERSION_FILE.read_text(encoding="utf-8").strip()
    return "unknown"


def check(strict=False):
    if not DOCS_DIR.exists():
        print("No docs/ directory found. Skipping freshness check.")
        return 0

    today = date.today()
    ver = current_version()
    stale, mismatch, missing, fresh = [], [], [], []

    for md in sorted(DOCS_DIR.rglob("*.md")):
        rel = str(md.relative_to(DOCS_DIR))
        if rel.startswith(("archive/", "_meta/", "context/", "patterns/")):
            continue
        meta = parse_frontmatter(md)
        if meta is None:
            if rel in REQUIRED:
                missing.append(rel)
            continue
        lv = meta.get("last_verified", "")
        fd = int(meta.get("freshness_days", "30"))
        if lv:
            try:
                if today - date.fromisoformat(lv) > timedelta(days=fd):
                    stale.append((rel, lv, fd))
                else:
                    fresh.append(rel)
            except ValueError:
                stale.append((rel, f"invalid: {lv}", 0))
        vv = meta.get("verified_version", "")
        if vv and ver != "unknown":
            if ".".join(vv.split(".")[:2]) != ".".join(ver.split(".")[:2]):
                mismatch.append((rel, vv, ver))

    print(f"Documentation Freshness Report — {today}")
    print(f"Current version: {ver}")
    print(f"Docs scanned: {len(fresh) + len(stale)}\n")

    if fresh:
        print(f"FRESH ({len(fresh)}):")
        for f in fresh:
            print(f"  {f}")
        print()
    if stale:
        print(f"STALE ({len(stale)}):")
        for r, lv, fd in stale:
            print(f"  {r} — last_verified: {lv} (threshold: {fd} days)")
        print()
    if mismatch:
        print(f"VERSION MISMATCH ({len(mismatch)}):")
        for r, ov, nv in mismatch:
            print(f"  {r} — verified for {ov}, current is {nv}")
        print()
    if missing:
        print(f"MISSING FRONTMATTER ({len(missing)}):")
        for m in missing:
            print(f"  {m}")
        print()

    if not stale and not mismatch and not missing:
        print("All documentation is fresh and up-to-date.")
        return 0
    return 1 if strict else 0


if __name__ == "__main__":
    sys.exit(check(strict="--strict" in sys.argv))
