#!/usr/bin/env python3
from __future__ import annotations

import argparse
import csv
import json
import re
import shutil
import sqlite3
import time
from dataclasses import dataclass, field
from datetime import datetime
from io import StringIO
from pathlib import Path

ROOT = Path(__file__).resolve().parents[1]
SOURCE_DIR = ROOT / "Pulled_Info" / "Vouchers" / "All_Scaned"
DB_PATH = ROOT / "config" / "vouchers.sqlite"
WORK_DB_PATH = DB_PATH.with_suffix(".rebuild.sqlite")

STYLE_RE = re.compile(r"\b\d{4,5}\b")
NUMBER_RE = re.compile(r"\b\d{3,}\b")
PROGRAM_URL_RE = re.compile(r"^\s*-\s+\*\*Program URL\*\*:\s*(https?://\S+)\s*$", re.IGNORECASE | re.MULTILINE)
ACCOUNT_URL_RE = re.compile(r"^\s*-\s+\*\*Account URL\*\*:\s*(https?://\S+)\s*$", re.IGNORECASE | re.MULTILINE)
CSV_BLOCK_RE = re.compile(r"```csv\s*(.*?)```", re.IGNORECASE | re.DOTALL)


@dataclass
class VoucherSummary:
    mode: str = "incremental"
    scanned_files: int = 0
    changed_files: int = 0
    deleted_files: int = 0
    skipped_files: int = 0
    folder_count: int = 0
    file_count: int = 0
    style_price_count: int = 0
    fts_enabled: bool = False
    warnings: list[dict] = field(default_factory=list)

    def add_warning(self, code: str, rel_path: str, message: str) -> None:
        self.warnings.append(
            {
                "severity": "warning",
                "code": code,
                "relPath": rel_path,
                "message": message,
            }
        )

    def as_payload(self, elapsed_seconds: float | None = None) -> dict:
        payload = {
            "generatedAt": datetime.now().isoformat(timespec="seconds"),
            "mode": self.mode,
            "scannedFiles": self.scanned_files,
            "changedFiles": self.changed_files,
            "deletedFiles": self.deleted_files,
            "skippedFiles": self.skipped_files,
            "folderCount": self.folder_count,
            "fileCount": self.file_count,
            "stylePriceCount": self.style_price_count,
            "ftsEnabled": bool(self.fts_enabled),
            "validation": {
                "warningCount": len(self.warnings),
                "warnings": self.warnings[:25],
            },
        }
        if elapsed_seconds is not None:
            payload["elapsedSeconds"] = round(float(elapsed_seconds), 3)
        return payload


def clean_text(value: object) -> str:
    return re.sub(r"\s+", " ", str(value or "")).strip()


def lower_norm(value: str) -> str:
    return clean_text(value).lower()


def extract_numbers(text: str) -> list[str]:
    seen: set[str] = set()
    out: list[str] = []
    for match in NUMBER_RE.findall(text or ""):
        if match in seen:
            continue
        seen.add(match)
        out.append(match)
    return out


def parse_money(value: object) -> float:
    raw = clean_text(value).replace("$", "").replace(",", "")
    if not raw:
        return 0.0
    try:
        return float(raw)
    except Exception:
        return 0.0


def parse_discount(value: object) -> float:
    raw = clean_text(value).replace("%", "")
    if not raw:
        return 0.0
    try:
        return float(raw)
    except Exception:
        return 0.0


def preview_text_for_search(text: str, max_lines: int = 60) -> str:
    lines = [line.rstrip() for line in str(text or "").splitlines()]
    return "\n".join(lines[:max_lines]).strip()


def parse_style_prices(rel_path: str, text: str, summary: VoucherSummary) -> list[tuple[str, float, float, float]]:
    rows: list[tuple[str, float, float, float]] = []
    for block_match in CSV_BLOCK_RE.finditer(text or ""):
        block = block_match.group(1).strip()
        if not block:
            continue
        reader = csv.DictReader(StringIO(block))
        for row in reader:
            if not isinstance(row, dict):
                continue
            style_code = clean_text(row.get("STYLE") or row.get("Style") or "")
            style_match = STYLE_RE.search(style_code)
            if not style_match:
                continue
            msrp = parse_money(row.get("MSRP"))
            fixed_price = parse_money(row.get("FIXED PRICE"))
            discount_pct = parse_discount(row.get("DISCOUNT %") or row.get("DISCOUNT%"))
            program_price = parse_money(row.get("PROGRAM PRICE"))
            if program_price <= 0 and fixed_price > 0:
                program_price = fixed_price
            if program_price <= 0 and msrp > 0 and discount_pct > 0:
                program_price = round(msrp * (1 - (discount_pct / 100.0)), 2)
            if msrp <= 0 and program_price > 0 and discount_pct <= 0:
                msrp = program_price
            if program_price <= 0 and msrp <= 0:
                summary.add_warning("voucher_price_missing", rel_path, f"Could not parse price row for style {style_match.group(0)}.")
                continue
            rows.append((style_match.group(0), float(msrp or 0.0), float(discount_pct or 0.0), float(program_price or 0.0)))
    return rows


def configure_connection(conn: sqlite3.Connection) -> None:
    conn.execute("pragma journal_mode = wal")
    conn.execute("pragma synchronous = normal")
    conn.execute("pragma temp_store = memory")
    conn.execute("pragma busy_timeout = 10000")
    conn.execute("pragma foreign_keys = off")


def table_exists(conn: sqlite3.Connection, name: str) -> bool:
    row = conn.execute(
        "select 1 from sqlite_master where type = 'table' and name = ? limit 1",
        (name,),
    ).fetchone()
    return row is not None


def ensure_schema(conn: sqlite3.Connection) -> None:
    conn.execute(
        """
        create table if not exists voucher_filesig (
            rel_path text primary key,
            mtime_ns integer not null,
            size_bytes integer not null
        )
        """
    )
    conn.execute(
        """
        create table if not exists voucher_folders (
            folder text primary key,
            name text not null,
            folder_norm text not null,
            name_norm text not null
        )
        """
    )
    conn.execute(
        """
        create table if not exists voucher_items (
            rel_path text primary key,
            name text not null,
            folder text not null,
            is_dir integer not null,
            numbers_json text not null,
            preview_text text not null,
            preview_blob blob
        )
        """
    )
    conn.execute(
        """
        create table if not exists voucher_style_prices (
            rel_path text not null,
            style_code text not null,
            normal_price real not null,
            discount_pct real not null,
            discounted_price real not null,
            primary key (rel_path, style_code)
        )
        """
    )
    conn.execute(
        """
        create table if not exists voucher_index_meta (
            key text primary key,
            value_json text not null
        )
        """
    )
    conn.execute("create index if not exists idx_vf_folder_norm on voucher_folders(folder_norm)")
    conn.execute("create index if not exists idx_vf_name on voucher_folders(name)")
    conn.execute("create index if not exists idx_vf_name_norm on voucher_folders(name_norm)")
    conn.execute("create index if not exists idx_voucher_items_folder on voucher_items(folder)")
    conn.execute("create index if not exists idx_voucher_items_type_name on voucher_items(is_dir, name)")
    conn.execute("create index if not exists idx_vsp_style on voucher_style_prices(style_code)")
    try:
        expected_fts_columns = ["rel_path", "folder", "name", "preview_text", "numbers_text"]
        existing_fts_columns = []
        if table_exists(conn, "voucher_search_fts"):
            existing_fts_columns = [str(row[1] or "").strip() for row in conn.execute("pragma table_info(voucher_search_fts)").fetchall()]
        if existing_fts_columns and existing_fts_columns != expected_fts_columns:
            conn.execute("drop table voucher_search_fts")
        conn.execute(
            """
            create virtual table if not exists voucher_search_fts using fts5(
                rel_path UNINDEXED,
                folder,
                name,
                preview_text,
                numbers_text
            )
            """
        )
    except sqlite3.DatabaseError:
        return


def load_existing_filesigs(conn: sqlite3.Connection) -> dict[str, tuple[int, int]]:
    if not table_exists(conn, "voucher_filesig"):
        return {}
    rows = conn.execute("select rel_path, mtime_ns, size_bytes from voucher_filesig").fetchall()
    return {str(rel_path): (int(mtime_ns or 0), int(size_bytes or 0)) for rel_path, mtime_ns, size_bytes in rows}


def load_existing_indexed_relpaths(conn: sqlite3.Connection) -> set[str]:
    if not table_exists(conn, "voucher_items"):
        return set()
    rows = conn.execute("select rel_path from voucher_items where is_dir = 0").fetchall()
    return {str(row[0]) for row in rows}


def iter_voucher_files() -> list[tuple[str, Path, int, int]]:
    files: list[tuple[str, Path, int, int]] = []
    for path in SOURCE_DIR.rglob("*.md"):
        if not path.is_file():
            continue
        stat = path.stat()
        files.append((path.relative_to(SOURCE_DIR).as_posix(), path, int(stat.st_mtime_ns), int(stat.st_size)))
    files.sort(key=lambda item: item[0].lower())
    return files


def parse_voucher_file(
    rel_path: str,
    path: Path,
    summary: VoucherSummary,
) -> tuple[
    tuple[str, str, str, int, str, str] | None,
    tuple[str, int, int] | None,
    list[tuple[str, str, float, float, float]],
    tuple[str, str, str, str, str] | None,
]:
    try:
        text = path.read_text(encoding="utf-8", errors="ignore")
    except Exception:
        summary.add_warning("voucher_read_failed", rel_path, "Could not read voucher markdown file.")
        return None, None, [], None
    stat = path.stat()
    folder = path.parent.relative_to(SOURCE_DIR).as_posix()
    folder = "" if folder == "." else folder
    numbers = extract_numbers(f"{rel_path}\n{text[:1200]}")
    item_row = (
        rel_path,
        path.name,
        folder,
        0,
        json.dumps(numbers),
        preview_text_for_search(text),
    )
    sig_row = (rel_path, int(stat.st_mtime_ns), int(stat.st_size))
    if not PROGRAM_URL_RE.search(text or "") and not ACCOUNT_URL_RE.search(text or ""):
        summary.add_warning("voucher_missing_portal_url", rel_path, "Voucher file is missing Program URL and Account URL.")
    prices = parse_style_prices(rel_path, text, summary)
    if not prices:
        summary.add_warning("voucher_no_style_rows", rel_path, "Voucher file did not produce any style pricing rows.")
    style_rows = [(rel_path, style_code, msrp, discount_pct, program_price) for style_code, msrp, discount_pct, program_price in prices]
    fts_row = (
        rel_path,
        folder,
        path.name,
        preview_text_for_search(text, max_lines=120),
        " ".join(numbers),
    )
    return item_row, sig_row, style_rows, fts_row


def chunked(values: list[str], size: int = 500):
    for index in range(0, len(values), size):
        yield values[index : index + size]


def delete_rel_paths(conn: sqlite3.Connection, rel_paths: list[str]) -> None:
    for chunk in chunked(rel_paths):
        placeholders = ", ".join("?" for _ in chunk)
        conn.execute(f"delete from voucher_style_prices where rel_path in ({placeholders})", chunk)
        conn.execute(f"delete from voucher_items where rel_path in ({placeholders})", chunk)
        conn.execute(f"delete from voucher_filesig where rel_path in ({placeholders})", chunk)
        if table_exists(conn, "voucher_search_fts"):
            conn.execute(f"delete from voucher_search_fts where rel_path in ({placeholders})", chunk)


def rebuild_folder_rows(conn: sqlite3.Connection) -> None:
    rows = conn.execute(
        """
        select distinct folder
        from voucher_items
        where is_dir = 0
        order by folder collate nocase
        """
    ).fetchall()
    folder_rows = []
    for (folder,) in rows:
        folder = str(folder or "").strip()
        name = Path(folder).name if folder else ""
        folder_rows.append((folder, name, lower_norm(folder), lower_norm(name)))
    conn.execute("delete from voucher_folders")
    if folder_rows:
        conn.executemany(
            "insert into voucher_folders(folder, name, folder_norm, name_norm) values (?, ?, ?, ?)",
            folder_rows,
        )


def refresh_summary_from_db(conn: sqlite3.Connection, summary: VoucherSummary) -> None:
    summary.folder_count = int(conn.execute("select count(*) from voucher_folders").fetchone()[0])
    summary.file_count = int(conn.execute("select count(*) from voucher_items where is_dir = 0").fetchone()[0])
    summary.style_price_count = int(conn.execute("select count(*) from voucher_style_prices").fetchone()[0])
    summary.fts_enabled = table_exists(conn, "voucher_search_fts")


def remove_work_db_artifacts() -> None:
    for path in (WORK_DB_PATH, WORK_DB_PATH.with_name(f"{WORK_DB_PATH.name}-wal"), WORK_DB_PATH.with_name(f"{WORK_DB_PATH.name}-shm")):
        try:
            path.unlink(missing_ok=True)
        except Exception:
            continue


def checkpoint_database(conn: sqlite3.Connection) -> None:
    try:
        conn.execute("pragma wal_checkpoint(truncate)")
    except Exception:
        return


def bootstrap_work_db(full: bool) -> sqlite3.Connection:
    remove_work_db_artifacts()
    WORK_DB_PATH.parent.mkdir(parents=True, exist_ok=True)
    if not full and DB_PATH.is_file():
        with sqlite3.connect(DB_PATH, timeout=30) as source_conn, sqlite3.connect(WORK_DB_PATH, timeout=30) as temp_conn:
            configure_connection(source_conn)
            configure_connection(temp_conn)
            source_conn.backup(temp_conn)
    conn = sqlite3.connect(WORK_DB_PATH, timeout=30)
    configure_connection(conn)
    return conn


def publish_work_db() -> None:
    if not WORK_DB_PATH.is_file():
        raise FileNotFoundError(f"Work database not found: {WORK_DB_PATH}")
    if not DB_PATH.is_file():
        DB_PATH.parent.mkdir(parents=True, exist_ok=True)
        WORK_DB_PATH.replace(DB_PATH)
        remove_work_db_artifacts()
        return
    last_error: Exception | None = None
    for _ in range(5):
        try:
            with sqlite3.connect(WORK_DB_PATH, timeout=30) as source_conn, sqlite3.connect(DB_PATH, timeout=30) as target_conn:
                configure_connection(source_conn)
                configure_connection(target_conn)
                source_conn.backup(target_conn)
            remove_work_db_artifacts()
            return
        except sqlite3.OperationalError as exc:
            last_error = exc
            time.sleep(2)
    if last_error is not None:
        raise last_error


def bootstrap_fts_from_indexed_rows(conn: sqlite3.Connection, current_relpaths: set[str]) -> int:
    if not table_exists(conn, "voucher_search_fts") or not current_relpaths:
        return 0
    existing_rows = conn.execute("select rel_path from voucher_search_fts").fetchall()
    existing_relpaths = {str(row[0] or "").strip() for row in existing_rows if row and row[0]}
    missing_relpaths = sorted(current_relpaths - existing_relpaths)
    if not missing_relpaths:
        return 0
    inserted = 0
    for chunk in chunked(missing_relpaths, size=500):
        placeholders = ", ".join("?" for _ in chunk)
        rows = conn.execute(
            f"""
            select rel_path, folder, name, preview_text, numbers_json
            from voucher_items
            where is_dir = 0 and rel_path in ({placeholders})
            """,
            chunk,
        ).fetchall()
        fts_rows: list[tuple[str, str, str, str, str]] = []
        for rel_path, folder, name, preview_text, numbers_json in rows:
            numbers_text = ""
            try:
                parsed_numbers = json.loads(str(numbers_json or "[]"))
                if isinstance(parsed_numbers, list):
                    numbers_text = " ".join(str(value).strip() for value in parsed_numbers if str(value).strip())
            except Exception:
                numbers_text = clean_text(numbers_json)
            fts_rows.append(
                (
                    str(rel_path or "").strip(),
                    str(folder or "").strip(),
                    str(name or "").strip(),
                    str(preview_text or "").strip(),
                    numbers_text,
                )
            )
        if fts_rows:
            conn.executemany(
                """
                insert into voucher_search_fts(rel_path, folder, name, preview_text, numbers_text)
                values (?, ?, ?, ?, ?)
                """,
                fts_rows,
            )
            conn.commit()
            inserted += len(fts_rows)
    return inserted


def open_incremental_db() -> sqlite3.Connection:
    DB_PATH.parent.mkdir(parents=True, exist_ok=True)
    conn = sqlite3.connect(DB_PATH, timeout=30)
    configure_connection(conn)
    return conn


def backup_live_db_for_full_rebuild() -> None:
    if not DB_PATH.is_file():
        return
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    backup_path = DB_PATH.with_name(f"{DB_PATH.stem}.pre_full_{timestamp}{DB_PATH.suffix}")
    shutil.copy2(DB_PATH, backup_path)


def rebuild_index(*, full: bool = False) -> dict:
    started_at = time.perf_counter()
    summary = VoucherSummary(mode="full" if full else "incremental")
    SOURCE_DIR.mkdir(parents=True, exist_ok=True)
    DB_PATH.parent.mkdir(parents=True, exist_ok=True)

    if full:
        backup_live_db_for_full_rebuild()
        conn_cm = bootstrap_work_db(full)
    else:
        conn_cm = open_incremental_db()

    with conn_cm as conn:
        configure_connection(conn)
        ensure_schema(conn)
        if full:
            conn.execute("delete from voucher_style_prices")
            conn.execute("delete from voucher_items")
            conn.execute("delete from voucher_filesig")
            conn.execute("delete from voucher_folders")
            if table_exists(conn, "voucher_search_fts"):
                conn.execute("delete from voucher_search_fts")
            conn.execute("delete from voucher_index_meta")
            existing_sigs: dict[str, tuple[int, int]] = {}
            indexed_relpaths: set[str] = set()
        else:
            existing_sigs = load_existing_filesigs(conn)
            indexed_relpaths = load_existing_indexed_relpaths(conn)

        current_files = iter_voucher_files()
        current_relpaths = {rel_path for rel_path, *_ in current_files}
        if not full:
            bootstrap_fts_from_indexed_rows(conn, current_relpaths)
        parsed_items: list[tuple[str, str, str, int, str, str]] = []
        parsed_sigs: list[tuple[str, int, int]] = []
        parsed_style_rows: list[tuple[str, str, float, float, float]] = []
        parsed_fts_rows: list[tuple[str, str, str, str, str]] = []
        parsed_relpaths: list[str] = []

        for rel_path, path, mtime_ns, size_bytes in current_files:
            summary.scanned_files += 1
            existing_sig = existing_sigs.get(rel_path)
            is_changed = full or existing_sig != (mtime_ns, size_bytes) or rel_path not in indexed_relpaths
            if not is_changed:
                summary.skipped_files += 1
                continue
            summary.changed_files += 1
            item_row, sig_row, style_rows, fts_row = parse_voucher_file(rel_path, path, summary)
            if item_row is None or sig_row is None:
                continue
            parsed_items.append(item_row)
            parsed_sigs.append(sig_row)
            parsed_style_rows.extend(style_rows)
            if fts_row is not None:
                parsed_fts_rows.append(fts_row)
            parsed_relpaths.append(rel_path)

        deleted_relpaths = sorted(set(existing_sigs) - current_relpaths)
        summary.deleted_files = len(deleted_relpaths)
        if deleted_relpaths:
            delete_rel_paths(conn, deleted_relpaths)

        if parsed_relpaths:
            for chunk in chunked(parsed_relpaths):
                placeholders = ", ".join("?" for _ in chunk)
                conn.execute(f"delete from voucher_style_prices where rel_path in ({placeholders})", chunk)
                if table_exists(conn, "voucher_search_fts"):
                    conn.execute(f"delete from voucher_search_fts where rel_path in ({placeholders})", chunk)
            conn.executemany(
                """
                insert into voucher_items(rel_path, name, folder, is_dir, numbers_json, preview_text, preview_blob)
                values (?, ?, ?, ?, ?, ?, null)
                on conflict(rel_path) do update set
                    name = excluded.name,
                    folder = excluded.folder,
                    is_dir = excluded.is_dir,
                    numbers_json = excluded.numbers_json,
                    preview_text = excluded.preview_text,
                    preview_blob = null
                """,
                parsed_items,
            )
            conn.executemany(
                """
                insert into voucher_filesig(rel_path, mtime_ns, size_bytes)
                values (?, ?, ?)
                on conflict(rel_path) do update set
                    mtime_ns = excluded.mtime_ns,
                    size_bytes = excluded.size_bytes
                """,
                parsed_sigs,
            )
            if parsed_style_rows:
                conn.executemany(
                    """
                    insert into voucher_style_prices(rel_path, style_code, normal_price, discount_pct, discounted_price)
                    values (?, ?, ?, ?, ?)
                    """,
                    parsed_style_rows,
                )
            if parsed_fts_rows and table_exists(conn, "voucher_search_fts"):
                conn.executemany(
                    """
                    insert into voucher_search_fts(rel_path, folder, name, preview_text, numbers_text)
                    values (?, ?, ?, ?, ?)
                    """,
                    parsed_fts_rows,
                )
            conn.commit()

        rebuild_folder_rows(conn)
        refresh_summary_from_db(conn, summary)
        summary_payload = summary.as_payload(elapsed_seconds=time.perf_counter() - started_at)
        conn.execute("delete from voucher_index_meta where key = 'summary'")
        conn.execute(
            "insert into voucher_index_meta(key, value_json) values (?, ?)",
            ("summary", json.dumps(summary_payload, indent=2, sort_keys=True)),
        )
        conn.commit()
        checkpoint_database(conn)
    if full:
        publish_work_db()
        with sqlite3.connect(DB_PATH, timeout=30) as conn:
            configure_connection(conn)
            checkpoint_database(conn)

    return summary_payload


def parse_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser(description="Incrementally rebuild the voucher SQLite index.")
    parser.add_argument("--full", action="store_true", help="Drop current indexed rows and rebuild the voucher index from scratch.")
    return parser.parse_args()


def main() -> None:
    args = parse_args()
    payload = rebuild_index(full=bool(args.full))
    print(json.dumps(payload, indent=2))


if __name__ == "__main__":
    main()
