#!/usr/bin/env bash
# continuous-improve.sh — Entity-by-entity continuous improvement loop
#
# Iterates through every factbase entity, one at a time. Does mechanical
# cleanup in bash (fast), then only invokes an agent for entities that
# actually need review resolution or enrichment.
#
# Usage: .automate/continuous-improve.sh [options]
#   --priority reviews|stale|random   Queue ordering (default: reviews)
#   --cycle-delay N                   Seconds between entities (default: 5)
#   --model MODEL                     LLM model (default: claude-sonnet-4.6)
#   --start N                         Skip first N entities in queue (resume)
#   --no-skip                         Don't skip clean entities (force agent on all)

set -euo pipefail

# ─── Parse arguments ───
PRIORITY="reviews"
CYCLE_DELAY=5
MODEL="claude-sonnet-4.6"
START_AT=0
SKIP_CLEAN=true
while [[ $# -gt 0 ]]; do
    case "$1" in
        --priority) PRIORITY="$2"; shift 2 ;;
        --cycle-delay) CYCLE_DELAY="$2"; shift 2 ;;
        --model) MODEL="$2"; shift 2 ;;
        --start) START_AT="$2"; shift 2 ;;
        --no-skip) SKIP_CLEAN=false; shift ;;
        *) echo "Usage: $0 [--priority reviews|stale|random] [--cycle-delay N] [--model MODEL] [--start N] [--no-skip]"; exit 1 ;;
    esac
done

SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
DOCS_DIR="$SCRIPT_DIR/.."
STATE_FILE="$SCRIPT_DIR/.improve-state.tsv"
ACTION_LOG="$SCRIPT_DIR/improve-history.log"

exec > >(tee -a "$SCRIPT_DIR/continuous-improve.log") 2>&1

log() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*"; }

do_commit() {
    local msg="$1"
    cd "$DOCS_DIR"
    git add -A
    if ! git diff --cached --quiet; then
        git commit -m "$msg"
        for attempt in 1 2 3; do
            if git push 2>/dev/null; then
                log "✅ Committed: $msg"
                return 0
            fi
            log "⚠️  Push attempt $attempt failed, rebasing..."
            git pull --rebase
        done
        log "❌ Push failed after 3 attempts"
    fi
}

build_queue() {
    cd "$DOCS_DIR"
    local tmpfile
    tmpfile=$(mktemp)

    find . -name '*.md' \
        -not -path './.git/*' \
        -not -path './.automate/*' \
        -not -path './.kiro/*' \
        -not -path './.factbase/*' \
        -not -path './_orphans.md' \
        -print0 | while IFS= read -r -d '' file; do

        local fb_id
        fb_id=$(grep -oP '(?<=factbase:)[a-f0-9]{6}' "$file" 2>/dev/null | head -1)
        [[ -z "$fb_id" ]] && continue

        local review_count
        review_count=$(grep -c '^\- \[ \] `@q\[' "$file" 2>/dev/null) || true

        local mtime
        mtime=$(stat -c %Y "$file")

        local last_processed=0
        if [[ -f "$STATE_FILE" ]]; then
            last_processed=$(grep "^${fb_id}	" "$STATE_FILE" 2>/dev/null | tail -1 | cut -f2) || true
            [[ -z "$last_processed" ]] && last_processed=0
        fi

        local garbage_count
        garbage_count=$(grep -ciP '^\[\^.*\b(not a conflict|sequential|boundary overlap|not simultaneous|malformed tag|garbled|artifact|remove)\b' "$file" 2>/dev/null) || true

        # Flag person docs with incomplete names (single word, alias, no space)
        local incomplete_name=0
        local parent_dir
        parent_dir=$(echo "$file" | sed 's|^\./||' | rev | cut -d/ -f2 | rev)
        if [[ "$parent_dir" == "rulers" ]]; then
            local doc_title
            doc_title=$(grep '^# ' "$file" 2>/dev/null | head -1 | sed 's/^# //' | sed 's/ @t\[.*//;s/ \[\^.*//')
            if [[ -n "$doc_title" ]] && ! echo "$doc_title" | grep -q ' '; then
                incomplete_name=1
            fi
        fi

        echo -e "${fb_id}\t${file}\t${review_count}\t${mtime}\t${last_processed}\t${garbage_count}\t${incomplete_name}"
    done > "$tmpfile"

    case "$PRIORITY" in
        reviews) sort -t$'\t' -k7,7rn -k3,3rn -k6,6rn -k5,5n "$tmpfile" ;;
        stale)   sort -t$'\t' -k7,7rn -k5,5n -k3,3rn "$tmpfile" ;;
        random)  shuf "$tmpfile" ;;
    esac

    rm -f "$tmpfile"
}

mark_processed() {
    echo -e "$1\t$(date +%s)" >> "$STATE_FILE"
}

# ─── Bash-based mechanical cleanup (no agent needed) ───
bash_cleanup() {
    local file="$1"
    local changed=false

    # Fix corrupted title: strip @t[...] and [^N] suffixes
    if grep -qP '^# .+(\s+@t\[|\s+\[\^)' "$file" 2>/dev/null; then
        sed -i -E 's/^(# .+?)\s+(@t\[.*|\[\^.*)$/\1/' "$file"
        changed=true
    fi

    # Delete garbage footnotes (review answers dumped as source citations)
    if grep -qiP '^\[\^\d+\]:.*\b(not a conflict|sequential role|boundary overlap|not simultaneous|malformed tag|garbled|artifact from previous|remove garbled)\b' "$file" 2>/dev/null; then
        # Get line numbers of garbage footnotes, delete them
        local lines
        lines=$(grep -niP '^\[\^\d+\]:.*\b(not a conflict|sequential role|boundary overlap|not simultaneous|malformed tag|garbled|artifact from previous|remove garbled)\b' "$file" | cut -d: -f1 | sort -rn)
        if [[ -n "$lines" ]]; then
            for ln in $lines; do
                sed -i "${ln}d" "$file"
            done
            changed=true
        fi
    fi

    # Remove review answer artifact lines in body
    if grep -qP '^- Artifact from previous review application' "$file" 2>/dev/null; then
        sed -i '/^- Artifact from previous review application/d' "$file"
        changed=true
    fi

    # Remove empty Review Queue sections and factbase:review markers
    if grep -qP '^## Review Queue|<!-- factbase:review -->' "$file" 2>/dev/null; then
        # Only remove if the review queue has no actual unanswered questions
        local has_open
        has_open=$(grep -c '^\- \[ \] `@q\[' "$file" 2>/dev/null) || true
        if [[ "$has_open" -eq 0 ]]; then
            sed -i '/^## Review Queue$/d; /^<!-- factbase:review -->$/d' "$file"
            # Clean up trailing blank lines and stray --- at end of file
            sed -i -e :a -e '/^\n*$/{$d;N;ba' -e '}' "$file"
            changed=true
        fi
    fi

    [[ "$changed" == true ]]
}

get_related_context() {
    local file="$1"
    local title
    title=$(grep '^# ' "$file" | head -1 | sed 's/^# //' | sed 's/ @t\[.*//;s/ \[\^.*//')
    [[ -z "$title" ]] && return

    cd "$DOCS_DIR"
    local related=""
    while IFS= read -r match_file; do
        [[ "$match_file" == "$file" ]] && continue
        local match_title
        match_title=$(grep '^# ' "$match_file" 2>/dev/null | head -1 | sed 's/^# //')
        local excerpt
        excerpt=$(grep -i "$title" "$match_file" 2>/dev/null | head -3)
        if [[ -n "$excerpt" ]]; then
            related+="--- From: ${match_title} (${match_file}) ---
${excerpt}

"
        fi
    done < <(grep -rl "$title" . --include='*.md' \
        | grep -v '.git\|.automate\|.kiro\|.factbase\|_orphans' \
        | head -5)

    echo "$related"
}

# ─── Agent-based processing (review questions + enrichment) ───
process_entity_agent() {
    local fb_id="$1"
    local file="$2"
    local title="$3"

    cd "$DOCS_DIR"

    local content
    content=$(cat "$file")

    local related
    related=$(get_related_context "$file")

    local prompt
    prompt="You are continuously improving a factbase knowledge base, one entity at a time.
Use factbase MCP tools — call get_authoring_guide if you need formatting rules.

ENTITY FILE: $file
ENTITY ID: $fb_id

=== CURRENT DOCUMENT CONTENT ===
$content
=== END DOCUMENT CONTENT ==="

    if [[ -n "$related" ]]; then
        prompt+="

=== MENTIONS IN OTHER DOCUMENTS ===
$related
=== END MENTIONS ==="
    fi

    prompt+='

STEPS — work through in order, skip any that do not apply:

1. RESOLVE REVIEW QUESTIONS:
   Call get_review_queue(doc_id='"'"''"$fb_id"''"'"') — if there are open questions, answer them.

   Patterns learned from resolving thousands of these:
   - CONFLICT (chronological overlaps): Boundary-year overlaps in sequential reigns or periods
     are NOT conflicts (date granularity artifact). Concurrent roles (e.g., ruler + military
     commander, pharaoh + high priest) are both true simultaneously. Approximate dates that
     overlap by a few years reflect scholarly uncertainty, not contradiction.
   - AMBIGUOUS (terms): Expand the term AND create or update a definitions/ file so the
     term is not flagged again. Do NOT just answer inline — the definitions file is what
     prevents recurrence. Check existing definitions files first with search_content.
     Common: BCE, CE, polis, satrapy, pharaoh, consul, tribune, cuneiform, stele, ziggurat.
   - TEMPORAL/STALE: BCE dates are written in text, not temporal tags. CE date ranges
     (@t[...]) that are closed are historical, not stale. For open @t[~...] tags, search
     for newer scholarship.
   - MISSING: Search with search_knowledge and search_content. If not found, defer.

   After answering, call apply_review_answers(doc_id='"'"''"$fb_id"''"'"'). Then re-read the
   file with get_entity and verify apply did not corrupt it (garbage footnotes, mangled title).
   If it did, fix with update_document.

2. IDENTITY & ORGANIZATION:
   For ruler documents: if the title is a single name, alias, or epithet (not a full name),
   prioritize finding their full or commonly known name. Search local sources, check mentions
   in other documents, and cross-reference with civilization docs.
   If you find the full name, update the document title with update_document.

   For any document: if the title or file location could be improved (e.g. a ruler doc is in
   the wrong civilization folder, or the filename does not match the title), use the organize MCP
   tool to rename/move it. Use organize(action='"'"'move'"'"', doc_id=..., to=...) to relocate
   or update_document(id=..., title=...) to fix the title.

3. ENRICH:
   Search ALL your available tools for new information about this entity — factbase search,
   web search, whatever you have. Use the entity name, aliases,
   and known associations as search terms. Add any new facts not already present, following
   factbase authoring conventions.

4. IMPROVEMENT IDEAS:
   If you notice friction or gaps in factbase tools, file a Vikunja feature request:
   curl -s -X PUT "https://vikunja.home.everyonce.com/api/v1/projects/2/tasks" \
     -H "Authorization: Bearer tk_ff251f3d3512775c71913bc2f8ec0dabbf5016a8" \
     -H "Content-Type: application/json" \
     -d '"'"'{"title":"[factbase][feature] <summary>","description":"<details>","priority":2}'"'"'
   Only file genuinely useful improvements, not duplicates.

RULES:
- Use update_document to edit — be surgical, change only what needs changing
- If nothing needs changing, say so and move on

IMPORTANT: When finished, output exactly one line:
<action_summary>status: STATUS | '"$title"' | changes: DESCRIPTION</action_summary>
Status values: UPDATED (made changes), NO_CHANGE (nothing to do), ERROR (something failed)'

    local output
    output=$(kiro-cli chat --trust-all-tools --no-interactive --model "$MODEL" \
        "$prompt" 2>&1) || {
        log "❌ kiro-cli error for $title, continuing..."
        return 1
    }

    echo "$output"

    local summary
    summary=$(echo "$output" | grep -oP '(?<=<action_summary>).*(?=</action_summary>)' | tail -1)
    [[ -z "$summary" ]] && summary="status: UNKNOWN | $title | no summary returned"
    echo "$summary"
}

# ─── Main entity processing dispatcher ───
process_entity() {
    local fb_id="$1"
    local file="$2"
    local review_count="$3"
    local mtime="$4"
    local last_processed="$5"
    local garbage_count="$6"
    local incomplete_name="$7"

    cd "$DOCS_DIR"

    local title
    title=$(grep '^# ' "$file" | head -1 | sed 's/^# //' | sed 's/ @t\[.*//;s/ \[\^.*//')

    log "━━━ [$title] ($fb_id) reviews=$review_count garbage=$garbage_count ━━━"

    local start_time
    start_time=$(date +%s)
    local status="NO_CHANGE"
    local summary=""

    # Phase 1: Bash cleanup (milliseconds, no agent)
    if bash_cleanup "$file"; then
        log "  🧹 Bash cleanup applied"
        status="UPDATED"
        # Recount after cleanup
        review_count=$(grep -c '^\- \[ \] `@q\[' "$file" 2>/dev/null) || true
        garbage_count=0
    fi

    # Phase 2: Decide if agent is needed
    local needs_agent=false
    if [[ "${incomplete_name:-0}" -eq 1 ]]; then
        needs_agent=true
        log "  👤 Incomplete name (ruler doc) → agent needed to resolve identity"
    elif [[ "$review_count" -gt 0 ]]; then
        needs_agent=true
        log "  📋 $review_count review questions → agent needed"
    elif [[ "$SKIP_CLEAN" == true && "$last_processed" -gt 0 && "$mtime" -le "$last_processed" ]]; then
        log "  ⏭️  No questions, not modified since last pass → skipping agent"
    else
        needs_agent=true
        log "  🔍 Enrichment pass → agent needed"
    fi

    if [[ "$needs_agent" == true ]]; then
        local agent_output
        agent_output=$(process_entity_agent "$fb_id" "$file" "$title")
        echo "$agent_output"

        summary=$(echo "$agent_output" | grep -oP '(?<=<action_summary>).*(?=</action_summary>)' | tail -1)
        local agent_status
        agent_status=$(echo "$summary" | grep -oP '^status: \K[A-Z_]+' || echo "UNKNOWN")
        if [[ "$agent_status" == "UPDATED" ]]; then
            status="UPDATED"
        fi
    fi

    local end_time
    end_time=$(date +%s)
    local duration=$((end_time - start_time))

    if [[ "$status" == "UPDATED" ]]; then
        do_commit "improve: $title"
    fi

    [[ -z "$summary" ]] && summary="status: $status | $title | bash-only pass"

    {
        echo "[$(date -Iseconds)] $fb_id | $title"
        echo "  $summary"
        echo "  duration: ${duration}s"
    } >> "$ACTION_LOG"

    mark_processed "$fb_id"
    log "  Done (${duration}s) — $status"

    [[ "$status" == "UPDATED" ]] && return 0 || return 1
}

# ═══════════════════════════════════════════
# MAIN LOOP
# ═══════════════════════════════════════════
log "🚀 Starting continuous improvement loop (priority=$PRIORITY, model=$MODEL, start=$START_AT, skip_clean=$SKIP_CLEAN)"
log "Docs dir: $DOCS_DIR"
log "State file: $STATE_FILE"
log "Press Ctrl+C to stop"

PASS=0
while true; do
    PASS=$((PASS + 1))
    log ""
    log "═══════════════════════════════════════════"
    log "  PASS $PASS — $(TZ='America/Chicago' date '+%Y-%m-%d %r') — priority=$PRIORITY"
    log "═══════════════════════════════════════════"

    QUEUE=$(build_queue)
    TOTAL=$(echo "$QUEUE" | grep -c . || echo 0)
    log "Queue: $TOTAL entities (starting at $((START_AT + 1)))"

    PROCESSED=0
    UPDATED=0
    SKIPPED=0
    POSITION=0

    while IFS=$'\t' read -r fb_id file review_count mtime last_processed garbage_count incomplete_name; do
        [[ -z "$fb_id" ]] && continue
        POSITION=$((POSITION + 1))

        if [[ $POSITION -le $START_AT ]]; then
            continue
        fi

        PROCESSED=$((PROCESSED + 1))
        log "[$POSITION/$TOTAL] Next up..."

        if process_entity "$fb_id" "$file" "$review_count" "$mtime" "$last_processed" "$garbage_count" "$incomplete_name"; then
            UPDATED=$((UPDATED + 1))
        fi

        sleep "$CYCLE_DELAY"
    done <<< "$QUEUE"

    log ""
    log "═══ Pass $PASS complete: $PROCESSED processed, $UPDATED updated ═══"

    START_AT=0
    log "Looping back to start..."
    sleep "$CYCLE_DELAY"
done