#!/usr/bin/env bash # continuous-improve.sh — Entity-by-entity continuous improvement loop # # Iterates through every factbase entity, one at a time. Does mechanical # cleanup in bash (fast), then only invokes an agent for entities that # actually need review resolution or enrichment. # # Usage: .automate/continuous-improve.sh [options] # --priority reviews|stale|random Queue ordering (default: reviews) # --cycle-delay N Seconds between entities (default: 5) # --model MODEL LLM model (default: claude-sonnet-4.6) # --start N Skip first N entities in queue (resume) # --no-skip Don't skip clean entities (force agent on all) set -euo pipefail # ─── Parse arguments ─── PRIORITY="reviews" CYCLE_DELAY=5 MODEL="claude-sonnet-4.6" START_AT=0 SKIP_CLEAN=true while [[ $# -gt 0 ]]; do case "$1" in --priority) PRIORITY="$2"; shift 2 ;; --cycle-delay) CYCLE_DELAY="$2"; shift 2 ;; --model) MODEL="$2"; shift 2 ;; --start) START_AT="$2"; shift 2 ;; --no-skip) SKIP_CLEAN=false; shift ;; *) echo "Usage: $0 [--priority reviews|stale|random] [--cycle-delay N] [--model MODEL] [--start N] [--no-skip]"; exit 1 ;; esac done SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" DOCS_DIR="$SCRIPT_DIR/.." STATE_FILE="$SCRIPT_DIR/.improve-state.tsv" ACTION_LOG="$SCRIPT_DIR/improve-history.log" exec > >(tee -a "$SCRIPT_DIR/continuous-improve.log") 2>&1 log() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*"; } do_commit() { local msg="$1" cd "$DOCS_DIR" git add -A if ! git diff --cached --quiet; then git commit -m "$msg" for attempt in 1 2 3; do if git push 2>/dev/null; then log "✅ Committed: $msg" return 0 fi log "⚠️ Push attempt $attempt failed, rebasing..." git pull --rebase done log "❌ Push failed after 3 attempts" fi } build_queue() { cd "$DOCS_DIR" local tmpfile tmpfile=$(mktemp) find . -name '*.md' \ -not -path './.git/*' \ -not -path './.automate/*' \ -not -path './.kiro/*' \ -not -path './.factbase/*' \ -not -path './_orphans.md' \ -print0 | while IFS= read -r -d '' file; do local fb_id fb_id=$(grep -oP '(?<=factbase:)[a-f0-9]{6}' "$file" 2>/dev/null | head -1) [[ -z "$fb_id" ]] && continue local review_count review_count=$(grep -c '^\- \[ \] `@q\[' "$file" 2>/dev/null) || true local mtime mtime=$(stat -c %Y "$file") local last_processed=0 if [[ -f "$STATE_FILE" ]]; then last_processed=$(grep "^${fb_id} " "$STATE_FILE" 2>/dev/null | tail -1 | cut -f2) || true [[ -z "$last_processed" ]] && last_processed=0 fi local garbage_count garbage_count=$(grep -ciP '^\[\^.*\b(not a conflict|sequential|boundary overlap|not simultaneous|malformed tag|garbled|artifact|remove)\b' "$file" 2>/dev/null) || true # Flag person docs with incomplete names (single word, alias, no space) local incomplete_name=0 local parent_dir parent_dir=$(echo "$file" | sed 's|^\./||' | rev | cut -d/ -f2 | rev) if [[ "$parent_dir" == "rulers" ]]; then local doc_title doc_title=$(grep '^# ' "$file" 2>/dev/null | head -1 | sed 's/^# //' | sed 's/ @t\[.*//;s/ \[\^.*//') if [[ -n "$doc_title" ]] && ! echo "$doc_title" | grep -q ' '; then incomplete_name=1 fi fi echo -e "${fb_id}\t${file}\t${review_count}\t${mtime}\t${last_processed}\t${garbage_count}\t${incomplete_name}" done > "$tmpfile" case "$PRIORITY" in reviews) sort -t$'\t' -k7,7rn -k3,3rn -k6,6rn -k5,5n "$tmpfile" ;; stale) sort -t$'\t' -k7,7rn -k5,5n -k3,3rn "$tmpfile" ;; random) shuf "$tmpfile" ;; esac rm -f "$tmpfile" } mark_processed() { echo -e "$1\t$(date +%s)" >> "$STATE_FILE" } # ─── Bash-based mechanical cleanup (no agent needed) ─── bash_cleanup() { local file="$1" local changed=false # Fix corrupted title: strip @t[...] and [^N] suffixes if grep -qP '^# .+(\s+@t\[|\s+\[\^)' "$file" 2>/dev/null; then sed -i -E 's/^(# .+?)\s+(@t\[.*|\[\^.*)$/\1/' "$file" changed=true fi # Delete garbage footnotes (review answers dumped as source citations) if grep -qiP '^\[\^\d+\]:.*\b(not a conflict|sequential role|boundary overlap|not simultaneous|malformed tag|garbled|artifact from previous|remove garbled)\b' "$file" 2>/dev/null; then # Get line numbers of garbage footnotes, delete them local lines lines=$(grep -niP '^\[\^\d+\]:.*\b(not a conflict|sequential role|boundary overlap|not simultaneous|malformed tag|garbled|artifact from previous|remove garbled)\b' "$file" | cut -d: -f1 | sort -rn) if [[ -n "$lines" ]]; then for ln in $lines; do sed -i "${ln}d" "$file" done changed=true fi fi # Remove review answer artifact lines in body if grep -qP '^- Artifact from previous review application' "$file" 2>/dev/null; then sed -i '/^- Artifact from previous review application/d' "$file" changed=true fi # Remove empty Review Queue sections and factbase:review markers if grep -qP '^## Review Queue|' "$file" 2>/dev/null; then # Only remove if the review queue has no actual unanswered questions local has_open has_open=$(grep -c '^\- \[ \] `@q\[' "$file" 2>/dev/null) || true if [[ "$has_open" -eq 0 ]]; then sed -i '/^## Review Queue$/d; /^$/d' "$file" # Clean up trailing blank lines and stray --- at end of file sed -i -e :a -e '/^\n*$/{$d;N;ba' -e '}' "$file" changed=true fi fi [[ "$changed" == true ]] } get_related_context() { local file="$1" local title title=$(grep '^# ' "$file" | head -1 | sed 's/^# //' | sed 's/ @t\[.*//;s/ \[\^.*//') [[ -z "$title" ]] && return cd "$DOCS_DIR" local related="" while IFS= read -r match_file; do [[ "$match_file" == "$file" ]] && continue local match_title match_title=$(grep '^# ' "$match_file" 2>/dev/null | head -1 | sed 's/^# //') local excerpt excerpt=$(grep -i "$title" "$match_file" 2>/dev/null | head -3) if [[ -n "$excerpt" ]]; then related+="--- From: ${match_title} (${match_file}) --- ${excerpt} " fi done < <(grep -rl "$title" . --include='*.md' \ | grep -v '.git\|.automate\|.kiro\|.factbase\|_orphans' \ | head -5) echo "$related" } # ─── Agent-based processing (review questions + enrichment) ─── process_entity_agent() { local fb_id="$1" local file="$2" local title="$3" cd "$DOCS_DIR" local content content=$(cat "$file") local related related=$(get_related_context "$file") local prompt prompt="You are continuously improving a factbase knowledge base, one entity at a time. Use factbase MCP tools — call get_authoring_guide if you need formatting rules. ENTITY FILE: $file ENTITY ID: $fb_id === CURRENT DOCUMENT CONTENT === $content === END DOCUMENT CONTENT ===" if [[ -n "$related" ]]; then prompt+=" === MENTIONS IN OTHER DOCUMENTS === $related === END MENTIONS ===" fi prompt+=' STEPS — work through in order, skip any that do not apply: 1. RESOLVE REVIEW QUESTIONS: Call get_review_queue(doc_id='"'"''"$fb_id"''"'"') — if there are open questions, answer them. Patterns learned from resolving thousands of these: - CONFLICT (chronological overlaps): Boundary-year overlaps in sequential reigns or periods are NOT conflicts (date granularity artifact). Concurrent roles (e.g., ruler + military commander, pharaoh + high priest) are both true simultaneously. Approximate dates that overlap by a few years reflect scholarly uncertainty, not contradiction. - AMBIGUOUS (terms): Expand the term AND create or update a definitions/ file so the term is not flagged again. Do NOT just answer inline — the definitions file is what prevents recurrence. Check existing definitions files first with search_content. Common: BCE, CE, polis, satrapy, pharaoh, consul, tribune, cuneiform, stele, ziggurat. - TEMPORAL/STALE: BCE dates are written in text, not temporal tags. CE date ranges (@t[...]) that are closed are historical, not stale. For open @t[~...] tags, search for newer scholarship. - MISSING: Search with search_knowledge and search_content. If not found, defer. After answering, call apply_review_answers(doc_id='"'"''"$fb_id"''"'"'). Then re-read the file with get_entity and verify apply did not corrupt it (garbage footnotes, mangled title). If it did, fix with update_document. 2. IDENTITY & ORGANIZATION: For ruler documents: if the title is a single name, alias, or epithet (not a full name), prioritize finding their full or commonly known name. Search local sources, check mentions in other documents, and cross-reference with civilization docs. If you find the full name, update the document title with update_document. For any document: if the title or file location could be improved (e.g. a ruler doc is in the wrong civilization folder, or the filename does not match the title), use the organize MCP tool to rename/move it. Use organize(action='"'"'move'"'"', doc_id=..., to=...) to relocate or update_document(id=..., title=...) to fix the title. 3. ENRICH: Search ALL your available tools for new information about this entity — factbase search, web search, whatever you have. Use the entity name, aliases, and known associations as search terms. Add any new facts not already present, following factbase authoring conventions. 4. IMPROVEMENT IDEAS: If you notice friction or gaps in factbase tools, file a Vikunja feature request: curl -s -X PUT "https://vikunja.home.everyonce.com/api/v1/projects/2/tasks" \ -H "Authorization: Bearer tk_ff251f3d3512775c71913bc2f8ec0dabbf5016a8" \ -H "Content-Type: application/json" \ -d '"'"'{"title":"[factbase][feature] ","description":"
","priority":2}'"'"' Only file genuinely useful improvements, not duplicates. RULES: - Use update_document to edit — be surgical, change only what needs changing - If nothing needs changing, say so and move on IMPORTANT: When finished, output exactly one line: status: STATUS | '"$title"' | changes: DESCRIPTION Status values: UPDATED (made changes), NO_CHANGE (nothing to do), ERROR (something failed)' local output output=$(kiro-cli chat --trust-all-tools --no-interactive --model "$MODEL" \ "$prompt" 2>&1) || { log "❌ kiro-cli error for $title, continuing..." return 1 } echo "$output" local summary summary=$(echo "$output" | grep -oP '(?<=).*(?=)' | tail -1) [[ -z "$summary" ]] && summary="status: UNKNOWN | $title | no summary returned" echo "$summary" } # ─── Main entity processing dispatcher ─── process_entity() { local fb_id="$1" local file="$2" local review_count="$3" local mtime="$4" local last_processed="$5" local garbage_count="$6" local incomplete_name="$7" cd "$DOCS_DIR" local title title=$(grep '^# ' "$file" | head -1 | sed 's/^# //' | sed 's/ @t\[.*//;s/ \[\^.*//') log "━━━ [$title] ($fb_id) reviews=$review_count garbage=$garbage_count ━━━" local start_time start_time=$(date +%s) local status="NO_CHANGE" local summary="" # Phase 1: Bash cleanup (milliseconds, no agent) if bash_cleanup "$file"; then log " 🧹 Bash cleanup applied" status="UPDATED" # Recount after cleanup review_count=$(grep -c '^\- \[ \] `@q\[' "$file" 2>/dev/null) || true garbage_count=0 fi # Phase 2: Decide if agent is needed local needs_agent=false if [[ "${incomplete_name:-0}" -eq 1 ]]; then needs_agent=true log " 👤 Incomplete name (ruler doc) → agent needed to resolve identity" elif [[ "$review_count" -gt 0 ]]; then needs_agent=true log " 📋 $review_count review questions → agent needed" elif [[ "$SKIP_CLEAN" == true && "$last_processed" -gt 0 && "$mtime" -le "$last_processed" ]]; then log " ⏭️ No questions, not modified since last pass → skipping agent" else needs_agent=true log " 🔍 Enrichment pass → agent needed" fi if [[ "$needs_agent" == true ]]; then local agent_output agent_output=$(process_entity_agent "$fb_id" "$file" "$title") echo "$agent_output" summary=$(echo "$agent_output" | grep -oP '(?<=).*(?=)' | tail -1) local agent_status agent_status=$(echo "$summary" | grep -oP '^status: \K[A-Z_]+' || echo "UNKNOWN") if [[ "$agent_status" == "UPDATED" ]]; then status="UPDATED" fi fi local end_time end_time=$(date +%s) local duration=$((end_time - start_time)) if [[ "$status" == "UPDATED" ]]; then do_commit "improve: $title" fi [[ -z "$summary" ]] && summary="status: $status | $title | bash-only pass" { echo "[$(date -Iseconds)] $fb_id | $title" echo " $summary" echo " duration: ${duration}s" } >> "$ACTION_LOG" mark_processed "$fb_id" log " Done (${duration}s) — $status" [[ "$status" == "UPDATED" ]] && return 0 || return 1 } # ═══════════════════════════════════════════ # MAIN LOOP # ═══════════════════════════════════════════ log "🚀 Starting continuous improvement loop (priority=$PRIORITY, model=$MODEL, start=$START_AT, skip_clean=$SKIP_CLEAN)" log "Docs dir: $DOCS_DIR" log "State file: $STATE_FILE" log "Press Ctrl+C to stop" PASS=0 while true; do PASS=$((PASS + 1)) log "" log "═══════════════════════════════════════════" log " PASS $PASS — $(TZ='America/Chicago' date '+%Y-%m-%d %r') — priority=$PRIORITY" log "═══════════════════════════════════════════" QUEUE=$(build_queue) TOTAL=$(echo "$QUEUE" | grep -c . || echo 0) log "Queue: $TOTAL entities (starting at $((START_AT + 1)))" PROCESSED=0 UPDATED=0 SKIPPED=0 POSITION=0 while IFS=$'\t' read -r fb_id file review_count mtime last_processed garbage_count incomplete_name; do [[ -z "$fb_id" ]] && continue POSITION=$((POSITION + 1)) if [[ $POSITION -le $START_AT ]]; then continue fi PROCESSED=$((PROCESSED + 1)) log "[$POSITION/$TOTAL] Next up..." if process_entity "$fb_id" "$file" "$review_count" "$mtime" "$last_processed" "$garbage_count" "$incomplete_name"; then UPDATED=$((UPDATED + 1)) fi sleep "$CYCLE_DELAY" done <<< "$QUEUE" log "" log "═══ Pass $PASS complete: $PROCESSED processed, $UPDATED updated ═══" START_AT=0 log "Looping back to start..." sleep "$CYCLE_DELAY" done