#!/usr/bin/env bash # continuous-improve.sh — Continuous enrichment and quality loop # # Each cycle: processes every entity (resolve reviews, enrich from outside # sources), then runs a deep cross-document validation scan. # # Usage: .automate/continuous-improve.sh [options] # --priority reviews|stale|random Queue ordering (default: reviews) # --cycle-delay N Seconds between entities (default: 5) # --model MODEL LLM model (default: claude-sonnet-4.6) # --start N Skip first N entities in queue (resume) # --skip-unchanged Skip entities unchanged since last pass set -euo pipefail # ─── Parse arguments ─── PRIORITY="reviews" CYCLE_DELAY=5 MODEL="claude-sonnet-4.6" START_AT=0 SKIP_UNCHANGED=false while [[ $# -gt 0 ]]; do case "$1" in --priority) PRIORITY="$2"; shift 2 ;; --cycle-delay) CYCLE_DELAY="$2"; shift 2 ;; --model) MODEL="$2"; shift 2 ;; --start) START_AT="$2"; shift 2 ;; --skip-unchanged) SKIP_UNCHANGED=true; shift ;; *) echo "Usage: $0 [--priority reviews|stale|random] [--cycle-delay N] [--model MODEL] [--start N] [--skip-unchanged]"; exit 1 ;; esac done SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" DOCS_DIR="$SCRIPT_DIR/.." STATE_FILE="$SCRIPT_DIR/.improve-state.tsv" ACTION_LOG="$SCRIPT_DIR/improve-history.log" exec > >(tee -a "$SCRIPT_DIR/continuous-improve.log") 2>&1 log() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*"; } do_commit() { local msg="$1" cd "$DOCS_DIR" git add -A if ! git diff --cached --quiet; then git commit -m "$msg" for attempt in 1 2 3; do if git push 2>/dev/null; then log "✅ Committed: $msg" return 0 fi log "⚠️ Push attempt $attempt failed, rebasing..." git pull --rebase done log "❌ Push failed after 3 attempts" fi } build_queue() { cd "$DOCS_DIR" local tmpfile tmpfile=$(mktemp) find . -name '*.md' \ -not -path './.git/*' \ -not -path './.automate/*' \ -not -path './.kiro/*' \ -not -path './.factbase/*' \ -not -path './_orphans.md' \ -print0 | while IFS= read -r -d '' file; do local fb_id fb_id=$(grep -oP '(?<=factbase:)[a-f0-9]{6}' "$file" 2>/dev/null | head -1) [[ -z "$fb_id" ]] && continue local review_count review_count=$(grep -c '^\- \[ \] `@q\[' "$file" 2>/dev/null) || true local mtime mtime=$(stat -c %Y "$file") local last_processed=0 if [[ -f "$STATE_FILE" ]]; then last_processed=$(grep "^${fb_id} " "$STATE_FILE" 2>/dev/null | tail -1 | cut -f2) || true [[ -z "$last_processed" ]] && last_processed=0 fi local garbage_count garbage_count=$(grep -ciP '^\[\^.*\b(not a conflict|sequential|boundary overlap|not simultaneous|malformed tag|garbled|artifact|remove)\b' "$file" 2>/dev/null) || true # Flag ruler docs with incomplete names (single word, alias, no space) local incomplete_name=0 local parent_dir parent_dir=$(echo "$file" | sed 's|^\./||' | rev | cut -d/ -f2 | rev) if [[ "$parent_dir" == "rulers" ]]; then local doc_title doc_title=$(grep '^# ' "$file" 2>/dev/null | head -1 | sed 's/^# //' | sed 's/ @t\[.*//;s/ \[\^.*//') if [[ -n "$doc_title" ]] && ! echo "$doc_title" | grep -q ' '; then incomplete_name=1 fi fi echo -e "${fb_id}\t${file}\t${review_count}\t${mtime}\t${last_processed}\t${garbage_count}\t${incomplete_name}" done > "$tmpfile" case "$PRIORITY" in reviews) sort -t$'\t' -k7,7rn -k3,3rn -k6,6rn -k5,5n "$tmpfile" ;; stale) sort -t$'\t' -k7,7rn -k5,5n -k3,3rn "$tmpfile" ;; random) shuf "$tmpfile" ;; esac rm -f "$tmpfile" } mark_processed() { echo -e "$1\t$(date +%s)" >> "$STATE_FILE" } # ─── Bash-based mechanical cleanup (no agent needed) ─── bash_cleanup() { local file="$1" local changed=false # Fix corrupted title: strip @t[...] and [^N] suffixes if grep -qP '^# .+(\s+@t\[|\s+\[\^)' "$file" 2>/dev/null; then sed -i -E 's/^(# .+?)\s+(@t\[.*|\[\^.*)$/\1/' "$file" changed=true fi # Delete garbage footnotes (review answers dumped as source citations) if grep -qiP '^\[\^\d+\]:.*\b(not a conflict|sequential role|boundary overlap|not simultaneous|malformed tag|garbled|artifact from previous|remove garbled)\b' "$file" 2>/dev/null; then # Get line numbers of garbage footnotes, delete them local lines lines=$(grep -niP '^\[\^\d+\]:.*\b(not a conflict|sequential role|boundary overlap|not simultaneous|malformed tag|garbled|artifact from previous|remove garbled)\b' "$file" | cut -d: -f1 | sort -rn) if [[ -n "$lines" ]]; then for ln in $lines; do sed -i "${ln}d" "$file" done changed=true fi fi # Remove review answer artifact lines in body if grep -qP '^- Artifact from previous review application' "$file" 2>/dev/null; then sed -i '/^- Artifact from previous review application/d' "$file" changed=true fi # Remove empty Review Queue sections and factbase:review markers if grep -qP '^## Review Queue|' "$file" 2>/dev/null; then # Only remove if the review queue has no actual unanswered questions local has_open has_open=$(grep -c '^\- \[ \] `@q\[' "$file" 2>/dev/null) || true if [[ "$has_open" -eq 0 ]]; then sed -i '/^## Review Queue$/d; /^$/d' "$file" # Clean up trailing blank lines and stray --- at end of file sed -i -e :a -e '/^\n*$/{$d;N;ba' -e '}' "$file" changed=true fi fi [[ "$changed" == true ]] } get_related_context() { local file="$1" local title title=$(grep '^# ' "$file" | head -1 | sed 's/^# //' | sed 's/ @t\[.*//;s/ \[\^.*//') [[ -z "$title" ]] && return cd "$DOCS_DIR" local related="" while IFS= read -r match_file; do [[ "$match_file" == "$file" ]] && continue local match_title match_title=$(grep '^# ' "$match_file" 2>/dev/null | head -1 | sed 's/^# //') local excerpt excerpt=$(grep -i "$title" "$match_file" 2>/dev/null | head -3) if [[ -n "$excerpt" ]]; then related+="--- From: ${match_title} (${match_file}) --- ${excerpt} " fi done < <(grep -rl "$title" . --include='*.md' \ | grep -v '.git\|.automate\|.kiro\|.factbase\|_orphans' \ | head -5) echo "$related" } # ─── Agent-based processing (review questions + enrichment) ─── process_entity_agent() { local fb_id="$1" local file="$2" local title="$3" cd "$DOCS_DIR" local content content=$(cat "$file") local related related=$(get_related_context "$file") local prompt prompt="You are continuously improving a factbase knowledge base, one entity at a time. Use factbase MCP tools — call get_authoring_guide if you need formatting rules. ENTITY FILE: $file ENTITY ID: $fb_id === CURRENT DOCUMENT CONTENT === $content === END DOCUMENT CONTENT ===" if [[ -n "$related" ]]; then prompt+=" === MENTIONS IN OTHER DOCUMENTS === $related === END MENTIONS ===" fi prompt+=' STEPS — work through in order, skip any that do not apply: 1. RESOLVE REVIEW QUESTIONS: Call get_review_queue(doc_id='"'"''"$fb_id"''"'"') — if there are open questions, answer them. Patterns learned from resolving thousands of these: - CONFLICT (chronological overlaps): Boundary-year overlaps in sequential reigns or periods are NOT conflicts (date granularity artifact). Concurrent roles (e.g., ruler + military commander, pharaoh + high priest) are both true simultaneously. Approximate dates that overlap by a few years reflect scholarly uncertainty, not contradiction. - AMBIGUOUS (terms): Expand the term AND create or update a definitions/ file so the term is not flagged again. Do NOT just answer inline — the definitions file is what prevents recurrence. Check existing definitions files first with search_content. Common: BCE, CE, polis, satrapy, pharaoh, consul, tribune, cuneiform, stele, ziggurat. - TEMPORAL/STALE: BCE dates are written in text, not temporal tags. CE date ranges (@t[...]) that are closed are historical, not stale. For open @t[~...] tags, search for newer scholarship. - MISSING: Search with search_knowledge and search_content. If not found, defer. After answering, call apply_review_answers(doc_id='"'"''"$fb_id"''"'"'). Then re-read the file with get_entity and verify apply did not corrupt it (garbage footnotes, mangled title). If it did, fix with update_document. 2. IDENTITY & ORGANIZATION: For ruler documents: if the title is a single name, alias, or epithet (not a full name), prioritize finding their full or commonly known name. Search local sources, check mentions in other documents, and cross-reference with civilization docs. If you find the full name, update the document title with update_document. For any document: if the title or file location could be improved (e.g. a ruler doc is in the wrong civilization folder, or the filename does not match the title), use the organize MCP tool to rename/move it. Use organize(action='"'"'move'"'"', doc_id=..., to=...) to relocate or update_document(id=..., title=...) to fix the title. 3. ENRICH FROM OUTSIDE SOURCES: This is the most important step. Use web_search to find high-quality information about this entity from scholarly and encyclopedic sources. Search for: - The entity name + "archaeology" or "ancient history" - Key events, dates, or relationships mentioned in the document - Recent archaeological discoveries or revised scholarly consensus Prefer peer-reviewed sources, university publications, museum databases, and established encyclopedias. Add any new facts not already present, with source citations, following factbase authoring conventions. Do NOT add speculative or poorly-sourced claims. 4. IMPROVEMENT IDEAS: If you notice friction or gaps in factbase tools, file a Vikunja feature request: curl -s -X PUT "https://vikunja.home.everyonce.com/api/v1/projects/2/tasks" \ -H "Authorization: Bearer tk_ff251f3d3512775c71913bc2f8ec0dabbf5016a8" \ -H "Content-Type: application/json" \ -d '"'"'{"title":"[factbase][feature] ","description":"
","priority":2}'"'"' Only file genuinely useful improvements, not duplicates. RULES: - Use update_document to edit — be surgical, change only what needs changing - If nothing needs changing, say so and move on IMPORTANT: When finished, output exactly one line: status: STATUS | '"$title"' | changes: DESCRIPTION Status values: UPDATED (made changes), NO_CHANGE (nothing to do), ERROR (something failed)' local output output=$(kiro-cli chat --trust-all-tools --no-interactive --model "$MODEL" \ "$prompt" 2>&1) || { log "❌ kiro-cli error for $title, continuing..." return 1 } echo "$output" local summary summary=$(echo "$output" | grep -oP '(?<=).*(?=)' | tail -1) [[ -z "$summary" ]] && summary="status: UNKNOWN | $title | no summary returned" echo "$summary" } # ─── Main entity processing dispatcher ─── process_entity() { local fb_id="$1" local file="$2" local review_count="$3" local mtime="$4" local last_processed="$5" local garbage_count="$6" local incomplete_name="$7" cd "$DOCS_DIR" local title title=$(grep '^# ' "$file" | head -1 | sed 's/^# //' | sed 's/ @t\[.*//;s/ \[\^.*//') log "━━━ [$title] ($fb_id) reviews=$review_count garbage=$garbage_count ━━━" local start_time start_time=$(date +%s) local status="NO_CHANGE" local summary="" # Phase 1: Bash cleanup (milliseconds, no agent) if bash_cleanup "$file"; then log " 🧹 Bash cleanup applied" status="UPDATED" # Recount after cleanup review_count=$(grep -c '^\- \[ \] `@q\[' "$file" 2>/dev/null) || true garbage_count=0 fi # Phase 2: Decide if agent is needed local needs_agent=true if [[ "${incomplete_name:-0}" -eq 1 ]]; then log " 👤 Incomplete name (ruler doc) → agent needed to resolve identity" elif [[ "$review_count" -gt 0 ]]; then log " 📋 $review_count review questions → agent needed" elif [[ "$SKIP_UNCHANGED" == true && "$last_processed" -gt 0 && "$mtime" -le "$last_processed" ]]; then needs_agent=false log " ⏭️ No questions, not modified since last pass → skipping (--skip-unchanged)" else log " 🔍 Enrichment + review pass" fi if [[ "$needs_agent" == true ]]; then local agent_output agent_output=$(process_entity_agent "$fb_id" "$file" "$title") echo "$agent_output" summary=$(echo "$agent_output" | grep -oP '(?<=).*(?=)' | tail -1) local agent_status agent_status=$(echo "$summary" | grep -oP '^status: \K[A-Z_]+' || echo "UNKNOWN") if [[ "$agent_status" == "UPDATED" ]]; then status="UPDATED" fi fi local end_time end_time=$(date +%s) local duration=$((end_time - start_time)) if [[ "$status" == "UPDATED" ]]; then do_commit "improve: $title" fi [[ -z "$summary" ]] && summary="status: $status | $title | bash-only pass" { echo "[$(date -Iseconds)] $fb_id | $title" echo " $summary" echo " duration: ${duration}s" } >> "$ACTION_LOG" mark_processed "$fb_id" log " Done (${duration}s) — $status" [[ "$status" == "UPDATED" ]] && return 0 || return 1 } # ═══════════════════════════════════════════ # DEEP CROSS-DOCUMENT SCAN (once per cycle) # ═══════════════════════════════════════════ run_deep_scan() { log "🔬 Running deep cross-document validation scan..." local output output=$(kiro-cli chat --trust-all-tools --no-interactive --model "$MODEL" \ "Run check_repository with deep_check=true. Review any new issues found — answer what you can, defer what you cannot. Then commit." 2>&1) || { log "❌ Deep scan agent failed, continuing..." return 1 } echo "$output" do_commit "deep scan: cross-document validation" log "✅ Deep scan complete" } # ═══════════════════════════════════════════ # MAIN LOOP # ═══════════════════════════════════════════ log "🚀 Starting continuous improvement loop (priority=$PRIORITY, model=$MODEL, start=$START_AT, skip_unchanged=$SKIP_UNCHANGED)" log "Docs dir: $DOCS_DIR" log "State file: $STATE_FILE" log "Press Ctrl+C to stop" PASS=0 while true; do PASS=$((PASS + 1)) log "" log "═══════════════════════════════════════════" log " PASS $PASS — $(TZ='America/Chicago' date '+%Y-%m-%d %r') — priority=$PRIORITY" log "═══════════════════════════════════════════" QUEUE=$(build_queue) TOTAL=$(echo "$QUEUE" | grep -c . || echo 0) log "Queue: $TOTAL entities (starting at $((START_AT + 1)))" PROCESSED=0 UPDATED=0 SKIPPED=0 POSITION=0 while IFS=$'\t' read -r fb_id file review_count mtime last_processed garbage_count incomplete_name; do [[ -z "$fb_id" ]] && continue POSITION=$((POSITION + 1)) if [[ $POSITION -le $START_AT ]]; then continue fi PROCESSED=$((PROCESSED + 1)) log "[$POSITION/$TOTAL] Next up..." if process_entity "$fb_id" "$file" "$review_count" "$mtime" "$last_processed" "$garbage_count" "$incomplete_name"; then UPDATED=$((UPDATED + 1)) fi sleep "$CYCLE_DELAY" done <<< "$QUEUE" log "" log "═══ Pass $PASS complete: $PROCESSED processed, $UPDATED updated ═══" run_deep_scan START_AT=0 log "Looping back to start..." sleep "$CYCLE_DELAY" done