#!/usr/bin/env bash # continuous-improve.sh — Continuous enrichment and quality loop # # Each cycle: processes every entity (resolve reviews, enrich from outside # sources), then runs a deep cross-document validation scan. # # Usage: .automate/continuous-improve.sh [options] # --priority reviews|stale|random Queue ordering (default: reviews) # --cycle-delay N Seconds between entities (default: 5) # --model MODEL LLM model (default: claude-sonnet-4.6) # --start N Skip first N entities in queue (resume) # --skip-unchanged Skip entities unchanged since last pass set -euo pipefail # ─── Parse arguments ─── PRIORITY="reviews" CYCLE_DELAY=5 MODEL="claude-sonnet-4.6" START_AT=0 SKIP_UNCHANGED=true while [[ $# -gt 0 ]]; do case "$1" in --priority) PRIORITY="$2"; shift 2 ;; --cycle-delay) CYCLE_DELAY="$2"; shift 2 ;; --model) MODEL="$2"; shift 2 ;; --start) START_AT="$2"; shift 2 ;; --skip-unchanged) SKIP_UNCHANGED=true; shift ;; *) echo "Usage: $0 [--priority reviews|stale|random] [--cycle-delay N] [--model MODEL] [--start N] [--skip-unchanged]"; exit 1 ;; esac done SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" DOCS_DIR="$SCRIPT_DIR/.." STATE_FILE="$SCRIPT_DIR/.improve-state.tsv" ACTION_LOG="$SCRIPT_DIR/improve-history.log" exec > >(tee -a "$SCRIPT_DIR/continuous-improve.log") 2>&1 log() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*"; } do_commit() { local msg="$1" cd "$DOCS_DIR" git add -A if ! git diff --cached --quiet; then git commit -m "$msg" for attempt in 1 2 3; do if git push 2>/dev/null; then log "✅ Committed: $msg" return 0 fi log "⚠️ Push attempt $attempt failed, rebasing..." git pull --rebase done log "❌ Push failed after 3 attempts" fi } build_queue() { cd "$DOCS_DIR" local tmpfile tmpfile=$(mktemp) find . -name '*.md' \ -not -path './.git/*' \ -not -path './.automate/*' \ -not -path './.kiro/*' \ -not -path './.factbase/*' \ -not -path './_orphans.md' \ -print0 | while IFS= read -r -d '' file; do local fb_id fb_id=$(grep -oP '(?<=factbase:)[a-f0-9]{6}' "$file" 2>/dev/null | head -1) [[ -z "$fb_id" ]] && continue local review_count review_count=$(grep -c '^\- \[ \] `@q\[' "$file" 2>/dev/null) || true local mtime mtime=$(stat -c %Y "$file") local last_processed=0 if [[ -f "$STATE_FILE" ]]; then last_processed=$(grep "^${fb_id} " "$STATE_FILE" 2>/dev/null | tail -1 | cut -f2) || true [[ -z "$last_processed" ]] && last_processed=0 fi local garbage_count garbage_count=$(grep -ciP '^\[\^.*\b(not a conflict|sequential|boundary overlap|not simultaneous|malformed tag|garbled|artifact|remove)\b' "$file" 2>/dev/null) || true # Flag ruler docs with incomplete names (single word, alias, no space) local incomplete_name=0 local parent_dir parent_dir=$(echo "$file" | sed 's|^\./||' | rev | cut -d/ -f2 | rev) if [[ "$parent_dir" == "rulers" ]]; then local doc_title doc_title=$(grep '^# ' "$file" 2>/dev/null | head -1 | sed 's/^# //' | sed 's/ @t\[.*//;s/ \[\^.*//') if [[ -n "$doc_title" ]] && ! echo "$doc_title" | grep -q ' '; then incomplete_name=1 fi fi echo -e "${fb_id}\t${file}\t${review_count}\t${mtime}\t${last_processed}\t${garbage_count}\t${incomplete_name}" done > "$tmpfile" case "$PRIORITY" in reviews) sort -t$'\t' -k7,7rn -k3,3rn -k6,6rn -k5,5n "$tmpfile" ;; stale) sort -t$'\t' -k7,7rn -k5,5n -k3,3rn "$tmpfile" ;; random) shuf "$tmpfile" ;; esac rm -f "$tmpfile" } mark_processed() { echo -e "$1\t$(date +%s)" >> "$STATE_FILE" } # ─── Bash-based mechanical cleanup (no agent needed) ─── bash_cleanup() { local file="$1" local changed=false # Fix corrupted title: strip @t[...] and [^N] suffixes if grep -qP '^# .+(\s+@t\[|\s+\[\^)' "$file" 2>/dev/null; then sed -i -E 's/^(# .+?)\s+(@t\[.*|\[\^.*)$/\1/' "$file" changed=true fi # Delete garbage footnotes (review answers dumped as source citations) if grep -qiP '^\[\^\d+\]:.*\b(not a conflict|sequential role|boundary overlap|not simultaneous|malformed tag|garbled|artifact from previous|remove garbled)\b' "$file" 2>/dev/null; then # Get line numbers of garbage footnotes, delete them local lines lines=$(grep -niP '^\[\^\d+\]:.*\b(not a conflict|sequential role|boundary overlap|not simultaneous|malformed tag|garbled|artifact from previous|remove garbled)\b' "$file" | cut -d: -f1 | sort -rn) if [[ -n "$lines" ]]; then for ln in $lines; do sed -i "${ln}d" "$file" done changed=true fi fi # Remove review answer artifact lines in body if grep -qP '^- Artifact from previous review application' "$file" 2>/dev/null; then sed -i '/^- Artifact from previous review application/d' "$file" changed=true fi # Remove empty Review Queue sections and factbase:review markers if grep -qP '^## Review Queue|' "$file" 2>/dev/null; then # Only remove if the review queue has no actual unanswered questions local has_open has_open=$(grep -c '^\- \[ \] `@q\[' "$file" 2>/dev/null) || true if [[ "$has_open" -eq 0 ]]; then sed -i '/^## Review Queue$/d; /^$/d' "$file" changed=true fi fi # Remove answered review Q&A lines (- [x] `@q[...]` and their > answer lines) if grep -qP '^\- \[x\] `@q\[' "$file" 2>/dev/null; then local has_open has_open=$(grep -c '^\- \[ \] `@q\[' "$file" 2>/dev/null) || true if [[ "$has_open" -eq 0 ]]; then # Remove - [x] `@q[...] lines and the > answer line that follows each awk ' /^\- \[x\] `@q\[/ { skip=1; next } skip && /^>/ { skip=0; next } { skip=0; print } ' "$file" > "${file}.tmp" && mv "${file}.tmp" "$file" changed=true fi fi # Remove duplicate H1 headings (keep first, remove subsequent) local h1_count h1_count=$(grep -c '^# ' "$file" 2>/dev/null) || true if [[ "$h1_count" -gt 1 ]]; then # Keep the first H1, delete all others awk '/^# / { if (++count > 1) next } { print }' "$file" > "${file}.tmp" && mv "${file}.tmp" "$file" changed=true fi # Clean up trailing blank lines sed -i -e :a -e '/^\n*$/{$d;N;ba' -e '}' "$file" [[ "$changed" == true ]] } get_related_context() { local file="$1" local title title=$(grep '^# ' "$file" | head -1 | sed 's/^# //' | sed 's/ @t\[.*//;s/ \[\^.*//') [[ -z "$title" ]] && return cd "$DOCS_DIR" local related="" while IFS= read -r match_file; do [[ "$match_file" == "$file" ]] && continue local match_title match_title=$(grep '^# ' "$match_file" 2>/dev/null | head -1 | sed 's/^# //') local excerpt excerpt=$(grep -i "$title" "$match_file" 2>/dev/null | head -3) if [[ -n "$excerpt" ]]; then related+="--- From: ${match_title} (${match_file}) --- ${excerpt} " fi done < <(grep -rl "$title" . --include='*.md' \ | grep -v '.git\|.automate\|.kiro\|.factbase\|_orphans' \ | head -5) echo "$related" } # ─── Fetch existing Vikunja bug titles (for dedup) ─── get_existing_bug_titles() { curl -s "https://vikunja.home.everyonce.com/api/v1/projects/2/tasks" \ -H "Authorization: Bearer tk_ff251f3d3512775c71913bc2f8ec0dabbf5016a8" \ 2>/dev/null | grep -oP '"title":"[^"]*"' | sed 's/"title":"//;s/"$//' || true } # ─── Agent-based processing (review questions + enrichment) ─── process_entity_agent() { local fb_id="$1" local file="$2" local title="$3" local review_count="${4:-0}" cd "$DOCS_DIR" local content content=$(cat "$file") local related related=$(get_related_context "$file") local existing_bugs existing_bugs=$(get_existing_bug_titles) local prompt prompt="You are continuously improving a factbase knowledge base, one entity at a time. Use factbase MCP tools — call get_authoring_guide if you need formatting rules. ENTITY FILE: $file ENTITY ID: $fb_id === CURRENT DOCUMENT CONTENT === $content === END DOCUMENT CONTENT ===" if [[ -n "$related" ]]; then prompt+=" === MENTIONS IN OTHER DOCUMENTS === $related === END MENTIONS ===" fi if [[ -n "$existing_bugs" ]]; then prompt+=" === EXISTING VIKUNJA BUG REPORTS (do NOT file duplicates) === $existing_bugs === END EXISTING BUGS ===" fi # Build review step conditionally local review_step="" if [[ "$review_count" -gt 0 ]]; then review_step='1. RESOLVE REVIEW QUESTIONS: Call get_review_queue(doc_id='"'"''"$fb_id"''"'"') — if there are open (unanswered) questions, answer them. Patterns learned from resolving thousands of these: - CONFLICT (chronological overlaps): Boundary-year overlaps in sequential reigns or periods are NOT conflicts (date granularity artifact). Concurrent roles (e.g., ruler + military commander, pharaoh + high priest) are both true simultaneously. Approximate dates that overlap by a few years reflect scholarly uncertainty, not contradiction. - AMBIGUOUS (terms): Expand the term AND create or update a definitions/ file so the term is not flagged again. Do NOT just answer inline — the definitions file is what prevents recurrence. Check existing definitions files first with search_content. Common: BCE, CE, polis, satrapy, pharaoh, consul, tribune, cuneiform, stele, ziggurat. - TEMPORAL/STALE: BCE dates are written in text, not temporal tags. CE date ranges (@t[...]) that are closed are historical, not stale. For open @t[~...] tags, search for newer scholarship. - MISSING: Search with search_knowledge and search_content. If not found, defer. IMPORTANT: Only call apply_review_answers if get_review_queue shows unanswered > 0. After applying, re-read the file with get_entity and verify apply did not corrupt it (garbage footnotes, mangled title). If it did, fix with update_document.' else review_step='1. REVIEW QUESTIONS: SKIP — the wrapper already confirmed 0 open review questions for this entity.' fi prompt+=' STEPS — work through in order, skip any that do not apply: '"$review_step"' 2. IDENTITY & ORGANIZATION: For ruler documents: if the title is a single name, alias, or epithet (not a full name), prioritize finding their full or commonly known name. Search local sources, check mentions in other documents, and cross-reference with civilization docs. If you find the full name, update the document title with update_document. For any document: if the title or file location could be improved (e.g. a ruler doc is in the wrong civilization folder, or the filename does not match the title), use the organize MCP tool to rename/move it. Use organize(action='"'"'move'"'"', doc_id=..., to=...) to relocate or update_document(id=..., title=...) to fix the title. 3. ENRICH FROM OUTSIDE SOURCES: This is the most important step. Use web_search to find high-quality information about this entity from scholarly and encyclopedic sources. Search for: - The entity name + "archaeology" or "ancient history" - Key events, dates, or relationships mentioned in the document - Recent archaeological discoveries or revised scholarly consensus Prefer peer-reviewed sources, university publications, museum databases, and established encyclopedias. Add any new facts not already present, with source citations, following factbase authoring conventions. Do NOT add speculative or poorly-sourced claims. 4. FACTBASE TOOL BUGS & FEATURE REQUESTS: A key purpose of this improvement loop is to make factbase itself better. As you work through steps 1-3, pay close attention to how the factbase tools behave. File a Vikunja task for EACH of the following you encounter: BUGS — any MCP tool error, unexpected result, silent failure, data corruption, or behavior that contradicts the tool description. Use title prefix "[factbase]". FEATURES — any friction, missing capability, awkward multi-step workaround, or gap that slowed you down or limited what you could accomplish. Use title prefix "[factbase][feature]". CRITICAL: Before filing ANY bug or feature request, check the EXISTING VIKUNJA BUG REPORTS section above. If a similar issue is already filed, do NOT file a duplicate. Only file if the issue is genuinely new. CRITICAL: Factbase is domain-agnostic — it is used for many different knowledge bases, not just this one. Your bug reports and feature requests MUST describe the issue in terms of factbase tool behavior, not in terms of ancient history content. For example: - GOOD: "apply_review_answers corrupts footnote references when document has >5 citations" - BAD: "apply_review_answers broke the Ramesses II document" - GOOD: "[feature] check_repository should detect duplicate entities across different folders" - BAD: "[feature] factbase should know that Babylon appears in both civilizations/ and cities/" Include in every task description: - Which MCP tool was called (or which tool is missing/needed) - What you expected to happen vs what actually happened - Specific parameters or conditions that triggered the issue - For features: the general use case and how it would help ANY factbase, not just this one curl -s -X PUT "https://vikunja.home.everyonce.com/api/v1/projects/2/tasks" \ -H "Authorization: Bearer tk_ff251f3d3512775c71913bc2f8ec0dabbf5016a8" \ -H "Content-Type: application/json" \ -d '"'"'{"title":"[factbase] ","description":"","priority":3}'"'"' Priority: 1-2 for features/minor issues, 3 for normal bugs, 4-5 for data loss or blocking failures. RULES: - The document content is already provided above — do NOT call get_entity to re-read it unless you just called apply_review_answers and need to verify the result - Use update_document to edit — be surgical, change only what needs changing - When calling update_document, do NOT include the comment or the # Title heading in the content — factbase adds those automatically. Start content with the first section (e.g. ## Overview). Including them causes duplicate headings. - If the document has answered review questions (- [x] `@q[...] lines with > answer lines), ALWAYS remove them from the content — they are stale artifacts, not part of the document. - Do NOT run git add, git commit, or git push — the wrapper script handles all git operations - If nothing needs changing, say so and move on IMPORTANT: When finished, output exactly one line: status: STATUS | '"$title"' | changes: DESCRIPTION Status values: UPDATED (made changes), NO_CHANGE (nothing to do), ERROR (something failed)' local output output=$(kiro-cli chat --trust-all-tools --no-interactive --model "$MODEL" \ "$prompt" 2>&1) || { log "❌ kiro-cli error for $title, continuing..." return 1 } echo "$output" local summary summary=$(echo "$output" | grep -oP '(?<=).*(?=)' | tail -1) [[ -z "$summary" ]] && summary="status: UNKNOWN | $title | no summary returned" echo "$summary" } # ─── Main entity processing dispatcher ─── process_entity() { local fb_id="$1" local file="$2" local review_count="$3" local mtime="$4" local last_processed="$5" local garbage_count="$6" local incomplete_name="$7" cd "$DOCS_DIR" local title title=$(grep '^# ' "$file" | head -1 | sed 's/^# //' | sed 's/ @t\[.*//;s/ \[\^.*//') log "━━━ [$title] ($fb_id) reviews=$review_count garbage=$garbage_count ━━━" local start_time start_time=$(date +%s) local status="NO_CHANGE" local summary="" # Phase 1: Bash cleanup (milliseconds, no agent) if bash_cleanup "$file"; then log " 🧹 Bash cleanup applied" status="UPDATED" # Recount after cleanup review_count=$(grep -c '^\- \[ \] `@q\[' "$file" 2>/dev/null) || true garbage_count=0 fi # Phase 2: Decide if agent is needed local needs_agent=true if [[ "$SKIP_UNCHANGED" == true && "$last_processed" -gt 0 && "$mtime" -le "$last_processed" ]]; then needs_agent=false log " ⏭️ Not modified since last pass → skipping (--skip-unchanged)" elif [[ "${incomplete_name:-0}" -eq 1 ]]; then log " 👤 Incomplete name (ruler doc) → agent needed to resolve identity" elif [[ "$review_count" -gt 0 ]]; then log " 📋 $review_count review questions → agent needed" elif [[ "$SKIP_UNCHANGED" == true && "$last_processed" -gt 0 && "$mtime" -le "$last_processed" ]]; then needs_agent=false log " ⏭️ No questions, not modified since last pass → skipping (--skip-unchanged)" else log " 🔍 Enrichment + review pass" fi if [[ "$needs_agent" == true ]]; then local agent_output agent_output=$(process_entity_agent "$fb_id" "$file" "$title" "$review_count") echo "$agent_output" summary=$(echo "$agent_output" | grep -oP '(?<=).*(?=)' | tail -1) local agent_status agent_status=$(echo "$summary" | grep -oP '^status: \K[A-Z_]+' || echo "UNKNOWN") if [[ "$agent_status" == "UPDATED" ]]; then status="UPDATED" fi fi local end_time end_time=$(date +%s) local duration=$((end_time - start_time)) if [[ "$status" == "UPDATED" ]]; then do_commit "improve: $title" fi [[ -z "$summary" ]] && summary="status: $status | $title | bash-only pass" { echo "[$(date -Iseconds)] $fb_id | $title" echo " $summary" echo " duration: ${duration}s" } >> "$ACTION_LOG" mark_processed "$fb_id" log " Done (${duration}s) — $status" [[ "$status" == "UPDATED" ]] && return 0 || return 1 } # ═══════════════════════════════════════════ # DEEP CROSS-DOCUMENT SCAN (once per cycle) # ═══════════════════════════════════════════ run_deep_scan() { log "🔬 Running deep cross-document validation scan..." local output output=$(kiro-cli chat --trust-all-tools --no-interactive --model "$MODEL" \ "Run check_repository with deep_check=true. Review any new issues found — answer what you can, defer what you cannot. If any factbase tool behaves unexpectedly during this process, file a bug to Vikunja (project 2, Authorization: Bearer tk_ff251f3d3512775c71913bc2f8ec0dabbf5016a8). Describe issues in domain-agnostic terms — factbase is used for many knowledge bases, not just this one." 2>&1) || { log "❌ Deep scan agent failed, continuing..." return 1 } echo "$output" do_commit "deep scan: cross-document validation" log "✅ Deep scan complete" } # ═══════════════════════════════════════════ # MAIN LOOP # ═══════════════════════════════════════════ log "🚀 Starting continuous improvement loop (priority=$PRIORITY, model=$MODEL, start=$START_AT, skip_unchanged=$SKIP_UNCHANGED)" log "Docs dir: $DOCS_DIR" log "State file: $STATE_FILE" log "Press Ctrl+C to stop" PASS=0 while true; do PASS=$((PASS + 1)) log "" log "═══════════════════════════════════════════" log " PASS $PASS — $(TZ='America/Chicago' date '+%Y-%m-%d %r') — priority=$PRIORITY" log "═══════════════════════════════════════════" QUEUE=$(build_queue) TOTAL=$(echo "$QUEUE" | grep -c . || echo 0) log "Queue: $TOTAL entities (starting at $((START_AT + 1)))" PROCESSED=0 UPDATED=0 SKIPPED=0 POSITION=0 while IFS=$'\t' read -r fb_id file review_count mtime last_processed garbage_count incomplete_name; do [[ -z "$fb_id" ]] && continue POSITION=$((POSITION + 1)) if [[ $POSITION -le $START_AT ]]; then continue fi PROCESSED=$((PROCESSED + 1)) log "[$POSITION/$TOTAL] Next up..." if process_entity "$fb_id" "$file" "$review_count" "$mtime" "$last_processed" "$garbage_count" "$incomplete_name"; then UPDATED=$((UPDATED + 1)) fi sleep "$CYCLE_DELAY" done <<< "$QUEUE" log "" log "═══ Pass $PASS complete: $PROCESSED processed, $UPDATED updated ═══" run_deep_scan START_AT=0 log "Looping back to start..." sleep "$CYCLE_DELAY" done