Rework continuous-improve: always enrich, add deep scan, strengthen web research

2026-02-22 22:14:18 +00:00
parent 1c8a67349f
commit a8454cf9c4
7 changed files with 50 additions and 28 deletions
--- a/.automate/continuous-improve.sh
+++ b/.automate/continuous-improve.sh
@@ -1,16 +1,15 @@
 #!/usr/bin/env bash
-# continuous-improve.sh — Entity-by-entity continuous improvement loop
+# continuous-improve.sh — Continuous enrichment and quality loop
 #
-# Iterates through every factbase entity, one at a time. Does mechanical
-# cleanup in bash (fast), then only invokes an agent for entities that
-# actually need review resolution or enrichment.
+# Each cycle: processes every entity (resolve reviews, enrich from outside
+# sources), then runs a deep cross-document validation scan.
 #
 # Usage: .automate/continuous-improve.sh [options]
 #   --priority reviews|stale|random   Queue ordering (default: reviews)
 #   --cycle-delay N                   Seconds between entities (default: 5)
 #   --model MODEL                     LLM model (default: claude-sonnet-4.6)
 #   --start N                         Skip first N entities in queue (resume)
-#   --no-skip                         Don't skip clean entities (force agent on all)
+#   --skip-unchanged                  Skip entities unchanged since last pass

 set -euo pipefail

@@ -19,15 +18,15 @@ PRIORITY="reviews"
 CYCLE_DELAY=5
 MODEL="claude-sonnet-4.6"
 START_AT=0
-SKIP_CLEAN=true
+SKIP_UNCHANGED=false
 while [[ $# -gt 0 ]]; do
    case "$1" in
        --priority) PRIORITY="$2"; shift 2 ;;
        --cycle-delay) CYCLE_DELAY="$2"; shift 2 ;;
        --model) MODEL="$2"; shift 2 ;;
        --start) START_AT="$2"; shift 2 ;;
-        --no-skip) SKIP_CLEAN=false; shift ;;
-        *) echo "Usage: $0 [--priority reviews|stale|random] [--cycle-delay N] [--model MODEL] [--start N] [--no-skip]"; exit 1 ;;
+        --skip-unchanged) SKIP_UNCHANGED=true; shift ;;
+        *) echo "Usage: $0 [--priority reviews|stale|random] [--cycle-delay N] [--model MODEL] [--start N] [--skip-unchanged]"; exit 1 ;;
    esac
 done

@@ -90,7 +89,7 @@ build_queue() {
        local garbage_count
        garbage_count=$(grep -ciP '^\[\^.*\b(not a conflict|sequential|boundary overlap|not simultaneous|malformed tag|garbled|artifact|remove)\b' "$file" 2>/dev/null) || true

-        # Flag person docs with incomplete names (single word, alias, no space)
+        # Flag ruler docs with incomplete names (single word, alias, no space)
        local incomplete_name=0
        local parent_dir
        parent_dir=$(echo "$file" | sed 's|^\./||' | rev | cut -d/ -f2 | rev)
@@ -260,11 +259,15 @@ STEPS — work through in order, skip any that do not apply:
   tool to rename/move it. Use organize(action='"'"'move'"'"', doc_id=..., to=...) to relocate
   or update_document(id=..., title=...) to fix the title.

-3. ENRICH:
-   Search ALL your available tools for new information about this entity — factbase search,
-   web search, whatever you have. Use the entity name, aliases,
-   and known associations as search terms. Add any new facts not already present, following
-   factbase authoring conventions.
+3. ENRICH FROM OUTSIDE SOURCES:
+   This is the most important step. Use web_search to find high-quality information about
+   this entity from scholarly and encyclopedic sources. Search for:
+   - The entity name + "archaeology" or "ancient history"
+   - Key events, dates, or relationships mentioned in the document
+   - Recent archaeological discoveries or revised scholarly consensus
+   Prefer peer-reviewed sources, university publications, museum databases, and established
+   encyclopedias. Add any new facts not already present, with source citations, following
+   factbase authoring conventions. Do NOT add speculative or poorly-sourced claims.

 4. IMPROVEMENT IDEAS:
   If you notice friction or gaps in factbase tools, file a Vikunja feature request:
@@ -329,18 +332,16 @@ process_entity() {
    fi

    # Phase 2: Decide if agent is needed
-    local needs_agent=false
+    local needs_agent=true
    if [[ "${incomplete_name:-0}" -eq 1 ]]; then
-        needs_agent=true
        log "  👤 Incomplete name (ruler doc) → agent needed to resolve identity"
    elif [[ "$review_count" -gt 0 ]]; then
-        needs_agent=true
        log "  📋 $review_count review questions → agent needed"
-    elif [[ "$SKIP_CLEAN" == true && "$last_processed" -gt 0 && "$mtime" -le "$last_processed" ]]; then
-        log "  ⏭️  No questions, not modified since last pass → skipping agent"
+    elif [[ "$SKIP_UNCHANGED" == true && "$last_processed" -gt 0 && "$mtime" -le "$last_processed" ]]; then
+        needs_agent=false
+        log "  ⏭️  No questions, not modified since last pass → skipping (--skip-unchanged)"
    else
-        needs_agent=true
-        log "  🔍 Enrichment pass → agent needed"
+        log "  🔍 Enrichment + review pass"
    fi

    if [[ "$needs_agent" == true ]]; then
@@ -378,10 +379,26 @@ process_entity() {
    [[ "$status" == "UPDATED" ]] && return 0 || return 1
 }

+# ═══════════════════════════════════════════
+# DEEP CROSS-DOCUMENT SCAN (once per cycle)
+# ═══════════════════════════════════════════
+run_deep_scan() {
+    log "🔬 Running deep cross-document validation scan..."
+    local output
+    output=$(kiro-cli chat --trust-all-tools --no-interactive --model "$MODEL" \
+        "Run check_repository with deep_check=true. Review any new issues found — answer what you can, defer what you cannot. Then commit." 2>&1) || {
+        log "❌ Deep scan agent failed, continuing..."
+        return 1
+    }
+    echo "$output"
+    do_commit "deep scan: cross-document validation"
+    log "✅ Deep scan complete"
+}
+
 # ═══════════════════════════════════════════
 # MAIN LOOP
 # ═══════════════════════════════════════════
-log "🚀 Starting continuous improvement loop (priority=$PRIORITY, model=$MODEL, start=$START_AT, skip_clean=$SKIP_CLEAN)"
+log "🚀 Starting continuous improvement loop (priority=$PRIORITY, model=$MODEL, start=$START_AT, skip_unchanged=$SKIP_UNCHANGED)"
 log "Docs dir: $DOCS_DIR"
 log "State file: $STATE_FILE"
 log "Press Ctrl+C to stop"
@@ -424,6 +441,8 @@ while true; do
    log ""
    log "═══ Pass $PASS complete: $PROCESSED processed, $UPDATED updated ═══"

+    run_deep_scan
+
    START_AT=0
    log "Looping back to start..."
    sleep "$CYCLE_DELAY"