Rework continuous-improve: always enrich, add deep scan, strengthen web research

This commit is contained in:
daniel
2026-02-22 22:14:18 +00:00
parent 1c8a67349f
commit a8454cf9c4
7 changed files with 50 additions and 28 deletions

View File

@@ -1,16 +1,15 @@
#!/usr/bin/env bash
# continuous-improve.sh — Entity-by-entity continuous improvement loop
# continuous-improve.sh — Continuous enrichment and quality loop
#
# Iterates through every factbase entity, one at a time. Does mechanical
# cleanup in bash (fast), then only invokes an agent for entities that
# actually need review resolution or enrichment.
# Each cycle: processes every entity (resolve reviews, enrich from outside
# sources), then runs a deep cross-document validation scan.
#
# Usage: .automate/continuous-improve.sh [options]
# --priority reviews|stale|random Queue ordering (default: reviews)
# --cycle-delay N Seconds between entities (default: 5)
# --model MODEL LLM model (default: claude-sonnet-4.6)
# --start N Skip first N entities in queue (resume)
# --no-skip Don't skip clean entities (force agent on all)
# --skip-unchanged Skip entities unchanged since last pass
set -euo pipefail
@@ -19,15 +18,15 @@ PRIORITY="reviews"
CYCLE_DELAY=5
MODEL="claude-sonnet-4.6"
START_AT=0
SKIP_CLEAN=true
SKIP_UNCHANGED=false
while [[ $# -gt 0 ]]; do
case "$1" in
--priority) PRIORITY="$2"; shift 2 ;;
--cycle-delay) CYCLE_DELAY="$2"; shift 2 ;;
--model) MODEL="$2"; shift 2 ;;
--start) START_AT="$2"; shift 2 ;;
--no-skip) SKIP_CLEAN=false; shift ;;
*) echo "Usage: $0 [--priority reviews|stale|random] [--cycle-delay N] [--model MODEL] [--start N] [--no-skip]"; exit 1 ;;
--skip-unchanged) SKIP_UNCHANGED=true; shift ;;
*) echo "Usage: $0 [--priority reviews|stale|random] [--cycle-delay N] [--model MODEL] [--start N] [--skip-unchanged]"; exit 1 ;;
esac
done
@@ -90,7 +89,7 @@ build_queue() {
local garbage_count
garbage_count=$(grep -ciP '^\[\^.*\b(not a conflict|sequential|boundary overlap|not simultaneous|malformed tag|garbled|artifact|remove)\b' "$file" 2>/dev/null) || true
# Flag person docs with incomplete names (single word, alias, no space)
# Flag ruler docs with incomplete names (single word, alias, no space)
local incomplete_name=0
local parent_dir
parent_dir=$(echo "$file" | sed 's|^\./||' | rev | cut -d/ -f2 | rev)
@@ -260,11 +259,15 @@ STEPS — work through in order, skip any that do not apply:
tool to rename/move it. Use organize(action='"'"'move'"'"', doc_id=..., to=...) to relocate
or update_document(id=..., title=...) to fix the title.
3. ENRICH:
Search ALL your available tools for new information about this entity — factbase search,
web search, whatever you have. Use the entity name, aliases,
and known associations as search terms. Add any new facts not already present, following
factbase authoring conventions.
3. ENRICH FROM OUTSIDE SOURCES:
This is the most important step. Use web_search to find high-quality information about
this entity from scholarly and encyclopedic sources. Search for:
- The entity name + "archaeology" or "ancient history"
- Key events, dates, or relationships mentioned in the document
- Recent archaeological discoveries or revised scholarly consensus
Prefer peer-reviewed sources, university publications, museum databases, and established
encyclopedias. Add any new facts not already present, with source citations, following
factbase authoring conventions. Do NOT add speculative or poorly-sourced claims.
4. IMPROVEMENT IDEAS:
If you notice friction or gaps in factbase tools, file a Vikunja feature request:
@@ -329,18 +332,16 @@ process_entity() {
fi
# Phase 2: Decide if agent is needed
local needs_agent=false
local needs_agent=true
if [[ "${incomplete_name:-0}" -eq 1 ]]; then
needs_agent=true
log " 👤 Incomplete name (ruler doc) → agent needed to resolve identity"
elif [[ "$review_count" -gt 0 ]]; then
needs_agent=true
log " 📋 $review_count review questions → agent needed"
elif [[ "$SKIP_CLEAN" == true && "$last_processed" -gt 0 && "$mtime" -le "$last_processed" ]]; then
log " ⏭️ No questions, not modified since last pass → skipping agent"
elif [[ "$SKIP_UNCHANGED" == true && "$last_processed" -gt 0 && "$mtime" -le "$last_processed" ]]; then
needs_agent=false
log " ⏭️ No questions, not modified since last pass → skipping (--skip-unchanged)"
else
needs_agent=true
log " 🔍 Enrichment pass → agent needed"
log " 🔍 Enrichment + review pass"
fi
if [[ "$needs_agent" == true ]]; then
@@ -378,10 +379,26 @@ process_entity() {
[[ "$status" == "UPDATED" ]] && return 0 || return 1
}
# ═══════════════════════════════════════════
# DEEP CROSS-DOCUMENT SCAN (once per cycle)
# ═══════════════════════════════════════════
run_deep_scan() {
log "🔬 Running deep cross-document validation scan..."
local output
output=$(kiro-cli chat --trust-all-tools --no-interactive --model "$MODEL" \
"Run check_repository with deep_check=true. Review any new issues found — answer what you can, defer what you cannot. Then commit." 2>&1) || {
log "❌ Deep scan agent failed, continuing..."
return 1
}
echo "$output"
do_commit "deep scan: cross-document validation"
log "✅ Deep scan complete"
}
# ═══════════════════════════════════════════
# MAIN LOOP
# ═══════════════════════════════════════════
log "🚀 Starting continuous improvement loop (priority=$PRIORITY, model=$MODEL, start=$START_AT, skip_clean=$SKIP_CLEAN)"
log "🚀 Starting continuous improvement loop (priority=$PRIORITY, model=$MODEL, start=$START_AT, skip_unchanged=$SKIP_UNCHANGED)"
log "Docs dir: $DOCS_DIR"
log "State file: $STATE_FILE"
log "Press Ctrl+C to stop"
@@ -424,6 +441,8 @@ while true; do
log ""
log "═══ Pass $PASS complete: $PROCESSED processed, $UPDATED updated ═══"
run_deep_scan
START_AT=0
log "Looping back to start..."
sleep "$CYCLE_DELAY"