Rework continuous-improve: always enrich, add deep scan, strengthen web research
This commit is contained in:
@@ -1,16 +1,15 @@
|
||||
#!/usr/bin/env bash
|
||||
# continuous-improve.sh — Entity-by-entity continuous improvement loop
|
||||
# continuous-improve.sh — Continuous enrichment and quality loop
|
||||
#
|
||||
# Iterates through every factbase entity, one at a time. Does mechanical
|
||||
# cleanup in bash (fast), then only invokes an agent for entities that
|
||||
# actually need review resolution or enrichment.
|
||||
# Each cycle: processes every entity (resolve reviews, enrich from outside
|
||||
# sources), then runs a deep cross-document validation scan.
|
||||
#
|
||||
# Usage: .automate/continuous-improve.sh [options]
|
||||
# --priority reviews|stale|random Queue ordering (default: reviews)
|
||||
# --cycle-delay N Seconds between entities (default: 5)
|
||||
# --model MODEL LLM model (default: claude-sonnet-4.6)
|
||||
# --start N Skip first N entities in queue (resume)
|
||||
# --no-skip Don't skip clean entities (force agent on all)
|
||||
# --skip-unchanged Skip entities unchanged since last pass
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
@@ -19,15 +18,15 @@ PRIORITY="reviews"
|
||||
CYCLE_DELAY=5
|
||||
MODEL="claude-sonnet-4.6"
|
||||
START_AT=0
|
||||
SKIP_CLEAN=true
|
||||
SKIP_UNCHANGED=false
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--priority) PRIORITY="$2"; shift 2 ;;
|
||||
--cycle-delay) CYCLE_DELAY="$2"; shift 2 ;;
|
||||
--model) MODEL="$2"; shift 2 ;;
|
||||
--start) START_AT="$2"; shift 2 ;;
|
||||
--no-skip) SKIP_CLEAN=false; shift ;;
|
||||
*) echo "Usage: $0 [--priority reviews|stale|random] [--cycle-delay N] [--model MODEL] [--start N] [--no-skip]"; exit 1 ;;
|
||||
--skip-unchanged) SKIP_UNCHANGED=true; shift ;;
|
||||
*) echo "Usage: $0 [--priority reviews|stale|random] [--cycle-delay N] [--model MODEL] [--start N] [--skip-unchanged]"; exit 1 ;;
|
||||
esac
|
||||
done
|
||||
|
||||
@@ -90,7 +89,7 @@ build_queue() {
|
||||
local garbage_count
|
||||
garbage_count=$(grep -ciP '^\[\^.*\b(not a conflict|sequential|boundary overlap|not simultaneous|malformed tag|garbled|artifact|remove)\b' "$file" 2>/dev/null) || true
|
||||
|
||||
# Flag person docs with incomplete names (single word, alias, no space)
|
||||
# Flag ruler docs with incomplete names (single word, alias, no space)
|
||||
local incomplete_name=0
|
||||
local parent_dir
|
||||
parent_dir=$(echo "$file" | sed 's|^\./||' | rev | cut -d/ -f2 | rev)
|
||||
@@ -260,11 +259,15 @@ STEPS — work through in order, skip any that do not apply:
|
||||
tool to rename/move it. Use organize(action='"'"'move'"'"', doc_id=..., to=...) to relocate
|
||||
or update_document(id=..., title=...) to fix the title.
|
||||
|
||||
3. ENRICH:
|
||||
Search ALL your available tools for new information about this entity — factbase search,
|
||||
web search, whatever you have. Use the entity name, aliases,
|
||||
and known associations as search terms. Add any new facts not already present, following
|
||||
factbase authoring conventions.
|
||||
3. ENRICH FROM OUTSIDE SOURCES:
|
||||
This is the most important step. Use web_search to find high-quality information about
|
||||
this entity from scholarly and encyclopedic sources. Search for:
|
||||
- The entity name + "archaeology" or "ancient history"
|
||||
- Key events, dates, or relationships mentioned in the document
|
||||
- Recent archaeological discoveries or revised scholarly consensus
|
||||
Prefer peer-reviewed sources, university publications, museum databases, and established
|
||||
encyclopedias. Add any new facts not already present, with source citations, following
|
||||
factbase authoring conventions. Do NOT add speculative or poorly-sourced claims.
|
||||
|
||||
4. IMPROVEMENT IDEAS:
|
||||
If you notice friction or gaps in factbase tools, file a Vikunja feature request:
|
||||
@@ -329,18 +332,16 @@ process_entity() {
|
||||
fi
|
||||
|
||||
# Phase 2: Decide if agent is needed
|
||||
local needs_agent=false
|
||||
local needs_agent=true
|
||||
if [[ "${incomplete_name:-0}" -eq 1 ]]; then
|
||||
needs_agent=true
|
||||
log " 👤 Incomplete name (ruler doc) → agent needed to resolve identity"
|
||||
elif [[ "$review_count" -gt 0 ]]; then
|
||||
needs_agent=true
|
||||
log " 📋 $review_count review questions → agent needed"
|
||||
elif [[ "$SKIP_CLEAN" == true && "$last_processed" -gt 0 && "$mtime" -le "$last_processed" ]]; then
|
||||
log " ⏭️ No questions, not modified since last pass → skipping agent"
|
||||
elif [[ "$SKIP_UNCHANGED" == true && "$last_processed" -gt 0 && "$mtime" -le "$last_processed" ]]; then
|
||||
needs_agent=false
|
||||
log " ⏭️ No questions, not modified since last pass → skipping (--skip-unchanged)"
|
||||
else
|
||||
needs_agent=true
|
||||
log " 🔍 Enrichment pass → agent needed"
|
||||
log " 🔍 Enrichment + review pass"
|
||||
fi
|
||||
|
||||
if [[ "$needs_agent" == true ]]; then
|
||||
@@ -378,10 +379,26 @@ process_entity() {
|
||||
[[ "$status" == "UPDATED" ]] && return 0 || return 1
|
||||
}
|
||||
|
||||
# ═══════════════════════════════════════════
|
||||
# DEEP CROSS-DOCUMENT SCAN (once per cycle)
|
||||
# ═══════════════════════════════════════════
|
||||
run_deep_scan() {
|
||||
log "🔬 Running deep cross-document validation scan..."
|
||||
local output
|
||||
output=$(kiro-cli chat --trust-all-tools --no-interactive --model "$MODEL" \
|
||||
"Run check_repository with deep_check=true. Review any new issues found — answer what you can, defer what you cannot. Then commit." 2>&1) || {
|
||||
log "❌ Deep scan agent failed, continuing..."
|
||||
return 1
|
||||
}
|
||||
echo "$output"
|
||||
do_commit "deep scan: cross-document validation"
|
||||
log "✅ Deep scan complete"
|
||||
}
|
||||
|
||||
# ═══════════════════════════════════════════
|
||||
# MAIN LOOP
|
||||
# ═══════════════════════════════════════════
|
||||
log "🚀 Starting continuous improvement loop (priority=$PRIORITY, model=$MODEL, start=$START_AT, skip_clean=$SKIP_CLEAN)"
|
||||
log "🚀 Starting continuous improvement loop (priority=$PRIORITY, model=$MODEL, start=$START_AT, skip_unchanged=$SKIP_UNCHANGED)"
|
||||
log "Docs dir: $DOCS_DIR"
|
||||
log "State file: $STATE_FILE"
|
||||
log "Press Ctrl+C to stop"
|
||||
@@ -424,6 +441,8 @@ while true; do
|
||||
log ""
|
||||
log "═══ Pass $PASS complete: $PROCESSED processed, $UPDATED updated ═══"
|
||||
|
||||
run_deep_scan
|
||||
|
||||
START_AT=0
|
||||
log "Looping back to start..."
|
||||
sleep "$CYCLE_DELAY"
|
||||
|
||||
Reference in New Issue
Block a user