Update continuous-improve.sh: remove vikunja rebuild, fix domain language for ancient history
This commit is contained in:
430
.automate/continuous-improve.sh
Executable file
430
.automate/continuous-improve.sh
Executable file
@@ -0,0 +1,430 @@
|
||||
#!/usr/bin/env bash
|
||||
# continuous-improve.sh — Entity-by-entity continuous improvement loop
|
||||
#
|
||||
# Iterates through every factbase entity, one at a time. Does mechanical
|
||||
# cleanup in bash (fast), then only invokes an agent for entities that
|
||||
# actually need review resolution or enrichment.
|
||||
#
|
||||
# Usage: .automate/continuous-improve.sh [options]
|
||||
# --priority reviews|stale|random Queue ordering (default: reviews)
|
||||
# --cycle-delay N Seconds between entities (default: 5)
|
||||
# --model MODEL LLM model (default: claude-sonnet-4.6)
|
||||
# --start N Skip first N entities in queue (resume)
|
||||
# --no-skip Don't skip clean entities (force agent on all)
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# ─── Parse arguments ───
|
||||
PRIORITY="reviews"
|
||||
CYCLE_DELAY=5
|
||||
MODEL="claude-sonnet-4.6"
|
||||
START_AT=0
|
||||
SKIP_CLEAN=true
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--priority) PRIORITY="$2"; shift 2 ;;
|
||||
--cycle-delay) CYCLE_DELAY="$2"; shift 2 ;;
|
||||
--model) MODEL="$2"; shift 2 ;;
|
||||
--start) START_AT="$2"; shift 2 ;;
|
||||
--no-skip) SKIP_CLEAN=false; shift ;;
|
||||
*) echo "Usage: $0 [--priority reviews|stale|random] [--cycle-delay N] [--model MODEL] [--start N] [--no-skip]"; exit 1 ;;
|
||||
esac
|
||||
done
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
DOCS_DIR="$SCRIPT_DIR/.."
|
||||
STATE_FILE="$SCRIPT_DIR/.improve-state.tsv"
|
||||
ACTION_LOG="$SCRIPT_DIR/improve-history.log"
|
||||
|
||||
exec > >(tee -a "$SCRIPT_DIR/continuous-improve.log") 2>&1
|
||||
|
||||
log() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*"; }
|
||||
|
||||
do_commit() {
|
||||
local msg="$1"
|
||||
cd "$DOCS_DIR"
|
||||
git add -A
|
||||
if ! git diff --cached --quiet; then
|
||||
git commit -m "$msg"
|
||||
for attempt in 1 2 3; do
|
||||
if git push 2>/dev/null; then
|
||||
log "✅ Committed: $msg"
|
||||
return 0
|
||||
fi
|
||||
log "⚠️ Push attempt $attempt failed, rebasing..."
|
||||
git pull --rebase
|
||||
done
|
||||
log "❌ Push failed after 3 attempts"
|
||||
fi
|
||||
}
|
||||
|
||||
build_queue() {
|
||||
cd "$DOCS_DIR"
|
||||
local tmpfile
|
||||
tmpfile=$(mktemp)
|
||||
|
||||
find . -name '*.md' \
|
||||
-not -path './.git/*' \
|
||||
-not -path './.automate/*' \
|
||||
-not -path './.kiro/*' \
|
||||
-not -path './.factbase/*' \
|
||||
-not -path './_orphans.md' \
|
||||
-print0 | while IFS= read -r -d '' file; do
|
||||
|
||||
local fb_id
|
||||
fb_id=$(grep -oP '(?<=factbase:)[a-f0-9]{6}' "$file" 2>/dev/null | head -1)
|
||||
[[ -z "$fb_id" ]] && continue
|
||||
|
||||
local review_count
|
||||
review_count=$(grep -c '^\- \[ \] `@q\[' "$file" 2>/dev/null) || true
|
||||
|
||||
local mtime
|
||||
mtime=$(stat -c %Y "$file")
|
||||
|
||||
local last_processed=0
|
||||
if [[ -f "$STATE_FILE" ]]; then
|
||||
last_processed=$(grep "^${fb_id} " "$STATE_FILE" 2>/dev/null | tail -1 | cut -f2) || true
|
||||
[[ -z "$last_processed" ]] && last_processed=0
|
||||
fi
|
||||
|
||||
local garbage_count
|
||||
garbage_count=$(grep -ciP '^\[\^.*\b(not a conflict|sequential|boundary overlap|not simultaneous|malformed tag|garbled|artifact|remove)\b' "$file" 2>/dev/null) || true
|
||||
|
||||
# Flag person docs with incomplete names (single word, alias, no space)
|
||||
local incomplete_name=0
|
||||
local parent_dir
|
||||
parent_dir=$(echo "$file" | sed 's|^\./||' | rev | cut -d/ -f2 | rev)
|
||||
if [[ "$parent_dir" == "rulers" ]]; then
|
||||
local doc_title
|
||||
doc_title=$(grep '^# ' "$file" 2>/dev/null | head -1 | sed 's/^# //' | sed 's/ @t\[.*//;s/ \[\^.*//')
|
||||
if [[ -n "$doc_title" ]] && ! echo "$doc_title" | grep -q ' '; then
|
||||
incomplete_name=1
|
||||
fi
|
||||
fi
|
||||
|
||||
echo -e "${fb_id}\t${file}\t${review_count}\t${mtime}\t${last_processed}\t${garbage_count}\t${incomplete_name}"
|
||||
done > "$tmpfile"
|
||||
|
||||
case "$PRIORITY" in
|
||||
reviews) sort -t$'\t' -k7,7rn -k3,3rn -k6,6rn -k5,5n "$tmpfile" ;;
|
||||
stale) sort -t$'\t' -k7,7rn -k5,5n -k3,3rn "$tmpfile" ;;
|
||||
random) shuf "$tmpfile" ;;
|
||||
esac
|
||||
|
||||
rm -f "$tmpfile"
|
||||
}
|
||||
|
||||
mark_processed() {
|
||||
echo -e "$1\t$(date +%s)" >> "$STATE_FILE"
|
||||
}
|
||||
|
||||
# ─── Bash-based mechanical cleanup (no agent needed) ───
|
||||
bash_cleanup() {
|
||||
local file="$1"
|
||||
local changed=false
|
||||
|
||||
# Fix corrupted title: strip @t[...] and [^N] suffixes
|
||||
if grep -qP '^# .+(\s+@t\[|\s+\[\^)' "$file" 2>/dev/null; then
|
||||
sed -i -E 's/^(# .+?)\s+(@t\[.*|\[\^.*)$/\1/' "$file"
|
||||
changed=true
|
||||
fi
|
||||
|
||||
# Delete garbage footnotes (review answers dumped as source citations)
|
||||
if grep -qiP '^\[\^\d+\]:.*\b(not a conflict|sequential role|boundary overlap|not simultaneous|malformed tag|garbled|artifact from previous|remove garbled)\b' "$file" 2>/dev/null; then
|
||||
# Get line numbers of garbage footnotes, delete them
|
||||
local lines
|
||||
lines=$(grep -niP '^\[\^\d+\]:.*\b(not a conflict|sequential role|boundary overlap|not simultaneous|malformed tag|garbled|artifact from previous|remove garbled)\b' "$file" | cut -d: -f1 | sort -rn)
|
||||
if [[ -n "$lines" ]]; then
|
||||
for ln in $lines; do
|
||||
sed -i "${ln}d" "$file"
|
||||
done
|
||||
changed=true
|
||||
fi
|
||||
fi
|
||||
|
||||
# Remove review answer artifact lines in body
|
||||
if grep -qP '^- Artifact from previous review application' "$file" 2>/dev/null; then
|
||||
sed -i '/^- Artifact from previous review application/d' "$file"
|
||||
changed=true
|
||||
fi
|
||||
|
||||
# Remove empty Review Queue sections and factbase:review markers
|
||||
if grep -qP '^## Review Queue|<!-- factbase:review -->' "$file" 2>/dev/null; then
|
||||
# Only remove if the review queue has no actual unanswered questions
|
||||
local has_open
|
||||
has_open=$(grep -c '^\- \[ \] `@q\[' "$file" 2>/dev/null) || true
|
||||
if [[ "$has_open" -eq 0 ]]; then
|
||||
sed -i '/^## Review Queue$/d; /^<!-- factbase:review -->$/d' "$file"
|
||||
# Clean up trailing blank lines and stray --- at end of file
|
||||
sed -i -e :a -e '/^\n*$/{$d;N;ba' -e '}' "$file"
|
||||
changed=true
|
||||
fi
|
||||
fi
|
||||
|
||||
[[ "$changed" == true ]]
|
||||
}
|
||||
|
||||
get_related_context() {
|
||||
local file="$1"
|
||||
local title
|
||||
title=$(grep '^# ' "$file" | head -1 | sed 's/^# //' | sed 's/ @t\[.*//;s/ \[\^.*//')
|
||||
[[ -z "$title" ]] && return
|
||||
|
||||
cd "$DOCS_DIR"
|
||||
local related=""
|
||||
while IFS= read -r match_file; do
|
||||
[[ "$match_file" == "$file" ]] && continue
|
||||
local match_title
|
||||
match_title=$(grep '^# ' "$match_file" 2>/dev/null | head -1 | sed 's/^# //')
|
||||
local excerpt
|
||||
excerpt=$(grep -i "$title" "$match_file" 2>/dev/null | head -3)
|
||||
if [[ -n "$excerpt" ]]; then
|
||||
related+="--- From: ${match_title} (${match_file}) ---
|
||||
${excerpt}
|
||||
|
||||
"
|
||||
fi
|
||||
done < <(grep -rl "$title" . --include='*.md' \
|
||||
| grep -v '.git\|.automate\|.kiro\|.factbase\|_orphans' \
|
||||
| head -5)
|
||||
|
||||
echo "$related"
|
||||
}
|
||||
|
||||
# ─── Agent-based processing (review questions + enrichment) ───
|
||||
process_entity_agent() {
|
||||
local fb_id="$1"
|
||||
local file="$2"
|
||||
local title="$3"
|
||||
|
||||
cd "$DOCS_DIR"
|
||||
|
||||
local content
|
||||
content=$(cat "$file")
|
||||
|
||||
local related
|
||||
related=$(get_related_context "$file")
|
||||
|
||||
local prompt
|
||||
prompt="You are continuously improving a factbase knowledge base, one entity at a time.
|
||||
Use factbase MCP tools — call get_authoring_guide if you need formatting rules.
|
||||
|
||||
ENTITY FILE: $file
|
||||
ENTITY ID: $fb_id
|
||||
|
||||
=== CURRENT DOCUMENT CONTENT ===
|
||||
$content
|
||||
=== END DOCUMENT CONTENT ==="
|
||||
|
||||
if [[ -n "$related" ]]; then
|
||||
prompt+="
|
||||
|
||||
=== MENTIONS IN OTHER DOCUMENTS ===
|
||||
$related
|
||||
=== END MENTIONS ==="
|
||||
fi
|
||||
|
||||
prompt+='
|
||||
|
||||
STEPS — work through in order, skip any that do not apply:
|
||||
|
||||
1. RESOLVE REVIEW QUESTIONS:
|
||||
Call get_review_queue(doc_id='"'"''"$fb_id"''"'"') — if there are open questions, answer them.
|
||||
|
||||
Patterns learned from resolving thousands of these:
|
||||
- CONFLICT (chronological overlaps): Boundary-year overlaps in sequential reigns or periods
|
||||
are NOT conflicts (date granularity artifact). Concurrent roles (e.g., ruler + military
|
||||
commander, pharaoh + high priest) are both true simultaneously. Approximate dates that
|
||||
overlap by a few years reflect scholarly uncertainty, not contradiction.
|
||||
- AMBIGUOUS (terms): Expand the term AND create or update a definitions/ file so the
|
||||
term is not flagged again. Do NOT just answer inline — the definitions file is what
|
||||
prevents recurrence. Check existing definitions files first with search_content.
|
||||
Common: BCE, CE, polis, satrapy, pharaoh, consul, tribune, cuneiform, stele, ziggurat.
|
||||
- TEMPORAL/STALE: BCE dates are written in text, not temporal tags. CE date ranges
|
||||
(@t[...]) that are closed are historical, not stale. For open @t[~...] tags, search
|
||||
for newer scholarship.
|
||||
- MISSING: Search with search_knowledge and search_content. If not found, defer.
|
||||
|
||||
After answering, call apply_review_answers(doc_id='"'"''"$fb_id"''"'"'). Then re-read the
|
||||
file with get_entity and verify apply did not corrupt it (garbage footnotes, mangled title).
|
||||
If it did, fix with update_document.
|
||||
|
||||
2. IDENTITY & ORGANIZATION:
|
||||
For ruler documents: if the title is a single name, alias, or epithet (not a full name),
|
||||
prioritize finding their full or commonly known name. Search local sources, check mentions
|
||||
in other documents, and cross-reference with civilization docs.
|
||||
If you find the full name, update the document title with update_document.
|
||||
|
||||
For any document: if the title or file location could be improved (e.g. a ruler doc is in
|
||||
the wrong civilization folder, or the filename does not match the title), use the organize MCP
|
||||
tool to rename/move it. Use organize(action='"'"'move'"'"', doc_id=..., to=...) to relocate
|
||||
or update_document(id=..., title=...) to fix the title.
|
||||
|
||||
3. ENRICH:
|
||||
Search ALL your available tools for new information about this entity — factbase search,
|
||||
web search, whatever you have. Use the entity name, aliases,
|
||||
and known associations as search terms. Add any new facts not already present, following
|
||||
factbase authoring conventions.
|
||||
|
||||
4. IMPROVEMENT IDEAS:
|
||||
If you notice friction or gaps in factbase tools, file a Vikunja feature request:
|
||||
curl -s -X PUT "https://vikunja.home.everyonce.com/api/v1/projects/2/tasks" \
|
||||
-H "Authorization: Bearer tk_ff251f3d3512775c71913bc2f8ec0dabbf5016a8" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '"'"'{"title":"[factbase][feature] <summary>","description":"<details>","priority":2}'"'"'
|
||||
Only file genuinely useful improvements, not duplicates.
|
||||
|
||||
RULES:
|
||||
- Use update_document to edit — be surgical, change only what needs changing
|
||||
- If nothing needs changing, say so and move on
|
||||
|
||||
IMPORTANT: When finished, output exactly one line:
|
||||
<action_summary>status: STATUS | '"$title"' | changes: DESCRIPTION</action_summary>
|
||||
Status values: UPDATED (made changes), NO_CHANGE (nothing to do), ERROR (something failed)'
|
||||
|
||||
local output
|
||||
output=$(kiro-cli chat --trust-all-tools --no-interactive --model "$MODEL" \
|
||||
"$prompt" 2>&1) || {
|
||||
log "❌ kiro-cli error for $title, continuing..."
|
||||
return 1
|
||||
}
|
||||
|
||||
echo "$output"
|
||||
|
||||
local summary
|
||||
summary=$(echo "$output" | grep -oP '(?<=<action_summary>).*(?=</action_summary>)' | tail -1)
|
||||
[[ -z "$summary" ]] && summary="status: UNKNOWN | $title | no summary returned"
|
||||
echo "$summary"
|
||||
}
|
||||
|
||||
# ─── Main entity processing dispatcher ───
|
||||
process_entity() {
|
||||
local fb_id="$1"
|
||||
local file="$2"
|
||||
local review_count="$3"
|
||||
local mtime="$4"
|
||||
local last_processed="$5"
|
||||
local garbage_count="$6"
|
||||
local incomplete_name="$7"
|
||||
|
||||
cd "$DOCS_DIR"
|
||||
|
||||
local title
|
||||
title=$(grep '^# ' "$file" | head -1 | sed 's/^# //' | sed 's/ @t\[.*//;s/ \[\^.*//')
|
||||
|
||||
log "━━━ [$title] ($fb_id) reviews=$review_count garbage=$garbage_count ━━━"
|
||||
|
||||
local start_time
|
||||
start_time=$(date +%s)
|
||||
local status="NO_CHANGE"
|
||||
local summary=""
|
||||
|
||||
# Phase 1: Bash cleanup (milliseconds, no agent)
|
||||
if bash_cleanup "$file"; then
|
||||
log " 🧹 Bash cleanup applied"
|
||||
status="UPDATED"
|
||||
# Recount after cleanup
|
||||
review_count=$(grep -c '^\- \[ \] `@q\[' "$file" 2>/dev/null) || true
|
||||
garbage_count=0
|
||||
fi
|
||||
|
||||
# Phase 2: Decide if agent is needed
|
||||
local needs_agent=false
|
||||
if [[ "${incomplete_name:-0}" -eq 1 ]]; then
|
||||
needs_agent=true
|
||||
log " 👤 Incomplete name (ruler doc) → agent needed to resolve identity"
|
||||
elif [[ "$review_count" -gt 0 ]]; then
|
||||
needs_agent=true
|
||||
log " 📋 $review_count review questions → agent needed"
|
||||
elif [[ "$SKIP_CLEAN" == true && "$last_processed" -gt 0 && "$mtime" -le "$last_processed" ]]; then
|
||||
log " ⏭️ No questions, not modified since last pass → skipping agent"
|
||||
else
|
||||
needs_agent=true
|
||||
log " 🔍 Enrichment pass → agent needed"
|
||||
fi
|
||||
|
||||
if [[ "$needs_agent" == true ]]; then
|
||||
local agent_output
|
||||
agent_output=$(process_entity_agent "$fb_id" "$file" "$title")
|
||||
echo "$agent_output"
|
||||
|
||||
summary=$(echo "$agent_output" | grep -oP '(?<=<action_summary>).*(?=</action_summary>)' | tail -1)
|
||||
local agent_status
|
||||
agent_status=$(echo "$summary" | grep -oP '^status: \K[A-Z_]+' || echo "UNKNOWN")
|
||||
if [[ "$agent_status" == "UPDATED" ]]; then
|
||||
status="UPDATED"
|
||||
fi
|
||||
fi
|
||||
|
||||
local end_time
|
||||
end_time=$(date +%s)
|
||||
local duration=$((end_time - start_time))
|
||||
|
||||
if [[ "$status" == "UPDATED" ]]; then
|
||||
do_commit "improve: $title"
|
||||
fi
|
||||
|
||||
[[ -z "$summary" ]] && summary="status: $status | $title | bash-only pass"
|
||||
|
||||
{
|
||||
echo "[$(date -Iseconds)] $fb_id | $title"
|
||||
echo " $summary"
|
||||
echo " duration: ${duration}s"
|
||||
} >> "$ACTION_LOG"
|
||||
|
||||
mark_processed "$fb_id"
|
||||
log " Done (${duration}s) — $status"
|
||||
|
||||
[[ "$status" == "UPDATED" ]] && return 0 || return 1
|
||||
}
|
||||
|
||||
# ═══════════════════════════════════════════
|
||||
# MAIN LOOP
|
||||
# ═══════════════════════════════════════════
|
||||
log "🚀 Starting continuous improvement loop (priority=$PRIORITY, model=$MODEL, start=$START_AT, skip_clean=$SKIP_CLEAN)"
|
||||
log "Docs dir: $DOCS_DIR"
|
||||
log "State file: $STATE_FILE"
|
||||
log "Press Ctrl+C to stop"
|
||||
|
||||
PASS=0
|
||||
while true; do
|
||||
PASS=$((PASS + 1))
|
||||
log ""
|
||||
log "═══════════════════════════════════════════"
|
||||
log " PASS $PASS — $(TZ='America/Chicago' date '+%Y-%m-%d %r') — priority=$PRIORITY"
|
||||
log "═══════════════════════════════════════════"
|
||||
|
||||
QUEUE=$(build_queue)
|
||||
TOTAL=$(echo "$QUEUE" | grep -c . || echo 0)
|
||||
log "Queue: $TOTAL entities (starting at $((START_AT + 1)))"
|
||||
|
||||
PROCESSED=0
|
||||
UPDATED=0
|
||||
SKIPPED=0
|
||||
POSITION=0
|
||||
|
||||
while IFS=$'\t' read -r fb_id file review_count mtime last_processed garbage_count incomplete_name; do
|
||||
[[ -z "$fb_id" ]] && continue
|
||||
POSITION=$((POSITION + 1))
|
||||
|
||||
if [[ $POSITION -le $START_AT ]]; then
|
||||
continue
|
||||
fi
|
||||
|
||||
PROCESSED=$((PROCESSED + 1))
|
||||
log "[$POSITION/$TOTAL] Next up..."
|
||||
|
||||
if process_entity "$fb_id" "$file" "$review_count" "$mtime" "$last_processed" "$garbage_count" "$incomplete_name"; then
|
||||
UPDATED=$((UPDATED + 1))
|
||||
fi
|
||||
|
||||
sleep "$CYCLE_DELAY"
|
||||
done <<< "$QUEUE"
|
||||
|
||||
log ""
|
||||
log "═══ Pass $PASS complete: $PROCESSED processed, $UPDATED updated ═══"
|
||||
|
||||
START_AT=0
|
||||
log "Looping back to start..."
|
||||
sleep "$CYCLE_DELAY"
|
||||
done
|
||||
Reference in New Issue
Block a user