542 lines
21 KiB
Bash
Executable File
542 lines
21 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# continuous-improve.sh — Continuous enrichment and quality loop
|
|
#
|
|
# Each cycle: processes every entity (resolve reviews, enrich from outside
|
|
# sources), then runs a deep cross-document validation scan.
|
|
#
|
|
# Usage: .automate/continuous-improve.sh [options]
|
|
# --priority reviews|stale|random Queue ordering (default: reviews)
|
|
# --cycle-delay N Seconds between entities (default: 5)
|
|
# --model MODEL LLM model (default: claude-sonnet-4.6)
|
|
# --start N Skip first N entities in queue (resume)
|
|
# --skip-unchanged Skip entities unchanged since last pass
|
|
|
|
set -euo pipefail
|
|
|
|
# ─── Parse arguments ───
|
|
PRIORITY="reviews"
|
|
CYCLE_DELAY=5
|
|
MODEL="claude-sonnet-4.6"
|
|
START_AT=0
|
|
SKIP_UNCHANGED=true
|
|
while [[ $# -gt 0 ]]; do
|
|
case "$1" in
|
|
--priority) PRIORITY="$2"; shift 2 ;;
|
|
--cycle-delay) CYCLE_DELAY="$2"; shift 2 ;;
|
|
--model) MODEL="$2"; shift 2 ;;
|
|
--start) START_AT="$2"; shift 2 ;;
|
|
--skip-unchanged) SKIP_UNCHANGED=true; shift ;;
|
|
*) echo "Usage: $0 [--priority reviews|stale|random] [--cycle-delay N] [--model MODEL] [--start N] [--skip-unchanged]"; exit 1 ;;
|
|
esac
|
|
done
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
|
DOCS_DIR="$SCRIPT_DIR/.."
|
|
STATE_FILE="$SCRIPT_DIR/.improve-state.tsv"
|
|
ACTION_LOG="$SCRIPT_DIR/improve-history.log"
|
|
|
|
exec > >(tee -a "$SCRIPT_DIR/continuous-improve.log") 2>&1
|
|
|
|
log() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*"; }
|
|
|
|
do_commit() {
|
|
local msg="$1"
|
|
cd "$DOCS_DIR"
|
|
git add -A
|
|
if ! git diff --cached --quiet; then
|
|
git commit -m "$msg"
|
|
for attempt in 1 2 3; do
|
|
if git push 2>/dev/null; then
|
|
log "✅ Committed: $msg"
|
|
return 0
|
|
fi
|
|
log "⚠️ Push attempt $attempt failed, rebasing..."
|
|
git pull --rebase
|
|
done
|
|
log "❌ Push failed after 3 attempts"
|
|
fi
|
|
}
|
|
|
|
build_queue() {
|
|
cd "$DOCS_DIR"
|
|
local tmpfile
|
|
tmpfile=$(mktemp)
|
|
|
|
find . -name '*.md' \
|
|
-not -path './.git/*' \
|
|
-not -path './.automate/*' \
|
|
-not -path './.kiro/*' \
|
|
-not -path './.factbase/*' \
|
|
-not -path './_orphans.md' \
|
|
-print0 | while IFS= read -r -d '' file; do
|
|
|
|
local fb_id
|
|
fb_id=$(grep -oP '(?<=factbase:)[a-f0-9]{6}' "$file" 2>/dev/null | head -1)
|
|
[[ -z "$fb_id" ]] && continue
|
|
|
|
local review_count
|
|
review_count=$(grep -c '^\- \[ \] `@q\[' "$file" 2>/dev/null) || true
|
|
|
|
local mtime
|
|
mtime=$(stat -c %Y "$file")
|
|
|
|
local last_processed=0
|
|
if [[ -f "$STATE_FILE" ]]; then
|
|
last_processed=$(grep "^${fb_id} " "$STATE_FILE" 2>/dev/null | tail -1 | cut -f2) || true
|
|
[[ -z "$last_processed" ]] && last_processed=0
|
|
fi
|
|
|
|
local garbage_count
|
|
garbage_count=$(grep -ciP '^\[\^.*\b(not a conflict|sequential|boundary overlap|not simultaneous|malformed tag|garbled|artifact|remove)\b' "$file" 2>/dev/null) || true
|
|
|
|
# Flag ruler docs with incomplete names (single word, alias, no space)
|
|
local incomplete_name=0
|
|
local parent_dir
|
|
parent_dir=$(echo "$file" | sed 's|^\./||' | rev | cut -d/ -f2 | rev)
|
|
if [[ "$parent_dir" == "rulers" ]]; then
|
|
local doc_title
|
|
doc_title=$(grep '^# ' "$file" 2>/dev/null | head -1 | sed 's/^# //' | sed 's/ @t\[.*//;s/ \[\^.*//')
|
|
if [[ -n "$doc_title" ]] && ! echo "$doc_title" | grep -q ' '; then
|
|
incomplete_name=1
|
|
fi
|
|
fi
|
|
|
|
echo -e "${fb_id}\t${file}\t${review_count}\t${mtime}\t${last_processed}\t${garbage_count}\t${incomplete_name}"
|
|
done > "$tmpfile"
|
|
|
|
case "$PRIORITY" in
|
|
reviews) sort -t$'\t' -k7,7rn -k3,3rn -k6,6rn -k5,5n "$tmpfile" ;;
|
|
stale) sort -t$'\t' -k7,7rn -k5,5n -k3,3rn "$tmpfile" ;;
|
|
random) shuf "$tmpfile" ;;
|
|
esac
|
|
|
|
rm -f "$tmpfile"
|
|
}
|
|
|
|
mark_processed() {
|
|
echo -e "$1\t$(date +%s)" >> "$STATE_FILE"
|
|
}
|
|
|
|
# ─── Bash-based mechanical cleanup (no agent needed) ───
|
|
bash_cleanup() {
|
|
local file="$1"
|
|
local changed=false
|
|
|
|
# Fix corrupted title: strip @t[...] and [^N] suffixes
|
|
if grep -qP '^# .+(\s+@t\[|\s+\[\^)' "$file" 2>/dev/null; then
|
|
sed -i -E 's/^(# .+?)\s+(@t\[.*|\[\^.*)$/\1/' "$file"
|
|
changed=true
|
|
fi
|
|
|
|
# Delete garbage footnotes (review answers dumped as source citations)
|
|
if grep -qiP '^\[\^\d+\]:.*\b(not a conflict|sequential role|boundary overlap|not simultaneous|malformed tag|garbled|artifact from previous|remove garbled)\b' "$file" 2>/dev/null; then
|
|
# Get line numbers of garbage footnotes, delete them
|
|
local lines
|
|
lines=$(grep -niP '^\[\^\d+\]:.*\b(not a conflict|sequential role|boundary overlap|not simultaneous|malformed tag|garbled|artifact from previous|remove garbled)\b' "$file" | cut -d: -f1 | sort -rn)
|
|
if [[ -n "$lines" ]]; then
|
|
for ln in $lines; do
|
|
sed -i "${ln}d" "$file"
|
|
done
|
|
changed=true
|
|
fi
|
|
fi
|
|
|
|
# Remove review answer artifact lines in body
|
|
if grep -qP '^- Artifact from previous review application' "$file" 2>/dev/null; then
|
|
sed -i '/^- Artifact from previous review application/d' "$file"
|
|
changed=true
|
|
fi
|
|
|
|
# Remove empty Review Queue sections and factbase:review markers
|
|
if grep -qP '^## Review Queue|<!-- factbase:review -->' "$file" 2>/dev/null; then
|
|
# Only remove if the review queue has no actual unanswered questions
|
|
local has_open
|
|
has_open=$(grep -c '^\- \[ \] `@q\[' "$file" 2>/dev/null) || true
|
|
if [[ "$has_open" -eq 0 ]]; then
|
|
sed -i '/^## Review Queue$/d; /^<!-- factbase:review -->$/d' "$file"
|
|
changed=true
|
|
fi
|
|
fi
|
|
|
|
# Remove answered review Q&A lines (- [x] `@q[...]` and their > answer lines)
|
|
if grep -qP '^\- \[x\] `@q\[' "$file" 2>/dev/null; then
|
|
local has_open
|
|
has_open=$(grep -c '^\- \[ \] `@q\[' "$file" 2>/dev/null) || true
|
|
if [[ "$has_open" -eq 0 ]]; then
|
|
# Remove - [x] `@q[...] lines and the > answer line that follows each
|
|
awk '
|
|
/^\- \[x\] `@q\[/ { skip=1; next }
|
|
skip && /^>/ { skip=0; next }
|
|
{ skip=0; print }
|
|
' "$file" > "${file}.tmp" && mv "${file}.tmp" "$file"
|
|
changed=true
|
|
fi
|
|
fi
|
|
|
|
# Remove duplicate H1 headings (keep first, remove subsequent)
|
|
local h1_count
|
|
h1_count=$(grep -c '^# ' "$file" 2>/dev/null) || true
|
|
if [[ "$h1_count" -gt 1 ]]; then
|
|
# Keep the first H1, delete all others
|
|
awk '/^# / { if (++count > 1) next } { print }' "$file" > "${file}.tmp" && mv "${file}.tmp" "$file"
|
|
changed=true
|
|
fi
|
|
|
|
# Clean up trailing blank lines
|
|
sed -i -e :a -e '/^\n*$/{$d;N;ba' -e '}' "$file"
|
|
|
|
[[ "$changed" == true ]]
|
|
}
|
|
|
|
get_related_context() {
|
|
local file="$1"
|
|
local title
|
|
title=$(grep '^# ' "$file" | head -1 | sed 's/^# //' | sed 's/ @t\[.*//;s/ \[\^.*//')
|
|
[[ -z "$title" ]] && return
|
|
|
|
cd "$DOCS_DIR"
|
|
local related=""
|
|
while IFS= read -r match_file; do
|
|
[[ "$match_file" == "$file" ]] && continue
|
|
local match_title
|
|
match_title=$(grep '^# ' "$match_file" 2>/dev/null | head -1 | sed 's/^# //')
|
|
local excerpt
|
|
excerpt=$(grep -i "$title" "$match_file" 2>/dev/null | head -3)
|
|
if [[ -n "$excerpt" ]]; then
|
|
related+="--- From: ${match_title} (${match_file}) ---
|
|
${excerpt}
|
|
|
|
"
|
|
fi
|
|
done < <(grep -rl "$title" . --include='*.md' \
|
|
| grep -v '.git\|.automate\|.kiro\|.factbase\|_orphans' \
|
|
| head -5)
|
|
|
|
echo "$related"
|
|
}
|
|
|
|
# ─── Fetch existing Vikunja bug titles (for dedup) ───
|
|
get_existing_bug_titles() {
|
|
curl -s "https://vikunja.home.everyonce.com/api/v1/projects/2/tasks" \
|
|
-H "Authorization: Bearer tk_ff251f3d3512775c71913bc2f8ec0dabbf5016a8" \
|
|
2>/dev/null | grep -oP '"title":"[^"]*"' | sed 's/"title":"//;s/"$//' || true
|
|
}
|
|
|
|
# ─── Agent-based processing (review questions + enrichment) ───
|
|
process_entity_agent() {
|
|
local fb_id="$1"
|
|
local file="$2"
|
|
local title="$3"
|
|
local review_count="${4:-0}"
|
|
|
|
cd "$DOCS_DIR"
|
|
|
|
local content
|
|
content=$(cat "$file")
|
|
|
|
local related
|
|
related=$(get_related_context "$file")
|
|
|
|
local existing_bugs
|
|
existing_bugs=$(get_existing_bug_titles)
|
|
|
|
local prompt
|
|
prompt="You are continuously improving a factbase knowledge base, one entity at a time.
|
|
Use factbase MCP tools — call get_authoring_guide if you need formatting rules.
|
|
|
|
ENTITY FILE: $file
|
|
ENTITY ID: $fb_id
|
|
|
|
=== CURRENT DOCUMENT CONTENT ===
|
|
$content
|
|
=== END DOCUMENT CONTENT ==="
|
|
|
|
if [[ -n "$related" ]]; then
|
|
prompt+="
|
|
|
|
=== MENTIONS IN OTHER DOCUMENTS ===
|
|
$related
|
|
=== END MENTIONS ==="
|
|
fi
|
|
|
|
if [[ -n "$existing_bugs" ]]; then
|
|
prompt+="
|
|
|
|
=== EXISTING VIKUNJA BUG REPORTS (do NOT file duplicates) ===
|
|
$existing_bugs
|
|
=== END EXISTING BUGS ==="
|
|
fi
|
|
|
|
# Build review step conditionally
|
|
local review_step=""
|
|
if [[ "$review_count" -gt 0 ]]; then
|
|
review_step='1. RESOLVE REVIEW QUESTIONS:
|
|
Call get_review_queue(doc_id='"'"''"$fb_id"''"'"') — if there are open (unanswered) questions, answer them.
|
|
|
|
Patterns learned from resolving thousands of these:
|
|
- CONFLICT (chronological overlaps): Boundary-year overlaps in sequential reigns or periods
|
|
are NOT conflicts (date granularity artifact). Concurrent roles (e.g., ruler + military
|
|
commander, pharaoh + high priest) are both true simultaneously. Approximate dates that
|
|
overlap by a few years reflect scholarly uncertainty, not contradiction.
|
|
- AMBIGUOUS (terms): Expand the term AND create or update a definitions/ file so the
|
|
term is not flagged again. Do NOT just answer inline — the definitions file is what
|
|
prevents recurrence. Check existing definitions files first with search_content.
|
|
Common: BCE, CE, polis, satrapy, pharaoh, consul, tribune, cuneiform, stele, ziggurat.
|
|
- TEMPORAL/STALE: BCE dates are written in text, not temporal tags. CE date ranges
|
|
(@t[...]) that are closed are historical, not stale. For open @t[~...] tags, search
|
|
for newer scholarship.
|
|
- MISSING: Search with search_knowledge and search_content. If not found, defer.
|
|
|
|
IMPORTANT: Only call apply_review_answers if get_review_queue shows unanswered > 0.
|
|
After applying, re-read the file with get_entity and verify apply did not corrupt it
|
|
(garbage footnotes, mangled title). If it did, fix with update_document.'
|
|
else
|
|
review_step='1. REVIEW QUESTIONS: SKIP — the wrapper already confirmed 0 open review questions for this entity.'
|
|
fi
|
|
|
|
prompt+='
|
|
|
|
STEPS — work through in order, skip any that do not apply:
|
|
|
|
'"$review_step"'
|
|
|
|
2. IDENTITY & ORGANIZATION:
|
|
For ruler documents: if the title is a single name, alias, or epithet (not a full name),
|
|
prioritize finding their full or commonly known name. Search local sources, check mentions
|
|
in other documents, and cross-reference with civilization docs.
|
|
If you find the full name, update the document title with update_document.
|
|
|
|
For any document: if the title or file location could be improved (e.g. a ruler doc is in
|
|
the wrong civilization folder, or the filename does not match the title), use the organize MCP
|
|
tool to rename/move it. Use organize(action='"'"'move'"'"', doc_id=..., to=...) to relocate
|
|
or update_document(id=..., title=...) to fix the title.
|
|
|
|
3. ENRICH FROM OUTSIDE SOURCES:
|
|
This is the most important step. Use web_search to find high-quality information about
|
|
this entity from scholarly and encyclopedic sources. Search for:
|
|
- The entity name + "archaeology" or "ancient history"
|
|
- Key events, dates, or relationships mentioned in the document
|
|
- Recent archaeological discoveries or revised scholarly consensus
|
|
Prefer peer-reviewed sources, university publications, museum databases, and established
|
|
encyclopedias. Add any new facts not already present, with source citations, following
|
|
factbase authoring conventions. Do NOT add speculative or poorly-sourced claims.
|
|
|
|
4. FACTBASE TOOL BUGS & FEATURE REQUESTS:
|
|
A key purpose of this improvement loop is to make factbase itself better. As you work
|
|
through steps 1-3, pay close attention to how the factbase tools behave. File a Vikunja
|
|
task for EACH of the following you encounter:
|
|
|
|
BUGS — any MCP tool error, unexpected result, silent failure, data corruption, or behavior
|
|
that contradicts the tool description. Use title prefix "[factbase]".
|
|
|
|
FEATURES — any friction, missing capability, awkward multi-step workaround, or gap that
|
|
slowed you down or limited what you could accomplish. Use title prefix "[factbase][feature]".
|
|
|
|
CRITICAL: Before filing ANY bug or feature request, check the EXISTING VIKUNJA BUG REPORTS
|
|
section above. If a similar issue is already filed, do NOT file a duplicate. Only file if
|
|
the issue is genuinely new.
|
|
|
|
CRITICAL: Factbase is domain-agnostic — it is used for many different knowledge bases, not
|
|
just this one. Your bug reports and feature requests MUST describe the issue in terms of
|
|
factbase tool behavior, not in terms of ancient history content. For example:
|
|
- GOOD: "apply_review_answers corrupts footnote references when document has >5 citations"
|
|
- BAD: "apply_review_answers broke the Ramesses II document"
|
|
- GOOD: "[feature] check_repository should detect duplicate entities across different folders"
|
|
- BAD: "[feature] factbase should know that Babylon appears in both civilizations/ and cities/"
|
|
|
|
Include in every task description:
|
|
- Which MCP tool was called (or which tool is missing/needed)
|
|
- What you expected to happen vs what actually happened
|
|
- Specific parameters or conditions that triggered the issue
|
|
- For features: the general use case and how it would help ANY factbase, not just this one
|
|
|
|
curl -s -X PUT "https://vikunja.home.everyonce.com/api/v1/projects/2/tasks" \
|
|
-H "Authorization: Bearer tk_ff251f3d3512775c71913bc2f8ec0dabbf5016a8" \
|
|
-H "Content-Type: application/json" \
|
|
-d '"'"'{"title":"[factbase] <summary>","description":"<detailed description>","priority":3}'"'"'
|
|
Priority: 1-2 for features/minor issues, 3 for normal bugs, 4-5 for data loss or blocking failures.
|
|
|
|
RULES:
|
|
- The document content is already provided above — do NOT call get_entity to re-read it
|
|
unless you just called apply_review_answers and need to verify the result
|
|
- Use update_document to edit — be surgical, change only what needs changing
|
|
- When calling update_document, do NOT include the <!-- factbase:XXXXXX --> comment or the
|
|
# Title heading in the content — factbase adds those automatically. Start content with
|
|
the first section (e.g. ## Overview). Including them causes duplicate headings.
|
|
- If the document has answered review questions (- [x] `@q[...] lines with > answer lines),
|
|
ALWAYS remove them from the content — they are stale artifacts, not part of the document.
|
|
- Do NOT run git add, git commit, or git push — the wrapper script handles all git operations
|
|
- If nothing needs changing, say so and move on
|
|
|
|
IMPORTANT: When finished, output exactly one line:
|
|
<action_summary>status: STATUS | '"$title"' | changes: DESCRIPTION</action_summary>
|
|
Status values: UPDATED (made changes), NO_CHANGE (nothing to do), ERROR (something failed)'
|
|
|
|
local output
|
|
output=$(kiro-cli chat --trust-all-tools --no-interactive --model "$MODEL" \
|
|
"$prompt" 2>&1) || {
|
|
log "❌ kiro-cli error for $title, continuing..."
|
|
return 1
|
|
}
|
|
|
|
echo "$output"
|
|
|
|
local summary
|
|
summary=$(echo "$output" | grep -oP '(?<=<action_summary>).*(?=</action_summary>)' | tail -1)
|
|
[[ -z "$summary" ]] && summary="status: UNKNOWN | $title | no summary returned"
|
|
echo "$summary"
|
|
}
|
|
|
|
# ─── Main entity processing dispatcher ───
|
|
process_entity() {
|
|
local fb_id="$1"
|
|
local file="$2"
|
|
local review_count="$3"
|
|
local mtime="$4"
|
|
local last_processed="$5"
|
|
local garbage_count="$6"
|
|
local incomplete_name="$7"
|
|
|
|
cd "$DOCS_DIR"
|
|
|
|
local title
|
|
title=$(grep '^# ' "$file" | head -1 | sed 's/^# //' | sed 's/ @t\[.*//;s/ \[\^.*//')
|
|
|
|
log "━━━ [$title] ($fb_id) reviews=$review_count garbage=$garbage_count ━━━"
|
|
|
|
local start_time
|
|
start_time=$(date +%s)
|
|
local status="NO_CHANGE"
|
|
local summary=""
|
|
|
|
# Phase 1: Bash cleanup (milliseconds, no agent)
|
|
if bash_cleanup "$file"; then
|
|
log " 🧹 Bash cleanup applied"
|
|
status="UPDATED"
|
|
# Recount after cleanup
|
|
review_count=$(grep -c '^\- \[ \] `@q\[' "$file" 2>/dev/null) || true
|
|
garbage_count=0
|
|
fi
|
|
|
|
# Phase 2: Decide if agent is needed
|
|
local needs_agent=true
|
|
if [[ "$SKIP_UNCHANGED" == true && "$last_processed" -gt 0 && "$mtime" -le "$last_processed" ]]; then
|
|
needs_agent=false
|
|
log " ⏭️ Not modified since last pass → skipping (--skip-unchanged)"
|
|
elif [[ "${incomplete_name:-0}" -eq 1 ]]; then
|
|
log " 👤 Incomplete name (ruler doc) → agent needed to resolve identity"
|
|
elif [[ "$review_count" -gt 0 ]]; then
|
|
log " 📋 $review_count review questions → agent needed"
|
|
elif [[ "$SKIP_UNCHANGED" == true && "$last_processed" -gt 0 && "$mtime" -le "$last_processed" ]]; then
|
|
needs_agent=false
|
|
log " ⏭️ No questions, not modified since last pass → skipping (--skip-unchanged)"
|
|
else
|
|
log " 🔍 Enrichment + review pass"
|
|
fi
|
|
|
|
if [[ "$needs_agent" == true ]]; then
|
|
local agent_output
|
|
agent_output=$(process_entity_agent "$fb_id" "$file" "$title" "$review_count")
|
|
echo "$agent_output"
|
|
|
|
summary=$(echo "$agent_output" | grep -oP '(?<=<action_summary>).*(?=</action_summary>)' | tail -1)
|
|
local agent_status
|
|
agent_status=$(echo "$summary" | grep -oP '^status: \K[A-Z_]+' || echo "UNKNOWN")
|
|
if [[ "$agent_status" == "UPDATED" ]]; then
|
|
status="UPDATED"
|
|
fi
|
|
fi
|
|
|
|
local end_time
|
|
end_time=$(date +%s)
|
|
local duration=$((end_time - start_time))
|
|
|
|
if [[ "$status" == "UPDATED" ]]; then
|
|
do_commit "improve: $title"
|
|
fi
|
|
|
|
[[ -z "$summary" ]] && summary="status: $status | $title | bash-only pass"
|
|
|
|
{
|
|
echo "[$(date -Iseconds)] $fb_id | $title"
|
|
echo " $summary"
|
|
echo " duration: ${duration}s"
|
|
} >> "$ACTION_LOG"
|
|
|
|
mark_processed "$fb_id"
|
|
log " Done (${duration}s) — $status"
|
|
|
|
[[ "$status" == "UPDATED" ]] && return 0 || return 1
|
|
}
|
|
|
|
# ═══════════════════════════════════════════
|
|
# DEEP CROSS-DOCUMENT SCAN (once per cycle)
|
|
# ═══════════════════════════════════════════
|
|
run_deep_scan() {
|
|
log "🔬 Running deep cross-document validation scan..."
|
|
local output
|
|
local rc=0
|
|
output=$(kiro-cli chat --trust-all-tools --no-interactive --model "$MODEL" \
|
|
"Run check_repository with deep_check=true. Review any new issues found — answer what you can, defer what you cannot. If any factbase tool behaves unexpectedly during this process, file a bug to Vikunja (project 2, Authorization: Bearer tk_ff251f3d3512775c71913bc2f8ec0dabbf5016a8). Describe issues in domain-agnostic terms — factbase is used for many knowledge bases, not just this one." 2>&1) || rc=$?
|
|
echo "$output"
|
|
if [[ $rc -ne 0 ]]; then
|
|
log "❌ Deep scan agent failed (exit code $rc), continuing..."
|
|
return 1
|
|
fi
|
|
do_commit "deep scan: cross-document validation"
|
|
log "✅ Deep scan complete"
|
|
}
|
|
|
|
# ═══════════════════════════════════════════
|
|
# MAIN LOOP
|
|
# ═══════════════════════════════════════════
|
|
log "🚀 Starting continuous improvement loop (priority=$PRIORITY, model=$MODEL, start=$START_AT, skip_unchanged=$SKIP_UNCHANGED)"
|
|
log "Docs dir: $DOCS_DIR"
|
|
log "State file: $STATE_FILE"
|
|
log "Press Ctrl+C to stop"
|
|
|
|
PASS=0
|
|
while true; do
|
|
PASS=$((PASS + 1))
|
|
log ""
|
|
log "═══════════════════════════════════════════"
|
|
log " PASS $PASS — $(TZ='America/Chicago' date '+%Y-%m-%d %r') — priority=$PRIORITY"
|
|
log "═══════════════════════════════════════════"
|
|
|
|
QUEUE=$(build_queue)
|
|
TOTAL=$(echo "$QUEUE" | grep -c . || echo 0)
|
|
log "Queue: $TOTAL entities (starting at $((START_AT + 1)))"
|
|
|
|
PROCESSED=0
|
|
UPDATED=0
|
|
SKIPPED=0
|
|
POSITION=0
|
|
|
|
while IFS=$'\t' read -r fb_id file review_count mtime last_processed garbage_count incomplete_name; do
|
|
[[ -z "$fb_id" ]] && continue
|
|
POSITION=$((POSITION + 1))
|
|
|
|
if [[ $POSITION -le $START_AT ]]; then
|
|
continue
|
|
fi
|
|
|
|
PROCESSED=$((PROCESSED + 1))
|
|
log "[$POSITION/$TOTAL] Next up..."
|
|
|
|
if process_entity "$fb_id" "$file" "$review_count" "$mtime" "$last_processed" "$garbage_count" "$incomplete_name"; then
|
|
UPDATED=$((UPDATED + 1))
|
|
fi
|
|
|
|
sleep "$CYCLE_DELAY"
|
|
done <<< "$QUEUE"
|
|
|
|
log ""
|
|
log "═══ Pass $PASS complete: $PROCESSED processed, $UPDATED updated ═══"
|
|
|
|
run_deep_scan
|
|
|
|
START_AT=0
|
|
log "Looping back to start..."
|
|
sleep "$CYCLE_DELAY"
|
|
done
|