Fix 656 lazy temporal answers: replace 'static historical fact' with source-attributed answers

- Updated perspective.md: require source citations in temporal answers - Filed feature request #75 for BCE temporal tag support (tested 7 formats, all rejected) - Built batch script to replace all 'Static historical fact' answers with proper source attribution (ancient text date + modern publication year) - Fixed source date detection bug (modern books about ancient figures) - Answers now cite attesting source and its date per document footnotes
2026-02-22 23:00:39 +00:00
parent 71592a06ff
commit 01866caf6a
68 changed files with 892 additions and 731 deletions
--- a/.automate/fix-lazy-temporal.py
+++ b/.automate/fix-lazy-temporal.py
@@ -0,0 +1,144 @@
+#!/usr/bin/env python3
+"""Replace 'Static historical fact' temporal answers with source-attributed answers."""
+import re, glob, os
+
+# Known ancient source dates
+ANCIENT_SOURCE_DATES = {
+    'herodotus': '~430 BCE',
+    'plutarch': '~75 CE',
+    'arrian': '~130 CE',
+    'ammianus': '~390 CE',
+    'caesar': '~50 BCE',
+    'frontinus': '~97 CE',
+    'cassius dio': '~229 CE',
+    'strabo': '~23 CE',
+    'polybius': '~150 BCE',
+    'thucydides': '~400 BCE',
+    'livy': '~10 CE',
+    'tacitus': '~110 CE',
+    'suetonius': '~121 CE',
+    'diodorus': '~50 BCE',
+    'appian': '~160 CE',
+    'xenophon': '~370 BCE',
+    'pausanias': '~175 CE',
+    'josephus': '~94 CE',
+    'pliny': '~77 CE',
+    'vitruvius': '~30 BCE',
+}
+
+def extract_year(citation):
+    """Extract publication year from a modern citation like '(Cambridge, 2005)'."""
+    m = re.search(r'\b(1[89]\d{2}|20[0-2]\d)\b', citation)
+    return m.group(1) if m else None
+
+def get_source_date(citation):
+    """Get the date a source was written/published."""
+    cl = citation.lower()
+    # Check for modern publication year first — if present, this is a modern source
+    # even if it mentions an ancient author's name (e.g., "Goldsworthy, *Caesar*")
+    modern_year = extract_year(citation)
+    # Only match ancient sources if there's no modern publication year
+    if not modern_year:
+        for name, date in ANCIENT_SOURCE_DATES.items():
+            if name in cl:
+                return date
+    if modern_year:
+        return modern_year
+    return None
+
+def parse_footnotes(content):
+    """Extract footnote definitions from document."""
+    footnotes = {}
+    for m in re.finditer(r'^\[\^(\d+)\]:\s*(.+)$', content, re.MULTILINE):
+        num = m.group(1)
+        text = m.group(2).strip()
+        footnotes[num] = text
+    return footnotes
+
+def extract_event_date(question_desc):
+    """Try to extract a date from the question description."""
+    # Look for BCE dates
+    m = re.search(r'~?(\d+)\s*BCE', question_desc)
+    if m:
+        return f"{m.group(1)} BCE"
+    # Look for CE dates
+    m = re.search(r'(\d+)\s*CE', question_desc)
+    if m:
+        return f"{m.group(1)} CE"
+    return None
+
+def build_source_attribution(footnotes):
+    """Build a source attribution string from document footnotes."""
+    parts = []
+    for num, text in sorted(footnotes.items(), key=lambda x: int(x[0])):
+        date = get_source_date(text)
+        # Shorten the citation for the answer
+        short = text.split(',')[0] if ',' in text else text.split('(')[0].strip()
+        if date:
+            parts.append(f"{short} ({date}) [^{num}]")
+        else:
+            parts.append(f"{short} [^{num}]")
+    return '; '.join(parts) if parts else None
+
+def fix_file(filepath):
+    """Fix lazy temporal answers in a single file."""
+    with open(filepath, 'r') as f:
+        content = f.read()
+
+    if 'Static historical' not in content:
+        return 0
+
+    footnotes = parse_footnotes(content)
+    attribution = build_source_attribution(footnotes)
+    if not attribution:
+        attribution = "source not yet identified"
+
+    count = 0
+    lines = content.split('\n')
+    new_lines = []
+    i = 0
+    while i < len(lines):
+        line = lines[i]
+        # Check if next line is a lazy temporal answer
+        if (i + 1 < len(lines) and
+            '`@q[temporal]`' in line and
+            lines[i+1].startswith('> Static historical')):
+
+            new_lines.append(line)  # keep the question
+
+            # Extract date context from the question
+            event_date = extract_event_date(line)
+            date_prefix = f"{event_date} event." if event_date else "Historical event."
+
+            # Check if it's a scholarly interpretation
+            old_answer = lines[i+1]
+            if 'interpretation' in old_answer.lower() or 'scholarly' in old_answer.lower():
+                new_answer = f"> Scholarly interpretation. {attribution}."
+            else:
+                bce_note = " BCE temporal tags not yet supported by factbase." if event_date and 'BCE' in event_date else ""
+                new_answer = f"> {date_prefix} Attested by {attribution}.{bce_note}"
+
+            new_lines.append(new_answer)
+            count += 1
+            i += 2
+        else:
+            new_lines.append(line)
+            i += 1
+
+    if count > 0:
+        with open(filepath, 'w') as f:
+            f.write('\n'.join(new_lines))
+
+    return count
+
+# Process all markdown files
+total = 0
+for filepath in sorted(glob.glob('/home/ubuntu/work/factbase-ancient-history/**/*.md', recursive=True)):
+    if '/.git/' in filepath or '/.automate/' in filepath or '/.kiro/' in filepath or '/.factbase/' in filepath:
+        continue
+    fixed = fix_file(filepath)
+    if fixed > 0:
+        print(f"  {os.path.relpath(filepath, '/home/ubuntu/work/factbase-ancient-history')}: {fixed} answers fixed")
+        total += fixed
+
+print(f"\nTotal: {total} lazy temporal answers replaced with source-attributed answers")