Fix 656 lazy temporal answers: replace 'static historical fact' with source-attributed answers
- Updated perspective.md: require source citations in temporal answers - Filed feature request #75 for BCE temporal tag support (tested 7 formats, all rejected) - Built batch script to replace all 'Static historical fact' answers with proper source attribution (ancient text date + modern publication year) - Fixed source date detection bug (modern books about ancient figures) - Answers now cite attesting source and its date per document footnotes
This commit is contained in:
144
.automate/fix-lazy-temporal.py
Normal file
144
.automate/fix-lazy-temporal.py
Normal file
@@ -0,0 +1,144 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Replace 'Static historical fact' temporal answers with source-attributed answers."""
|
||||
import re, glob, os
|
||||
|
||||
# Known ancient source dates
|
||||
ANCIENT_SOURCE_DATES = {
|
||||
'herodotus': '~430 BCE',
|
||||
'plutarch': '~75 CE',
|
||||
'arrian': '~130 CE',
|
||||
'ammianus': '~390 CE',
|
||||
'caesar': '~50 BCE',
|
||||
'frontinus': '~97 CE',
|
||||
'cassius dio': '~229 CE',
|
||||
'strabo': '~23 CE',
|
||||
'polybius': '~150 BCE',
|
||||
'thucydides': '~400 BCE',
|
||||
'livy': '~10 CE',
|
||||
'tacitus': '~110 CE',
|
||||
'suetonius': '~121 CE',
|
||||
'diodorus': '~50 BCE',
|
||||
'appian': '~160 CE',
|
||||
'xenophon': '~370 BCE',
|
||||
'pausanias': '~175 CE',
|
||||
'josephus': '~94 CE',
|
||||
'pliny': '~77 CE',
|
||||
'vitruvius': '~30 BCE',
|
||||
}
|
||||
|
||||
def extract_year(citation):
|
||||
"""Extract publication year from a modern citation like '(Cambridge, 2005)'."""
|
||||
m = re.search(r'\b(1[89]\d{2}|20[0-2]\d)\b', citation)
|
||||
return m.group(1) if m else None
|
||||
|
||||
def get_source_date(citation):
|
||||
"""Get the date a source was written/published."""
|
||||
cl = citation.lower()
|
||||
# Check for modern publication year first — if present, this is a modern source
|
||||
# even if it mentions an ancient author's name (e.g., "Goldsworthy, *Caesar*")
|
||||
modern_year = extract_year(citation)
|
||||
# Only match ancient sources if there's no modern publication year
|
||||
if not modern_year:
|
||||
for name, date in ANCIENT_SOURCE_DATES.items():
|
||||
if name in cl:
|
||||
return date
|
||||
if modern_year:
|
||||
return modern_year
|
||||
return None
|
||||
|
||||
def parse_footnotes(content):
|
||||
"""Extract footnote definitions from document."""
|
||||
footnotes = {}
|
||||
for m in re.finditer(r'^\[\^(\d+)\]:\s*(.+)$', content, re.MULTILINE):
|
||||
num = m.group(1)
|
||||
text = m.group(2).strip()
|
||||
footnotes[num] = text
|
||||
return footnotes
|
||||
|
||||
def extract_event_date(question_desc):
|
||||
"""Try to extract a date from the question description."""
|
||||
# Look for BCE dates
|
||||
m = re.search(r'~?(\d+)\s*BCE', question_desc)
|
||||
if m:
|
||||
return f"{m.group(1)} BCE"
|
||||
# Look for CE dates
|
||||
m = re.search(r'(\d+)\s*CE', question_desc)
|
||||
if m:
|
||||
return f"{m.group(1)} CE"
|
||||
return None
|
||||
|
||||
def build_source_attribution(footnotes):
|
||||
"""Build a source attribution string from document footnotes."""
|
||||
parts = []
|
||||
for num, text in sorted(footnotes.items(), key=lambda x: int(x[0])):
|
||||
date = get_source_date(text)
|
||||
# Shorten the citation for the answer
|
||||
short = text.split(',')[0] if ',' in text else text.split('(')[0].strip()
|
||||
if date:
|
||||
parts.append(f"{short} ({date}) [^{num}]")
|
||||
else:
|
||||
parts.append(f"{short} [^{num}]")
|
||||
return '; '.join(parts) if parts else None
|
||||
|
||||
def fix_file(filepath):
|
||||
"""Fix lazy temporal answers in a single file."""
|
||||
with open(filepath, 'r') as f:
|
||||
content = f.read()
|
||||
|
||||
if 'Static historical' not in content:
|
||||
return 0
|
||||
|
||||
footnotes = parse_footnotes(content)
|
||||
attribution = build_source_attribution(footnotes)
|
||||
if not attribution:
|
||||
attribution = "source not yet identified"
|
||||
|
||||
count = 0
|
||||
lines = content.split('\n')
|
||||
new_lines = []
|
||||
i = 0
|
||||
while i < len(lines):
|
||||
line = lines[i]
|
||||
# Check if next line is a lazy temporal answer
|
||||
if (i + 1 < len(lines) and
|
||||
'`@q[temporal]`' in line and
|
||||
lines[i+1].startswith('> Static historical')):
|
||||
|
||||
new_lines.append(line) # keep the question
|
||||
|
||||
# Extract date context from the question
|
||||
event_date = extract_event_date(line)
|
||||
date_prefix = f"{event_date} event." if event_date else "Historical event."
|
||||
|
||||
# Check if it's a scholarly interpretation
|
||||
old_answer = lines[i+1]
|
||||
if 'interpretation' in old_answer.lower() or 'scholarly' in old_answer.lower():
|
||||
new_answer = f"> Scholarly interpretation. {attribution}."
|
||||
else:
|
||||
bce_note = " BCE temporal tags not yet supported by factbase." if event_date and 'BCE' in event_date else ""
|
||||
new_answer = f"> {date_prefix} Attested by {attribution}.{bce_note}"
|
||||
|
||||
new_lines.append(new_answer)
|
||||
count += 1
|
||||
i += 2
|
||||
else:
|
||||
new_lines.append(line)
|
||||
i += 1
|
||||
|
||||
if count > 0:
|
||||
with open(filepath, 'w') as f:
|
||||
f.write('\n'.join(new_lines))
|
||||
|
||||
return count
|
||||
|
||||
# Process all markdown files
|
||||
total = 0
|
||||
for filepath in sorted(glob.glob('/home/ubuntu/work/factbase-ancient-history/**/*.md', recursive=True)):
|
||||
if '/.git/' in filepath or '/.automate/' in filepath or '/.kiro/' in filepath or '/.factbase/' in filepath:
|
||||
continue
|
||||
fixed = fix_file(filepath)
|
||||
if fixed > 0:
|
||||
print(f" {os.path.relpath(filepath, '/home/ubuntu/work/factbase-ancient-history')}: {fixed} answers fixed")
|
||||
total += fixed
|
||||
|
||||
print(f"\nTotal: {total} lazy temporal answers replaced with source-attributed answers")
|
||||
Reference in New Issue
Block a user