#!/usr/bin/env python3 """Replace 'Static historical fact' temporal answers with source-attributed answers.""" import re, glob, os # Known ancient source dates ANCIENT_SOURCE_DATES = { 'herodotus': '~430 BCE', 'plutarch': '~75 CE', 'arrian': '~130 CE', 'ammianus': '~390 CE', 'caesar': '~50 BCE', 'frontinus': '~97 CE', 'cassius dio': '~229 CE', 'strabo': '~23 CE', 'polybius': '~150 BCE', 'thucydides': '~400 BCE', 'livy': '~10 CE', 'tacitus': '~110 CE', 'suetonius': '~121 CE', 'diodorus': '~50 BCE', 'appian': '~160 CE', 'xenophon': '~370 BCE', 'pausanias': '~175 CE', 'josephus': '~94 CE', 'pliny': '~77 CE', 'vitruvius': '~30 BCE', } def extract_year(citation): """Extract publication year from a modern citation like '(Cambridge, 2005)'.""" m = re.search(r'\b(1[89]\d{2}|20[0-2]\d)\b', citation) return m.group(1) if m else None def get_source_date(citation): """Get the date a source was written/published.""" cl = citation.lower() # Check for modern publication year first — if present, this is a modern source # even if it mentions an ancient author's name (e.g., "Goldsworthy, *Caesar*") modern_year = extract_year(citation) # Only match ancient sources if there's no modern publication year if not modern_year: for name, date in ANCIENT_SOURCE_DATES.items(): if name in cl: return date if modern_year: return modern_year return None def parse_footnotes(content): """Extract footnote definitions from document.""" footnotes = {} for m in re.finditer(r'^\[\^(\d+)\]:\s*(.+)$', content, re.MULTILINE): num = m.group(1) text = m.group(2).strip() footnotes[num] = text return footnotes def extract_event_date(question_desc): """Try to extract a date from the question description.""" # Look for BCE dates m = re.search(r'~?(\d+)\s*BCE', question_desc) if m: return f"{m.group(1)} BCE" # Look for CE dates m = re.search(r'(\d+)\s*CE', question_desc) if m: return f"{m.group(1)} CE" return None def build_source_attribution(footnotes): """Build a source attribution string from document footnotes.""" parts = [] for num, text in sorted(footnotes.items(), key=lambda x: int(x[0])): date = get_source_date(text) # Shorten the citation for the answer short = text.split(',')[0] if ',' in text else text.split('(')[0].strip() if date: parts.append(f"{short} ({date}) [^{num}]") else: parts.append(f"{short} [^{num}]") return '; '.join(parts) if parts else None def fix_file(filepath): """Fix lazy temporal answers in a single file.""" with open(filepath, 'r') as f: content = f.read() if 'Static historical' not in content: return 0 footnotes = parse_footnotes(content) attribution = build_source_attribution(footnotes) if not attribution: attribution = "source not yet identified" count = 0 lines = content.split('\n') new_lines = [] i = 0 while i < len(lines): line = lines[i] # Check if next line is a lazy temporal answer if (i + 1 < len(lines) and '`@q[temporal]`' in line and lines[i+1].startswith('> Static historical')): new_lines.append(line) # keep the question # Extract date context from the question event_date = extract_event_date(line) date_prefix = f"{event_date} event." if event_date else "Historical event." # Check if it's a scholarly interpretation old_answer = lines[i+1] if 'interpretation' in old_answer.lower() or 'scholarly' in old_answer.lower(): new_answer = f"> Scholarly interpretation. {attribution}." else: bce_note = " BCE temporal tags not yet supported by factbase." if event_date and 'BCE' in event_date else "" new_answer = f"> {date_prefix} Attested by {attribution}.{bce_note}" new_lines.append(new_answer) count += 1 i += 2 else: new_lines.append(line) i += 1 if count > 0: with open(filepath, 'w') as f: f.write('\n'.join(new_lines)) return count # Process all markdown files total = 0 for filepath in sorted(glob.glob('/home/ubuntu/work/factbase-ancient-history/**/*.md', recursive=True)): if '/.git/' in filepath or '/.automate/' in filepath or '/.kiro/' in filepath or '/.factbase/' in filepath: continue fixed = fix_file(filepath) if fixed > 0: print(f" {os.path.relpath(filepath, '/home/ubuntu/work/factbase-ancient-history')}: {fixed} answers fixed") total += fixed print(f"\nTotal: {total} lazy temporal answers replaced with source-attributed answers")