refactor: slim system prompt — SOUL.md+STATUS.md only, fix duplicate time injection

2026-05-15 16:42:27 -05:00
parent 05fee423f2
commit e00702164d
2 changed files with 11 additions and 15 deletions
@@ -527,16 +527,8 @@ async def main(payload: dict, context):
    if ltm_block:
        system_prompt = system_prompt + '\n\n---\n\n' + ltm_block
-    # Inject current datetime so the model always has accurate time context
+    system_prompt += '\nAWS tools available: call_aws (any AWS API via AWS MCP Server), aws_list_lambda_functions, aws_get_cost_and_usage, aws_describe_service. Use call_aws directly for AWS API calls — do NOT say you lack AWS access.'
    from datetime import datetime
    from zoneinfo import ZoneInfo
    _tz = ZoneInfo('America/Chicago')
    _now = datetime.now(_tz)
    _time_str = _now.strftime('%A, %B %d, %Y %I:%M %p %Z')
    system_prompt = system_prompt + f'\n\nCurrent date/time: {_time_str}'
    system_prompt += 'AWS tools available: call_aws (any AWS API via AWS MCP Server), aws_list_lambda_functions, aws_get_cost_and_usage, aws_describe_service. Use call_aws directly for AWS API calls — do NOT say you lack AWS access.'
    system_prompt += '\n\nSubagents available — use them aggressively to save cost and improve quality:\n- aws_agent: all AWS infrastructure, cost, resource, IAM, CloudWatch queries\n- coding_agent: code writing, builds, deployments, CodeBuild/AppRunner/ECR\n- document_agent: summarize URLs, extract data from documents, process long text\nDefault to delegating; only answer directly for simple conversational responses or tasks that don\'t fit a subagent.'
    print(f'[main] System prompt time injection: {_time_str}')
    # Model: claude-sonnet-4-6 via cross-region inference
    # NOTE: extended thinking disabled — causes retry/duplicate issues with streaming
@@ -46,14 +46,14 @@ def _get_base_prompt(actor_id: str = '') -> str:
    s3 = boto3.client('s3')
    parts = []
-    # MEMORY.md removed — AgentCore Memory handles persistent facts via conversation history.
+    for fname in ['SOUL.md', 'STATUS.md']:
    # Long-term memory extraction (retrieval_config + memory strategy) is the right layer for this.
    for fname in ['SOUL.md', 'AGENTS.md', 'IDENTITY.md', 'TOOLS.md', 'HEARTBEAT.md']:
        try:
            obj = s3.get_object(Bucket=bucket, Key=fname)
            content = obj['Body'].read().decode('utf-8')
-            parts.append(f'## {fname}\n{content}')
+            if fname == 'STATUS.md':
                parts.append(f'## Status — In Progress\n{content}')
            else:
                parts.append(content)
            print(f'[prompt_builder] Loaded {fname} ({len(content)} bytes)')
        except Exception as e:
            print(f'[prompt_builder] Failed to load {fname}: {e}')
@@ -74,7 +74,11 @@ def _get_base_prompt(actor_id: str = '') -> str:
        'want you to remember it.\n'
        '- If you notice a fact that seems important but may not be in LTM yet (e.g. a '
        'deadline, a preference, a name), you may say "I\'ll keep that in mind" — but do '
-        'not ask permission or make a production of it.'
+        'not ask permission or make a production of it.\n'
        '- **In-progress tracking (STATUS.md):** When you start async work (CodeBuild job, '
        'reminder, deployment, anything you need to check back on), update STATUS.md using '
        "write_workspace_file('STATUS.md', content). Clear entries when complete. Check "
        "STATUS.md at the start of sessions where Daniel asks 'what's happening' or 'any updates'."
    )
    parts.append('## Runtime\nRuntime: agent-claw | host=AgentCore | model=bedrock-claude-sonnet | channel=telegram | timezone=America/Chicago')