Disable extended thinking: causes blank responses via streaming retry
This commit is contained in:
@@ -272,15 +272,13 @@ async def main(payload: dict, context):
|
|||||||
system_prompt = system_prompt + f'\n\nCurrent date/time: {_time_str}'
|
system_prompt = system_prompt + f'\n\nCurrent date/time: {_time_str}'
|
||||||
print(f'[main] System prompt time injection: {_time_str}')
|
print(f'[main] System prompt time injection: {_time_str}')
|
||||||
|
|
||||||
# Model: claude-sonnet-4-6 via cross-region inference with extended thinking
|
# Model: claude-sonnet-4-6 via cross-region inference
|
||||||
|
# NOTE: extended thinking disabled — causes retry/duplicate issues with streaming
|
||||||
from botocore.config import Config as BotoConfig
|
from botocore.config import Config as BotoConfig
|
||||||
model = BedrockModel(
|
model = BedrockModel(
|
||||||
model_id="us.anthropic.claude-sonnet-4-6",
|
model_id="us.anthropic.claude-sonnet-4-6",
|
||||||
region_name="us-east-1",
|
region_name="us-east-1",
|
||||||
boto_client_config=BotoConfig(read_timeout=600, connect_timeout=10),
|
boto_client_config=BotoConfig(read_timeout=600, connect_timeout=10),
|
||||||
additional_model_request_fields={
|
|
||||||
"thinking": {"type": "enabled", "budget_tokens": 2000}
|
|
||||||
},
|
|
||||||
)
|
)
|
||||||
|
|
||||||
base_tools = [web_search, web_fetch, read_workspace_file, write_workspace_file,
|
base_tools = [web_search, web_fetch, read_workspace_file, write_workspace_file,
|
||||||
|
|||||||
Reference in New Issue
Block a user