Files
agent-claw/agentclaw/app/agent_claw_main/tools/web.py
2026-05-06 18:55:16 -05:00

69 lines
2.2 KiB
Python

import os
import threading
import urllib.request
import urllib.parse
import json
import boto3
# Brave Search API
_brave_key: str | None = None
_brave_lock = threading.Lock()
def _get_brave_key() -> str:
global _brave_key
if _brave_key is None:
with _brave_lock:
if _brave_key is None:
secret_arn = os.environ.get(
'BRAVE_API_KEY_SECRET_ARN',
'arn:aws:secretsmanager:us-east-1:495395224548:secret:agent-claw/brave-api-key-uUSgzi'
)
sm = boto3.client('secretsmanager')
_brave_key = sm.get_secret_value(SecretId=secret_arn)['SecretString']
return _brave_key
def brave_search(query: str, count: int = 5) -> str:
"""Search the web using Brave Search API."""
api_key = _get_brave_key()
params = urllib.parse.urlencode({'q': query, 'count': count})
req = urllib.request.Request(
f'https://api.search.brave.com/res/v1/web/search?{params}',
headers={
'Accept': 'application/json',
'X-Subscription-Token': api_key,
},
)
with urllib.request.urlopen(req, timeout=10) as resp:
data = json.loads(resp.read())
results = data.get('web', {}).get('results', [])
if not results:
return 'No results found.'
parts = []
for r in results:
parts.append(f"**{r.get('title', '')}**\n{r.get('url', '')}\n{r.get('description', '')}")
return '\n\n'.join(parts)
def web_fetch(url: str) -> str:
"""Fetch and return text content from a URL."""
req = urllib.request.Request(
url,
headers={'User-Agent': 'Mozilla/5.0 (compatible; agent-claw/1.0)'},
)
with urllib.request.urlopen(req, timeout=15) as resp:
raw = resp.read(1024 * 1024) # cap at 1MB
# Basic text extraction (strip HTML tags)
import re
text = raw.decode('utf-8', errors='ignore')
text = re.sub(r'<script[^>]*>.*?</script>', '', text, flags=re.DOTALL | re.IGNORECASE)
text = re.sub(r'<style[^>]*>.*?</style>', '', text, flags=re.DOTALL | re.IGNORECASE)
text = re.sub(r'<[^>]+>', ' ', text)
text = re.sub(r'[ \t]+', ' ', text)
text = re.sub(r'\n{3,}', '\n\n', text)
return text[:8000].strip()