69 lines
2.2 KiB
Python
69 lines
2.2 KiB
Python
import os
|
|
import threading
|
|
import urllib.request
|
|
import urllib.parse
|
|
import json
|
|
import boto3
|
|
|
|
# Brave Search API
|
|
_brave_key: str | None = None
|
|
_brave_lock = threading.Lock()
|
|
|
|
|
|
def _get_brave_key() -> str:
|
|
global _brave_key
|
|
if _brave_key is None:
|
|
with _brave_lock:
|
|
if _brave_key is None:
|
|
secret_arn = os.environ.get(
|
|
'BRAVE_API_KEY_SECRET_ARN',
|
|
'arn:aws:secretsmanager:us-east-1:495395224548:secret:agent-claw/brave-api-key-uUSgzi'
|
|
)
|
|
sm = boto3.client('secretsmanager')
|
|
_brave_key = sm.get_secret_value(SecretId=secret_arn)['SecretString']
|
|
return _brave_key
|
|
|
|
|
|
def brave_search(query: str, count: int = 5) -> str:
|
|
"""Search the web using Brave Search API."""
|
|
api_key = _get_brave_key()
|
|
params = urllib.parse.urlencode({'q': query, 'count': count})
|
|
req = urllib.request.Request(
|
|
f'https://api.search.brave.com/res/v1/web/search?{params}',
|
|
headers={
|
|
'Accept': 'application/json',
|
|
'X-Subscription-Token': api_key,
|
|
},
|
|
)
|
|
with urllib.request.urlopen(req, timeout=10) as resp:
|
|
data = json.loads(resp.read())
|
|
|
|
results = data.get('web', {}).get('results', [])
|
|
if not results:
|
|
return 'No results found.'
|
|
|
|
parts = []
|
|
for r in results:
|
|
parts.append(f"**{r.get('title', '')}**\n{r.get('url', '')}\n{r.get('description', '')}")
|
|
return '\n\n'.join(parts)
|
|
|
|
|
|
def web_fetch(url: str) -> str:
|
|
"""Fetch and return text content from a URL."""
|
|
req = urllib.request.Request(
|
|
url,
|
|
headers={'User-Agent': 'Mozilla/5.0 (compatible; agent-claw/1.0)'},
|
|
)
|
|
with urllib.request.urlopen(req, timeout=15) as resp:
|
|
raw = resp.read(1024 * 1024) # cap at 1MB
|
|
|
|
# Basic text extraction (strip HTML tags)
|
|
import re
|
|
text = raw.decode('utf-8', errors='ignore')
|
|
text = re.sub(r'<script[^>]*>.*?</script>', '', text, flags=re.DOTALL | re.IGNORECASE)
|
|
text = re.sub(r'<style[^>]*>.*?</style>', '', text, flags=re.DOTALL | re.IGNORECASE)
|
|
text = re.sub(r'<[^>]+>', ' ', text)
|
|
text = re.sub(r'[ \t]+', ' ', text)
|
|
text = re.sub(r'\n{3,}', '\n\n', text)
|
|
return text[:8000].strip()
|