agent-claw: automated task changes
This commit is contained in:
68
agentclaw/app/agent_claw_main/tools/web.py
Normal file
68
agentclaw/app/agent_claw_main/tools/web.py
Normal file
@@ -0,0 +1,68 @@
|
||||
import os
|
||||
import threading
|
||||
import urllib.request
|
||||
import urllib.parse
|
||||
import json
|
||||
import boto3
|
||||
|
||||
# Brave Search API
|
||||
_brave_key: str | None = None
|
||||
_brave_lock = threading.Lock()
|
||||
|
||||
|
||||
def _get_brave_key() -> str:
|
||||
global _brave_key
|
||||
if _brave_key is None:
|
||||
with _brave_lock:
|
||||
if _brave_key is None:
|
||||
secret_arn = os.environ.get(
|
||||
'BRAVE_API_KEY_SECRET_ARN',
|
||||
'arn:aws:secretsmanager:us-east-1:495395224548:secret:agent-claw/brave-api-key-uUSgzi'
|
||||
)
|
||||
sm = boto3.client('secretsmanager')
|
||||
_brave_key = sm.get_secret_value(SecretId=secret_arn)['SecretString']
|
||||
return _brave_key
|
||||
|
||||
|
||||
def brave_search(query: str, count: int = 5) -> str:
|
||||
"""Search the web using Brave Search API."""
|
||||
api_key = _get_brave_key()
|
||||
params = urllib.parse.urlencode({'q': query, 'count': count})
|
||||
req = urllib.request.Request(
|
||||
f'https://api.search.brave.com/res/v1/web/search?{params}',
|
||||
headers={
|
||||
'Accept': 'application/json',
|
||||
'X-Subscription-Token': api_key,
|
||||
},
|
||||
)
|
||||
with urllib.request.urlopen(req, timeout=10) as resp:
|
||||
data = json.loads(resp.read())
|
||||
|
||||
results = data.get('web', {}).get('results', [])
|
||||
if not results:
|
||||
return 'No results found.'
|
||||
|
||||
parts = []
|
||||
for r in results:
|
||||
parts.append(f"**{r.get('title', '')}**\n{r.get('url', '')}\n{r.get('description', '')}")
|
||||
return '\n\n'.join(parts)
|
||||
|
||||
|
||||
def web_fetch(url: str) -> str:
|
||||
"""Fetch and return text content from a URL."""
|
||||
req = urllib.request.Request(
|
||||
url,
|
||||
headers={'User-Agent': 'Mozilla/5.0 (compatible; agent-claw/1.0)'},
|
||||
)
|
||||
with urllib.request.urlopen(req, timeout=15) as resp:
|
||||
raw = resp.read(1024 * 1024) # cap at 1MB
|
||||
|
||||
# Basic text extraction (strip HTML tags)
|
||||
import re
|
||||
text = raw.decode('utf-8', errors='ignore')
|
||||
text = re.sub(r'<script[^>]*>.*?</script>', '', text, flags=re.DOTALL | re.IGNORECASE)
|
||||
text = re.sub(r'<style[^>]*>.*?</style>', '', text, flags=re.DOTALL | re.IGNORECASE)
|
||||
text = re.sub(r'<[^>]+>', ' ', text)
|
||||
text = re.sub(r'[ \t]+', ' ', text)
|
||||
text = re.sub(r'\n{3,}', '\n\n', text)
|
||||
return text[:8000].strip()
|
||||
Reference in New Issue
Block a user