agent-claw: automated task changes

This commit is contained in:
daniel
2026-05-06 18:55:16 -05:00
parent 38905bb1e9
commit 732b00fb66
8494 changed files with 2018127 additions and 4 deletions

View File

@@ -0,0 +1,255 @@
import json
import logging
import os
import re
import subprocess
import shutil
import tempfile
import zipfile
import boto3
logger = logging.getLogger()
logger.setLevel(logging.INFO)
# these are coming from the kubectl layer
os.environ['PATH'] = '/opt/helm:/opt/awscli:' + os.environ['PATH']
outdir = os.environ.get('TEST_OUTDIR', '/tmp')
kubeconfig = os.path.join(outdir, 'kubeconfig')
def get_chart_asset_from_url(chart_asset_url):
chart_zip = os.path.join(outdir, 'chart.zip')
shutil.rmtree(chart_zip, ignore_errors=True)
subprocess.check_call(['aws', 's3', 'cp', chart_asset_url, chart_zip])
chart_dir = os.path.join(outdir, 'chart')
shutil.rmtree(chart_dir, ignore_errors=True)
os.mkdir(chart_dir)
with zipfile.ZipFile(chart_zip, 'r') as zip_ref:
zip_ref.extractall(chart_dir)
return chart_dir
def is_ecr_public_available(region):
s = boto3.Session()
return s.get_partition_for_region(region) == 'aws'
def helm_handler(event, context):
logger.info(json.dumps(dict(event, ResponseURL='...')))
request_type = event['RequestType']
props = event['ResourceProperties']
# resource properties
cluster_name = props['ClusterName']
release = props['Release']
chart = props.get('Chart', None)
chart_asset_url = props.get('ChartAssetURL', None)
version = props.get('Version', None)
wait = props.get('Wait', False)
atomic = props.get('Atomic', False)
timeout = props.get('Timeout', None)
namespace = props.get('Namespace', None)
create_namespace = props.get('CreateNamespace', None)
repository = props.get('Repository', None)
values_text = props.get('Values', None)
skip_crds = props.get('SkipCrds', False)
# "log in" to the cluster
subprocess.check_call([ 'aws', 'eks', 'update-kubeconfig',
'--name', cluster_name,
'--kubeconfig', kubeconfig
])
if os.path.isfile(kubeconfig):
os.chmod(kubeconfig, 0o600)
# Write out the values to a file and include them with the install and upgrade
values_file = None
if not request_type == "Delete" and not values_text is None:
values = json.loads(values_text)
values_file = os.path.join(outdir, 'values.yaml')
with open(values_file, "w") as f:
f.write(json.dumps(values, indent=2))
if request_type == 'Create' or request_type == 'Update':
# Ensure chart or chart_asset_url are set
if chart == None and chart_asset_url == None:
raise RuntimeError(f'chart or chartAsset must be specified')
if chart_asset_url != None:
assert(chart==None)
assert(repository==None)
assert(version==None)
if not chart_asset_url.startswith('s3://'):
raise RuntimeError(f'ChartAssetURL must point to as s3 location but is {chart_asset_url}')
# future work: support versions from s3 assets
chart = get_chart_asset_from_url(chart_asset_url)
if repository is not None and repository.startswith('oci://'):
tmpdir = tempfile.TemporaryDirectory()
chart_dir = get_chart_from_oci(tmpdir.name, repository, version)
chart = chart_dir
# Chart is now local — clear repository and version so helm() doesn't
# pass --repo/--version to "helm upgrade". Helm v4 (kubectl-v35+)
# rejects --repo with oci:// URLs ("invalid reference"), unlike v3.
repository = None
version = None
helm('upgrade', release, chart, repository, values_file, namespace, version, wait, timeout, create_namespace, atomic=atomic)
elif request_type == "Delete":
try:
helm('uninstall', release, namespace=namespace, wait=wait, timeout=timeout)
except Exception as e:
logger.info("delete error: %s" % e)
def get_oci_cmd(repository, version):
# Generates OCI command based on pattern. Public ECR vs Private ECR are treated differently.
private_ecr_pattern = 'oci://(?P<registry>\d+\.dkr\.ecr\.(?P<region>[a-z0-9\-]+)\.(?P<domain>[a-z0-9\.-]+))*'
public_ecr_pattern = 'oci://(?P<registry>public\.ecr\.aws)*'
private_registry = re.match(private_ecr_pattern, repository).groupdict()
public_registry = re.match(public_ecr_pattern, repository).groupdict()
# Build helm pull command as array
helm_cmd = ['helm', 'pull', repository, '--version', version , '--untar']
if private_registry['registry'] is not None:
logger.info("Found AWS private repository")
ecr_login = ['aws', 'ecr', 'get-login-password', '--region', private_registry['region']]
helm_registry_login = ['helm', 'registry', 'login', '--username', 'AWS', '--password-stdin', private_registry['registry']]
return {'ecr_login': ecr_login, 'helm_registry_login': helm_registry_login, 'helm': helm_cmd}
elif public_registry['registry'] is not None:
logger.info("Found AWS public repository, will use default region as deployment")
region = os.environ.get('AWS_REGION', 'us-east-1')
if is_ecr_public_available(region):
# Public ECR auth is always in us-east-1: https://docs.aws.amazon.com/AmazonECR/latest/public/public-registry-auth.html
ecr_login = ['aws', 'ecr-public', 'get-login-password', '--region', 'us-east-1']
helm_registry_login = ['helm', 'registry', 'login', '--username', 'AWS', '--password-stdin', public_registry['registry']]
return {'ecr_login': ecr_login, 'helm_registry_login': helm_registry_login, 'helm': helm_cmd}
else:
# No login required for public ECR in non-aws regions
# see https://helm.sh/docs/helm/helm_registry_login/
return {'helm': helm_cmd}
else:
logger.error("OCI repository format not recognized, falling back to helm pull")
return {'helm': helm_cmd}
def get_chart_from_oci(tmpdir, repository=None, version=None):
from subprocess import Popen, PIPE
commands = get_oci_cmd(repository, version)
maxAttempts = 3
retry = maxAttempts
while retry > 0:
try:
# Execute login commands if needed
if 'ecr_login' in commands and 'helm_registry_login' in commands:
logger.info("Running login command: %s", commands['ecr_login'])
logger.info("Running registry login command: %s", commands['helm_registry_login'])
# Start first process: aws ecr get-login-password
# NOTE: We do NOT call p1.wait() here before starting p2.
# Doing so could deadlock if p1's output fills the pipe buffer
# before p2 starts reading. Instead, start p2 immediately so it
# can consume p1's stdout as it's produced.
p1 = Popen(commands['ecr_login'], stdout=PIPE, stderr=PIPE, cwd=tmpdir)
# Start second process: helm registry login
p2 = Popen(commands['helm_registry_login'], stdin=p1.stdout, stdout=PIPE, stderr=PIPE, cwd=tmpdir)
p1.stdout.close() # Allow p1 to receive SIGPIPE if p2 exits early
# Wait for p2 to finish first (ensures full pipeline completes)
_, p2_err = p2.communicate()
# Now wait for p1 so we have a complete stderr and an exit code
p1.wait()
# Handle p1 failure
if p1.returncode != 0:
p1_err = p1.stderr.read().decode('utf-8', errors='replace') if p1.stderr else ''
logger.error(
"ECR get-login-password failed for repository %s. Error: %s",
repository,
p1_err or "No error details"
)
raise subprocess.CalledProcessError(p1.returncode, commands['ecr_login'], p1_err.encode())
# Handle p2 failure
if p2.returncode != 0:
logger.error(
"Helm registry authentication failed for repository %s. Error: %s",
repository,
p2_err.decode('utf-8', errors='replace') or "No error details"
)
raise subprocess.CalledProcessError(p2.returncode, commands['helm_registry_login'], p2_err)
# Execute helm pull command
logger.info("Running helm command: %s", commands['helm'])
output = subprocess.check_output(commands['helm'], stderr=subprocess.STDOUT, cwd=tmpdir)
logger.info(output)
# effectively returns "$tmpDir/$lastPartOfOCIUrl", because this is how helm pull saves OCI artifact.
# Eg. if we have oci://9999999999.dkr.ecr.us-east-1.amazonaws.com/foo/bar/pet-service repository, helm saves artifact under $tmpDir/pet-service
return os.path.join(tmpdir, repository.rpartition('/')[-1])
except subprocess.CalledProcessError as exc:
output = exc.output
if b'Broken pipe' in output:
retry = retry - 1
logger.info("Broken pipe, retries left: %s" % retry)
else:
raise Exception(output)
raise Exception(f'Operation failed after {maxAttempts} attempts: {output}')
def helm(verb, release, chart = None, repo = None, file = None, namespace = None, version = None, wait = False, timeout = None, create_namespace = None, skip_crds = False, atomic = False):
cmnd = ['helm', verb, release]
if not chart is None:
cmnd.append(chart)
if verb == 'upgrade':
cmnd.append('--install')
if create_namespace:
cmnd.append('--create-namespace')
if not repo is None:
cmnd.extend(['--repo', repo])
if not file is None:
cmnd.extend(['--values', file])
if not version is None:
cmnd.extend(['--version', version])
if not namespace is None:
cmnd.extend(['--namespace', namespace])
if wait:
cmnd.append('--wait')
if skip_crds:
cmnd.append('--skip-crds')
if not timeout is None:
cmnd.extend(['--timeout', timeout])
if atomic:
cmnd.append('--atomic')
cmnd.extend(['--kubeconfig', kubeconfig])
# Log the full helm command for better troubleshooting
logger.info("Running command: %s", cmnd)
maxAttempts = 3
retry = maxAttempts
while retry > 0:
try:
output = subprocess.check_output(cmnd, stderr=subprocess.STDOUT, cwd=outdir)
logger.info(output.decode('utf-8', errors='replace'))
return
except subprocess.CalledProcessError as exc:
output = exc.output
if b'Broken pipe' in output:
retry = retry - 1
logger.info("Broken pipe, retries left: %s" % retry)
else:
error_message = output.decode('utf-8', errors='replace')
logger.error("Command failed: %s", cmnd)
logger.error("Error output: %s", error_message)
raise Exception(output)
raise Exception(f'Operation failed after {maxAttempts} attempts: {output.decode("utf-8", errors="replace")}')