agent-claw: automated task changes

This commit is contained in:
daniel
2026-05-06 18:55:16 -05:00
parent 38905bb1e9
commit 732b00fb66
8494 changed files with 2018127 additions and 4 deletions

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,8 @@
import { Construct } from "constructs";
import * as lambda from "../../../aws-lambda";
export declare class ClusterResourceOnEventFunction extends lambda.Function {
constructor(scope: Construct, id: string, props?: lambda.FunctionOptions);
}
export declare class ClusterResourceIsCompleteFunction extends lambda.Function {
constructor(scope: Construct, id: string, props?: lambda.FunctionOptions);
}

View File

@@ -0,0 +1 @@
"use strict";Object.defineProperty(exports,"__esModule",{value:!0}),exports.ClusterResourceIsCompleteFunction=exports.ClusterResourceOnEventFunction=void 0;const path=require("path"),lambda=require("../../../aws-lambda");class ClusterResourceOnEventFunction extends lambda.Function{constructor(scope,id,props){super(scope,id,{...props,code:lambda.Code.fromAsset(path.join(__dirname,"cluster-resource-handler")),handler:"index.onEvent",runtime:lambda.determineLatestNodeRuntime(scope)}),this.node.addMetadata("aws:cdk:is-custom-resource-handler-runtime-family",this.runtime.family)}}exports.ClusterResourceOnEventFunction=ClusterResourceOnEventFunction;class ClusterResourceIsCompleteFunction extends lambda.Function{constructor(scope,id,props){super(scope,id,{...props,code:lambda.Code.fromAsset(path.join(__dirname,"cluster-resource-handler")),handler:"index.isComplete",runtime:lambda.determineLatestNodeRuntime(scope)}),this.node.addMetadata("aws:cdk:is-custom-resource-handler-runtime-family",this.runtime.family)}}exports.ClusterResourceIsCompleteFunction=ClusterResourceIsCompleteFunction;

View File

@@ -0,0 +1,95 @@
import json
import logging
import os
import subprocess
logger = logging.getLogger()
logger.setLevel(logging.INFO)
# these are coming from the kubectl layer
os.environ['PATH'] = '/opt/kubectl:/opt/awscli:' + os.environ['PATH']
outdir = os.environ.get('TEST_OUTDIR', '/tmp')
kubeconfig = os.path.join(outdir, 'kubeconfig')
def apply_handler(event, context):
logger.info(json.dumps(dict(event, ResponseURL='...')))
request_type = event['RequestType']
props = event['ResourceProperties']
# resource properties (all required)
cluster_name = props['ClusterName']
manifest_text = props['Manifest']
role_arn = props['RoleArn']
prune_label = props.get('PruneLabel', None)
overwrite = props.get('Overwrite', 'false').lower() == 'true'
skip_validation = props.get('SkipValidation', 'false').lower() == 'true'
# "log in" to the cluster
cmd = [ 'aws', 'eks', 'update-kubeconfig',
'--role-arn', role_arn,
'--name', cluster_name,
'--kubeconfig', kubeconfig
]
logger.info(f'Running command: {cmd}')
subprocess.check_call(cmd)
if os.path.isfile(kubeconfig):
os.chmod(kubeconfig, 0o600)
# write resource manifests in sequence: { r1 }{ r2 }{ r3 } (this is how
# a stream of JSON objects can be included in a k8s manifest).
manifest_list = json.loads(manifest_text)
manifest_file = os.path.join(outdir, 'manifest.yaml')
with open(manifest_file, "w") as f:
f.writelines(map(lambda obj: json.dumps(obj), manifest_list))
logger.info("manifest written to: %s" % manifest_file)
kubectl_opts = []
if skip_validation:
kubectl_opts.extend(['--validate=false'])
if request_type == 'Create':
# if "overwrite" is enabled, then we use "apply" for CREATE operations
# which technically means we can determine the desired state of an
# existing resource.
if overwrite:
kubectl('apply', manifest_file, *kubectl_opts)
else:
# --save-config will allow us to use "apply" later
kubectl_opts.extend(['--save-config'])
kubectl('create', manifest_file, *kubectl_opts)
elif request_type == 'Update':
if prune_label is not None:
kubectl_opts.extend(['--prune', '-l', prune_label])
kubectl('apply', manifest_file, *kubectl_opts)
elif request_type == "Delete":
try:
kubectl('delete', manifest_file)
except Exception as e:
logger.info("delete error: %s" % e)
def kubectl(verb, file, *opts):
maxAttempts = 3
retry = maxAttempts
while retry > 0:
try:
cmd = ['kubectl', verb, '--kubeconfig', kubeconfig, '-f', file] + list(opts)
logger.info(f'Running command: {cmd}')
output = subprocess.check_output(cmd, stderr=subprocess.STDOUT)
except subprocess.CalledProcessError as exc:
output = exc.output
if b'i/o timeout' in output and retry > 0:
retry = retry - 1
logger.info("kubectl timed out, retries left: %s" % retry)
else:
raise Exception(output)
else:
logger.info(output)
return
raise Exception(f'Operation failed after {maxAttempts} attempts: {output}')

View File

@@ -0,0 +1,88 @@
import json
import logging
import os
import subprocess
import time
logger = logging.getLogger()
logger.setLevel(logging.INFO)
# these are coming from the kubectl layer
os.environ['PATH'] = '/opt/kubectl:/opt/awscli:' + os.environ['PATH']
outdir = os.environ.get('TEST_OUTDIR', '/tmp')
kubeconfig = os.path.join(outdir, 'kubeconfig')
def get_handler(event, context):
logger.info(json.dumps(dict(event, ResponseURL='...')))
request_type = event['RequestType']
props = event['ResourceProperties']
# resource properties (all required)
cluster_name = props['ClusterName']
role_arn = props['RoleArn']
# "log in" to the cluster
subprocess.check_call([ 'aws', 'eks', 'update-kubeconfig',
'--role-arn', role_arn,
'--name', cluster_name,
'--kubeconfig', kubeconfig
])
if os.path.isfile(kubeconfig):
os.chmod(kubeconfig, 0o600)
object_type = props['ObjectType']
object_name = props['ObjectName']
object_namespace = props['ObjectNamespace']
json_path = props['JsonPath']
timeout_seconds = props['TimeoutSeconds']
# json path should be surrounded with '{}'
path = '{{{0}}}'.format(json_path)
if request_type == 'Create' or request_type == 'Update':
output = wait_for_output(['get', '-n', object_namespace, object_type, object_name, "-o=jsonpath='{{{0}}}'".format(json_path)], int(timeout_seconds))
return {'Data': {'Value': output}}
elif request_type == 'Delete':
pass
else:
raise Exception("invalid request type %s" % request_type)
def wait_for_output(args, timeout_seconds):
end_time = time.time() + timeout_seconds
error = None
while time.time() < end_time:
try:
# the output is surrounded with '', so we unquote
output = kubectl(args).decode('utf-8')[1:-1]
if output:
return output
except Exception as e:
error = str(e)
# also a recoverable error
if 'NotFound' in error:
pass
time.sleep(10)
raise RuntimeError(f'Timeout waiting for output from kubectl command: {args} (last_error={error})')
def kubectl(args):
retry = 3
while retry > 0:
try:
cmd = [ 'kubectl', '--kubeconfig', kubeconfig ] + args
output = subprocess.check_output(cmd, stderr=subprocess.PIPE)
except subprocess.CalledProcessError as exc:
output = exc.output + exc.stderr
if b'i/o timeout' in output and retry > 0:
logger.info("kubectl timed out, retries left: %s" % retry)
retry = retry - 1
else:
raise Exception(output)
else:
logger.info(output)
return output

View File

@@ -0,0 +1,263 @@
import json
import logging
import os
import re
import subprocess
import shutil
import tempfile
import zipfile
import boto3
logger = logging.getLogger()
logger.setLevel(logging.INFO)
# these are coming from the kubectl layer
os.environ['PATH'] = '/opt/helm:/opt/awscli:' + os.environ['PATH']
outdir = os.environ.get('TEST_OUTDIR', '/tmp')
kubeconfig = os.path.join(outdir, 'kubeconfig')
def get_chart_asset_from_url(chart_asset_url):
chart_zip = os.path.join(outdir, 'chart.zip')
shutil.rmtree(chart_zip, ignore_errors=True)
subprocess.check_call(['aws', 's3', 'cp', chart_asset_url, chart_zip])
chart_dir = os.path.join(outdir, 'chart')
shutil.rmtree(chart_dir, ignore_errors=True)
os.mkdir(chart_dir)
with zipfile.ZipFile(chart_zip, 'r') as zip_ref:
zip_ref.extractall(chart_dir)
return chart_dir
def is_ecr_public_available(region):
s = boto3.Session()
return s.get_partition_for_region(region) == 'aws'
def helm_handler(event, context):
logger.info(json.dumps(dict(event, ResponseURL='...')))
request_type = event['RequestType']
props = event['ResourceProperties']
# resource properties
cluster_name = props['ClusterName']
role_arn = props['RoleArn']
release = props['Release']
chart = props.get('Chart', None)
chart_asset_url = props.get('ChartAssetURL', None)
version = props.get('Version', None)
wait = props.get('Wait', False)
atomic = props.get('Atomic', False)
timeout = props.get('Timeout', None)
namespace = props.get('Namespace', None)
create_namespace = props.get('CreateNamespace', None)
repository = props.get('Repository', None)
values_text = props.get('Values', None)
skip_crds = props.get('SkipCrds', False)
# "log in" to the cluster
subprocess.check_call([ 'aws', 'eks', 'update-kubeconfig',
'--role-arn', role_arn,
'--name', cluster_name,
'--kubeconfig', kubeconfig
])
if os.path.isfile(kubeconfig):
os.chmod(kubeconfig, 0o600)
# Write out the values to a file and include them with the install and upgrade
values_file = None
if not request_type == "Delete" and not values_text is None:
values = json.loads(values_text)
values_file = os.path.join(outdir, 'values.yaml')
with open(values_file, "w") as f:
f.write(json.dumps(values, indent=2))
if request_type == 'Create' or request_type == 'Update':
# Ensure chart or chart_asset_url are set
if chart == None and chart_asset_url == None:
raise RuntimeError(f'chart or chartAsset must be specified')
if chart_asset_url != None:
assert(chart==None)
assert(repository==None)
assert(version==None)
if not chart_asset_url.startswith('s3://'):
raise RuntimeError(f'ChartAssetURL must point to as s3 location but is {chart_asset_url}')
# future work: support versions from s3 assets
chart = get_chart_asset_from_url(chart_asset_url)
if repository is not None and repository.startswith('oci://'):
tmpdir = tempfile.TemporaryDirectory()
chart_dir = get_chart_from_oci(tmpdir.name, repository, version)
chart = chart_dir
# Chart is now local — clear repository and version so helm() doesn't
# pass --repo/--version to "helm upgrade". Helm v4 (kubectl-v35+)
# rejects --repo with oci:// URLs ("invalid reference"), unlike v3.
repository = None
version = None
helm('upgrade', release, chart, repository, values_file, namespace, version, wait, timeout, create_namespace, skip_crds, atomic=atomic)
elif request_type == "Delete":
try:
helm('uninstall', release, namespace=namespace, wait=wait, timeout=timeout)
except Exception as e:
logger.error("Delete error: %s", str(e))
def get_oci_cmd(repository, version):
# Generates OCI command based on pattern. Public ECR vs Private ECR are treated differently.
private_ecr_pattern = 'oci://(?P<registry>\d+\.dkr\.ecr\.(?P<region>[a-z0-9\-]+)\.(?P<domain>[a-z0-9\.-]+))*'
public_ecr_pattern = 'oci://(?P<registry>public\.ecr\.aws)*'
private_registry = re.match(private_ecr_pattern, repository).groupdict()
public_registry = re.match(public_ecr_pattern, repository).groupdict()
# Build helm pull command as array
helm_cmd = ['helm', 'pull', repository, '--version', version , '--untar']
if private_registry['registry'] is not None:
logger.info("Found AWS private repository")
ecr_login = ['aws', 'ecr', 'get-login-password', '--region', private_registry['region']]
helm_registry_login = ['helm', 'registry', 'login', '--username', 'AWS', '--password-stdin', private_registry['registry']]
return {'ecr_login': ecr_login, 'helm_registry_login': helm_registry_login, 'helm': helm_cmd}
elif public_registry['registry'] is not None:
logger.info("Found AWS public repository, will use default region as deployment")
region = os.environ.get('AWS_REGION', 'us-east-1')
if is_ecr_public_available(region):
# Public ECR auth is always in us-east-1: https://docs.aws.amazon.com/AmazonECR/latest/public/public-registry-auth.html
ecr_login = ['aws', 'ecr-public', 'get-login-password', '--region', 'us-east-1']
helm_registry_login = ['helm', 'registry', 'login', '--username', 'AWS', '--password-stdin', public_registry['registry']]
return {'ecr_login': ecr_login, 'helm_registry_login': helm_registry_login, 'helm': helm_cmd}
else:
# No login required for public ECR in non-aws regions
# see https://helm.sh/docs/helm/helm_registry_login/
return {'helm': helm_cmd}
else:
logger.error("OCI repository format not recognized, falling back to helm pull")
return {'helm': helm_cmd}
def get_chart_from_oci(tmpdir, repository=None, version=None):
from subprocess import Popen, PIPE
commands = get_oci_cmd(repository, version)
maxAttempts = 3
retry = maxAttempts
while retry > 0:
try:
# Execute login commands if needed
if 'ecr_login' in commands and 'helm_registry_login' in commands:
logger.info("Running login command: %s", commands['ecr_login'])
logger.info("Running registry login command: %s", commands['helm_registry_login'])
# Start first process: aws ecr get-login-password
# NOTE: We do NOT call p1.wait() here before starting p2.
# Doing so could deadlock if p1's output fills the pipe buffer
# before p2 starts reading. Instead, start p2 immediately so it
# can consume p1's stdout as it's produced.
p1 = Popen(commands['ecr_login'], stdout=PIPE, stderr=PIPE, cwd=tmpdir)
# Start second process: helm registry login
p2 = Popen(commands['helm_registry_login'], stdin=p1.stdout, stdout=PIPE, stderr=PIPE, cwd=tmpdir)
p1.stdout.close() # Allow p1 to receive SIGPIPE if p2 exits early
# Wait for p2 to finish first (ensures full pipeline completes)
_, p2_err = p2.communicate()
# Now wait for p1 so we have a complete stderr and an exit code
p1.wait()
# Handle p1 failure
if p1.returncode != 0:
p1_err = p1.stderr.read().decode('utf-8', errors='replace') if p1.stderr else ''
logger.error(
"ECR get-login-password failed for repository %s. Error: %s",
repository,
p1_err or "No error details"
)
raise subprocess.CalledProcessError(p1.returncode, commands['ecr_login'], p1_err.encode())
# Handle p2 failure
if p2.returncode != 0:
p1.kill()
logger.error(
"Helm registry authentication failed for repository %s. Error: %s",
repository,
p2_err.decode('utf-8', errors='replace') or "No error details"
)
raise subprocess.CalledProcessError(p2.returncode, commands['helm_registry_login'], p2_err)
# Execute helm pull command
logger.info("Running helm command: %s", commands['helm'])
output = subprocess.check_output(commands['helm'], stderr=subprocess.STDOUT, cwd=tmpdir)
logger.info(output.decode('utf-8', errors='replace'))
# effectively returns "$tmpDir/$lastPartOfOCIUrl", because this is how helm pull saves OCI artifact.
# Eg. if we have oci://9999999999.dkr.ecr.us-east-1.amazonaws.com/foo/bar/pet-service repository, helm saves artifact under $tmpDir/pet-service
return os.path.join(tmpdir, repository.rpartition('/')[-1])
except subprocess.CalledProcessError as exc:
output = exc.output
if b'Broken pipe' in output:
retry = retry - 1
logger.info("Broken pipe, retries left: %s" % retry)
else:
error_message = output.decode('utf-8', errors='replace')
logger.error("OCI command failed: %s", commands['helm'])
logger.error("Error output: %s", error_message)
raise Exception(output)
raise Exception(f'Operation failed after {maxAttempts} attempts: {output.decode("utf-8", errors="replace")}')
def helm(verb, release, chart = None, repo = None, file = None, namespace = None, version = None, wait = False, timeout = None, create_namespace = None, skip_crds = False, atomic = False):
import subprocess
cmnd = ['helm', verb, release]
if not chart is None:
cmnd.append(chart)
if verb == 'upgrade':
cmnd.append('--install')
if create_namespace:
cmnd.append('--create-namespace')
if not repo is None:
cmnd.extend(['--repo', repo])
if not file is None:
cmnd.extend(['--values', file])
if not version is None:
cmnd.extend(['--version', version])
if not namespace is None:
cmnd.extend(['--namespace', namespace])
if wait:
cmnd.append('--wait')
if skip_crds:
cmnd.append('--skip-crds')
if not timeout is None:
cmnd.extend(['--timeout', timeout])
if atomic:
cmnd.append('--atomic')
cmnd.extend(['--kubeconfig', kubeconfig])
# Log the full helm command for better troubleshooting
logger.info("Running command: %s", cmnd)
maxAttempts = 3
retry = maxAttempts
while retry > 0:
try:
output = subprocess.check_output(cmnd, stderr=subprocess.STDOUT, cwd=outdir)
logger.info(output.decode('utf-8', errors='replace'))
return
except subprocess.CalledProcessError as exc:
output = exc.output
if b'Broken pipe' in output:
retry = retry - 1
logger.info("Broken pipe, retries left: %s" % retry)
else:
error_message = output.decode('utf-8', errors='replace')
logger.error("Command failed: %s", cmnd)
logger.error("Error output: %s", error_message)
raise Exception(output)
raise Exception(f'Operation failed after {maxAttempts} attempts: {output.decode("utf-8", errors="replace")}')

View File

@@ -0,0 +1,26 @@
import json
import logging
from apply import apply_handler
from helm import helm_handler
from patch import patch_handler
from get import get_handler
def handler(event, context):
print(json.dumps(dict(event, ResponseURL='...')))
resource_type = event['ResourceType']
if resource_type == 'Custom::AWSCDK-EKS-KubernetesResource':
return apply_handler(event, context)
if resource_type == 'Custom::AWSCDK-EKS-HelmChart':
return helm_handler(event, context)
if resource_type == 'Custom::AWSCDK-EKS-KubernetesPatch':
return patch_handler(event, context)
if resource_type == 'Custom::AWSCDK-EKS-KubernetesObjectValue':
return get_handler(event, context)
raise Exception("unknown resource type %s" % resource_type)

View File

@@ -0,0 +1,70 @@
import json
import logging
import os
import subprocess
logger = logging.getLogger()
logger.setLevel(logging.INFO)
# these are coming from the kubectl layer
os.environ['PATH'] = '/opt/kubectl:/opt/awscli:' + os.environ['PATH']
outdir = os.environ.get('TEST_OUTDIR', '/tmp')
kubeconfig = os.path.join(outdir, 'kubeconfig')
def patch_handler(event, context):
logger.info(json.dumps(dict(event, ResponseURL='...')))
request_type = event['RequestType']
props = event['ResourceProperties']
# resource properties (all required)
cluster_name = props['ClusterName']
role_arn = props['RoleArn']
# "log in" to the cluster
subprocess.check_call([ 'aws', 'eks', 'update-kubeconfig',
'--role-arn', role_arn,
'--name', cluster_name,
'--kubeconfig', kubeconfig
])
if os.path.isfile(kubeconfig):
os.chmod(kubeconfig, 0o600)
resource_name = props['ResourceName']
resource_namespace = props['ResourceNamespace']
apply_patch_json = props['ApplyPatchJson']
restore_patch_json = props['RestorePatchJson']
patch_type = props['PatchType']
patch_json = None
if request_type == 'Create' or request_type == 'Update':
patch_json = apply_patch_json
elif request_type == 'Delete':
patch_json = restore_patch_json
else:
raise Exception("invalid request type %s" % request_type)
kubectl([ 'patch', resource_name, '-n', resource_namespace, '-p', patch_json, '--type', patch_type ])
def kubectl(args):
maxAttempts = 3
retry = maxAttempts
while retry > 0:
try:
cmd = [ 'kubectl', '--kubeconfig', kubeconfig ] + args
output = subprocess.check_output(cmd, stderr=subprocess.STDOUT)
except subprocess.CalledProcessError as exc:
output = exc.output
if b'i/o timeout' in output and retry > 0:
retry = retry - 1
logger.info("kubectl timed out, retries left: %s" % retry)
else:
raise Exception(output)
else:
logger.info(output)
return
raise Exception(f'Operation failed after {maxAttempts} attempts: {output}')

View File

@@ -0,0 +1,5 @@
import { Construct } from "constructs";
import * as lambda from "../../../aws-lambda";
export declare class KubectlFunction extends lambda.Function {
constructor(scope: Construct, id: string, props?: lambda.FunctionOptions);
}

View File

@@ -0,0 +1 @@
"use strict";Object.defineProperty(exports,"__esModule",{value:!0}),exports.KubectlFunction=void 0;const path=require("path"),lambda=require("../../../aws-lambda");class KubectlFunction extends lambda.Function{constructor(scope,id,props){super(scope,id,{...props,code:lambda.Code.fromAsset(path.join(__dirname,"kubectl-handler")),handler:"index.handler",runtime:lambda.Runtime.PYTHON_3_13}),this.node.addMetadata("aws:cdk:is-custom-resource-handler-runtime-family",this.runtime.family)}}exports.KubectlFunction=KubectlFunction;