agent-claw: automated task changes

2026-05-06 18:55:16 -05:00
parent 38905bb1e9
commit 732b00fb66
8494 changed files with 2018127 additions and 4 deletions
--- a/cdk/cdk.out/asset.3423a042b818e31c1e34a19d6689ab2e5f9b70fcbe9e71df66f241b20a200bd9/index.py
+++ b/cdk/cdk.out/asset.3423a042b818e31c1e34a19d6689ab2e5f9b70fcbe9e71df66f241b20a200bd9/index.py
@@ -0,0 +1,406 @@
+import contextlib
+import json
+import logging
+import os
+import shutil
+import subprocess
+import tempfile
+import urllib.parse
+from urllib.request import Request, urlopen
+from uuid import uuid4
+from zipfile import ZipFile
+
+import boto3
+from botocore.config import Config
+from botocore.exceptions import WaiterError
+
+logger = logging.getLogger()
+logger.setLevel(logging.INFO)
+
+cloudfront = boto3.client('cloudfront', config=Config(
+    retries = {
+        'max_attempts': 10,
+        'mode': 'standard',
+    }
+))
+s3 = boto3.client('s3')
+
+CFN_SUCCESS = "SUCCESS"
+CFN_FAILED = "FAILED"
+ENV_KEY_MOUNT_PATH = "MOUNT_PATH"
+ENV_KEY_SKIP_CLEANUP = "SKIP_CLEANUP"
+
+AWS_CLI_CONFIG_FILE = "/tmp/aws_cli_config"
+CUSTOM_RESOURCE_OWNER_TAG = "aws-cdk:cr-owned"
+
+os.putenv('AWS_CONFIG_FILE', AWS_CLI_CONFIG_FILE)
+
+def handler(event, context):
+
+    def cfn_error(message=None):
+        if message:
+            logger.error("| cfn_error: %s" % message.encode())
+        cfn_send(event, context, CFN_FAILED, reason=message, physicalResourceId=event.get('PhysicalResourceId', None))
+
+
+    try:
+        # We are not logging ResponseURL as this is a pre-signed S3 URL, and could be used to tamper
+        # with the response CloudFormation sees from this Custom Resource execution.
+        logger.info({ key:value for (key, value) in event.items() if key != 'ResponseURL'})
+
+        # cloudformation request type (create/update/delete)
+        request_type = event['RequestType']
+
+        # extract resource properties
+        props = event['ResourceProperties']
+        old_props = event.get('OldResourceProperties', {})
+        physical_id = event.get('PhysicalResourceId', None)
+
+        try:
+            source_bucket_names = props['SourceBucketNames']
+            source_object_keys  = props['SourceObjectKeys']
+            source_markers      = props.get('SourceMarkers', None)
+            source_markers_config = props.get('SourceMarkersConfig', None)
+            dest_bucket_name    = props['DestinationBucketName']
+            dest_bucket_prefix  = props.get('DestinationBucketKeyPrefix', '')
+            extract             = props.get('Extract', 'true') == 'true'
+            retain_on_delete    = props.get('RetainOnDelete', "true") == "true"
+            distribution_id     = props.get('DistributionId', '')
+            wait_for_distribution_invalidation = props.get('WaitForDistributionInvalidation', True)
+            user_metadata       = props.get('UserMetadata', {})
+            system_metadata     = props.get('SystemMetadata', {})
+            prune               = props.get('Prune', 'true').lower() == 'true'
+            exclude             = props.get('Exclude', [])
+            include             = props.get('Include', [])
+            sign_content        = props.get('SignContent', 'false').lower() == 'true'
+            output_object_keys  = props.get('OutputObjectKeys', 'true') == 'true'
+
+            # backwards compatibility - if "SourceMarkers" is not specified,
+            # assume all sources have an empty market map
+            if source_markers is None:
+                source_markers = [{} for i in range(len(source_bucket_names))]
+            if source_markers_config is None:
+                source_markers_config = [{} for i in range(len(source_bucket_names))]
+
+            default_distribution_path = dest_bucket_prefix
+            if not default_distribution_path.endswith("/"):
+                default_distribution_path += "/"
+            if not default_distribution_path.startswith("/"):
+                default_distribution_path = "/" + default_distribution_path
+            default_distribution_path += "*"
+
+            distribution_paths = props.get('DistributionPaths', [default_distribution_path])
+        except KeyError as e:
+            cfn_error("missing request resource property %s. props: %s" % (str(e), props))
+            return
+
+        # configure aws cli options after resetting back to the defaults for each request
+        if os.path.exists(AWS_CLI_CONFIG_FILE):
+                os.remove(AWS_CLI_CONFIG_FILE)
+        if sign_content:
+                aws_command("configure", "set", "default.s3.payload_signing_enabled", "true")
+
+        # treat "/" as if no prefix was specified
+        if dest_bucket_prefix == "/":
+            dest_bucket_prefix = ""
+
+        s3_source_zips = list(map(lambda name, key: "s3://%s/%s" % (name, key), source_bucket_names, source_object_keys))
+        s3_dest = "s3://%s/%s" % (dest_bucket_name, dest_bucket_prefix)
+        old_s3_dest = "s3://%s/%s" % (old_props.get("DestinationBucketName", ""), old_props.get("DestinationBucketKeyPrefix", ""))
+
+
+        # obviously this is not
+        if old_s3_dest == "s3:///":
+            old_s3_dest = None
+
+        logger.info("| s3_dest: %s" % sanitize_message(s3_dest))
+        logger.info("| old_s3_dest: %s" % sanitize_message(old_s3_dest))
+
+        # if we are creating a new resource, allocate a physical id for it
+        # otherwise, we expect physical id to be relayed by cloudformation
+        if request_type == "Create":
+            physical_id = "aws.cdk.s3deployment.%s" % str(uuid4())
+        else:
+            if not physical_id:
+                cfn_error("invalid request: request type is '%s' but 'PhysicalResourceId' is not defined" % request_type)
+                return
+
+        # delete or create/update (only if "retain_on_delete" is false)
+        if request_type == "Delete" and not retain_on_delete:
+            if not bucket_owned(dest_bucket_name, dest_bucket_prefix):
+                aws_command("s3", "rm", s3_dest, "--recursive")
+
+        # if we are updating without retention and the destination changed, delete first
+        if request_type == "Update" and not retain_on_delete and old_s3_dest != s3_dest:
+            if not old_s3_dest:
+                logger.warn("cannot delete old resource without old resource properties")
+                return
+
+            aws_command("s3", "rm", old_s3_dest, "--recursive")
+
+        if request_type == "Update" or request_type == "Create":
+            s3_deploy(s3_source_zips, s3_dest, user_metadata, system_metadata, prune, exclude, include, source_markers, extract, source_markers_config)
+
+        if distribution_id:
+            cloudfront_invalidate(distribution_id, distribution_paths, wait_for_distribution_invalidation)
+
+        cfn_send(event, context, CFN_SUCCESS, physicalResourceId=physical_id, responseData={
+            # Passing through the ARN sequences dependencees on the deployment
+            'DestinationBucketArn': props.get('DestinationBucketArn'),
+            **({'SourceObjectKeys': props.get('SourceObjectKeys')} if output_object_keys else {'SourceObjectKeys': []})
+        })
+    except KeyError as e:
+        cfn_error("invalid request. Missing key %s" % str(e))
+    except Exception as e:
+        logger.exception(e)
+        cfn_error(str(e))
+
+#---------------------------------------------------------------------------------------------------
+# Sanitize the message to mitigate CWE-117 and CWE-93 vulnerabilities
+def sanitize_message(message):
+    if not message:
+        return message
+
+    # Sanitize the message to prevent log injection and HTTP response splitting
+    sanitized_message = message.replace('\n', '').replace('\r', '')
+
+    # Encode the message to handle special characters
+    encoded_message = urllib.parse.quote(sanitized_message)
+
+    return encoded_message
+
+#---------------------------------------------------------------------------------------------------
+# populate all files from s3_source_zips to a destination bucket
+def s3_deploy(s3_source_zips, s3_dest, user_metadata, system_metadata, prune, exclude, include, source_markers, extract, source_markers_config):
+    # list lengths are equal
+    if len(s3_source_zips) != len(source_markers):
+        raise Exception("'source_markers' and 's3_source_zips' must be the same length")
+
+    # create a temporary working directory in /tmp or if enabled an attached efs volume
+    if ENV_KEY_MOUNT_PATH in os.environ:
+        workdir = os.getenv(ENV_KEY_MOUNT_PATH) + "/" + str(uuid4())
+        os.mkdir(workdir)
+    else:
+        workdir = tempfile.mkdtemp()
+
+    logger.info("| workdir: %s" % workdir)
+
+    # create a directory into which we extract the contents of the zip file
+    contents_dir=os.path.join(workdir, 'contents')
+    os.mkdir(contents_dir)
+
+    try:
+        # download the archive from the source and extract to "contents"
+        for i in range(len(s3_source_zips)):
+            s3_source_zip = s3_source_zips[i]
+            markers       = source_markers[i]
+            markers_config = source_markers_config[i]
+
+            if extract:
+                archive=os.path.join(workdir, str(uuid4()))
+                logger.info("archive: %s" % archive)
+                aws_command("s3", "cp", s3_source_zip, archive)
+                logger.info("| extracting archive to: %s\n" % contents_dir)
+                logger.info("| markers: %s" % markers)
+                extract_and_replace_markers(archive, contents_dir, markers, markers_config)
+            else:
+                logger.info("| copying archive to: %s\n" % contents_dir)
+                aws_command("s3", "cp", s3_source_zip, contents_dir)
+
+        # sync from "contents" to destination
+
+        s3_command = ["s3", "sync"]
+
+        if prune:
+          s3_command.append("--delete")
+
+        if exclude:
+          for filter in exclude:
+            s3_command.extend(["--exclude", filter])
+
+        if include:
+          for filter in include:
+            s3_command.extend(["--include", filter])
+
+        s3_command.extend([contents_dir, s3_dest])
+        s3_command.extend(create_metadata_args(user_metadata, system_metadata))
+        aws_command(*s3_command)
+    finally:
+        if not os.getenv(ENV_KEY_SKIP_CLEANUP):
+            shutil.rmtree(workdir)
+
+#---------------------------------------------------------------------------------------------------
+# invalidate files in the CloudFront distribution edge caches
+def cloudfront_invalidate(distribution_id, distribution_paths, wait_for_invalidation):
+    invalidation_resp = cloudfront.create_invalidation(
+        DistributionId=distribution_id,
+        InvalidationBatch={
+            'Paths': {
+                'Quantity': len(distribution_paths),
+                'Items': distribution_paths
+            },
+            'CallerReference': str(uuid4()),
+        })
+    if wait_for_invalidation:
+        try:
+            # Wait for a maximum of 13 minutes for invalidation to complete.
+            cloudfront.get_waiter('invalidation_completed').wait(
+                DistributionId=distribution_id,
+                Id=invalidation_resp['Invalidation']['Id'],
+                WaiterConfig={
+                    'Delay': 20,
+                    'MaxAttempts': (13*60)//20,
+                }
+            )
+        except WaiterError as e:
+            raise RuntimeError(f"Unable to confirm that cache invalidation was successful. This may be a CloudFront regression as reported in https://github.com/aws/aws-cdk/issues/15891") from e
+
+
+#---------------------------------------------------------------------------------------------------
+# set metadata
+def create_metadata_args(raw_user_metadata, raw_system_metadata):
+    if len(raw_user_metadata) == 0 and len(raw_system_metadata) == 0:
+        return []
+
+    format_system_metadata_key = lambda k: k.lower()
+    format_user_metadata_key = lambda k: k.lower()
+
+    system_metadata = { format_system_metadata_key(k): v for k, v in raw_system_metadata.items() }
+    user_metadata = { format_user_metadata_key(k): v for k, v in raw_user_metadata.items() }
+
+    flatten = lambda l: [item for sublist in l for item in sublist]
+    system_args = flatten([[f"--{k}", v] for k, v in system_metadata.items()])
+    user_args = ["--metadata", json.dumps(user_metadata, separators=(',', ':'))] if len(user_metadata) > 0 else []
+
+    return system_args + user_args + ["--metadata-directive", "REPLACE"]
+
+#---------------------------------------------------------------------------------------------------
+# executes an "aws" cli command
+def aws_command(*args):
+    aws="/opt/awscli/aws" # from AwsCliLayer
+    logger.info("| aws %s" % ' '.join(args))
+    subprocess.check_call([aws] + list(args))
+
+#---------------------------------------------------------------------------------------------------
+# sends a response to cloudformation
+def cfn_send(event, context, responseStatus, responseData={}, physicalResourceId=None, noEcho=False, reason=None):
+
+    responseUrl = event['ResponseURL']
+
+    responseBody = {}
+    responseBody['Status'] = responseStatus
+    responseBody['Reason'] = reason or ('See the details in CloudWatch Log Stream: ' + context.log_stream_name)
+    responseBody['PhysicalResourceId'] = physicalResourceId or context.log_stream_name
+    responseBody['StackId'] = event['StackId']
+    responseBody['RequestId'] = event['RequestId']
+    responseBody['LogicalResourceId'] = event['LogicalResourceId']
+    responseBody['NoEcho'] = noEcho
+    responseBody['Data'] = responseData
+
+    body = json.dumps(responseBody)
+    logger.info("| response body:\n" + body)
+
+    headers = {
+        'content-type' : '',
+        'content-length' : str(len(body))
+    }
+
+    try:
+        request = Request(responseUrl, method='PUT', data=bytes(body.encode('utf-8')), headers=headers)
+        with contextlib.closing(urlopen(request)) as response:
+            logger.info("| status code: " + response.reason)
+    except Exception as e:
+        logger.error("| unable to send response to CloudFormation")
+        logger.exception(e)
+
+
+#---------------------------------------------------------------------------------------------------
+# check if bucket is owned by a custom resource
+# if it is then we don't want to delete content
+def bucket_owned(bucketName, keyPrefix):
+    tag = CUSTOM_RESOURCE_OWNER_TAG
+    if keyPrefix != "":
+        tag = tag + ':' + keyPrefix
+    try:
+        request = s3.get_bucket_tagging(
+            Bucket=bucketName,
+        )
+        return any((x["Key"].startswith(tag)) for x in request["TagSet"])
+    except Exception as e:
+        logger.info("| error getting tags from bucket")
+        logger.exception(e)
+        return False
+
+# extract archive and replace markers in output files
+def extract_and_replace_markers(archive, contents_dir, markers, markers_config):
+    with ZipFile(archive, "r") as zip:
+        zip.extractall(contents_dir)
+
+        # replace markers for this source
+        for file in zip.namelist():
+            file_path = os.path.join(contents_dir, file)
+            if os.path.isdir(file_path): continue
+            replace_markers(file_path, markers, markers_config)
+
+def prepare_json_safe_markers(markers):
+    """Pre-process markers to ensure JSON-safe values"""
+    safe_markers = {}
+    for key, value in markers.items():
+        # Serialize the value as JSON to handle escaping if the value is a string
+        serialized = json.dumps(value)
+        if serialized.startswith('"') and serialized.endswith('"'):
+            json_safe_value = json.dumps(value)[1:-1]  # Remove surrounding quotes
+        else:
+            json_safe_value = serialized
+        safe_markers[key.encode('utf-8')] = json_safe_value.encode('utf-8')
+    return safe_markers
+
+def replace_markers(filename, markers, markers_config):
+    """Replace markers in a file, with special handling for JSON files."""
+    # if there are no markers, skip
+    if not markers:
+        return
+    
+    outfile = filename + '.new'
+    json_escape = markers_config.get('jsonEscape', 'false').lower()
+    if json_escape == 'true':
+        replace_tokens = prepare_json_safe_markers(markers)
+    else:
+        replace_tokens = dict([(k.encode('utf-8'), v.encode('utf-8')) for k, v in markers.items()])
+
+    # Handle content with line-by-line binary replacement
+    with open(filename, 'rb') as fi, open(outfile, 'wb') as fo:
+        # Process line by line to handle large files
+        for line in fi:
+            for token, replacement in replace_tokens.items():
+                line = line.replace(token, replacement)
+            fo.write(line)
+
+    # Delete the original file and rename the new one to the original
+    os.remove(filename)
+    os.rename(outfile, filename)
+
+def replace_markers_in_json(json_object, replace_tokens):
+    """Replace markers in JSON content with proper escaping."""
+    try:
+        def replace_in_structure(obj):
+            if isinstance(obj, str):
+                # Convert string to bytes for consistent replacement
+                result = obj.encode('utf-8')
+                for token, replacement in replace_tokens.items():
+                    result = result.replace(token, replacement)
+                # Convert back to string
+                return result.decode('utf-8')
+            elif isinstance(obj, dict):
+                return {k: replace_in_structure(v) for k, v in obj.items()}
+            elif isinstance(obj, list):
+                return [replace_in_structure(item) for item in obj]
+            return obj
+
+        # Process the whole structure
+        processed = replace_in_structure(json_object)
+        return json.dumps(processed)
+    except Exception as e:
+        logger.error(f'Error processing JSON: {e}')
+        logger.exception(e)
+        return json_object