diff --git a/.gitignore b/.gitignore index a193ff6..b9bc47b 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,6 @@ webapp/node_modules/ webapp/package-lock.json iac/aws/terraform/creating-custom-vpc/.terraform/ +iac/demo/textract/.terraform.lock.hcl +iac/demo/textract/.terraform/* + diff --git a/iac/demo/textract/Makefile b/iac/demo/textract/Makefile new file mode 100644 index 0000000..f30912d --- /dev/null +++ b/iac/demo/textract/Makefile @@ -0,0 +1,27 @@ +# Python Lambda files +LAMBDA1_FILE = lambda_function.py +LAMBDA2_FILE = sqs_to_csv_lambda.py + +# Output zip files +LAMBDA1_ZIP = lambda_function.zip +LAMBDA2_ZIP = sqs_to_csv_lambda.zip + +# Default target +all: zip-lambdas + +# Zip the Lambda functions +zip-lambdas: $(LAMBDA1_ZIP) $(LAMBDA2_ZIP) + +$(LAMBDA1_ZIP): $(LAMBDA1_FILE) + @echo "Zipping $(LAMBDA1_FILE) into $(LAMBDA1_ZIP)..." + zip $(LAMBDA1_ZIP) $(LAMBDA1_FILE) + +$(LAMBDA2_ZIP): $(LAMBDA2_FILE) + @echo "Zipping $(LAMBDA2_FILE) into $(LAMBDA2_ZIP)..." + zip $(LAMBDA2_ZIP) $(LAMBDA2_FILE) + +# Clean the zip files +clean: + rm -f $(LAMBDA1_ZIP) $(LAMBDA2_ZIP) + @echo "Cleaned up old zip files!" + diff --git a/iac/demo/textract/lambda_function.py b/iac/demo/textract/lambda_function.py new file mode 100644 index 0000000..2de1687 --- /dev/null +++ b/iac/demo/textract/lambda_function.py @@ -0,0 +1,136 @@ +import boto3 +import json +import os + +s3_client = boto3.client('s3') +textract_client = boto3.client('textract') +sns_client = boto3.client('sns') + +SNS_TOPIC_ARN = os.environ['SNS_TOPIC_ARN'] # Environment variable for SNS topic ARN + +SUPPORTED_IMAGE_EXTENSIONS = ['.png', '.jpg', '.jpeg'] +SUPPORTED_PDF_EXTENSION = '.pdf' + +def lambda_handler(event, context): + try: + print(f"Event: {json.dumps(event)}") + + # Get S3 bucket and object key from the S3 event + bucket_name = event['Records'][0]['s3']['bucket']['name'] + object_key = event['Records'][0]['s3']['object']['key'] + + print('bucket_name::', bucket_name, ' - object_key::', object_key) + + # Validate file extension + if any(object_key.lower().endswith(ext) for ext in SUPPORTED_IMAGE_EXTENSIONS): + # Process image files + response = textract_client.detect_document_text( + Document={'S3Object': {'Bucket': bucket_name, 'Name': object_key}} + ) + + print('Textract detect_document_text response::', response) + + # Extract text blocks + # text_blocks = [block['Text'] for block in response['Blocks'] if block['BlockType'] == 'LINE'] + # extracted_text = '\n'.join(text_blocks) + # print('extracted_text::', extracted_text) + + + # Extract text blocks with confidence scores + text_blocks_with_confidence = [] + for block in response['Blocks']: + if block['BlockType'] == 'LINE': + text_blocks_with_confidence.append((block['Text'], block['Confidence'])) + + + # Format the extracted text with confidence scores + extracted_text_with_confidence = '\n'.join([f"{text} (Confidence: {confidence:.2f})" for text, confidence in text_blocks_with_confidence]) + print('extracted_text_with_confidence::', extracted_text_with_confidence) + + + # Send extracted text to SNS + sns_client.publish( + TopicArn=SNS_TOPIC_ARN, + Message=json.dumps({ + 'bucket': bucket_name, + 'key': object_key, + 'text': extracted_text_with_confidence + }), + Subject='Textract Extracted Text from Image' + ) + + elif object_key.lower().endswith(SUPPORTED_PDF_EXTENSION): + # Process PDF files + try: + response = textract_client.analyze_document( + Document={'S3Object': {'Bucket': bucket_name, 'Name': object_key}}, + FeatureTypes=['QUERIES'], + QueriesConfig={ + 'Queries': [ + {'Text': 'What is the event name?', 'Alias': 'EventName'}, + {'Text': 'What is the Location?', 'Alias': 'Location'} + ] + } + ) + + print('Textract analyze_document response::', response) + + # # Extract key-value pairs from QUERY_RESULT blocks + # query_results = { + # block['QueryResult']['Alias']: block['Text'] + # for block in response['Blocks'] + # if block['BlockType'] == 'QUERY_RESULT' + # } + + + query_results = {} + for block in response['Blocks']: + if block['BlockType'] == 'QUERY_RESULT': + for relationship in block['Relationships']: + if relationship['Type'] == 'ANSWER': + query_id = relationship['Ids'][0] + for query_block in response['Blocks']: + if query_block['Id'] == query_id: + query_alias = query_block['Query']['Alias'] + query_results[query_alias] = { + 'Text': block['Text'], + 'Confidence': block['Confidence'] + } + + print('query_results::', query_results) + + + # Send key-value pairs to SNS + sns_client.publish( + TopicArn=SNS_TOPIC_ARN, + Message=json.dumps({ + 'bucket': bucket_name, + 'key': object_key, + 'key_value_pairs': query_results + }), + Subject='Textract Extracted Key-Value Pairs from PDF' + ) + + except Exception as e: + print(f"Error processing file: {e}") + + else: + raise ValueError(f"Unsupported file extension for file: {object_key}") + + return { + 'statusCode': 200, + 'body': json.dumps('File processed successfully and data sent to SNS') + } + + except textract_client.exceptions.UnsupportedDocumentException as e: + print(f"Unsupported document format: {e}") + return { + 'statusCode': 400, + 'body': json.dumps('Unsupported document format') + } + except Exception as e: + print(f"Error processing file: {e}") + return { + 'statusCode': 500, + 'body': json.dumps(f"Error processing file: {str(e)}") + } \ No newline at end of file diff --git a/iac/demo/textract/lambda_function.zip b/iac/demo/textract/lambda_function.zip new file mode 100644 index 0000000..fca0f5e Binary files /dev/null and b/iac/demo/textract/lambda_function.zip differ diff --git a/iac/demo/textract/lambda_function_backup.py b/iac/demo/textract/lambda_function_backup.py new file mode 100644 index 0000000..ef1e5f2 --- /dev/null +++ b/iac/demo/textract/lambda_function_backup.py @@ -0,0 +1,65 @@ +import boto3 +import json +import os + +s3_client = boto3.client('s3') +textract_client = boto3.client('textract') +sns_client = boto3.client('sns') + +SNS_TOPIC_ARN = os.environ['SNS_TOPIC_ARN'] # Environment variable for SNS topic ARN + +SUPPORTED_EXTENSIONS = ['.png', '.jpg', '.jpeg', '.pdf'] # Add more formats if necessary + +def lambda_handler(event, context): + try: + print(f"Event: {json.dumps(event)}") + + # Get S3 bucket and object key from the S3 event + bucket_name = event['Records'][0]['s3']['bucket']['name'] + object_key = event['Records'][0]['s3']['object']['key'] + + print('bucket_name::', bucket_name, ' - object_key::', object_key) + + # Validate file extension + if not any(object_key.lower().endswith(ext) for ext in SUPPORTED_EXTENSIONS): + raise ValueError(f"Unsupported file extension for file: {object_key}") + + # Call Textract to extract text + response = textract_client.detect_document_text( + Document={'S3Object': {'Bucket': bucket_name, 'Name': object_key}} + ) + + print('response::', response) + + # Extract text blocks + text_blocks = [block['Text'] for block in response['Blocks'] if block['BlockType'] == 'LINE'] + extracted_text = '\n'.join(text_blocks) + + # Send extracted text to SNS + sns_client.publish( + TopicArn=SNS_TOPIC_ARN, + Message=json.dumps({ + 'bucket': bucket_name, + 'key': object_key, + 'text': extracted_text + }), + Subject='Textract Extracted Text' + ) + + return { + 'statusCode': 200, + 'body': json.dumps('Text extracted and sent to SNS') + } + + except textract_client.exceptions.UnsupportedDocumentException as e: + print(f"Unsupported document format: {e}") + return { + 'statusCode': 400, + 'body': json.dumps('Unsupported document format') + } + except Exception as e: + print(f"Error processing file: {e}") + return { + 'statusCode': 500, + 'body': json.dumps(f"Error processing file: {str(e)}") + } diff --git a/iac/demo/textract/main.tf b/iac/demo/textract/main.tf new file mode 100644 index 0000000..d15205c --- /dev/null +++ b/iac/demo/textract/main.tf @@ -0,0 +1,206 @@ +provider "aws" { + region = var.region + profile = var.aws_profile +} + +# S3 Bucket +resource "aws_s3_bucket" "textract_bucket" { + bucket_prefix = var.s3_bucket_name + + tags = { + Name = "TextractBucket" + } + force_destroy = true +} + +# SNS Topic +resource "aws_sns_topic" "textract_topic" { + name = "textract-sns-topic" + + tags = { + Name = "TextractSNSTopic" + } +} + +# Lambda Function +resource "aws_lambda_function" "textract_lambda" { + filename = "lambda_function.zip" + function_name = var.lambda_function_name + role = aws_iam_role.lambda_role.arn + handler = "lambda_function.lambda_handler" + runtime = "python3.11" + source_code_hash = filebase64sha256("lambda_function.zip") + timeout = 60 + + environment { + variables = { + SNS_TOPIC_ARN = aws_sns_topic.textract_topic.arn # Pass SNS Topic ARN as an environment variable + } + } +} + +# S3 Bucket Notification to Lambda +resource "aws_s3_bucket_notification" "bucket_notification" { + bucket = aws_s3_bucket.textract_bucket.id + + lambda_function { + lambda_function_arn = aws_lambda_function.textract_lambda.arn + events = ["s3:ObjectCreated:*"] + filter_suffix = ".pdf" + } + + lambda_function { + lambda_function_arn = aws_lambda_function.textract_lambda.arn + events = ["s3:ObjectCreated:*"] + filter_suffix = ".jpeg" + } + + depends_on = [aws_lambda_permission.allow_s3] +} + +# IAM Role for Lambda +resource "aws_iam_role" "lambda_role" { + name = "lambda-textract-role" + + assume_role_policy = jsonencode({ + Version = "2012-10-17" + Statement = [ + { + Action = "sts:AssumeRole" + Effect = "Allow" + Principal = { + Service = "lambda.amazonaws.com" + } + } + ] + }) +} + +# IAM Policy for Lambda to publish to SNS +resource "aws_iam_role_policy" "lambda_policy" { + name = "lambda-textract-policy" + role = aws_iam_role.lambda_role.id + policy = jsonencode({ + Version = "2012-10-17" + Statement = [ + { + Effect = "Allow" + Action = ["s3:GetObject", "s3:PutObject"] + Resource = "${aws_s3_bucket.textract_bucket.arn}/*" + }, + { + Effect = "Allow" + Action = ["sns:Publish"] + Resource = aws_sns_topic.textract_topic.arn + }, + { + Effect = "Allow" + Action = ["textract:DetectDocumentText"] + Resource = "*" + }, + { + "Effect": "Allow", + "Action": "textract:AnalyzeDocument", + "Resource": "*" + }, + { + Effect = "Allow" + Action = [ + "logs:CreateLogGroup", + "logs:CreateLogStream", + "logs:PutLogEvents" + ] + Resource = "*" + }, + { + Effect = "Allow" + Action = [ + "sqs:ReceiveMessage", + "sqs:DeleteMessage", + "sqs:GetQueueAttributes" + ] + Resource = aws_sqs_queue.textract_queue.arn + } + ] + }) +} + + + + +# Allow S3 to invoke Lambda +resource "aws_lambda_permission" "allow_s3" { + statement_id = "AllowS3Invoke" + action = "lambda:InvokeFunction" + function_name = aws_lambda_function.textract_lambda.function_name + principal = "s3.amazonaws.com" + source_arn = aws_s3_bucket.textract_bucket.arn +} + +# SQS Queue +resource "aws_sqs_queue" "textract_queue" { + name = var.sqs_queue_name + visibility_timeout_seconds = 60 + + tags = { + Name = "TextractQueue" + } +} + +# SQS Queue Policy to allow SNS to publish messages +resource "aws_sqs_queue_policy" "sns_publish_policy" { + queue_url = aws_sqs_queue.textract_queue.id + policy = jsonencode({ + Version = "2012-10-17" + Statement = [ + { + Effect = "Allow" + Principal = { + Service = "sns.amazonaws.com" + } + Action = "sqs:SendMessage" + Resource = aws_sqs_queue.textract_queue.arn + Condition = { + ArnEquals = { + "aws:SourceArn" = aws_sns_topic.textract_topic.arn + } + } + } + ] + }) +} + +# SNS Subscription for SQS +resource "aws_sns_topic_subscription" "sns_to_sqs" { + topic_arn = aws_sns_topic.textract_topic.arn + protocol = "sqs" + endpoint = aws_sqs_queue.textract_queue.arn + + # Allow SNS to publish to SQS + depends_on = [aws_sqs_queue_policy.sns_publish_policy] +} + +resource "aws_lambda_function" "sqs_to_csv_lambda" { + filename = "sqs_to_csv_lambda.zip" # Path to your Lambda zip file + function_name = var.lambda_function_name_2 + role = aws_iam_role.lambda_role.arn # IAM role for Lambda + handler = "sqs_to_csv_lambda.lambda_handler" # Lambda function handler + runtime = "python3.11" # Lambda runtime + source_code_hash = filebase64sha256("sqs_to_csv_lambda.zip") # Source code hash for validation + timeout = 60 # Timeout in seconds + + environment { + variables = { + CSV_S3_BUCKET = aws_s3_bucket.textract_bucket.bucket # S3 bucket name + CSV_S3_PREFIX = var.csv_s3_prefix # Prefix for CSV files + } + } +} + +resource "aws_lambda_event_source_mapping" "sqs_trigger" { + event_source_arn = aws_sqs_queue.textract_queue.arn + function_name = aws_lambda_function.sqs_to_csv_lambda.arn + batch_size = 1 + enabled = true +} + diff --git a/iac/demo/textract/outputs.tf b/iac/demo/textract/outputs.tf new file mode 100644 index 0000000..b7adbda --- /dev/null +++ b/iac/demo/textract/outputs.tf @@ -0,0 +1,19 @@ +output "s3_bucket_name" { + value = aws_s3_bucket.textract_bucket.id +} + +output "lambda_function_arn" { + value = aws_lambda_function.textract_lambda.arn +} + +output "sqs_queue_url" { + value = aws_sqs_queue.textract_queue.id +} + +output "csv_lambda_function_arn" { + value = aws_lambda_function.sqs_to_csv_lambda.arn +} + +output "csv_s3_prefix" { + value = var.csv_s3_prefix +} diff --git a/iac/demo/textract/sqs_to_csv_lambda.py b/iac/demo/textract/sqs_to_csv_lambda.py new file mode 100644 index 0000000..73c3a3d --- /dev/null +++ b/iac/demo/textract/sqs_to_csv_lambda.py @@ -0,0 +1,101 @@ +import boto3 +import csv +import os +import json +from io import StringIO +import datetime + +# Initialize clients +s3_client = boto3.client('s3') +sqs_client = boto3.client('sqs') + +# Environment variables +CSV_S3_BUCKET = os.environ['CSV_S3_BUCKET'] +CSV_S3_PREFIX = os.environ['CSV_S3_PREFIX'] + +def lambda_handler(event, context): + try: + for record in event['Records']: + # Parse the SQS message + message_body = json.loads(record['body']) + print(f"Received message body: {json.dumps(message_body)}") # Log the message for inspection + + # Access the nested data within "Message" key + message_data = json.loads(message_body['Message']) # This assumes the entire message is valid JSON + + # Extract required fields + bucket = message_data.get('bucket') + key = message_data.get('key') + extracted_text = message_data.get('text') + + # Validate message structure (optional) + if not all([bucket, key, extracted_text]): + raise ValueError("Missing required fields in message body") + + + # Prepare CSV data + csv_buffer = StringIO() + csv_writer = csv.writer(csv_buffer) + csv_writer.writerow(['Extracted Text']) + + # Extract and filter text lines with confidence scores + extracted_text_lines = extracted_text.splitlines() + filtered_text = [] + for line in extracted_text_lines: + text, confidence = line.split(' (Confidence: ', 1) + confidence = float(confidence.rstrip(')')) + if confidence >= 70: + filtered_text.append(f"{text.rstrip()} ({confidence:.2f})") + + # Join the filtered text lines into a single string + final_text = '\n'.join(filtered_text) + + print('final_text::', final_text) + + # Write the final text to CSV + csv_writer.writerow([final_text]) + + + # # Extract and filter text lines with confidence scores + # extracted_text_lines = extracted_text.splitlines() + # filtered_text = [] + # text_without_confidence = [] + # for line in extracted_text_lines: + # text, confidence = line.split(' (Confidence: ', 1) + # confidence = float(confidence.rstrip(')')) + # if confidence >= 70: + # filtered_text.append(f"{text.rstrip()} ({confidence:.2f})") + # text_without_confidence.append(text.rstrip()) # Extract only text + + # # Join the filtered text lines into a single string + # final_text = '\n'.join(filtered_text) + + # # Write the final text to CSV, including extracted text without confidence + # csv_writer.writerow([bucket, key, final_text, ', '.join(text_without_confidence)]) + + + # Define the CSV file path and name + timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S") + csv_filename = f"{CSV_S3_PREFIX}{key.replace('/', '_')}_{timestamp}.csv" + + # Upload CSV to S3 + s3_client.put_object( + Bucket=CSV_S3_BUCKET, + Key=csv_filename, + Body=csv_buffer.getvalue() + ) + + print(f"CSV file saved to S3: {csv_filename}") + + # Return a success response + return { + 'statusCode': 200, + 'body': 'CSV files created and saved to S3' + } + + except Exception as e: + print(f"Error processing the SQS message: {str(e)}") + return { + 'statusCode': 500, + 'body': f"Error processing file: {str(e)}" + } diff --git a/iac/demo/textract/sqs_to_csv_lambda.zip b/iac/demo/textract/sqs_to_csv_lambda.zip new file mode 100644 index 0000000..85a4150 Binary files /dev/null and b/iac/demo/textract/sqs_to_csv_lambda.zip differ diff --git a/iac/demo/textract/terraform.tfstate b/iac/demo/textract/terraform.tfstate new file mode 100644 index 0000000..bdacc31 --- /dev/null +++ b/iac/demo/textract/terraform.tfstate @@ -0,0 +1,643 @@ +{ + "version": 4, + "terraform_version": "1.9.8", + "serial": 118, + "lineage": "9648e928-c2fd-927c-5318-8f60366d97c1", + "outputs": { + "csv_lambda_function_arn": { + "value": "arn:aws:lambda:us-east-1:730335385934:function:sqs-to-csv-lambda", + "type": "string" + }, + "csv_s3_prefix": { + "value": "processed/csv/", + "type": "string" + }, + "lambda_function_arn": { + "value": "arn:aws:lambda:us-east-1:730335385934:function:textract-lambda", + "type": "string" + }, + "s3_bucket_name": { + "value": "acdkochi2024-textract-s3-bucket-demo-20241122145119754300000001", + "type": "string" + }, + "sqs_queue_url": { + "value": "https://sqs.us-east-1.amazonaws.com/730335385934/textract-sqs-queue", + "type": "string" + } + }, + "resources": [ + { + "mode": "managed", + "type": "aws_iam_role", + "name": "lambda_role", + "provider": "provider[\"registry.terraform.io/hashicorp/aws\"]", + "instances": [ + { + "schema_version": 0, + "attributes": { + "arn": "arn:aws:iam::730335385934:role/lambda-textract-role", + "assume_role_policy": "{\"Statement\":[{\"Action\":\"sts:AssumeRole\",\"Effect\":\"Allow\",\"Principal\":{\"Service\":\"lambda.amazonaws.com\"}}],\"Version\":\"2012-10-17\"}", + "create_date": "2024-11-22T14:41:26Z", + "description": "", + "force_detach_policies": false, + "id": "lambda-textract-role", + "inline_policy": [ + { + "name": "lambda-textract-policy", + "policy": "{\"Version\":\"2012-10-17\",\"Statement\":[{\"Action\":[\"s3:GetObject\",\"s3:PutObject\"],\"Effect\":\"Allow\",\"Resource\":\"arn:aws:s3:::acdkochi2024-textract-s3-bucket-demo-20241122145119754300000001/*\"},{\"Action\":[\"sns:Publish\"],\"Effect\":\"Allow\",\"Resource\":\"arn:aws:sns:us-east-1:730335385934:textract-sns-topic\"},{\"Action\":[\"textract:DetectDocumentText\"],\"Effect\":\"Allow\",\"Resource\":\"*\"},{\"Action\":\"textract:AnalyzeDocument\",\"Effect\":\"Allow\",\"Resource\":\"*\"},{\"Action\":[\"logs:CreateLogGroup\",\"logs:CreateLogStream\",\"logs:PutLogEvents\"],\"Effect\":\"Allow\",\"Resource\":\"*\"},{\"Action\":[\"sqs:ReceiveMessage\",\"sqs:DeleteMessage\",\"sqs:GetQueueAttributes\"],\"Effect\":\"Allow\",\"Resource\":\"arn:aws:sqs:us-east-1:730335385934:textract-sqs-queue\"}]}" + } + ], + "managed_policy_arns": [], + "max_session_duration": 3600, + "name": "lambda-textract-role", + "name_prefix": "", + "path": "/", + "permissions_boundary": "", + "tags": {}, + "tags_all": {}, + "unique_id": "AROA2UC3BTFHE5U2T7YTS" + }, + "sensitive_attributes": [], + "private": "bnVsbA==" + } + ] + }, + { + "mode": "managed", + "type": "aws_iam_role_policy", + "name": "lambda_policy", + "provider": "provider[\"registry.terraform.io/hashicorp/aws\"]", + "instances": [ + { + "schema_version": 0, + "attributes": { + "id": "lambda-textract-role:lambda-textract-policy", + "name": "lambda-textract-policy", + "name_prefix": "", + "policy": "{\"Version\":\"2012-10-17\",\"Statement\":[{\"Action\":[\"s3:GetObject\",\"s3:PutObject\"],\"Effect\":\"Allow\",\"Resource\":\"arn:aws:s3:::acdkochi2024-textract-s3-bucket-demo-20241122145119754300000001/*\"},{\"Action\":[\"sns:Publish\"],\"Effect\":\"Allow\",\"Resource\":\"arn:aws:sns:us-east-1:730335385934:textract-sns-topic\"},{\"Action\":[\"textract:DetectDocumentText\"],\"Effect\":\"Allow\",\"Resource\":\"*\"},{\"Action\":\"textract:AnalyzeDocument\",\"Effect\":\"Allow\",\"Resource\":\"*\"},{\"Action\":[\"logs:CreateLogGroup\",\"logs:CreateLogStream\",\"logs:PutLogEvents\"],\"Effect\":\"Allow\",\"Resource\":\"*\"},{\"Action\":[\"sqs:ReceiveMessage\",\"sqs:DeleteMessage\",\"sqs:GetQueueAttributes\"],\"Effect\":\"Allow\",\"Resource\":\"arn:aws:sqs:us-east-1:730335385934:textract-sqs-queue\"}]}", + "role": "lambda-textract-role" + }, + "sensitive_attributes": [], + "private": "bnVsbA==", + "dependencies": [ + "aws_iam_role.lambda_role", + "aws_s3_bucket.textract_bucket", + "aws_sns_topic.textract_topic", + "aws_sqs_queue.textract_queue" + ] + } + ] + }, + { + "mode": "managed", + "type": "aws_lambda_event_source_mapping", + "name": "sqs_trigger", + "provider": "provider[\"registry.terraform.io/hashicorp/aws\"]", + "instances": [ + { + "schema_version": 0, + "attributes": { + "amazon_managed_kafka_event_source_config": [], + "arn": "arn:aws:lambda:us-east-1:730335385934:event-source-mapping:37919b3d-a27f-46b1-8241-656a3d122b32", + "batch_size": 1, + "bisect_batch_on_function_error": false, + "destination_config": [], + "document_db_event_source_config": [], + "enabled": true, + "event_source_arn": "arn:aws:sqs:us-east-1:730335385934:textract-sqs-queue", + "filter_criteria": [], + "function_arn": "arn:aws:lambda:us-east-1:730335385934:function:sqs-to-csv-lambda", + "function_name": "arn:aws:lambda:us-east-1:730335385934:function:sqs-to-csv-lambda", + "function_response_types": [], + "id": "37919b3d-a27f-46b1-8241-656a3d122b32", + "kms_key_arn": "", + "last_modified": "2024-11-22T14:51:34Z", + "last_processing_result": "", + "maximum_batching_window_in_seconds": 0, + "maximum_record_age_in_seconds": 0, + "maximum_retry_attempts": 0, + "parallelization_factor": 0, + "queues": [], + "scaling_config": [], + "self_managed_event_source": [], + "self_managed_kafka_event_source_config": [], + "source_access_configuration": [], + "starting_position": "", + "starting_position_timestamp": "", + "state": "Enabled", + "state_transition_reason": "USER_INITIATED", + "tags": {}, + "tags_all": {}, + "topics": [], + "tumbling_window_in_seconds": 0, + "uuid": "37919b3d-a27f-46b1-8241-656a3d122b32" + }, + "sensitive_attributes": [], + "private": "bnVsbA==", + "dependencies": [ + "aws_iam_role.lambda_role", + "aws_lambda_function.sqs_to_csv_lambda", + "aws_s3_bucket.textract_bucket", + "aws_sqs_queue.textract_queue" + ] + } + ] + }, + { + "mode": "managed", + "type": "aws_lambda_function", + "name": "sqs_to_csv_lambda", + "provider": "provider[\"registry.terraform.io/hashicorp/aws\"]", + "instances": [ + { + "schema_version": 0, + "attributes": { + "architectures": [ + "x86_64" + ], + "arn": "arn:aws:lambda:us-east-1:730335385934:function:sqs-to-csv-lambda", + "code_sha256": "doYv077+NdxBqF9KNhbA0fYJD5Va9iM0hndcSCPPANA=", + "code_signing_config_arn": "", + "dead_letter_config": [], + "description": "", + "environment": [ + { + "variables": { + "CSV_S3_BUCKET": "acdkochi2024-textract-s3-bucket-demo-20241122145119754300000001", + "CSV_S3_PREFIX": "processed/csv/" + } + } + ], + "ephemeral_storage": [ + { + "size": 512 + } + ], + "file_system_config": [], + "filename": "sqs_to_csv_lambda.zip", + "function_name": "sqs-to-csv-lambda", + "handler": "sqs_to_csv_lambda.lambda_handler", + "id": "sqs-to-csv-lambda", + "image_config": [], + "image_uri": "", + "invoke_arn": "arn:aws:apigateway:us-east-1:lambda:path/2015-03-31/functions/arn:aws:lambda:us-east-1:730335385934:function:sqs-to-csv-lambda/invocations", + "kms_key_arn": "", + "last_modified": "2024-12-12T01:13:43.000+0000", + "layers": [], + "logging_config": [ + { + "application_log_level": "", + "log_format": "Text", + "log_group": "/aws/lambda/sqs-to-csv-lambda", + "system_log_level": "" + } + ], + "memory_size": 128, + "package_type": "Zip", + "publish": false, + "qualified_arn": "arn:aws:lambda:us-east-1:730335385934:function:sqs-to-csv-lambda:$LATEST", + "qualified_invoke_arn": "arn:aws:apigateway:us-east-1:lambda:path/2015-03-31/functions/arn:aws:lambda:us-east-1:730335385934:function:sqs-to-csv-lambda:$LATEST/invocations", + "replace_security_groups_on_destroy": null, + "replacement_security_group_ids": null, + "reserved_concurrent_executions": -1, + "role": "arn:aws:iam::730335385934:role/lambda-textract-role", + "runtime": "python3.11", + "s3_bucket": null, + "s3_key": null, + "s3_object_version": null, + "signing_job_arn": "", + "signing_profile_version_arn": "", + "skip_destroy": false, + "snap_start": [], + "source_code_hash": "doYv077+NdxBqF9KNhbA0fYJD5Va9iM0hndcSCPPANA=", + "source_code_size": 1385, + "tags": {}, + "tags_all": {}, + "timeout": 60, + "timeouts": null, + "tracing_config": [ + { + "mode": "PassThrough" + } + ], + "version": "$LATEST", + "vpc_config": [] + }, + "sensitive_attributes": [], + "private": "eyJlMmJmYjczMC1lY2FhLTExZTYtOGY4OC0zNDM2M2JjN2M0YzAiOnsiY3JlYXRlIjo2MDAwMDAwMDAwMDAsImRlbGV0ZSI6NjAwMDAwMDAwMDAwLCJ1cGRhdGUiOjYwMDAwMDAwMDAwMH19", + "dependencies": [ + "aws_iam_role.lambda_role", + "aws_s3_bucket.textract_bucket" + ] + } + ] + }, + { + "mode": "managed", + "type": "aws_lambda_function", + "name": "textract_lambda", + "provider": "provider[\"registry.terraform.io/hashicorp/aws\"]", + "instances": [ + { + "schema_version": 0, + "attributes": { + "architectures": [ + "x86_64" + ], + "arn": "arn:aws:lambda:us-east-1:730335385934:function:textract-lambda", + "code_sha256": "YdYIQ7CITqehgxJY9zX5CQlgY4x/80yRirW+tmkDurI=", + "code_signing_config_arn": "", + "dead_letter_config": [], + "description": "", + "environment": [ + { + "variables": { + "SNS_TOPIC_ARN": "arn:aws:sns:us-east-1:730335385934:textract-sns-topic" + } + } + ], + "ephemeral_storage": [ + { + "size": 512 + } + ], + "file_system_config": [], + "filename": "lambda_function.zip", + "function_name": "textract-lambda", + "handler": "lambda_function.lambda_handler", + "id": "textract-lambda", + "image_config": [], + "image_uri": "", + "invoke_arn": "arn:aws:apigateway:us-east-1:lambda:path/2015-03-31/functions/arn:aws:lambda:us-east-1:730335385934:function:textract-lambda/invocations", + "kms_key_arn": "", + "last_modified": "2024-12-12T00:26:58.000+0000", + "layers": [], + "logging_config": [ + { + "application_log_level": "", + "log_format": "Text", + "log_group": "/aws/lambda/textract-lambda", + "system_log_level": "" + } + ], + "memory_size": 128, + "package_type": "Zip", + "publish": false, + "qualified_arn": "arn:aws:lambda:us-east-1:730335385934:function:textract-lambda:$LATEST", + "qualified_invoke_arn": "arn:aws:apigateway:us-east-1:lambda:path/2015-03-31/functions/arn:aws:lambda:us-east-1:730335385934:function:textract-lambda:$LATEST/invocations", + "replace_security_groups_on_destroy": null, + "replacement_security_group_ids": null, + "reserved_concurrent_executions": -1, + "role": "arn:aws:iam::730335385934:role/lambda-textract-role", + "runtime": "python3.11", + "s3_bucket": null, + "s3_key": null, + "s3_object_version": null, + "signing_job_arn": "", + "signing_profile_version_arn": "", + "skip_destroy": false, + "snap_start": [], + "source_code_hash": "YdYIQ7CITqehgxJY9zX5CQlgY4x/80yRirW+tmkDurI=", + "source_code_size": 1620, + "tags": {}, + "tags_all": {}, + "timeout": 60, + "timeouts": null, + "tracing_config": [ + { + "mode": "PassThrough" + } + ], + "version": "$LATEST", + "vpc_config": [] + }, + "sensitive_attributes": [], + "private": "eyJlMmJmYjczMC1lY2FhLTExZTYtOGY4OC0zNDM2M2JjN2M0YzAiOnsiY3JlYXRlIjo2MDAwMDAwMDAwMDAsImRlbGV0ZSI6NjAwMDAwMDAwMDAwLCJ1cGRhdGUiOjYwMDAwMDAwMDAwMH19", + "dependencies": [ + "aws_iam_role.lambda_role", + "aws_sns_topic.textract_topic" + ] + } + ] + }, + { + "mode": "managed", + "type": "aws_lambda_permission", + "name": "allow_s3", + "provider": "provider[\"registry.terraform.io/hashicorp/aws\"]", + "instances": [ + { + "schema_version": 0, + "attributes": { + "action": "lambda:InvokeFunction", + "event_source_token": null, + "function_name": "textract-lambda", + "function_url_auth_type": null, + "id": "AllowS3Invoke", + "principal": "s3.amazonaws.com", + "principal_org_id": null, + "qualifier": "", + "source_account": null, + "source_arn": "arn:aws:s3:::acdkochi2024-textract-s3-bucket-demo-20241122145119754300000001", + "statement_id": "AllowS3Invoke", + "statement_id_prefix": "" + }, + "sensitive_attributes": [], + "private": "bnVsbA==", + "dependencies": [ + "aws_iam_role.lambda_role", + "aws_lambda_function.textract_lambda", + "aws_s3_bucket.textract_bucket", + "aws_sns_topic.textract_topic" + ] + } + ] + }, + { + "mode": "managed", + "type": "aws_s3_bucket", + "name": "textract_bucket", + "provider": "provider[\"registry.terraform.io/hashicorp/aws\"]", + "instances": [ + { + "schema_version": 0, + "attributes": { + "acceleration_status": "", + "acl": null, + "arn": "arn:aws:s3:::acdkochi2024-textract-s3-bucket-demo-20241122145119754300000001", + "bucket": "acdkochi2024-textract-s3-bucket-demo-20241122145119754300000001", + "bucket_domain_name": "acdkochi2024-textract-s3-bucket-demo-20241122145119754300000001.s3.amazonaws.com", + "bucket_prefix": "acdkochi2024-textract-s3-bucket-demo-", + "bucket_regional_domain_name": "acdkochi2024-textract-s3-bucket-demo-20241122145119754300000001.s3.us-east-1.amazonaws.com", + "cors_rule": [], + "force_destroy": true, + "grant": [ + { + "id": "c89bce84d3a5eb193efe6a3c6cdcec152c336289e819a195d5963b7f14bee28e", + "permissions": [ + "FULL_CONTROL" + ], + "type": "CanonicalUser", + "uri": "" + } + ], + "hosted_zone_id": "Z3AQBSTGFYJSTF", + "id": "acdkochi2024-textract-s3-bucket-demo-20241122145119754300000001", + "lifecycle_rule": [], + "logging": [], + "object_lock_configuration": [], + "object_lock_enabled": false, + "policy": "", + "region": "us-east-1", + "replication_configuration": [], + "request_payer": "BucketOwner", + "server_side_encryption_configuration": [ + { + "rule": [ + { + "apply_server_side_encryption_by_default": [ + { + "kms_master_key_id": "", + "sse_algorithm": "AES256" + } + ], + "bucket_key_enabled": false + } + ] + } + ], + "tags": { + "Name": "TextractBucket" + }, + "tags_all": { + "Name": "TextractBucket" + }, + "timeouts": null, + "versioning": [ + { + "enabled": false, + "mfa_delete": false + } + ], + "website": [], + "website_domain": null, + "website_endpoint": null + }, + "sensitive_attributes": [], + "private": "eyJlMmJmYjczMC1lY2FhLTExZTYtOGY4OC0zNDM2M2JjN2M0YzAiOnsiY3JlYXRlIjoxMjAwMDAwMDAwMDAwLCJkZWxldGUiOjM2MDAwMDAwMDAwMDAsInJlYWQiOjEyMDAwMDAwMDAwMDAsInVwZGF0ZSI6MTIwMDAwMDAwMDAwMH19" + } + ] + }, + { + "mode": "managed", + "type": "aws_s3_bucket_notification", + "name": "bucket_notification", + "provider": "provider[\"registry.terraform.io/hashicorp/aws\"]", + "instances": [ + { + "schema_version": 0, + "attributes": { + "bucket": "acdkochi2024-textract-s3-bucket-demo-20241122145119754300000001", + "eventbridge": false, + "id": "acdkochi2024-textract-s3-bucket-demo-20241122145119754300000001", + "lambda_function": [ + { + "events": [ + "s3:ObjectCreated:*" + ], + "filter_prefix": "", + "filter_suffix": ".pdf", + "id": "tf-s3-lambda-20241122145127431100000002", + "lambda_function_arn": "arn:aws:lambda:us-east-1:730335385934:function:textract-lambda" + }, + { + "events": [ + "s3:ObjectCreated:*" + ], + "filter_prefix": "", + "filter_suffix": ".jpeg", + "id": "tf-s3-lambda-20241122145127431100000003", + "lambda_function_arn": "arn:aws:lambda:us-east-1:730335385934:function:textract-lambda" + } + ], + "queue": [], + "topic": [] + }, + "sensitive_attributes": [], + "private": "bnVsbA==", + "dependencies": [ + "aws_iam_role.lambda_role", + "aws_lambda_function.textract_lambda", + "aws_lambda_permission.allow_s3", + "aws_s3_bucket.textract_bucket", + "aws_sns_topic.textract_topic" + ] + } + ] + }, + { + "mode": "managed", + "type": "aws_sns_topic", + "name": "textract_topic", + "provider": "provider[\"registry.terraform.io/hashicorp/aws\"]", + "instances": [ + { + "schema_version": 0, + "attributes": { + "application_failure_feedback_role_arn": "", + "application_success_feedback_role_arn": "", + "application_success_feedback_sample_rate": 0, + "archive_policy": "", + "arn": "arn:aws:sns:us-east-1:730335385934:textract-sns-topic", + "beginning_archive_time": "", + "content_based_deduplication": false, + "delivery_policy": "", + "display_name": "", + "fifo_topic": false, + "firehose_failure_feedback_role_arn": "", + "firehose_success_feedback_role_arn": "", + "firehose_success_feedback_sample_rate": 0, + "http_failure_feedback_role_arn": "", + "http_success_feedback_role_arn": "", + "http_success_feedback_sample_rate": 0, + "id": "arn:aws:sns:us-east-1:730335385934:textract-sns-topic", + "kms_master_key_id": "", + "lambda_failure_feedback_role_arn": "", + "lambda_success_feedback_role_arn": "", + "lambda_success_feedback_sample_rate": 0, + "name": "textract-sns-topic", + "name_prefix": "", + "owner": "730335385934", + "policy": "{\"Id\":\"__default_policy_ID\",\"Statement\":[{\"Action\":[\"SNS:GetTopicAttributes\",\"SNS:SetTopicAttributes\",\"SNS:AddPermission\",\"SNS:RemovePermission\",\"SNS:DeleteTopic\",\"SNS:Subscribe\",\"SNS:ListSubscriptionsByTopic\",\"SNS:Publish\"],\"Condition\":{\"StringEquals\":{\"AWS:SourceOwner\":\"730335385934\"}},\"Effect\":\"Allow\",\"Principal\":{\"AWS\":\"*\"},\"Resource\":\"arn:aws:sns:us-east-1:730335385934:textract-sns-topic\",\"Sid\":\"__default_statement_ID\"}],\"Version\":\"2008-10-17\"}", + "signature_version": 0, + "sqs_failure_feedback_role_arn": "", + "sqs_success_feedback_role_arn": "", + "sqs_success_feedback_sample_rate": 0, + "tags": { + "Name": "TextractSNSTopic" + }, + "tags_all": { + "Name": "TextractSNSTopic" + }, + "tracing_config": "" + }, + "sensitive_attributes": [], + "private": "bnVsbA==" + } + ] + }, + { + "mode": "managed", + "type": "aws_sns_topic_subscription", + "name": "sns_to_sqs", + "provider": "provider[\"registry.terraform.io/hashicorp/aws\"]", + "instances": [ + { + "schema_version": 0, + "attributes": { + "arn": "arn:aws:sns:us-east-1:730335385934:textract-sns-topic:3057bb09-b74d-46b8-b182-6d9f09406c41", + "confirmation_timeout_in_minutes": 1, + "confirmation_was_authenticated": true, + "delivery_policy": "", + "endpoint": "arn:aws:sqs:us-east-1:730335385934:textract-sqs-queue", + "endpoint_auto_confirms": false, + "filter_policy": "", + "filter_policy_scope": "", + "id": "arn:aws:sns:us-east-1:730335385934:textract-sns-topic:3057bb09-b74d-46b8-b182-6d9f09406c41", + "owner_id": "730335385934", + "pending_confirmation": false, + "protocol": "sqs", + "raw_message_delivery": false, + "redrive_policy": "", + "replay_policy": "", + "subscription_role_arn": "", + "topic_arn": "arn:aws:sns:us-east-1:730335385934:textract-sns-topic" + }, + "sensitive_attributes": [], + "private": "bnVsbA==", + "dependencies": [ + "aws_sns_topic.textract_topic", + "aws_sqs_queue.textract_queue", + "aws_sqs_queue_policy.sns_publish_policy" + ] + } + ] + }, + { + "mode": "managed", + "type": "aws_sqs_queue", + "name": "textract_queue", + "provider": "provider[\"registry.terraform.io/hashicorp/aws\"]", + "instances": [ + { + "schema_version": 0, + "attributes": { + "arn": "arn:aws:sqs:us-east-1:730335385934:textract-sqs-queue", + "content_based_deduplication": false, + "deduplication_scope": "", + "delay_seconds": 0, + "fifo_queue": false, + "fifo_throughput_limit": "", + "id": "https://sqs.us-east-1.amazonaws.com/730335385934/textract-sqs-queue", + "kms_data_key_reuse_period_seconds": 300, + "kms_master_key_id": "", + "max_message_size": 262144, + "message_retention_seconds": 345600, + "name": "textract-sqs-queue", + "name_prefix": "", + "policy": "{\"Statement\":[{\"Action\":\"sqs:SendMessage\",\"Condition\":{\"ArnEquals\":{\"aws:SourceArn\":\"arn:aws:sns:us-east-1:730335385934:textract-sns-topic\"}},\"Effect\":\"Allow\",\"Principal\":{\"Service\":\"sns.amazonaws.com\"},\"Resource\":\"arn:aws:sqs:us-east-1:730335385934:textract-sqs-queue\"}],\"Version\":\"2012-10-17\"}", + "receive_wait_time_seconds": 0, + "redrive_allow_policy": "", + "redrive_policy": "", + "sqs_managed_sse_enabled": true, + "tags": { + "Name": "TextractQueue" + }, + "tags_all": { + "Name": "TextractQueue" + }, + "url": "https://sqs.us-east-1.amazonaws.com/730335385934/textract-sqs-queue", + "visibility_timeout_seconds": 60 + }, + "sensitive_attributes": [], + "private": "bnVsbA==" + } + ] + }, + { + "mode": "managed", + "type": "aws_sqs_queue_policy", + "name": "sns_publish_policy", + "provider": "provider[\"registry.terraform.io/hashicorp/aws\"]", + "instances": [ + { + "schema_version": 1, + "attributes": { + "id": "https://sqs.us-east-1.amazonaws.com/730335385934/textract-sqs-queue", + "policy": "{\"Statement\":[{\"Action\":\"sqs:SendMessage\",\"Condition\":{\"ArnEquals\":{\"aws:SourceArn\":\"arn:aws:sns:us-east-1:730335385934:textract-sns-topic\"}},\"Effect\":\"Allow\",\"Principal\":{\"Service\":\"sns.amazonaws.com\"},\"Resource\":\"arn:aws:sqs:us-east-1:730335385934:textract-sqs-queue\"}],\"Version\":\"2012-10-17\"}", + "queue_url": "https://sqs.us-east-1.amazonaws.com/730335385934/textract-sqs-queue" + }, + "sensitive_attributes": [], + "private": "eyJzY2hlbWFfdmVyc2lvbiI6IjEifQ==", + "dependencies": [ + "aws_sns_topic.textract_topic", + "aws_sqs_queue.textract_queue" + ] + } + ] + } + ], + "check_results": [ + { + "object_kind": "var", + "config_addr": "var.region", + "status": "pass", + "objects": [ + { + "object_addr": "var.region", + "status": "pass" + } + ] + } + ] +} diff --git a/iac/demo/textract/terraform.tfstate.backup b/iac/demo/textract/terraform.tfstate.backup new file mode 100644 index 0000000..1eeb6ba --- /dev/null +++ b/iac/demo/textract/terraform.tfstate.backup @@ -0,0 +1,643 @@ +{ + "version": 4, + "terraform_version": "1.9.8", + "serial": 116, + "lineage": "9648e928-c2fd-927c-5318-8f60366d97c1", + "outputs": { + "csv_lambda_function_arn": { + "value": "arn:aws:lambda:us-east-1:730335385934:function:sqs-to-csv-lambda", + "type": "string" + }, + "csv_s3_prefix": { + "value": "processed/csv/", + "type": "string" + }, + "lambda_function_arn": { + "value": "arn:aws:lambda:us-east-1:730335385934:function:textract-lambda", + "type": "string" + }, + "s3_bucket_name": { + "value": "acdkochi2024-textract-s3-bucket-demo-20241122145119754300000001", + "type": "string" + }, + "sqs_queue_url": { + "value": "https://sqs.us-east-1.amazonaws.com/730335385934/textract-sqs-queue", + "type": "string" + } + }, + "resources": [ + { + "mode": "managed", + "type": "aws_iam_role", + "name": "lambda_role", + "provider": "provider[\"registry.terraform.io/hashicorp/aws\"]", + "instances": [ + { + "schema_version": 0, + "attributes": { + "arn": "arn:aws:iam::730335385934:role/lambda-textract-role", + "assume_role_policy": "{\"Statement\":[{\"Action\":\"sts:AssumeRole\",\"Effect\":\"Allow\",\"Principal\":{\"Service\":\"lambda.amazonaws.com\"}}],\"Version\":\"2012-10-17\"}", + "create_date": "2024-11-22T14:41:26Z", + "description": "", + "force_detach_policies": false, + "id": "lambda-textract-role", + "inline_policy": [ + { + "name": "lambda-textract-policy", + "policy": "{\"Version\":\"2012-10-17\",\"Statement\":[{\"Action\":[\"s3:GetObject\",\"s3:PutObject\"],\"Effect\":\"Allow\",\"Resource\":\"arn:aws:s3:::acdkochi2024-textract-s3-bucket-demo-20241122145119754300000001/*\"},{\"Action\":[\"sns:Publish\"],\"Effect\":\"Allow\",\"Resource\":\"arn:aws:sns:us-east-1:730335385934:textract-sns-topic\"},{\"Action\":[\"textract:DetectDocumentText\"],\"Effect\":\"Allow\",\"Resource\":\"*\"},{\"Action\":\"textract:AnalyzeDocument\",\"Effect\":\"Allow\",\"Resource\":\"*\"},{\"Action\":[\"logs:CreateLogGroup\",\"logs:CreateLogStream\",\"logs:PutLogEvents\"],\"Effect\":\"Allow\",\"Resource\":\"*\"},{\"Action\":[\"sqs:ReceiveMessage\",\"sqs:DeleteMessage\",\"sqs:GetQueueAttributes\"],\"Effect\":\"Allow\",\"Resource\":\"arn:aws:sqs:us-east-1:730335385934:textract-sqs-queue\"}]}" + } + ], + "managed_policy_arns": [], + "max_session_duration": 3600, + "name": "lambda-textract-role", + "name_prefix": "", + "path": "/", + "permissions_boundary": "", + "tags": {}, + "tags_all": {}, + "unique_id": "AROA2UC3BTFHE5U2T7YTS" + }, + "sensitive_attributes": [], + "private": "bnVsbA==" + } + ] + }, + { + "mode": "managed", + "type": "aws_iam_role_policy", + "name": "lambda_policy", + "provider": "provider[\"registry.terraform.io/hashicorp/aws\"]", + "instances": [ + { + "schema_version": 0, + "attributes": { + "id": "lambda-textract-role:lambda-textract-policy", + "name": "lambda-textract-policy", + "name_prefix": "", + "policy": "{\"Version\":\"2012-10-17\",\"Statement\":[{\"Action\":[\"s3:GetObject\",\"s3:PutObject\"],\"Effect\":\"Allow\",\"Resource\":\"arn:aws:s3:::acdkochi2024-textract-s3-bucket-demo-20241122145119754300000001/*\"},{\"Action\":[\"sns:Publish\"],\"Effect\":\"Allow\",\"Resource\":\"arn:aws:sns:us-east-1:730335385934:textract-sns-topic\"},{\"Action\":[\"textract:DetectDocumentText\"],\"Effect\":\"Allow\",\"Resource\":\"*\"},{\"Action\":\"textract:AnalyzeDocument\",\"Effect\":\"Allow\",\"Resource\":\"*\"},{\"Action\":[\"logs:CreateLogGroup\",\"logs:CreateLogStream\",\"logs:PutLogEvents\"],\"Effect\":\"Allow\",\"Resource\":\"*\"},{\"Action\":[\"sqs:ReceiveMessage\",\"sqs:DeleteMessage\",\"sqs:GetQueueAttributes\"],\"Effect\":\"Allow\",\"Resource\":\"arn:aws:sqs:us-east-1:730335385934:textract-sqs-queue\"}]}", + "role": "lambda-textract-role" + }, + "sensitive_attributes": [], + "private": "bnVsbA==", + "dependencies": [ + "aws_iam_role.lambda_role", + "aws_s3_bucket.textract_bucket", + "aws_sns_topic.textract_topic", + "aws_sqs_queue.textract_queue" + ] + } + ] + }, + { + "mode": "managed", + "type": "aws_lambda_event_source_mapping", + "name": "sqs_trigger", + "provider": "provider[\"registry.terraform.io/hashicorp/aws\"]", + "instances": [ + { + "schema_version": 0, + "attributes": { + "amazon_managed_kafka_event_source_config": [], + "arn": "arn:aws:lambda:us-east-1:730335385934:event-source-mapping:37919b3d-a27f-46b1-8241-656a3d122b32", + "batch_size": 1, + "bisect_batch_on_function_error": false, + "destination_config": [], + "document_db_event_source_config": [], + "enabled": true, + "event_source_arn": "arn:aws:sqs:us-east-1:730335385934:textract-sqs-queue", + "filter_criteria": [], + "function_arn": "arn:aws:lambda:us-east-1:730335385934:function:sqs-to-csv-lambda", + "function_name": "arn:aws:lambda:us-east-1:730335385934:function:sqs-to-csv-lambda", + "function_response_types": [], + "id": "37919b3d-a27f-46b1-8241-656a3d122b32", + "kms_key_arn": "", + "last_modified": "2024-11-22T14:51:34Z", + "last_processing_result": "", + "maximum_batching_window_in_seconds": 0, + "maximum_record_age_in_seconds": 0, + "maximum_retry_attempts": 0, + "parallelization_factor": 0, + "queues": [], + "scaling_config": [], + "self_managed_event_source": [], + "self_managed_kafka_event_source_config": [], + "source_access_configuration": [], + "starting_position": "", + "starting_position_timestamp": "", + "state": "Enabled", + "state_transition_reason": "USER_INITIATED", + "tags": {}, + "tags_all": {}, + "topics": [], + "tumbling_window_in_seconds": 0, + "uuid": "37919b3d-a27f-46b1-8241-656a3d122b32" + }, + "sensitive_attributes": [], + "private": "bnVsbA==", + "dependencies": [ + "aws_iam_role.lambda_role", + "aws_lambda_function.sqs_to_csv_lambda", + "aws_s3_bucket.textract_bucket", + "aws_sqs_queue.textract_queue" + ] + } + ] + }, + { + "mode": "managed", + "type": "aws_lambda_function", + "name": "sqs_to_csv_lambda", + "provider": "provider[\"registry.terraform.io/hashicorp/aws\"]", + "instances": [ + { + "schema_version": 0, + "attributes": { + "architectures": [ + "x86_64" + ], + "arn": "arn:aws:lambda:us-east-1:730335385934:function:sqs-to-csv-lambda", + "code_sha256": "m6R9w5Dk9gGfEC0SomXgR/vzifQhrOfiwo02iYaAAks=", + "code_signing_config_arn": "", + "dead_letter_config": [], + "description": "", + "environment": [ + { + "variables": { + "CSV_S3_BUCKET": "acdkochi2024-textract-s3-bucket-demo-20241122145119754300000001", + "CSV_S3_PREFIX": "processed/csv/" + } + } + ], + "ephemeral_storage": [ + { + "size": 512 + } + ], + "file_system_config": [], + "filename": "sqs_to_csv_lambda.zip", + "function_name": "sqs-to-csv-lambda", + "handler": "sqs_to_csv_lambda.lambda_handler", + "id": "sqs-to-csv-lambda", + "image_config": [], + "image_uri": "", + "invoke_arn": "arn:aws:apigateway:us-east-1:lambda:path/2015-03-31/functions/arn:aws:lambda:us-east-1:730335385934:function:sqs-to-csv-lambda/invocations", + "kms_key_arn": "", + "last_modified": "2024-12-12T01:10:23.000+0000", + "layers": [], + "logging_config": [ + { + "application_log_level": "", + "log_format": "Text", + "log_group": "/aws/lambda/sqs-to-csv-lambda", + "system_log_level": "" + } + ], + "memory_size": 128, + "package_type": "Zip", + "publish": false, + "qualified_arn": "arn:aws:lambda:us-east-1:730335385934:function:sqs-to-csv-lambda:$LATEST", + "qualified_invoke_arn": "arn:aws:apigateway:us-east-1:lambda:path/2015-03-31/functions/arn:aws:lambda:us-east-1:730335385934:function:sqs-to-csv-lambda:$LATEST/invocations", + "replace_security_groups_on_destroy": null, + "replacement_security_group_ids": null, + "reserved_concurrent_executions": -1, + "role": "arn:aws:iam::730335385934:role/lambda-textract-role", + "runtime": "python3.11", + "s3_bucket": null, + "s3_key": null, + "s3_object_version": null, + "signing_job_arn": "", + "signing_profile_version_arn": "", + "skip_destroy": false, + "snap_start": [], + "source_code_hash": "m6R9w5Dk9gGfEC0SomXgR/vzifQhrOfiwo02iYaAAks=", + "source_code_size": 1396, + "tags": {}, + "tags_all": {}, + "timeout": 60, + "timeouts": null, + "tracing_config": [ + { + "mode": "PassThrough" + } + ], + "version": "$LATEST", + "vpc_config": [] + }, + "sensitive_attributes": [], + "private": "eyJlMmJmYjczMC1lY2FhLTExZTYtOGY4OC0zNDM2M2JjN2M0YzAiOnsiY3JlYXRlIjo2MDAwMDAwMDAwMDAsImRlbGV0ZSI6NjAwMDAwMDAwMDAwLCJ1cGRhdGUiOjYwMDAwMDAwMDAwMH19", + "dependencies": [ + "aws_iam_role.lambda_role", + "aws_s3_bucket.textract_bucket" + ] + } + ] + }, + { + "mode": "managed", + "type": "aws_lambda_function", + "name": "textract_lambda", + "provider": "provider[\"registry.terraform.io/hashicorp/aws\"]", + "instances": [ + { + "schema_version": 0, + "attributes": { + "architectures": [ + "x86_64" + ], + "arn": "arn:aws:lambda:us-east-1:730335385934:function:textract-lambda", + "code_sha256": "YdYIQ7CITqehgxJY9zX5CQlgY4x/80yRirW+tmkDurI=", + "code_signing_config_arn": "", + "dead_letter_config": [], + "description": "", + "environment": [ + { + "variables": { + "SNS_TOPIC_ARN": "arn:aws:sns:us-east-1:730335385934:textract-sns-topic" + } + } + ], + "ephemeral_storage": [ + { + "size": 512 + } + ], + "file_system_config": [], + "filename": "lambda_function.zip", + "function_name": "textract-lambda", + "handler": "lambda_function.lambda_handler", + "id": "textract-lambda", + "image_config": [], + "image_uri": "", + "invoke_arn": "arn:aws:apigateway:us-east-1:lambda:path/2015-03-31/functions/arn:aws:lambda:us-east-1:730335385934:function:textract-lambda/invocations", + "kms_key_arn": "", + "last_modified": "2024-12-12T00:26:58.000+0000", + "layers": [], + "logging_config": [ + { + "application_log_level": "", + "log_format": "Text", + "log_group": "/aws/lambda/textract-lambda", + "system_log_level": "" + } + ], + "memory_size": 128, + "package_type": "Zip", + "publish": false, + "qualified_arn": "arn:aws:lambda:us-east-1:730335385934:function:textract-lambda:$LATEST", + "qualified_invoke_arn": "arn:aws:apigateway:us-east-1:lambda:path/2015-03-31/functions/arn:aws:lambda:us-east-1:730335385934:function:textract-lambda:$LATEST/invocations", + "replace_security_groups_on_destroy": null, + "replacement_security_group_ids": null, + "reserved_concurrent_executions": -1, + "role": "arn:aws:iam::730335385934:role/lambda-textract-role", + "runtime": "python3.11", + "s3_bucket": null, + "s3_key": null, + "s3_object_version": null, + "signing_job_arn": "", + "signing_profile_version_arn": "", + "skip_destroy": false, + "snap_start": [], + "source_code_hash": "YdYIQ7CITqehgxJY9zX5CQlgY4x/80yRirW+tmkDurI=", + "source_code_size": 1620, + "tags": {}, + "tags_all": {}, + "timeout": 60, + "timeouts": null, + "tracing_config": [ + { + "mode": "PassThrough" + } + ], + "version": "$LATEST", + "vpc_config": [] + }, + "sensitive_attributes": [], + "private": "eyJlMmJmYjczMC1lY2FhLTExZTYtOGY4OC0zNDM2M2JjN2M0YzAiOnsiY3JlYXRlIjo2MDAwMDAwMDAwMDAsImRlbGV0ZSI6NjAwMDAwMDAwMDAwLCJ1cGRhdGUiOjYwMDAwMDAwMDAwMH19", + "dependencies": [ + "aws_iam_role.lambda_role", + "aws_sns_topic.textract_topic" + ] + } + ] + }, + { + "mode": "managed", + "type": "aws_lambda_permission", + "name": "allow_s3", + "provider": "provider[\"registry.terraform.io/hashicorp/aws\"]", + "instances": [ + { + "schema_version": 0, + "attributes": { + "action": "lambda:InvokeFunction", + "event_source_token": null, + "function_name": "textract-lambda", + "function_url_auth_type": null, + "id": "AllowS3Invoke", + "principal": "s3.amazonaws.com", + "principal_org_id": null, + "qualifier": "", + "source_account": null, + "source_arn": "arn:aws:s3:::acdkochi2024-textract-s3-bucket-demo-20241122145119754300000001", + "statement_id": "AllowS3Invoke", + "statement_id_prefix": "" + }, + "sensitive_attributes": [], + "private": "bnVsbA==", + "dependencies": [ + "aws_iam_role.lambda_role", + "aws_lambda_function.textract_lambda", + "aws_s3_bucket.textract_bucket", + "aws_sns_topic.textract_topic" + ] + } + ] + }, + { + "mode": "managed", + "type": "aws_s3_bucket", + "name": "textract_bucket", + "provider": "provider[\"registry.terraform.io/hashicorp/aws\"]", + "instances": [ + { + "schema_version": 0, + "attributes": { + "acceleration_status": "", + "acl": null, + "arn": "arn:aws:s3:::acdkochi2024-textract-s3-bucket-demo-20241122145119754300000001", + "bucket": "acdkochi2024-textract-s3-bucket-demo-20241122145119754300000001", + "bucket_domain_name": "acdkochi2024-textract-s3-bucket-demo-20241122145119754300000001.s3.amazonaws.com", + "bucket_prefix": "acdkochi2024-textract-s3-bucket-demo-", + "bucket_regional_domain_name": "acdkochi2024-textract-s3-bucket-demo-20241122145119754300000001.s3.us-east-1.amazonaws.com", + "cors_rule": [], + "force_destroy": true, + "grant": [ + { + "id": "c89bce84d3a5eb193efe6a3c6cdcec152c336289e819a195d5963b7f14bee28e", + "permissions": [ + "FULL_CONTROL" + ], + "type": "CanonicalUser", + "uri": "" + } + ], + "hosted_zone_id": "Z3AQBSTGFYJSTF", + "id": "acdkochi2024-textract-s3-bucket-demo-20241122145119754300000001", + "lifecycle_rule": [], + "logging": [], + "object_lock_configuration": [], + "object_lock_enabled": false, + "policy": "", + "region": "us-east-1", + "replication_configuration": [], + "request_payer": "BucketOwner", + "server_side_encryption_configuration": [ + { + "rule": [ + { + "apply_server_side_encryption_by_default": [ + { + "kms_master_key_id": "", + "sse_algorithm": "AES256" + } + ], + "bucket_key_enabled": false + } + ] + } + ], + "tags": { + "Name": "TextractBucket" + }, + "tags_all": { + "Name": "TextractBucket" + }, + "timeouts": null, + "versioning": [ + { + "enabled": false, + "mfa_delete": false + } + ], + "website": [], + "website_domain": null, + "website_endpoint": null + }, + "sensitive_attributes": [], + "private": "eyJlMmJmYjczMC1lY2FhLTExZTYtOGY4OC0zNDM2M2JjN2M0YzAiOnsiY3JlYXRlIjoxMjAwMDAwMDAwMDAwLCJkZWxldGUiOjM2MDAwMDAwMDAwMDAsInJlYWQiOjEyMDAwMDAwMDAwMDAsInVwZGF0ZSI6MTIwMDAwMDAwMDAwMH19" + } + ] + }, + { + "mode": "managed", + "type": "aws_s3_bucket_notification", + "name": "bucket_notification", + "provider": "provider[\"registry.terraform.io/hashicorp/aws\"]", + "instances": [ + { + "schema_version": 0, + "attributes": { + "bucket": "acdkochi2024-textract-s3-bucket-demo-20241122145119754300000001", + "eventbridge": false, + "id": "acdkochi2024-textract-s3-bucket-demo-20241122145119754300000001", + "lambda_function": [ + { + "events": [ + "s3:ObjectCreated:*" + ], + "filter_prefix": "", + "filter_suffix": ".pdf", + "id": "tf-s3-lambda-20241122145127431100000002", + "lambda_function_arn": "arn:aws:lambda:us-east-1:730335385934:function:textract-lambda" + }, + { + "events": [ + "s3:ObjectCreated:*" + ], + "filter_prefix": "", + "filter_suffix": ".jpeg", + "id": "tf-s3-lambda-20241122145127431100000003", + "lambda_function_arn": "arn:aws:lambda:us-east-1:730335385934:function:textract-lambda" + } + ], + "queue": [], + "topic": [] + }, + "sensitive_attributes": [], + "private": "bnVsbA==", + "dependencies": [ + "aws_iam_role.lambda_role", + "aws_lambda_function.textract_lambda", + "aws_lambda_permission.allow_s3", + "aws_s3_bucket.textract_bucket", + "aws_sns_topic.textract_topic" + ] + } + ] + }, + { + "mode": "managed", + "type": "aws_sns_topic", + "name": "textract_topic", + "provider": "provider[\"registry.terraform.io/hashicorp/aws\"]", + "instances": [ + { + "schema_version": 0, + "attributes": { + "application_failure_feedback_role_arn": "", + "application_success_feedback_role_arn": "", + "application_success_feedback_sample_rate": 0, + "archive_policy": "", + "arn": "arn:aws:sns:us-east-1:730335385934:textract-sns-topic", + "beginning_archive_time": "", + "content_based_deduplication": false, + "delivery_policy": "", + "display_name": "", + "fifo_topic": false, + "firehose_failure_feedback_role_arn": "", + "firehose_success_feedback_role_arn": "", + "firehose_success_feedback_sample_rate": 0, + "http_failure_feedback_role_arn": "", + "http_success_feedback_role_arn": "", + "http_success_feedback_sample_rate": 0, + "id": "arn:aws:sns:us-east-1:730335385934:textract-sns-topic", + "kms_master_key_id": "", + "lambda_failure_feedback_role_arn": "", + "lambda_success_feedback_role_arn": "", + "lambda_success_feedback_sample_rate": 0, + "name": "textract-sns-topic", + "name_prefix": "", + "owner": "730335385934", + "policy": "{\"Id\":\"__default_policy_ID\",\"Statement\":[{\"Action\":[\"SNS:GetTopicAttributes\",\"SNS:SetTopicAttributes\",\"SNS:AddPermission\",\"SNS:RemovePermission\",\"SNS:DeleteTopic\",\"SNS:Subscribe\",\"SNS:ListSubscriptionsByTopic\",\"SNS:Publish\"],\"Condition\":{\"StringEquals\":{\"AWS:SourceOwner\":\"730335385934\"}},\"Effect\":\"Allow\",\"Principal\":{\"AWS\":\"*\"},\"Resource\":\"arn:aws:sns:us-east-1:730335385934:textract-sns-topic\",\"Sid\":\"__default_statement_ID\"}],\"Version\":\"2008-10-17\"}", + "signature_version": 0, + "sqs_failure_feedback_role_arn": "", + "sqs_success_feedback_role_arn": "", + "sqs_success_feedback_sample_rate": 0, + "tags": { + "Name": "TextractSNSTopic" + }, + "tags_all": { + "Name": "TextractSNSTopic" + }, + "tracing_config": "" + }, + "sensitive_attributes": [], + "private": "bnVsbA==" + } + ] + }, + { + "mode": "managed", + "type": "aws_sns_topic_subscription", + "name": "sns_to_sqs", + "provider": "provider[\"registry.terraform.io/hashicorp/aws\"]", + "instances": [ + { + "schema_version": 0, + "attributes": { + "arn": "arn:aws:sns:us-east-1:730335385934:textract-sns-topic:3057bb09-b74d-46b8-b182-6d9f09406c41", + "confirmation_timeout_in_minutes": 1, + "confirmation_was_authenticated": true, + "delivery_policy": "", + "endpoint": "arn:aws:sqs:us-east-1:730335385934:textract-sqs-queue", + "endpoint_auto_confirms": false, + "filter_policy": "", + "filter_policy_scope": "", + "id": "arn:aws:sns:us-east-1:730335385934:textract-sns-topic:3057bb09-b74d-46b8-b182-6d9f09406c41", + "owner_id": "730335385934", + "pending_confirmation": false, + "protocol": "sqs", + "raw_message_delivery": false, + "redrive_policy": "", + "replay_policy": "", + "subscription_role_arn": "", + "topic_arn": "arn:aws:sns:us-east-1:730335385934:textract-sns-topic" + }, + "sensitive_attributes": [], + "private": "bnVsbA==", + "dependencies": [ + "aws_sns_topic.textract_topic", + "aws_sqs_queue.textract_queue", + "aws_sqs_queue_policy.sns_publish_policy" + ] + } + ] + }, + { + "mode": "managed", + "type": "aws_sqs_queue", + "name": "textract_queue", + "provider": "provider[\"registry.terraform.io/hashicorp/aws\"]", + "instances": [ + { + "schema_version": 0, + "attributes": { + "arn": "arn:aws:sqs:us-east-1:730335385934:textract-sqs-queue", + "content_based_deduplication": false, + "deduplication_scope": "", + "delay_seconds": 0, + "fifo_queue": false, + "fifo_throughput_limit": "", + "id": "https://sqs.us-east-1.amazonaws.com/730335385934/textract-sqs-queue", + "kms_data_key_reuse_period_seconds": 300, + "kms_master_key_id": "", + "max_message_size": 262144, + "message_retention_seconds": 345600, + "name": "textract-sqs-queue", + "name_prefix": "", + "policy": "{\"Statement\":[{\"Action\":\"sqs:SendMessage\",\"Condition\":{\"ArnEquals\":{\"aws:SourceArn\":\"arn:aws:sns:us-east-1:730335385934:textract-sns-topic\"}},\"Effect\":\"Allow\",\"Principal\":{\"Service\":\"sns.amazonaws.com\"},\"Resource\":\"arn:aws:sqs:us-east-1:730335385934:textract-sqs-queue\"}],\"Version\":\"2012-10-17\"}", + "receive_wait_time_seconds": 0, + "redrive_allow_policy": "", + "redrive_policy": "", + "sqs_managed_sse_enabled": true, + "tags": { + "Name": "TextractQueue" + }, + "tags_all": { + "Name": "TextractQueue" + }, + "url": "https://sqs.us-east-1.amazonaws.com/730335385934/textract-sqs-queue", + "visibility_timeout_seconds": 60 + }, + "sensitive_attributes": [], + "private": "bnVsbA==" + } + ] + }, + { + "mode": "managed", + "type": "aws_sqs_queue_policy", + "name": "sns_publish_policy", + "provider": "provider[\"registry.terraform.io/hashicorp/aws\"]", + "instances": [ + { + "schema_version": 1, + "attributes": { + "id": "https://sqs.us-east-1.amazonaws.com/730335385934/textract-sqs-queue", + "policy": "{\"Statement\":[{\"Action\":\"sqs:SendMessage\",\"Condition\":{\"ArnEquals\":{\"aws:SourceArn\":\"arn:aws:sns:us-east-1:730335385934:textract-sns-topic\"}},\"Effect\":\"Allow\",\"Principal\":{\"Service\":\"sns.amazonaws.com\"},\"Resource\":\"arn:aws:sqs:us-east-1:730335385934:textract-sqs-queue\"}],\"Version\":\"2012-10-17\"}", + "queue_url": "https://sqs.us-east-1.amazonaws.com/730335385934/textract-sqs-queue" + }, + "sensitive_attributes": [], + "private": "eyJzY2hlbWFfdmVyc2lvbiI6IjEifQ==", + "dependencies": [ + "aws_sns_topic.textract_topic", + "aws_sqs_queue.textract_queue" + ] + } + ] + } + ], + "check_results": [ + { + "object_kind": "var", + "config_addr": "var.region", + "status": "pass", + "objects": [ + { + "object_addr": "var.region", + "status": "pass" + } + ] + } + ] +} diff --git a/iac/demo/textract/variables.tf b/iac/demo/textract/variables.tf new file mode 100644 index 0000000..dd934e9 --- /dev/null +++ b/iac/demo/textract/variables.tf @@ -0,0 +1,40 @@ +variable "region" { + description = "AWS region to deploy resources" + default = "us-east-1" + + validation { + condition = contains(["us-east-1"], var.region) + error_message = "Only us-east-1 is allowed for deployment." + } +} + + +variable "lambda_function_name" { + description = "Lambda function name" + default = "textract-lambda" +} + +variable "s3_bucket_name" { + description = "Name of the S3 bucket" + default = "acdkochi2024-textract-s3-bucket-demo-" +} + +variable "sqs_queue_name" { + description = "Name of the SQS queue" + default = "textract-sqs-queue" +} + +variable "csv_s3_prefix" { + description = "S3 prefix for storing CSV files" + default = "processed/csv/" +} + +variable "lambda_function_name_2" { + description = "Name for the second Lambda function" + default = "sqs-to-csv-lambda" +} + +variable "aws_profile" { + description = "AWS credentials profile to use" + default = "dev" +} \ No newline at end of file