# Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0

import json
import logging
import os
import uuid

import boto3
import textractcaller as tc
import textractmanifest as tm

logger = logging.getLogger(__name__)
__version__ = "0.0.8"
s3 = boto3.client('s3')


def lambda_handler(event, _):
    log_level = os.environ.get('LOG_LEVEL', 'INFO')
    logger.setLevel(log_level)
    logger.info(
        f"version: {__version__}\ntextractmanifest version: {tm.__version__}\nboto3 version: {boto3.__version__}\ntextractcaller version: {tc.__version__}."
    )
    logger.info(json.dumps(event))
    textract_endpoint_url = os.environ.get('TEXTRACT_ENDPOINT_URL', None)
    s3_output_bucket = os.environ.get('S3_OUTPUT_BUCKET')
    s3_output_prefix = os.environ.get('S3_OUTPUT_PREFIX')

    if not s3_output_bucket or not s3_output_prefix:
        raise ValueError(
            f"no s3_output_bucket: {s3_output_bucket} or s3_output_prefix: {s3_output_prefix} defined."
        )
    logger.info(f"LOG_LEVEL: {log_level} \n \
                S3_OUTPUT_BUCKET: {s3_output_bucket} \n \
                S3_OUTPUT_PREFIX: {s3_output_prefix} \n \
                TEXTRACT_ENDPOINT_URL: {textract_endpoint_url}")

    if textract_endpoint_url:
        textract = boto3.client("textract", endpoint_url=textract_endpoint_url)
    else:
        textract = boto3.client("textract")

    if "manifest" in event and event['manifest']:
        manifest: tm.IDPManifest = tm.IDPManifestSchema().load(
            event['manifest'])  #type: ignore
        logger.debug(manifest)
    else:
        raise Exception(f"no manifest in event: {event}")

    s3_path = manifest.document_pages[0]
    s3_filename, _ = os.path.splitext(os.path.basename(s3_path))

    logger.debug(f"before call_textract\n \
        input_document: {s3_path} \n ")
    if manifest.document_pages and len(manifest.document_pages) > 0:
        textract_response = tc.call_textract_analyzeid(
            document_pages=manifest.document_pages,  #type: ignore
            boto3_textract_client=textract)

        output_bucket_key = s3_output_prefix + "/" + str(
            uuid.uuid4()) + "/" + s3_filename + ".json"
        s3.put_object(Body=bytes(
            json.dumps(textract_response, indent=4).encode('UTF-8')),
                      Bucket=s3_output_bucket,
                      Key=output_bucket_key)
        return {
            "TextractOutputJsonPath":
            f"s3://{s3_output_bucket}/{output_bucket_key}"
        }
    else:
        raise ValueError(f"no document_pages in manifest file: {manifest}")
