# SPDX-License-Identifier: Apache-2.0
# (C) Copyright IBM Corp. 2024.
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#  http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
################################################################################

import os
import sys

from data_processing.utils import ParamsUtils
from data_processing_ray.runtime.ray import RayTransformLauncher
from dpk_ededup.ray.runtime import (
    EdedupRayTransformRuntimeConfiguration,
    hash_cpu_cli_params,
    num_hashes_cli_params,
)
from dpk_ededup.transform_base import (
    doc_column_name_cli_param,
    int_column_name_cli_param,
    snapshot_directory_cli_param,
    use_snapshot_cli_param,
)


# create launcher
launcher = RayTransformLauncher(EdedupRayTransformRuntimeConfiguration())
# create parameters
input_folder = os.path.abspath(os.path.join(os.path.dirname(__file__), "../test-data/input"))
output_folder = os.path.abspath(os.path.join(os.path.dirname(__file__), "../output"))
local_conf = {
    "input_folder": input_folder,
    "output_folder": output_folder,
}
worker_options = {"num_cpus": 0.5}
code_location = {"github": "github", "commit_hash": "12345", "path": "path"}
params = {
    # where to run
    "run_locally": True,
    # Data access. Only required parameters are specified
    "data_local_config": ParamsUtils.convert_to_ast(local_conf),
    # orchestrator
    "runtime_worker_options": ParamsUtils.convert_to_ast(worker_options),
    "runtime_num_workers": 2,
    "runtime_pipeline_id": "pipeline_id",
    "runtime_job_id": "job_id",
    "runtime_creation_delay": 0,
    "runtime_code_location": ParamsUtils.convert_to_ast(code_location),
    # ededup parameters
    hash_cpu_cli_params: 0.5,
    num_hashes_cli_params: 2,
    doc_column_name_cli_param: "contents",
    int_column_name_cli_param: "document_id",
    use_snapshot_cli_param: True,
    snapshot_directory_cli_param: input_folder + "/snapshot",
}
sys.argv = ParamsUtils.dict_to_req(d=params)

# launch
launcher.launch()
