# generated by datamodel-codegen:
#   filename:  metadataIngestion/databaseServiceProfilerPipeline.json
#   timestamp: 2024-11-26T06:50:54+00:00

from __future__ import annotations

from enum import Enum
from typing import Optional

from pydantic import ConfigDict, Field
from typing_extensions import Annotated

from metadata.ingestion.models.custom_pydantic import BaseModel

from ..entity.data import table
from ..type import filterPattern


class ProfilerConfigType(Enum):
    Profiler = 'Profiler'


class DatabaseServiceProfilerPipeline(BaseModel):
    model_config = ConfigDict(
        extra='forbid',
    )
    type: Annotated[
        Optional[ProfilerConfigType],
        Field(ProfilerConfigType.Profiler, description='Pipeline type'),
    ]
    schemaFilterPattern: Annotated[
        Optional[filterPattern.FilterPattern],
        Field(
            None,
            description='Regex to only fetch tables or databases that matches the pattern.',
            title='Schema Filter Pattern',
        ),
    ]
    tableFilterPattern: Annotated[
        Optional[filterPattern.FilterPattern],
        Field(
            None,
            description='Regex exclude tables or databases that matches the pattern.',
            title='Table Filter Pattern',
        ),
    ]
    databaseFilterPattern: Annotated[
        Optional[filterPattern.FilterPattern],
        Field(
            None,
            description='Regex to only fetch databases that matches the pattern.',
            title='Database Filter Pattern',
        ),
    ]
    includeViews: Annotated[
        Optional[bool],
        Field(
            True,
            description='Optional configuration to turn off fetching metadata for views.',
            title='Include Views',
        ),
    ]
    useFqnForFiltering: Annotated[
        Optional[bool],
        Field(
            False,
            description='Regex will be applied on fully qualified name (e.g service_name.db_name.schema_name.table_name) instead of raw name (e.g. table_name)',
            title='Use FQN For Filtering',
        ),
    ]
    generateSampleData: Annotated[
        Optional[bool],
        Field(
            True,
            description='Option to turn on/off generating sample data. If enabled, profiler will ingest sample data for each table.',
            title='Generate Sample Data',
        ),
    ]
    computeMetrics: Annotated[
        Optional[bool],
        Field(
            True,
            description='Option to turn on/off computing profiler metrics.',
            title='Compute Metrics',
        ),
    ]
    processPiiSensitive: Annotated[
        Optional[bool],
        Field(
            False,
            description='Optional configuration to automatically tag columns that might contain sensitive information',
            title='Auto Tag PII',
        ),
    ]
    confidence: Annotated[
        Optional[float],
        Field(
            80,
            description='Set the Confidence value for which you want the column to be tagged as PII. Confidence value ranges from 0 to 100. A higher number will yield less false positives but more false negatives. A lower number will yield more false positives but less false negatives.',
            title='PII Inference Confidence Level',
        ),
    ]
    profileSampleType: Annotated[
        Optional[table.ProfileSampleType],
        Field(table.ProfileSampleType.PERCENTAGE, title='Profile Sample Type'),
    ]
    profileSample: Annotated[
        Optional[float],
        Field(
            None,
            description='Percentage of data or no. of rows used to compute the profiler metrics and run data quality tests',
            title='Profile Sample',
        ),
    ]
    sampleDataCount: Annotated[
        Optional[int],
        Field(
            50,
            description="Number of sample rows to ingest when 'Generate Sample Data' is enabled",
            title='Sample Data Rows Count',
        ),
    ]
    threadCount: Annotated[
        Optional[float],
        Field(
            5,
            description='Number of threads to use during metric computations',
            title='Thread Count',
        ),
    ]
    timeoutSeconds: Annotated[
        Optional[int],
        Field(
            43200, description='Profiler Timeout in Seconds', title='Timeout (in sec.)'
        ),
    ]
