# generated by datamodel-codegen:
#   filename:  metadataIngestion/databaseServiceProfilerPipeline.json
#   timestamp: 2024-07-04T09:01:46+00:00

from __future__ import annotations

from enum import Enum
from typing import Optional

from pydantic import BaseModel, Extra, Field

from ..entity.data import table
from ..type import filterPattern


class ProfilerConfigType(Enum):
    Profiler = 'Profiler'


class DatabaseServiceProfilerPipeline(BaseModel):
    class Config:
        extra = Extra.forbid

    type: Optional[ProfilerConfigType] = Field(
        ProfilerConfigType.Profiler, description='Pipeline type'
    )
    schemaFilterPattern: Optional[filterPattern.FilterPattern] = Field(
        None,
        description='Regex to only fetch tables or databases that matches the pattern.',
        title='Schema Filter Pattern',
    )
    tableFilterPattern: Optional[filterPattern.FilterPattern] = Field(
        None,
        description='Regex exclude tables or databases that matches the pattern.',
        title='Table Filter Pattern',
    )
    databaseFilterPattern: Optional[filterPattern.FilterPattern] = Field(
        None,
        description='Regex to only fetch databases that matches the pattern.',
        title='Database Filter Pattern',
    )
    includeViews: Optional[bool] = Field(
        True,
        description='Optional configuration to turn off fetching metadata for views.',
        title='Include Views',
    )
    useFqnForFiltering: Optional[bool] = Field(
        False,
        description='Regex will be applied on fully qualified name (e.g service_name.db_name.schema_name.table_name) instead of raw name (e.g. table_name)',
        title='Use FQN For Filtering',
    )
    generateSampleData: Optional[bool] = Field(
        True,
        description='Option to turn on/off generating sample data. If enabled, profiler will ingest sample data for each table.',
        title='Generate Sample Data',
    )
    computeMetrics: Optional[bool] = Field(
        True,
        description='Option to turn on/off computing profiler metrics.',
        title='Compute Metrics',
    )
    processPiiSensitive: Optional[bool] = Field(
        False,
        description='Optional configuration to automatically tag columns that might contain sensitive information',
        title='Auto Tag PII',
    )
    confidence: Optional[float] = Field(
        80,
        description='Set the Confidence value for which you want the column to be tagged as PII. Confidence value ranges from 0 to 100. A higher number will yield less false positives but more false negatives. A lower number will yield more false positives but less false negatives.',
        title='PII Inference Confidence Level',
    )
    profileSampleType: Optional[table.ProfileSampleType] = Field(
        table.ProfileSampleType.PERCENTAGE, title='Profile Sample Type'
    )
    profileSample: Optional[float] = Field(
        None,
        description='Percentage of data or no. of rows used to compute the profiler metrics and run data quality tests',
        title='Profile Sample',
    )
    sampleDataCount: Optional[int] = Field(
        50,
        description="Number of sample rows to ingest when 'Generate Sample Data' is enabled",
        title='Sample Data Rows Count',
    )
    threadCount: Optional[float] = Field(
        5,
        description='Number of threads to use during metric computations',
        title='Thread Count',
    )
    timeoutSeconds: Optional[int] = Field(
        43200, description='Profiler Timeout in Seconds', title='Timeout (in sec.)'
    )
