import logging
import os
import re
import sqlite3
from contextlib import contextmanager
from functools import wraps
from time import sleep
from typing import (
    TYPE_CHECKING,
    Any,
    Callable,
    Dict,
    Iterable,
    List,
    Optional,
    Sequence,
    Tuple,
    Type,
    Union,
)

import sqlalchemy
from attrs import frozen
from sqlalchemy import MetaData, Table, UniqueConstraint, exists, select
from sqlalchemy.dialects import sqlite
from sqlalchemy.schema import CreateIndex, CreateTable, DropTable
from sqlalchemy.sql import func
from sqlalchemy.sql.expression import bindparam, cast

import dql.sql.sqlite
from dql.data_storage import AbstractMetastore, AbstractWarehouse
from dql.data_storage.db_engine import DatabaseEngine
from dql.data_storage.id_generator import AbstractIDGenerator
from dql.data_storage.schema import (
    DefaultSchema,
    SignalsTable,
    convert_rows_custom_column_types,
)
from dql.dataset import DatasetRecord
from dql.error import DQLError, InconsistentSignalType
from dql.sql.sqlite import create_user_defined_sql_functions, sqlite_dialect
from dql.sql.types import SQLType
from dql.storage import StorageURI
from dql.utils import DQLDir

if TYPE_CHECKING:
    from sqlalchemy.schema import SchemaItem
    from sqlalchemy.sql.elements import ColumnClause, ColumnElement, TextClause
    from sqlalchemy.types import TypeEngine

    from dql.data_storage import schema


logger = logging.getLogger("dql")

RETRY_START_SEC = 0.01
RETRY_MAX_TIMES = 10
RETRY_FACTOR = 2

Column = Union[str, "ColumnClause[Any]", "TextClause"]

dql.sql.sqlite.setup()

quote_schema = sqlite_dialect.identifier_preparer.quote_schema
quote = sqlite_dialect.identifier_preparer.quote


def get_retry_sleep_sec(retry_count: int) -> int:
    return RETRY_START_SEC * (RETRY_FACTOR**retry_count)


def retry_sqlite_locks(func):
    # This retries the database modification in case of concurrent access
    @wraps(func)
    def wrapper(*args, **kwargs):
        exc = None
        for retry_count in range(RETRY_MAX_TIMES):
            try:
                return func(*args, **kwargs)
            except sqlite3.OperationalError as operror:
                exc = operror
                sleep(get_retry_sleep_sec(retry_count))
        raise exc

    return wrapper


@frozen
class SQLiteDatabaseEngine(DatabaseEngine):
    dialect = sqlite_dialect

    db: sqlite3.Connection
    db_file: Optional[str]

    @classmethod
    def from_db_file(cls, db_file: Optional[str] = None) -> "SQLiteDatabaseEngine":
        detect_types = sqlite3.PARSE_DECLTYPES | sqlite3.PARSE_COLNAMES

        try:
            if db_file == ":memory:":
                # Enable multithreaded usage of the same in-memory db
                db = sqlite3.connect(
                    "file::memory:?cache=shared", uri=True, detect_types=detect_types
                )
            else:
                db = sqlite3.connect(
                    db_file or DQLDir.find().db, detect_types=detect_types
                )
            create_user_defined_sql_functions(db)
            engine = sqlalchemy.create_engine(
                "sqlite+pysqlite:///", creator=lambda: db, future=True
            )

            db.isolation_level = None  # Use autocommit mode
            db.execute("PRAGMA foreign_keys = ON")
            db.execute("PRAGMA cache_size = -102400")  # 100 MiB
            # Enable Write-Ahead Log Journaling
            db.execute("PRAGMA journal_mode = WAL")
            db.execute("PRAGMA synchronous = NORMAL")
            db.execute("PRAGMA case_sensitive_like = ON")
            if os.environ.get("DEBUG_SHOW_SQL_QUERIES"):
                db.set_trace_callback(print)

            return cls(engine, MetaData(), db, db_file)
        except RuntimeError:
            raise DQLError("Can't connect to SQLite DB")  # noqa: B904

    def clone(self) -> "SQLiteDatabaseEngine":
        """Clones DatabaseEngine implementation."""
        return SQLiteDatabaseEngine.from_db_file(self.db_file)

    def clone_params(self) -> Tuple[Callable[..., Any], List[Any], Dict[str, Any]]:
        """
        Returns the function, args, and kwargs needed to instantiate a cloned copy
        of this DatabaseEngine implementation, for use in separate processes
        or machines.
        """
        return (
            SQLiteDatabaseEngine.from_db_file,
            [self.db_file],
            {},
        )

    @retry_sqlite_locks
    def execute(
        self,
        query,
        cursor: Optional[sqlite3.Cursor] = None,
        conn=None,
    ) -> sqlite3.Cursor:
        if cursor is not None:
            result = cursor.execute(*self.compile_to_args(query))
        elif conn is not None:
            result = conn.execute(*self.compile_to_args(query))
        else:
            result = self.db.execute(*self.compile_to_args(query))
        if isinstance(query, CreateTable) and query.element.indexes:
            for index in query.element.indexes:
                self.execute(CreateIndex(index, if_not_exists=True), cursor=cursor)
        return result

    @retry_sqlite_locks
    def executemany(
        self, query, params, cursor: Optional[sqlite3.Cursor] = None
    ) -> sqlite3.Cursor:
        if cursor:
            return cursor.executemany(self.compile(query).string, params)
        return self.db.executemany(self.compile(query).string, params)

    def execute_str(self, sql: str, parameters=None) -> sqlite3.Cursor:
        if parameters is None:
            return self.db.execute(sql)
        return self.db.execute(sql, parameters)

    def insert_dataframe(self, table_name: str, df) -> int:
        return df.to_sql(table_name, self.db, if_exists="append", index=False)

    def cursor(self, factory=None):
        if factory is None:
            return self.db.cursor()
        return self.db.cursor(factory)

    def close(self) -> None:
        self.db.close()

    @contextmanager
    def transaction(self):
        db = self.db
        with db:
            db.execute("begin")
            yield db

    def has_table(self, name: str) -> bool:
        """
        Return True if a table exists with the given name

        We cannot simply use `inspect(engine).has_table(name)` like the
        parent class does because that will return False for a table
        created during a pending transaction. Instead, we check the
        sqlite_master table.
        """
        query = select(
            exists(
                select(1)
                .select_from(sqlalchemy.table("sqlite_master"))
                .where(
                    (sqlalchemy.column("type") == "table")
                    & (sqlalchemy.column("name") == name)
                )
            )
        )
        return bool(next(self.execute(query))[0])

    def create_table(self, table: "Table", if_not_exists: bool = True) -> None:
        self.execute(CreateTable(table, if_not_exists=if_not_exists))

    def drop_table(self, table: "Table", if_exists: bool = False) -> None:
        self.execute(DropTable(table, if_exists=if_exists))

    def rename_table(self, old_name: str, new_name: str):
        comp_old_name = quote_schema(old_name)
        comp_new_name = quote_schema(new_name)
        self.execute_str(f"ALTER TABLE {comp_old_name} RENAME TO {comp_new_name}")


class SQLiteIDGenerator(AbstractIDGenerator):
    _db: "SQLiteDatabaseEngine"

    def __init__(
        self,
        db: Optional["SQLiteDatabaseEngine"] = None,
        table_prefix: Optional[str] = None,
        skip_db_init: bool = False,
        db_file: Optional[str] = None,
    ):
        db = db or SQLiteDatabaseEngine.from_db_file(db_file)

        super().__init__(db, table_prefix, skip_db_init)

    def clone(self) -> "AbstractIDGenerator":
        """Clones SQLiteIDGenerator implementation."""
        return SQLiteIDGenerator(
            self._db.clone(), self._table_prefix, skip_db_init=True
        )

    def clone_params(self) -> Tuple[Callable[..., Any], List[Any], Dict[str, Any]]:
        """
        Returns the function, args, and kwargs needed to instantiate a cloned copy
        of this SQLiteIDGenerator implementation, for use in separate processes
        or machines.
        """
        return (
            SQLiteIDGenerator.init_after_clone,
            [],
            {
                "db_params": self._db.clone_params(),
                "table_prefix": self._table_prefix,
            },
        )

    @classmethod
    def init_after_clone(
        cls,
        *,
        db_params: Tuple[Callable, List, Dict[str, Any]],
        table_prefix: Optional[str] = None,
    ) -> "SQLiteIDGenerator":
        """
        Initializes a new instance of this SQLiteIDGenerator implementation
        using the given parameters, which were obtained from a call to clone_params.
        """
        (db_class, db_args, db_kwargs) = db_params
        return cls(
            db=db_class(*db_args, **db_kwargs),
            table_prefix=table_prefix,
            skip_db_init=True,
        )

    @property
    def db(self) -> "SQLiteDatabaseEngine":
        return self._db

    def init_id(self, uri: str) -> None:
        """Initializes the ID generator for the given URI with zero last_id."""
        self._db.execute(
            sqlite.insert(self._table)
            .values(uri=uri, last_id=0)
            .on_conflict_do_nothing()
        )

    def get_next_ids(self, uri: str, count: int) -> range:
        """Returns a range of IDs for the given URI."""

        # NOTE: we can't use RETURNING clause here because it is only available
        # in sqlalchemy v2, see
        # https://github.com/sqlalchemy/sqlalchemy/issues/6195#issuecomment-1248700677
        # After we upgrade to sqlalchemy v2, we can use the following code,
        # leaving fallback to the current implementation for older versions of SQLite,
        # which is still supported, for example, in Ubuntu 20.04 LTS (Focal Fossa),
        # where SQLite version 3.31.1 is used.

        # sqlite_version = version.parse(sqlite3.sqlite_version)
        # if sqlite_version >= version.parse("3.35.0"):
        #     # RETURNING is supported on SQLite 3.35.0 (2021-03-12) or newer
        #     stmt = (
        #         sqlite.insert(self._table)
        #         .values(uri=uri, last_id=count)
        #         .on_conflict_do_update(
        #             index_elements=["uri"],
        #             set_={"last_id": self._table.c.last_id + count},
        #         )
        #         .returning(self._table.c.last_id)
        #     )
        #     last_id = self._db.execute(stmt).fetchone()[0]
        # else:
        #     (fallback to the current implementation with a transaction)

        # Transactions ensure no concurrency conflicts
        with self._db.transaction() as conn:
            # UPSERT syntax was added to SQLite with version 3.24.0 (2018-06-04).
            stmt = (
                sqlite.insert(self._table)
                .values(uri=uri, last_id=count)
                .on_conflict_do_update(
                    index_elements=["uri"],
                    set_={"last_id": self._table.c.last_id + count},
                )
            )
            self._db.execute(stmt, conn=conn)

            stmt = (
                select(self._table.c.last_id)
                .select_from(self._table)
                .where(self._table.c.uri == uri)
            )
            last_id = self._db.execute(stmt, conn=conn).fetchone()[0]

        return range(last_id - count + 1, last_id + 1)


class SQLiteMetastore(AbstractMetastore):
    """
    SQLite Metastore uses SQLite3 for storing indexed data locally.
    This is currently used for the local cli.
    """

    db: "SQLiteDatabaseEngine"

    def __init__(
        self,
        id_generator: "AbstractIDGenerator",
        uri: StorageURI = StorageURI(""),
        partial_id: Optional[int] = None,
        db: Optional["SQLiteDatabaseEngine"] = None,
        db_file: Optional[str] = None,
    ):
        self.schema: "DefaultSchema" = DefaultSchema()
        super().__init__(id_generator, uri, partial_id)

        # needed for dropping tables in correct order for tests because of
        # foreign keys
        self.default_table_names: List[str] = []

        self.db = db or SQLiteDatabaseEngine.from_db_file(db_file)

        self._init_storage_table()
        self._init_datasets_tables()

    def clone(
        self, uri: StorageURI = StorageURI(""), partial_id: Optional[int] = None
    ) -> "SQLiteMetastore":
        if not uri:
            if partial_id is not None:
                raise ValueError("if partial_id is used, uri cannot be empty")
            if self.uri:
                uri = self.uri
                if self.partial_id:
                    partial_id = self.partial_id
        return SQLiteMetastore(
            self.id_generator.clone(),
            uri=uri,
            partial_id=partial_id,
            db=self.db.clone(),
        )

    def clone_params(self) -> Tuple[Callable[..., Any], List[Any], Dict[str, Any]]:
        """
        Returns the class, args, and kwargs needed to instantiate a cloned copy of this
        SQLiteDataStorage implementation, for use in separate processes or machines.
        """
        return (
            SQLiteMetastore.init_after_clone,
            [],
            {
                "id_generator_params": self.id_generator.clone_params(),
                "uri": self.uri,
                "partial_id": self.partial_id,
                "db_params": self.db.clone_params(),
            },
        )

    @classmethod
    def init_after_clone(
        cls,
        *,
        id_generator_params: Tuple[Callable, List, Dict[str, Any]],
        uri: StorageURI,
        partial_id: Optional[int],
        db_params: Tuple[Callable, List, Dict[str, Any]],
    ) -> "SQLiteMetastore":
        (
            id_generator_class,
            id_generator_args,
            id_generator_kwargs,
        ) = id_generator_params
        (db_class, db_args, db_kwargs) = db_params
        return cls(
            id_generator=id_generator_class(*id_generator_args, **id_generator_kwargs),
            uri=uri,
            partial_id=partial_id,
            db=db_class(*db_args, **db_kwargs),
        )

    def _init_storage_table(self) -> None:
        """Initialize only tables related to storage, e.g. s3"""
        self.db.create_table(self.storages, if_not_exists=True)
        self.default_table_names.append(self.storages.name)

    def _init_datasets_tables(self) -> None:
        self.db.create_table(self.datasets, if_not_exists=True)
        self.default_table_names.append(self.datasets.name)
        self.db.create_table(self.datasets_versions, if_not_exists=True)
        self.default_table_names.append(self.datasets_versions.name)
        self.db.create_table(self.datasets_dependencies, if_not_exists=True)
        self.default_table_names.append(self.datasets_dependencies.name)

    @classmethod
    def buckets_columns(cls) -> List["SchemaItem"]:
        """Buckets (storages) table columns."""
        return super().buckets_columns() + [
            UniqueConstraint("uri"),
        ]

    @classmethod
    def datasets_columns(cls) -> List["SchemaItem"]:
        """Datasets table columns."""
        return super().datasets_columns() + [
            UniqueConstraint("name"),
        ]

    def init_db(self, uri: StorageURI, partial_id: int) -> None:
        if not uri:
            raise ValueError("uri for init_db() cannot be empty")
        partials_table = self.partials_table(uri)
        self.db.create_table(partials_table, if_not_exists=True)

    def storages_insert(self):
        return sqlite.insert(self.storages)

    def partials_insert(self):
        return sqlite.insert(self.partials)

    def datasets_insert(self):
        return sqlite.insert(self.datasets)

    def datasets_versions_insert(self):
        return sqlite.insert(self.datasets_versions)

    def datasets_dependencies_insert(self):
        return sqlite.insert(self.datasets_dependencies)

    #
    # Storages
    #

    def mark_storage_not_indexed(self, uri: StorageURI) -> None:
        """
        Mark storage as not indexed.
        This method should be called when storage index is deleted.
        """
        self.db.execute(self.storages_delete().where(self.storages.c.uri == uri))

    #
    # Dataset dependencies
    #

    def dataset_dependencies_select_columns(self) -> List["SchemaItem"]:
        return [
            self.datasets_dependencies.c.id,
            self.datasets_dependencies.c.dataset_id,
            self.datasets_dependencies.c.dataset_version_id,
            self.datasets_dependencies.c.bucket_id,
            self.datasets_dependencies.c.bucket_version,
            self.datasets.c.name,
            self.datasets.c.created_at,
            self.datasets_versions.c.version,
            self.datasets_versions.c.created_at,
            self.storages.c.uri,
        ]

    def insert_dataset_dependency(self, data: Dict[str, Any]) -> None:
        self.db.execute(sqlite.insert(self.datasets_dependencies).values(**data))


class SQLiteWarehouse(AbstractWarehouse):
    """
    SQLite Warehouse uses SQLite3 for storing indexed data locally.
    This is currently used for the local cli.
    """

    db: "SQLiteDatabaseEngine"

    # Cache for our defined column types to dialect specific TypeEngine relations
    _col_python_type: Dict[Type, Type] = {}

    def __init__(
        self,
        id_generator: "AbstractIDGenerator",
        uri: StorageURI = StorageURI(""),
        partial_id: Optional[int] = None,
        db: Optional["SQLiteDatabaseEngine"] = None,
        db_file: Optional[str] = None,
    ):
        self.schema: "DefaultSchema" = DefaultSchema()
        super().__init__(id_generator, uri, partial_id)

        self.db = db or SQLiteDatabaseEngine.from_db_file(db_file)

        self.listing_table_pattern = re.compile(
            f"^{self.BUCKET_TABLE_NAME_PREFIX}[a-z0-9-._]+_[0-9]+$"
        )
        self._reflect_tables(
            filter_tables=lambda t, _: bool(self.listing_table_pattern.match(t))
        )

    def clone(
        self, uri: StorageURI = StorageURI(""), partial_id: Optional[int] = None
    ) -> "SQLiteWarehouse":
        if not uri:
            if partial_id is not None:
                raise ValueError("if partial_id is used, uri cannot be empty")
            if self.uri:
                uri = self.uri
                if self.partial_id:
                    partial_id = self.partial_id
        return SQLiteWarehouse(
            self.id_generator.clone(),
            uri=uri,
            partial_id=partial_id,
            db=self.db.clone(),
        )

    def clone_params(self) -> Tuple[Callable[..., Any], List[Any], Dict[str, Any]]:
        """
        Returns the class, args, and kwargs needed to instantiate a cloned copy of this
        SQLiteDataStorage implementation, for use in separate processes or machines.
        """
        return (
            SQLiteWarehouse.init_after_clone,
            [],
            {
                "id_generator_params": self.id_generator.clone_params(),
                "uri": self.uri,
                "partial_id": self.partial_id,
                "db_params": self.db.clone_params(),
            },
        )

    @classmethod
    def init_after_clone(
        cls,
        *,
        id_generator_params: Tuple[Callable, List, Dict[str, Any]],
        uri: StorageURI,
        partial_id: Optional[int],
        db_params: Tuple[Callable, List, Dict[str, Any]],
    ) -> "SQLiteWarehouse":
        (
            id_generator_class,
            id_generator_args,
            id_generator_kwargs,
        ) = id_generator_params
        (db_class, db_args, db_kwargs) = db_params
        return cls(
            id_generator=id_generator_class(*id_generator_args, **id_generator_kwargs),
            uri=uri,
            partial_id=partial_id,
            db=db_class(*db_args, **db_kwargs),
        )

    def _reflect_tables(self, filter_tables=None):
        """
        Since some tables are prone to schema extension, meaning we can add
        additional columns to it, we should reflect changes in metadata
        to have the latest columns when dealing with those tables.
        If filter function is defined, it's used to filter out tables to reflect,
        otherwise all tables are reflected
        """
        self.db.metadata.reflect(
            bind=self.db.engine,
            extend_existing=True,
            only=filter_tables,
        )

    def init_db(self, uri: StorageURI, partial_id: int) -> None:
        if not uri:
            raise ValueError("uri for init_db() cannot be empty")
        nodes_table = self.nodes_table(uri, partial_id).table
        self.db.create_table(nodes_table, if_not_exists=True)

    def create_dataset_rows_table(
        self,
        name: str,
        custom_columns: Sequence["sqlalchemy.Column"] = (),
        if_not_exists: bool = True,
    ) -> Table:
        table = self.schema.dataset_row_cls.new_table(
            name, custom_columns=custom_columns, metadata=self.db.metadata
        )
        self.db.create_table(table, if_not_exists=if_not_exists)
        return table

    def dataset_rows_select(
        self, select_query: sqlalchemy.sql.selectable.Select, **kwargs
    ):
        rows = self.db.execute(select_query, **kwargs)
        yield from convert_rows_custom_column_types(
            select_query.columns, rows, sqlite_dialect
        )

    def get_dataset_sources(
        self, dataset: DatasetRecord, version: Optional[int]
    ) -> List[StorageURI]:
        dr = self.dataset_rows(dataset, version)
        query = dr.select(dr.c.source).distinct()
        cur = self.db.cursor()
        cur.row_factory = sqlite3.Row  # type: ignore[assignment]

        return [
            StorageURI(dict(row)["source"])
            for row in self.db.execute(query, cursor=cur)
        ]

    def merge_dataset_rows(
        self,
        src: DatasetRecord,
        dst: DatasetRecord,
        src_version: Optional[int] = None,
        dst_version: Optional[int] = None,
    ) -> None:
        dst_empty = False

        if not self.db.has_table(self.dataset_table_name(src.name, src_version)):
            # source table doesn't exist, nothing to do
            return

        src_dr = self.dataset_rows(src, src_version).table

        if not self.db.has_table(self.dataset_table_name(dst.name, dst_version)):
            # destination table doesn't exist, create it
            custom_columns = [
                c
                for c in src_dr.c
                if c.name
                not in [c.name for c in self.schema.dataset_row_cls.default_columns()]
            ]
            self.create_dataset_rows_table(
                self.dataset_table_name(dst.name), custom_columns=custom_columns
            )
            dst_empty = True

        dst_dr = self.dataset_rows(dst, dst_version).table
        merge_fields = [c.name for c in src_dr.c if c.name != "id"]
        select_src = select(*(getattr(src_dr.c, f) for f in merge_fields))

        if dst_empty:
            # we don't need union, but just select from source to destination
            insert_query = sqlite.insert(dst_dr).from_select(merge_fields, select_src)
        else:
            dst_version_latest = None
            if dst_version and dst.versions:
                # find the previous version of the destination dataset
                dst_previous_versions = [
                    v.version for v in dst.versions if v.version < dst_version
                ]
                if dst_previous_versions:
                    dst_version_latest = max(dst_previous_versions)
            dst_dr_latest = self.dataset_rows(dst, dst_version_latest).table

            select_dst_latest = select(
                *(getattr(dst_dr_latest.c, f) for f in merge_fields)
            )
            union_query = sqlalchemy.union(select_src, select_dst_latest)
            insert_query = (
                sqlite.insert(dst_dr)
                .from_select(merge_fields, union_query)
                .prefix_with("OR IGNORE")
            )

        self.db.execute(insert_query)

    def copy_shadow_dataset_rows(self, src: DatasetRecord, dst: DatasetRecord) -> None:
        assert src.shadow
        assert dst.shadow

        if not self.db.has_table(self.dataset_table_name(src.name)):
            # source table doesn't exist, nothing to do
            return

        src_dr = self.dataset_rows(src).table

        if not self.db.has_table(self.dataset_table_name(dst.name)):
            # Destination table doesn't exist, create it
            custom_columns = [
                c
                for c in src_dr.c
                if c.name
                not in [c.name for c in self.schema.dataset_row_cls.default_columns()]
            ]
            self.create_dataset_rows_table(
                self.dataset_table_name(dst.name), custom_columns=custom_columns
            )

        dst_dr = self.dataset_rows(dst).table

        # Not including id
        src_fields = [c.name for c in src_dr.c if c.name != "id"]
        select_src = select(*(getattr(src_dr.c, f) for f in src_fields))
        insert_query = sqlite.insert(dst_dr).from_select(src_fields, select_src)

        self.db.execute(insert_query)

    async def insert_node(self, entry: Dict[str, Any]) -> int:
        return (
            self.db.execute(
                self.nodes.insert().values(self._prepare_node(entry))
            ).lastrowid
            or 0
        )

    async def insert_nodes(self, entries: Iterable[Dict[str, Any]]) -> None:
        self.db.executemany(
            self.nodes.insert().values({f: bindparam(f) for f in self.node_fields[1:]}),
            map(self._prepare_node, entries),
        )

    def insert_rows(self, table: Table, rows: Iterable[Dict[str, Any]]) -> None:
        rows = list(rows)
        if not rows:
            return
        self.db.executemany(
            table.insert().values({f: bindparam(f) for f in rows[0].keys()}),
            rows,
        )

    def insert_dataset_rows(
        self, df, dataset: DatasetRecord, version: Optional[int] = None
    ) -> int:
        dr = self.dataset_rows(dataset, version)
        return self.db.insert_dataframe(dr.table.name, df)

    def instr(self, source, target) -> "ColumnElement":
        return cast(func.instr(source, target), sqlalchemy.Boolean)

    def get_table(self, name: str) -> sqlalchemy.Table:
        # load table with latest schema to metadata
        self._reflect_tables(filter_tables=lambda t, _: t == name)
        return self.db.metadata.tables[name]

    def python_type(self, col_type: Union["TypeEngine", "SQLType"]) -> Any:
        if isinstance(col_type, SQLType):
            # converting our defined column types to dialect specific TypeEngine
            col_type_cls = type(col_type)
            if col_type_cls not in self._col_python_type:
                self._col_python_type[col_type_cls] = col_type.type_engine(
                    sqlite_dialect
                )
            col_type = self._col_python_type[col_type_cls]

        return col_type.python_type

    def add_column(
        self, table: Table, col_name: str, col_type: Union["TypeEngine", "SQLType"]
    ):
        """Adds a column to a table"""
        # trying to find the same column in a table
        table_col = table.c.get(col_name, None)

        if isinstance(col_type, SQLType):
            # converting our defined column types to dialect specific TypeEngine
            col_type = col_type.type_engine(sqlite_dialect)

        if table_col is not None and table_col.type.python_type != col_type.python_type:
            raise InconsistentSignalType(
                f"Column {col_name} already exists with a type:"
                f" {table_col.type.python_type}"
                f", but trying to create it with different type: {col_type.python_type}"
            )
        if table_col is not None:
            # column with the same name and type already exist, nothing to do
            return

        table_name = quote_schema(table.name)
        col_name_comp = quote(col_name)
        col_type_comp = col_type.compile(dialect=sqlite_dialect)
        q = f"ALTER TABLE {table_name} ADD COLUMN {col_name_comp} {col_type_comp}"
        self.db.execute_str(q)

        # reload the table to self.ddb.metadata so the table object
        # self.ddb.metadata.tables[table.name] includes the new column
        self._reflect_tables(filter_tables=lambda t, _: t == table.name)

    def dataset_column_types(
        self,
        dataset: DatasetRecord,
        version: Optional[int] = None,
        custom: bool = False,
    ) -> List[Dict[str, str]]:
        dr = self.dataset_rows(dataset, version)
        columns = dr.custom_columns if custom else dr.columns
        return [{"name": c.name, "type": c.type.python_type.__name__} for c in columns]

    def dataset_table_export_file_names(
        self, dataset: DatasetRecord, version: int
    ) -> List[str]:
        raise NotImplementedError("Exporting dataset table not implemented for SQLite")

    def export_dataset_table(
        self,
        bucket_uri: str,
        dataset: DatasetRecord,
        version: int,
        client_config=None,
    ) -> List[str]:
        raise NotImplementedError("Exporting dataset table not implemented for SQLite")

    #
    # Signals
    #

    def create_signals_table(self) -> SignalsTable:
        """
        Create an empty signals table for storing signals entries.
        """
        tbl_name = self.signals_table_name(self.uri)
        tbl = SignalsTable(tbl_name, [], self.db.metadata)
        self.db.create_table(tbl.table, if_not_exists=True)
        return tbl

    def extend_index_with_signals(self, index: "schema.Table", signals: SignalsTable):
        """
        Extend a nodes table with a signals table.
        This will result in the original index table being replaced
        with a table that is a join between the signals table and the
        index table (joining on the id column).
        """

        with self.db.transaction():
            # Create temporary table.
            join_tbl_name = "tmp_" + index.name

            signal_columns = [c for c in signals.table.c if c.name != "id"]

            join_tbl = self.schema.node_cls.new_table(
                join_tbl_name,
                [self.schema.node_cls.copy_signal_column(c) for c in signal_columns],
                self.db.metadata,
            )
            try:
                self.db.create_table(join_tbl, if_not_exists=True)

                # Query joining original index table and signals table.
                index_cols = {c.name for c in index.table.c}
                duplicate_signal_cols = {
                    c.name
                    for c in signals.table.c
                    if c.name in index_cols and c.name != "id"
                }
                select_cols = [
                    # columns from index table
                    *[c for c in index.table.c if c.name not in duplicate_signal_cols],
                    # coalesce columns already in index table
                    *[
                        func.coalesce(*cc).label(cc[0].name)
                        for cc in zip(
                            [index.table.c[col] for col in duplicate_signal_cols],
                            [signals.table.c[col] for col in duplicate_signal_cols],
                        )
                    ],
                    # columns from signals table
                    *[c for c in signal_columns if c.name not in duplicate_signal_cols],
                ]
                q = sqlalchemy.select(*select_cols).select_from(
                    index.table.outerjoin(
                        signals.table, index.c.id == signals.table.c.id
                    )
                )

                cols = [c.name for c in select_cols]

                # Write results of query to the new index table.
                self.db.execute(sqlalchemy.insert(join_tbl).from_select(cols, q))
                # Replace original table with extended one.
                self.db.drop_table(index.table)
                self.db.rename_table(join_tbl_name, index.name)
            finally:
                self.db.drop_table(join_tbl, if_exists=True)
