Edit on GitHub

sqlglot.parser

   1from __future__ import annotations
   2
   3import logging
   4import typing as t
   5from collections import defaultdict
   6
   7from sqlglot import exp
   8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors
   9from sqlglot.helper import apply_index_offset, ensure_collection, ensure_list, seq_get
  10from sqlglot.tokens import Token, Tokenizer, TokenType
  11from sqlglot.trie import in_trie, new_trie
  12
  13logger = logging.getLogger("sqlglot")
  14
  15E = t.TypeVar("E", bound=exp.Expression)
  16
  17
  18def parse_var_map(args: t.Sequence) -> exp.Expression:
  19    if len(args) == 1 and args[0].is_star:
  20        return exp.StarMap(this=args[0])
  21
  22    keys = []
  23    values = []
  24    for i in range(0, len(args), 2):
  25        keys.append(args[i])
  26        values.append(args[i + 1])
  27    return exp.VarMap(
  28        keys=exp.Array(expressions=keys),
  29        values=exp.Array(expressions=values),
  30    )
  31
  32
  33def parse_like(args):
  34    like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0))
  35    return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like
  36
  37
  38def binary_range_parser(
  39    expr_type: t.Type[exp.Expression],
  40) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]:
  41    return lambda self, this: self._parse_escape(
  42        self.expression(expr_type, this=this, expression=self._parse_bitwise())
  43    )
  44
  45
  46class _Parser(type):
  47    def __new__(cls, clsname, bases, attrs):
  48        klass = super().__new__(cls, clsname, bases, attrs)
  49        klass._show_trie = new_trie(key.split(" ") for key in klass.SHOW_PARSERS)
  50        klass._set_trie = new_trie(key.split(" ") for key in klass.SET_PARSERS)
  51
  52        return klass
  53
  54
  55class Parser(metaclass=_Parser):
  56    """
  57    Parser consumes a list of tokens produced by the `sqlglot.tokens.Tokenizer` and produces
  58    a parsed syntax tree.
  59
  60    Args:
  61        error_level: the desired error level.
  62            Default: ErrorLevel.RAISE
  63        error_message_context: determines the amount of context to capture from a
  64            query string when displaying the error message (in number of characters).
  65            Default: 50.
  66        index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list.
  67            Default: 0
  68        alias_post_tablesample: If the table alias comes after tablesample.
  69            Default: False
  70        max_errors: Maximum number of error messages to include in a raised ParseError.
  71            This is only relevant if error_level is ErrorLevel.RAISE.
  72            Default: 3
  73        null_ordering: Indicates the default null ordering method to use if not explicitly set.
  74            Options are "nulls_are_small", "nulls_are_large", "nulls_are_last".
  75            Default: "nulls_are_small"
  76    """
  77
  78    FUNCTIONS: t.Dict[str, t.Callable] = {
  79        **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()},
  80        "DATE_TO_DATE_STR": lambda args: exp.Cast(
  81            this=seq_get(args, 0),
  82            to=exp.DataType(this=exp.DataType.Type.TEXT),
  83        ),
  84        "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)),
  85        "IFNULL": exp.Coalesce.from_arg_list,
  86        "LIKE": parse_like,
  87        "TIME_TO_TIME_STR": lambda args: exp.Cast(
  88            this=seq_get(args, 0),
  89            to=exp.DataType(this=exp.DataType.Type.TEXT),
  90        ),
  91        "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring(
  92            this=exp.Cast(
  93                this=seq_get(args, 0),
  94                to=exp.DataType(this=exp.DataType.Type.TEXT),
  95            ),
  96            start=exp.Literal.number(1),
  97            length=exp.Literal.number(10),
  98        ),
  99        "VAR_MAP": parse_var_map,
 100    }
 101
 102    NO_PAREN_FUNCTIONS = {
 103        TokenType.CURRENT_DATE: exp.CurrentDate,
 104        TokenType.CURRENT_DATETIME: exp.CurrentDate,
 105        TokenType.CURRENT_TIME: exp.CurrentTime,
 106        TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp,
 107        TokenType.CURRENT_USER: exp.CurrentUser,
 108    }
 109
 110    JOIN_HINTS: t.Set[str] = set()
 111
 112    NESTED_TYPE_TOKENS = {
 113        TokenType.ARRAY,
 114        TokenType.MAP,
 115        TokenType.NULLABLE,
 116        TokenType.STRUCT,
 117    }
 118
 119    TYPE_TOKENS = {
 120        TokenType.BIT,
 121        TokenType.BOOLEAN,
 122        TokenType.TINYINT,
 123        TokenType.UTINYINT,
 124        TokenType.SMALLINT,
 125        TokenType.USMALLINT,
 126        TokenType.INT,
 127        TokenType.UINT,
 128        TokenType.BIGINT,
 129        TokenType.UBIGINT,
 130        TokenType.INT128,
 131        TokenType.UINT128,
 132        TokenType.INT256,
 133        TokenType.UINT256,
 134        TokenType.FLOAT,
 135        TokenType.DOUBLE,
 136        TokenType.CHAR,
 137        TokenType.NCHAR,
 138        TokenType.VARCHAR,
 139        TokenType.NVARCHAR,
 140        TokenType.TEXT,
 141        TokenType.MEDIUMTEXT,
 142        TokenType.LONGTEXT,
 143        TokenType.MEDIUMBLOB,
 144        TokenType.LONGBLOB,
 145        TokenType.BINARY,
 146        TokenType.VARBINARY,
 147        TokenType.JSON,
 148        TokenType.JSONB,
 149        TokenType.INTERVAL,
 150        TokenType.TIME,
 151        TokenType.TIMESTAMP,
 152        TokenType.TIMESTAMPTZ,
 153        TokenType.TIMESTAMPLTZ,
 154        TokenType.DATETIME,
 155        TokenType.DATETIME64,
 156        TokenType.DATE,
 157        TokenType.DECIMAL,
 158        TokenType.BIGDECIMAL,
 159        TokenType.UUID,
 160        TokenType.GEOGRAPHY,
 161        TokenType.GEOMETRY,
 162        TokenType.HLLSKETCH,
 163        TokenType.HSTORE,
 164        TokenType.PSEUDO_TYPE,
 165        TokenType.SUPER,
 166        TokenType.SERIAL,
 167        TokenType.SMALLSERIAL,
 168        TokenType.BIGSERIAL,
 169        TokenType.XML,
 170        TokenType.UNIQUEIDENTIFIER,
 171        TokenType.MONEY,
 172        TokenType.SMALLMONEY,
 173        TokenType.ROWVERSION,
 174        TokenType.IMAGE,
 175        TokenType.VARIANT,
 176        TokenType.OBJECT,
 177        TokenType.INET,
 178        *NESTED_TYPE_TOKENS,
 179    }
 180
 181    SUBQUERY_PREDICATES = {
 182        TokenType.ANY: exp.Any,
 183        TokenType.ALL: exp.All,
 184        TokenType.EXISTS: exp.Exists,
 185        TokenType.SOME: exp.Any,
 186    }
 187
 188    RESERVED_KEYWORDS = {*Tokenizer.SINGLE_TOKENS.values(), TokenType.SELECT}
 189
 190    DB_CREATABLES = {
 191        TokenType.DATABASE,
 192        TokenType.SCHEMA,
 193        TokenType.TABLE,
 194        TokenType.VIEW,
 195    }
 196
 197    CREATABLES = {
 198        TokenType.COLUMN,
 199        TokenType.FUNCTION,
 200        TokenType.INDEX,
 201        TokenType.PROCEDURE,
 202        *DB_CREATABLES,
 203    }
 204
 205    ID_VAR_TOKENS = {
 206        TokenType.VAR,
 207        TokenType.ANTI,
 208        TokenType.APPLY,
 209        TokenType.AUTO_INCREMENT,
 210        TokenType.BEGIN,
 211        TokenType.BOTH,
 212        TokenType.BUCKET,
 213        TokenType.CACHE,
 214        TokenType.CASCADE,
 215        TokenType.COLLATE,
 216        TokenType.COMMAND,
 217        TokenType.COMMENT,
 218        TokenType.COMMIT,
 219        TokenType.COMPOUND,
 220        TokenType.CONSTRAINT,
 221        TokenType.DEFAULT,
 222        TokenType.DELETE,
 223        TokenType.DESCRIBE,
 224        TokenType.DIV,
 225        TokenType.END,
 226        TokenType.EXECUTE,
 227        TokenType.ESCAPE,
 228        TokenType.FALSE,
 229        TokenType.FIRST,
 230        TokenType.FILTER,
 231        TokenType.FOLLOWING,
 232        TokenType.FORMAT,
 233        TokenType.FULL,
 234        TokenType.IF,
 235        TokenType.IS,
 236        TokenType.ISNULL,
 237        TokenType.INTERVAL,
 238        TokenType.KEEP,
 239        TokenType.LAZY,
 240        TokenType.LEADING,
 241        TokenType.LEFT,
 242        TokenType.LOCAL,
 243        TokenType.MATERIALIZED,
 244        TokenType.MERGE,
 245        TokenType.NATURAL,
 246        TokenType.NEXT,
 247        TokenType.OFFSET,
 248        TokenType.ONLY,
 249        TokenType.OPTIONS,
 250        TokenType.ORDINALITY,
 251        TokenType.OVERWRITE,
 252        TokenType.PARTITION,
 253        TokenType.PERCENT,
 254        TokenType.PIVOT,
 255        TokenType.PRAGMA,
 256        TokenType.PRECEDING,
 257        TokenType.RANGE,
 258        TokenType.REFERENCES,
 259        TokenType.RIGHT,
 260        TokenType.ROW,
 261        TokenType.ROWS,
 262        TokenType.SEED,
 263        TokenType.SEMI,
 264        TokenType.SET,
 265        TokenType.SETTINGS,
 266        TokenType.SHOW,
 267        TokenType.SORTKEY,
 268        TokenType.TEMPORARY,
 269        TokenType.TOP,
 270        TokenType.TRAILING,
 271        TokenType.TRUE,
 272        TokenType.UNBOUNDED,
 273        TokenType.UNIQUE,
 274        TokenType.UNLOGGED,
 275        TokenType.UNPIVOT,
 276        TokenType.VOLATILE,
 277        TokenType.WINDOW,
 278        *CREATABLES,
 279        *SUBQUERY_PREDICATES,
 280        *TYPE_TOKENS,
 281        *NO_PAREN_FUNCTIONS,
 282    }
 283
 284    INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END}
 285
 286    TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - {
 287        TokenType.APPLY,
 288        TokenType.FULL,
 289        TokenType.LEFT,
 290        TokenType.LOCK,
 291        TokenType.NATURAL,
 292        TokenType.OFFSET,
 293        TokenType.RIGHT,
 294        TokenType.WINDOW,
 295    }
 296
 297    COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS}
 298
 299    UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET}
 300
 301    TRIM_TYPES = {TokenType.LEADING, TokenType.TRAILING, TokenType.BOTH}
 302
 303    FUNC_TOKENS = {
 304        TokenType.COMMAND,
 305        TokenType.CURRENT_DATE,
 306        TokenType.CURRENT_DATETIME,
 307        TokenType.CURRENT_TIMESTAMP,
 308        TokenType.CURRENT_TIME,
 309        TokenType.CURRENT_USER,
 310        TokenType.FILTER,
 311        TokenType.FIRST,
 312        TokenType.FORMAT,
 313        TokenType.GLOB,
 314        TokenType.IDENTIFIER,
 315        TokenType.INDEX,
 316        TokenType.ISNULL,
 317        TokenType.ILIKE,
 318        TokenType.LIKE,
 319        TokenType.MERGE,
 320        TokenType.OFFSET,
 321        TokenType.PRIMARY_KEY,
 322        TokenType.RANGE,
 323        TokenType.REPLACE,
 324        TokenType.ROW,
 325        TokenType.UNNEST,
 326        TokenType.VAR,
 327        TokenType.LEFT,
 328        TokenType.RIGHT,
 329        TokenType.DATE,
 330        TokenType.DATETIME,
 331        TokenType.TABLE,
 332        TokenType.TIMESTAMP,
 333        TokenType.TIMESTAMPTZ,
 334        TokenType.WINDOW,
 335        *TYPE_TOKENS,
 336        *SUBQUERY_PREDICATES,
 337    }
 338
 339    CONJUNCTION = {
 340        TokenType.AND: exp.And,
 341        TokenType.OR: exp.Or,
 342    }
 343
 344    EQUALITY = {
 345        TokenType.EQ: exp.EQ,
 346        TokenType.NEQ: exp.NEQ,
 347        TokenType.NULLSAFE_EQ: exp.NullSafeEQ,
 348    }
 349
 350    COMPARISON = {
 351        TokenType.GT: exp.GT,
 352        TokenType.GTE: exp.GTE,
 353        TokenType.LT: exp.LT,
 354        TokenType.LTE: exp.LTE,
 355    }
 356
 357    BITWISE = {
 358        TokenType.AMP: exp.BitwiseAnd,
 359        TokenType.CARET: exp.BitwiseXor,
 360        TokenType.PIPE: exp.BitwiseOr,
 361        TokenType.DPIPE: exp.DPipe,
 362    }
 363
 364    TERM = {
 365        TokenType.DASH: exp.Sub,
 366        TokenType.PLUS: exp.Add,
 367        TokenType.MOD: exp.Mod,
 368        TokenType.COLLATE: exp.Collate,
 369    }
 370
 371    FACTOR = {
 372        TokenType.DIV: exp.IntDiv,
 373        TokenType.LR_ARROW: exp.Distance,
 374        TokenType.SLASH: exp.Div,
 375        TokenType.STAR: exp.Mul,
 376    }
 377
 378    TIMESTAMPS = {
 379        TokenType.TIME,
 380        TokenType.TIMESTAMP,
 381        TokenType.TIMESTAMPTZ,
 382        TokenType.TIMESTAMPLTZ,
 383    }
 384
 385    SET_OPERATIONS = {
 386        TokenType.UNION,
 387        TokenType.INTERSECT,
 388        TokenType.EXCEPT,
 389    }
 390
 391    JOIN_SIDES = {
 392        TokenType.LEFT,
 393        TokenType.RIGHT,
 394        TokenType.FULL,
 395    }
 396
 397    JOIN_KINDS = {
 398        TokenType.INNER,
 399        TokenType.OUTER,
 400        TokenType.CROSS,
 401        TokenType.SEMI,
 402        TokenType.ANTI,
 403    }
 404
 405    LAMBDAS = {
 406        TokenType.ARROW: lambda self, expressions: self.expression(
 407            exp.Lambda,
 408            this=self._replace_lambda(
 409                self._parse_conjunction(),
 410                {node.name for node in expressions},
 411            ),
 412            expressions=expressions,
 413        ),
 414        TokenType.FARROW: lambda self, expressions: self.expression(
 415            exp.Kwarg,
 416            this=exp.Var(this=expressions[0].name),
 417            expression=self._parse_conjunction(),
 418        ),
 419    }
 420
 421    COLUMN_OPERATORS = {
 422        TokenType.DOT: None,
 423        TokenType.DCOLON: lambda self, this, to: self.expression(
 424            exp.Cast if self.STRICT_CAST else exp.TryCast,
 425            this=this,
 426            to=to,
 427        ),
 428        TokenType.ARROW: lambda self, this, path: self.expression(
 429            exp.JSONExtract,
 430            this=this,
 431            expression=path,
 432        ),
 433        TokenType.DARROW: lambda self, this, path: self.expression(
 434            exp.JSONExtractScalar,
 435            this=this,
 436            expression=path,
 437        ),
 438        TokenType.HASH_ARROW: lambda self, this, path: self.expression(
 439            exp.JSONBExtract,
 440            this=this,
 441            expression=path,
 442        ),
 443        TokenType.DHASH_ARROW: lambda self, this, path: self.expression(
 444            exp.JSONBExtractScalar,
 445            this=this,
 446            expression=path,
 447        ),
 448        TokenType.PLACEHOLDER: lambda self, this, key: self.expression(
 449            exp.JSONBContains,
 450            this=this,
 451            expression=key,
 452        ),
 453    }
 454
 455    EXPRESSION_PARSERS = {
 456        exp.Column: lambda self: self._parse_column(),
 457        exp.DataType: lambda self: self._parse_types(),
 458        exp.From: lambda self: self._parse_from(),
 459        exp.Group: lambda self: self._parse_group(),
 460        exp.Identifier: lambda self: self._parse_id_var(),
 461        exp.Lateral: lambda self: self._parse_lateral(),
 462        exp.Join: lambda self: self._parse_join(),
 463        exp.Order: lambda self: self._parse_order(),
 464        exp.Cluster: lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster),
 465        exp.Sort: lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort),
 466        exp.Lambda: lambda self: self._parse_lambda(),
 467        exp.Limit: lambda self: self._parse_limit(),
 468        exp.Offset: lambda self: self._parse_offset(),
 469        exp.TableAlias: lambda self: self._parse_table_alias(),
 470        exp.Table: lambda self: self._parse_table(),
 471        exp.Condition: lambda self: self._parse_conjunction(),
 472        exp.Expression: lambda self: self._parse_statement(),
 473        exp.Properties: lambda self: self._parse_properties(),
 474        exp.Where: lambda self: self._parse_where(),
 475        exp.Ordered: lambda self: self._parse_ordered(),
 476        exp.Having: lambda self: self._parse_having(),
 477        exp.With: lambda self: self._parse_with(),
 478        exp.Window: lambda self: self._parse_named_window(),
 479        exp.Qualify: lambda self: self._parse_qualify(),
 480        exp.Returning: lambda self: self._parse_returning(),
 481        "JOIN_TYPE": lambda self: self._parse_join_side_and_kind(),
 482    }
 483
 484    STATEMENT_PARSERS = {
 485        TokenType.ALTER: lambda self: self._parse_alter(),
 486        TokenType.BEGIN: lambda self: self._parse_transaction(),
 487        TokenType.CACHE: lambda self: self._parse_cache(),
 488        TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(),
 489        TokenType.COMMENT: lambda self: self._parse_comment(),
 490        TokenType.CREATE: lambda self: self._parse_create(),
 491        TokenType.DELETE: lambda self: self._parse_delete(),
 492        TokenType.DESC: lambda self: self._parse_describe(),
 493        TokenType.DESCRIBE: lambda self: self._parse_describe(),
 494        TokenType.DROP: lambda self: self._parse_drop(),
 495        TokenType.END: lambda self: self._parse_commit_or_rollback(),
 496        TokenType.INSERT: lambda self: self._parse_insert(),
 497        TokenType.LOAD_DATA: lambda self: self._parse_load_data(),
 498        TokenType.MERGE: lambda self: self._parse_merge(),
 499        TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()),
 500        TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(),
 501        TokenType.SET: lambda self: self._parse_set(),
 502        TokenType.UNCACHE: lambda self: self._parse_uncache(),
 503        TokenType.UPDATE: lambda self: self._parse_update(),
 504        TokenType.USE: lambda self: self.expression(
 505            exp.Use,
 506            kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"))
 507            and exp.Var(this=self._prev.text),
 508            this=self._parse_table(schema=False),
 509        ),
 510    }
 511
 512    UNARY_PARSERS = {
 513        TokenType.PLUS: lambda self: self._parse_unary(),  # Unary + is handled as a no-op
 514        TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()),
 515        TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()),
 516        TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()),
 517    }
 518
 519    PRIMARY_PARSERS = {
 520        TokenType.STRING: lambda self, token: self.expression(
 521            exp.Literal, this=token.text, is_string=True
 522        ),
 523        TokenType.NUMBER: lambda self, token: self.expression(
 524            exp.Literal, this=token.text, is_string=False
 525        ),
 526        TokenType.STAR: lambda self, _: self.expression(
 527            exp.Star,
 528            **{"except": self._parse_except(), "replace": self._parse_replace()},
 529        ),
 530        TokenType.NULL: lambda self, _: self.expression(exp.Null),
 531        TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True),
 532        TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False),
 533        TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text),
 534        TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text),
 535        TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text),
 536        TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token),
 537        TokenType.NATIONAL: lambda self, token: self._parse_national(token),
 538        TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(),
 539    }
 540
 541    PLACEHOLDER_PARSERS = {
 542        TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder),
 543        TokenType.PARAMETER: lambda self: self._parse_parameter(),
 544        TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text)
 545        if self._match_set((TokenType.NUMBER, TokenType.VAR))
 546        else None,
 547    }
 548
 549    RANGE_PARSERS = {
 550        TokenType.BETWEEN: lambda self, this: self._parse_between(this),
 551        TokenType.GLOB: binary_range_parser(exp.Glob),
 552        TokenType.ILIKE: binary_range_parser(exp.ILike),
 553        TokenType.IN: lambda self, this: self._parse_in(this),
 554        TokenType.IRLIKE: binary_range_parser(exp.RegexpILike),
 555        TokenType.IS: lambda self, this: self._parse_is(this),
 556        TokenType.LIKE: binary_range_parser(exp.Like),
 557        TokenType.OVERLAPS: binary_range_parser(exp.Overlaps),
 558        TokenType.RLIKE: binary_range_parser(exp.RegexpLike),
 559        TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo),
 560    }
 561
 562    PROPERTY_PARSERS = {
 563        "AFTER": lambda self: self._parse_afterjournal(
 564            no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL"
 565        ),
 566        "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty),
 567        "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty),
 568        "BEFORE": lambda self: self._parse_journal(
 569            no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL"
 570        ),
 571        "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(),
 572        "CHARACTER SET": lambda self: self._parse_character_set(),
 573        "CHECKSUM": lambda self: self._parse_checksum(),
 574        "CLUSTER BY": lambda self: self.expression(
 575            exp.Cluster, expressions=self._parse_csv(self._parse_ordered)
 576        ),
 577        "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty),
 578        "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty),
 579        "DATABLOCKSIZE": lambda self: self._parse_datablocksize(
 580            default=self._prev.text.upper() == "DEFAULT"
 581        ),
 582        "DEFINER": lambda self: self._parse_definer(),
 583        "DETERMINISTIC": lambda self: self.expression(
 584            exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE")
 585        ),
 586        "DISTKEY": lambda self: self._parse_distkey(),
 587        "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty),
 588        "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty),
 589        "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty),
 590        "EXTERNAL": lambda self: self.expression(exp.ExternalProperty),
 591        "FALLBACK": lambda self: self._parse_fallback(no=self._prev.text.upper() == "NO"),
 592        "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty),
 593        "FREESPACE": lambda self: self._parse_freespace(),
 594        "GLOBAL": lambda self: self._parse_temporary(global_=True),
 595        "IMMUTABLE": lambda self: self.expression(
 596            exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE")
 597        ),
 598        "JOURNAL": lambda self: self._parse_journal(
 599            no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL"
 600        ),
 601        "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty),
 602        "LIKE": lambda self: self._parse_create_like(),
 603        "LOCAL": lambda self: self._parse_afterjournal(no=False, dual=False, local=True),
 604        "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty),
 605        "LOCK": lambda self: self._parse_locking(),
 606        "LOCKING": lambda self: self._parse_locking(),
 607        "LOG": lambda self: self._parse_log(no=self._prev.text.upper() == "NO"),
 608        "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty),
 609        "MAX": lambda self: self._parse_datablocksize(),
 610        "MAXIMUM": lambda self: self._parse_datablocksize(),
 611        "MERGEBLOCKRATIO": lambda self: self._parse_mergeblockratio(
 612            no=self._prev.text.upper() == "NO", default=self._prev.text.upper() == "DEFAULT"
 613        ),
 614        "MIN": lambda self: self._parse_datablocksize(),
 615        "MINIMUM": lambda self: self._parse_datablocksize(),
 616        "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True),
 617        "NO": lambda self: self._parse_noprimaryindex(),
 618        "NOT": lambda self: self._parse_afterjournal(no=False, dual=False, local=False),
 619        "ON": lambda self: self._parse_oncommit(),
 620        "ORDER BY": lambda self: self._parse_order(skip_order_token=True),
 621        "PARTITION BY": lambda self: self._parse_partitioned_by(),
 622        "PARTITIONED BY": lambda self: self._parse_partitioned_by(),
 623        "PARTITIONED_BY": lambda self: self._parse_partitioned_by(),
 624        "PRIMARY KEY": lambda self: self._parse_primary_key(),
 625        "RETURNS": lambda self: self._parse_returns(),
 626        "ROW": lambda self: self._parse_row(),
 627        "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty),
 628        "SET": lambda self: self.expression(exp.SetProperty, multi=False),
 629        "SETTINGS": lambda self: self.expression(
 630            exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item)
 631        ),
 632        "SORTKEY": lambda self: self._parse_sortkey(),
 633        "STABLE": lambda self: self.expression(
 634            exp.StabilityProperty, this=exp.Literal.string("STABLE")
 635        ),
 636        "STORED": lambda self: self._parse_stored(),
 637        "TABLE_FORMAT": lambda self: self._parse_property_assignment(exp.TableFormatProperty),
 638        "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property),
 639        "TEMP": lambda self: self._parse_temporary(global_=False),
 640        "TEMPORARY": lambda self: self._parse_temporary(global_=False),
 641        "TRANSIENT": lambda self: self.expression(exp.TransientProperty),
 642        "TTL": lambda self: self._parse_ttl(),
 643        "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty),
 644        "VOLATILE": lambda self: self._parse_volatile_property(),
 645        "WITH": lambda self: self._parse_with_property(),
 646    }
 647
 648    CONSTRAINT_PARSERS = {
 649        "AUTOINCREMENT": lambda self: self._parse_auto_increment(),
 650        "AUTO_INCREMENT": lambda self: self._parse_auto_increment(),
 651        "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False),
 652        "CHARACTER SET": lambda self: self.expression(
 653            exp.CharacterSetColumnConstraint, this=self._parse_var_or_string()
 654        ),
 655        "CHECK": lambda self: self.expression(
 656            exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction)
 657        ),
 658        "COLLATE": lambda self: self.expression(
 659            exp.CollateColumnConstraint, this=self._parse_var()
 660        ),
 661        "COMMENT": lambda self: self.expression(
 662            exp.CommentColumnConstraint, this=self._parse_string()
 663        ),
 664        "COMPRESS": lambda self: self._parse_compress(),
 665        "DEFAULT": lambda self: self.expression(
 666            exp.DefaultColumnConstraint, this=self._parse_bitwise()
 667        ),
 668        "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()),
 669        "FOREIGN KEY": lambda self: self._parse_foreign_key(),
 670        "FORMAT": lambda self: self.expression(
 671            exp.DateFormatColumnConstraint, this=self._parse_var_or_string()
 672        ),
 673        "GENERATED": lambda self: self._parse_generated_as_identity(),
 674        "IDENTITY": lambda self: self._parse_auto_increment(),
 675        "INLINE": lambda self: self._parse_inline(),
 676        "LIKE": lambda self: self._parse_create_like(),
 677        "NOT": lambda self: self._parse_not_constraint(),
 678        "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True),
 679        "ON": lambda self: self._match(TokenType.UPDATE)
 680        and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()),
 681        "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()),
 682        "PRIMARY KEY": lambda self: self._parse_primary_key(),
 683        "REFERENCES": lambda self: self._parse_references(match=False),
 684        "TITLE": lambda self: self.expression(
 685            exp.TitleColumnConstraint, this=self._parse_var_or_string()
 686        ),
 687        "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]),
 688        "UNIQUE": lambda self: self._parse_unique(),
 689        "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint),
 690    }
 691
 692    ALTER_PARSERS = {
 693        "ADD": lambda self: self._parse_alter_table_add(),
 694        "ALTER": lambda self: self._parse_alter_table_alter(),
 695        "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()),
 696        "DROP": lambda self: self._parse_alter_table_drop(),
 697        "RENAME": lambda self: self._parse_alter_table_rename(),
 698    }
 699
 700    SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"}
 701
 702    NO_PAREN_FUNCTION_PARSERS = {
 703        TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()),
 704        TokenType.CASE: lambda self: self._parse_case(),
 705        TokenType.IF: lambda self: self._parse_if(),
 706        TokenType.NEXT_VALUE_FOR: lambda self: self.expression(
 707            exp.NextValueFor,
 708            this=self._parse_column(),
 709            order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order),
 710        ),
 711    }
 712
 713    FUNCTION_PARSERS: t.Dict[str, t.Callable] = {
 714        "CAST": lambda self: self._parse_cast(self.STRICT_CAST),
 715        "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST),
 716        "DECODE": lambda self: self._parse_decode(),
 717        "EXTRACT": lambda self: self._parse_extract(),
 718        "JSON_OBJECT": lambda self: self._parse_json_object(),
 719        "LOG": lambda self: self._parse_logarithm(),
 720        "MATCH": lambda self: self._parse_match_against(),
 721        "OPENJSON": lambda self: self._parse_open_json(),
 722        "POSITION": lambda self: self._parse_position(),
 723        "STRING_AGG": lambda self: self._parse_string_agg(),
 724        "SUBSTRING": lambda self: self._parse_substring(),
 725        "TRIM": lambda self: self._parse_trim(),
 726        "TRY_CAST": lambda self: self._parse_cast(False),
 727        "TRY_CONVERT": lambda self: self._parse_convert(False),
 728    }
 729
 730    QUERY_MODIFIER_PARSERS = {
 731        "joins": lambda self: list(iter(self._parse_join, None)),
 732        "laterals": lambda self: list(iter(self._parse_lateral, None)),
 733        "match": lambda self: self._parse_match_recognize(),
 734        "where": lambda self: self._parse_where(),
 735        "group": lambda self: self._parse_group(),
 736        "having": lambda self: self._parse_having(),
 737        "qualify": lambda self: self._parse_qualify(),
 738        "windows": lambda self: self._parse_window_clause(),
 739        "order": lambda self: self._parse_order(),
 740        "limit": lambda self: self._parse_limit(),
 741        "offset": lambda self: self._parse_offset(),
 742        "locks": lambda self: self._parse_locks(),
 743        "sample": lambda self: self._parse_table_sample(as_modifier=True),
 744    }
 745
 746    SET_PARSERS = {
 747        "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"),
 748        "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"),
 749        "SESSION": lambda self: self._parse_set_item_assignment("SESSION"),
 750        "TRANSACTION": lambda self: self._parse_set_transaction(),
 751    }
 752
 753    SHOW_PARSERS: t.Dict[str, t.Callable] = {}
 754
 755    TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {}
 756
 757    MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table)
 758
 759    TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"}
 760
 761    TRANSACTION_CHARACTERISTICS = {
 762        "ISOLATION LEVEL REPEATABLE READ",
 763        "ISOLATION LEVEL READ COMMITTED",
 764        "ISOLATION LEVEL READ UNCOMMITTED",
 765        "ISOLATION LEVEL SERIALIZABLE",
 766        "READ WRITE",
 767        "READ ONLY",
 768    }
 769
 770    INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"}
 771
 772    CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"}
 773
 774    WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS}
 775    WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER}
 776
 777    ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY}
 778
 779    STRICT_CAST = True
 780
 781    CONVERT_TYPE_FIRST = False
 782
 783    PREFIXED_PIVOT_COLUMNS = False
 784    IDENTIFY_PIVOT_STRINGS = False
 785
 786    LOG_BASE_FIRST = True
 787    LOG_DEFAULTS_TO_LN = False
 788
 789    __slots__ = (
 790        "error_level",
 791        "error_message_context",
 792        "sql",
 793        "errors",
 794        "index_offset",
 795        "unnest_column_only",
 796        "alias_post_tablesample",
 797        "max_errors",
 798        "null_ordering",
 799        "_tokens",
 800        "_index",
 801        "_curr",
 802        "_next",
 803        "_prev",
 804        "_prev_comments",
 805        "_show_trie",
 806        "_set_trie",
 807    )
 808
 809    def __init__(
 810        self,
 811        error_level: t.Optional[ErrorLevel] = None,
 812        error_message_context: int = 100,
 813        index_offset: int = 0,
 814        unnest_column_only: bool = False,
 815        alias_post_tablesample: bool = False,
 816        max_errors: int = 3,
 817        null_ordering: t.Optional[str] = None,
 818    ):
 819        self.error_level = error_level or ErrorLevel.IMMEDIATE
 820        self.error_message_context = error_message_context
 821        self.index_offset = index_offset
 822        self.unnest_column_only = unnest_column_only
 823        self.alias_post_tablesample = alias_post_tablesample
 824        self.max_errors = max_errors
 825        self.null_ordering = null_ordering
 826        self.reset()
 827
 828    def reset(self):
 829        self.sql = ""
 830        self.errors = []
 831        self._tokens = []
 832        self._index = 0
 833        self._curr = None
 834        self._next = None
 835        self._prev = None
 836        self._prev_comments = None
 837
 838    def parse(
 839        self, raw_tokens: t.List[Token], sql: t.Optional[str] = None
 840    ) -> t.List[t.Optional[exp.Expression]]:
 841        """
 842        Parses a list of tokens and returns a list of syntax trees, one tree
 843        per parsed SQL statement.
 844
 845        Args:
 846            raw_tokens: the list of tokens.
 847            sql: the original SQL string, used to produce helpful debug messages.
 848
 849        Returns:
 850            The list of syntax trees.
 851        """
 852        return self._parse(
 853            parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql
 854        )
 855
 856    def parse_into(
 857        self,
 858        expression_types: exp.IntoType,
 859        raw_tokens: t.List[Token],
 860        sql: t.Optional[str] = None,
 861    ) -> t.List[t.Optional[exp.Expression]]:
 862        """
 863        Parses a list of tokens into a given Expression type. If a collection of Expression
 864        types is given instead, this method will try to parse the token list into each one
 865        of them, stopping at the first for which the parsing succeeds.
 866
 867        Args:
 868            expression_types: the expression type(s) to try and parse the token list into.
 869            raw_tokens: the list of tokens.
 870            sql: the original SQL string, used to produce helpful debug messages.
 871
 872        Returns:
 873            The target Expression.
 874        """
 875        errors = []
 876        for expression_type in ensure_collection(expression_types):
 877            parser = self.EXPRESSION_PARSERS.get(expression_type)
 878            if not parser:
 879                raise TypeError(f"No parser registered for {expression_type}")
 880            try:
 881                return self._parse(parser, raw_tokens, sql)
 882            except ParseError as e:
 883                e.errors[0]["into_expression"] = expression_type
 884                errors.append(e)
 885        raise ParseError(
 886            f"Failed to parse into {expression_types}",
 887            errors=merge_errors(errors),
 888        ) from errors[-1]
 889
 890    def _parse(
 891        self,
 892        parse_method: t.Callable[[Parser], t.Optional[exp.Expression]],
 893        raw_tokens: t.List[Token],
 894        sql: t.Optional[str] = None,
 895    ) -> t.List[t.Optional[exp.Expression]]:
 896        self.reset()
 897        self.sql = sql or ""
 898        total = len(raw_tokens)
 899        chunks: t.List[t.List[Token]] = [[]]
 900
 901        for i, token in enumerate(raw_tokens):
 902            if token.token_type == TokenType.SEMICOLON:
 903                if i < total - 1:
 904                    chunks.append([])
 905            else:
 906                chunks[-1].append(token)
 907
 908        expressions = []
 909
 910        for tokens in chunks:
 911            self._index = -1
 912            self._tokens = tokens
 913            self._advance()
 914
 915            expressions.append(parse_method(self))
 916
 917            if self._index < len(self._tokens):
 918                self.raise_error("Invalid expression / Unexpected token")
 919
 920            self.check_errors()
 921
 922        return expressions
 923
 924    def check_errors(self) -> None:
 925        """
 926        Logs or raises any found errors, depending on the chosen error level setting.
 927        """
 928        if self.error_level == ErrorLevel.WARN:
 929            for error in self.errors:
 930                logger.error(str(error))
 931        elif self.error_level == ErrorLevel.RAISE and self.errors:
 932            raise ParseError(
 933                concat_messages(self.errors, self.max_errors),
 934                errors=merge_errors(self.errors),
 935            )
 936
 937    def raise_error(self, message: str, token: t.Optional[Token] = None) -> None:
 938        """
 939        Appends an error in the list of recorded errors or raises it, depending on the chosen
 940        error level setting.
 941        """
 942        token = token or self._curr or self._prev or Token.string("")
 943        start = token.start
 944        end = token.end
 945        start_context = self.sql[max(start - self.error_message_context, 0) : start]
 946        highlight = self.sql[start:end]
 947        end_context = self.sql[end : end + self.error_message_context]
 948
 949        error = ParseError.new(
 950            f"{message}. Line {token.line}, Col: {token.col}.\n"
 951            f"  {start_context}\033[4m{highlight}\033[0m{end_context}",
 952            description=message,
 953            line=token.line,
 954            col=token.col,
 955            start_context=start_context,
 956            highlight=highlight,
 957            end_context=end_context,
 958        )
 959
 960        if self.error_level == ErrorLevel.IMMEDIATE:
 961            raise error
 962
 963        self.errors.append(error)
 964
 965    def expression(
 966        self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs
 967    ) -> E:
 968        """
 969        Creates a new, validated Expression.
 970
 971        Args:
 972            exp_class: the expression class to instantiate.
 973            comments: an optional list of comments to attach to the expression.
 974            kwargs: the arguments to set for the expression along with their respective values.
 975
 976        Returns:
 977            The target expression.
 978        """
 979        instance = exp_class(**kwargs)
 980        instance.add_comments(comments) if comments else self._add_comments(instance)
 981        self.validate_expression(instance)
 982        return instance
 983
 984    def _add_comments(self, expression: t.Optional[exp.Expression]) -> None:
 985        if expression and self._prev_comments:
 986            expression.add_comments(self._prev_comments)
 987            self._prev_comments = None
 988
 989    def validate_expression(
 990        self, expression: exp.Expression, args: t.Optional[t.List] = None
 991    ) -> None:
 992        """
 993        Validates an already instantiated expression, making sure that all its mandatory arguments
 994        are set.
 995
 996        Args:
 997            expression: the expression to validate.
 998            args: an optional list of items that was used to instantiate the expression, if it's a Func.
 999        """
1000        if self.error_level == ErrorLevel.IGNORE:
1001            return
1002
1003        for error_message in expression.error_messages(args):
1004            self.raise_error(error_message)
1005
1006    def _find_sql(self, start: Token, end: Token) -> str:
1007        return self.sql[start.start : end.end]
1008
1009    def _advance(self, times: int = 1) -> None:
1010        self._index += times
1011        self._curr = seq_get(self._tokens, self._index)
1012        self._next = seq_get(self._tokens, self._index + 1)
1013        if self._index > 0:
1014            self._prev = self._tokens[self._index - 1]
1015            self._prev_comments = self._prev.comments
1016        else:
1017            self._prev = None
1018            self._prev_comments = None
1019
1020    def _retreat(self, index: int) -> None:
1021        if index != self._index:
1022            self._advance(index - self._index)
1023
1024    def _parse_command(self) -> exp.Command:
1025        return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string())
1026
1027    def _parse_comment(self, allow_exists: bool = True) -> exp.Expression:
1028        start = self._prev
1029        exists = self._parse_exists() if allow_exists else None
1030
1031        self._match(TokenType.ON)
1032
1033        kind = self._match_set(self.CREATABLES) and self._prev
1034
1035        if not kind:
1036            return self._parse_as_command(start)
1037
1038        if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE):
1039            this = self._parse_user_defined_function(kind=kind.token_type)
1040        elif kind.token_type == TokenType.TABLE:
1041            this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS)
1042        elif kind.token_type == TokenType.COLUMN:
1043            this = self._parse_column()
1044        else:
1045            this = self._parse_id_var()
1046
1047        self._match(TokenType.IS)
1048
1049        return self.expression(
1050            exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists
1051        )
1052
1053    # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl
1054    def _parse_ttl(self) -> exp.Expression:
1055        def _parse_ttl_action() -> t.Optional[exp.Expression]:
1056            this = self._parse_bitwise()
1057
1058            if self._match_text_seq("DELETE"):
1059                return self.expression(exp.MergeTreeTTLAction, this=this, delete=True)
1060            if self._match_text_seq("RECOMPRESS"):
1061                return self.expression(
1062                    exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise()
1063                )
1064            if self._match_text_seq("TO", "DISK"):
1065                return self.expression(
1066                    exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string()
1067                )
1068            if self._match_text_seq("TO", "VOLUME"):
1069                return self.expression(
1070                    exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string()
1071                )
1072
1073            return this
1074
1075        expressions = self._parse_csv(_parse_ttl_action)
1076        where = self._parse_where()
1077        group = self._parse_group()
1078
1079        aggregates = None
1080        if group and self._match(TokenType.SET):
1081            aggregates = self._parse_csv(self._parse_set_item)
1082
1083        return self.expression(
1084            exp.MergeTreeTTL,
1085            expressions=expressions,
1086            where=where,
1087            group=group,
1088            aggregates=aggregates,
1089        )
1090
1091    def _parse_statement(self) -> t.Optional[exp.Expression]:
1092        if self._curr is None:
1093            return None
1094
1095        if self._match_set(self.STATEMENT_PARSERS):
1096            return self.STATEMENT_PARSERS[self._prev.token_type](self)
1097
1098        if self._match_set(Tokenizer.COMMANDS):
1099            return self._parse_command()
1100
1101        expression = self._parse_expression()
1102        expression = self._parse_set_operations(expression) if expression else self._parse_select()
1103        return self._parse_query_modifiers(expression)
1104
1105    def _parse_drop(self) -> t.Optional[exp.Drop | exp.Command]:
1106        start = self._prev
1107        temporary = self._match(TokenType.TEMPORARY)
1108        materialized = self._match(TokenType.MATERIALIZED)
1109        kind = self._match_set(self.CREATABLES) and self._prev.text
1110        if not kind:
1111            return self._parse_as_command(start)
1112
1113        return self.expression(
1114            exp.Drop,
1115            exists=self._parse_exists(),
1116            this=self._parse_table(schema=True),
1117            kind=kind,
1118            temporary=temporary,
1119            materialized=materialized,
1120            cascade=self._match(TokenType.CASCADE),
1121            constraints=self._match_text_seq("CONSTRAINTS"),
1122            purge=self._match_text_seq("PURGE"),
1123        )
1124
1125    def _parse_exists(self, not_: bool = False) -> t.Optional[bool]:
1126        return (
1127            self._match(TokenType.IF)
1128            and (not not_ or self._match(TokenType.NOT))
1129            and self._match(TokenType.EXISTS)
1130        )
1131
1132    def _parse_create(self) -> t.Optional[exp.Expression]:
1133        start = self._prev
1134        replace = self._prev.text.upper() == "REPLACE" or self._match_pair(
1135            TokenType.OR, TokenType.REPLACE
1136        )
1137        unique = self._match(TokenType.UNIQUE)
1138
1139        if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False):
1140            self._match(TokenType.TABLE)
1141
1142        properties = None
1143        create_token = self._match_set(self.CREATABLES) and self._prev
1144
1145        if not create_token:
1146            properties = self._parse_properties()  # exp.Properties.Location.POST_CREATE
1147            create_token = self._match_set(self.CREATABLES) and self._prev
1148
1149            if not properties or not create_token:
1150                return self._parse_as_command(start)
1151
1152        exists = self._parse_exists(not_=True)
1153        this = None
1154        expression = None
1155        indexes = None
1156        no_schema_binding = None
1157        begin = None
1158        clone = None
1159
1160        if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE):
1161            this = self._parse_user_defined_function(kind=create_token.token_type)
1162            temp_properties = self._parse_properties()
1163            if properties and temp_properties:
1164                properties.expressions.extend(temp_properties.expressions)
1165            elif temp_properties:
1166                properties = temp_properties
1167
1168            self._match(TokenType.ALIAS)
1169            begin = self._match(TokenType.BEGIN)
1170            return_ = self._match_text_seq("RETURN")
1171            expression = self._parse_statement()
1172
1173            if return_:
1174                expression = self.expression(exp.Return, this=expression)
1175        elif create_token.token_type == TokenType.INDEX:
1176            this = self._parse_index()
1177        elif create_token.token_type in self.DB_CREATABLES:
1178            table_parts = self._parse_table_parts(schema=True)
1179
1180            # exp.Properties.Location.POST_NAME
1181            if self._match(TokenType.COMMA):
1182                temp_properties = self._parse_properties(before=True)
1183                if properties and temp_properties:
1184                    properties.expressions.extend(temp_properties.expressions)
1185                elif temp_properties:
1186                    properties = temp_properties
1187
1188            this = self._parse_schema(this=table_parts)
1189
1190            # exp.Properties.Location.POST_SCHEMA and POST_WITH
1191            temp_properties = self._parse_properties()
1192            if properties and temp_properties:
1193                properties.expressions.extend(temp_properties.expressions)
1194            elif temp_properties:
1195                properties = temp_properties
1196
1197            self._match(TokenType.ALIAS)
1198
1199            # exp.Properties.Location.POST_ALIAS
1200            if not (
1201                self._match(TokenType.SELECT, advance=False)
1202                or self._match(TokenType.WITH, advance=False)
1203                or self._match(TokenType.L_PAREN, advance=False)
1204            ):
1205                temp_properties = self._parse_properties()
1206                if properties and temp_properties:
1207                    properties.expressions.extend(temp_properties.expressions)
1208                elif temp_properties:
1209                    properties = temp_properties
1210
1211            expression = self._parse_ddl_select()
1212
1213            if create_token.token_type == TokenType.TABLE:
1214                # exp.Properties.Location.POST_EXPRESSION
1215                temp_properties = self._parse_properties()
1216                if properties and temp_properties:
1217                    properties.expressions.extend(temp_properties.expressions)
1218                elif temp_properties:
1219                    properties = temp_properties
1220
1221                indexes = []
1222                while True:
1223                    index = self._parse_create_table_index()
1224
1225                    # exp.Properties.Location.POST_INDEX
1226                    if self._match(TokenType.PARTITION_BY, advance=False):
1227                        temp_properties = self._parse_properties()
1228                        if properties and temp_properties:
1229                            properties.expressions.extend(temp_properties.expressions)
1230                        elif temp_properties:
1231                            properties = temp_properties
1232
1233                    if not index:
1234                        break
1235                    else:
1236                        indexes.append(index)
1237            elif create_token.token_type == TokenType.VIEW:
1238                if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"):
1239                    no_schema_binding = True
1240
1241            if self._match_text_seq("CLONE"):
1242                clone = self._parse_table(schema=True)
1243                when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper()
1244                clone_kind = (
1245                    self._match(TokenType.L_PAREN)
1246                    and self._match_texts(self.CLONE_KINDS)
1247                    and self._prev.text.upper()
1248                )
1249                clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise()
1250                self._match(TokenType.R_PAREN)
1251                clone = self.expression(
1252                    exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression
1253                )
1254
1255        return self.expression(
1256            exp.Create,
1257            this=this,
1258            kind=create_token.text,
1259            replace=replace,
1260            unique=unique,
1261            expression=expression,
1262            exists=exists,
1263            properties=properties,
1264            indexes=indexes,
1265            no_schema_binding=no_schema_binding,
1266            begin=begin,
1267            clone=clone,
1268        )
1269
1270    def _parse_property_before(self) -> t.Optional[exp.Expression]:
1271        self._match(TokenType.COMMA)
1272
1273        # parsers look to _prev for no/dual/default, so need to consume first
1274        self._match_text_seq("NO")
1275        self._match_text_seq("DUAL")
1276        self._match_text_seq("DEFAULT")
1277
1278        if self.PROPERTY_PARSERS.get(self._curr.text.upper()):
1279            return self.PROPERTY_PARSERS[self._curr.text.upper()](self)
1280
1281        return None
1282
1283    def _parse_property(self) -> t.Optional[exp.Expression]:
1284        if self._match_texts(self.PROPERTY_PARSERS):
1285            return self.PROPERTY_PARSERS[self._prev.text.upper()](self)
1286
1287        if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET):
1288            return self._parse_character_set(default=True)
1289
1290        if self._match_pair(TokenType.COMPOUND, TokenType.SORTKEY):
1291            return self._parse_sortkey(compound=True)
1292
1293        if self._match_text_seq("SQL", "SECURITY"):
1294            return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER"))
1295
1296        assignment = self._match_pair(
1297            TokenType.VAR, TokenType.EQ, advance=False
1298        ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False)
1299
1300        if assignment:
1301            key = self._parse_var_or_string()
1302            self._match(TokenType.EQ)
1303            return self.expression(exp.Property, this=key, value=self._parse_column())
1304
1305        return None
1306
1307    def _parse_stored(self) -> exp.Expression:
1308        self._match(TokenType.ALIAS)
1309
1310        input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None
1311        output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None
1312
1313        return self.expression(
1314            exp.FileFormatProperty,
1315            this=self.expression(
1316                exp.InputOutputFormat, input_format=input_format, output_format=output_format
1317            )
1318            if input_format or output_format
1319            else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(),
1320        )
1321
1322    def _parse_property_assignment(self, exp_class: t.Type[exp.Expression]) -> exp.Expression:
1323        self._match(TokenType.EQ)
1324        self._match(TokenType.ALIAS)
1325        return self.expression(exp_class, this=self._parse_field())
1326
1327    def _parse_properties(self, before=None) -> t.Optional[exp.Expression]:
1328        properties = []
1329
1330        while True:
1331            if before:
1332                identified_property = self._parse_property_before()
1333            else:
1334                identified_property = self._parse_property()
1335
1336            if not identified_property:
1337                break
1338            for p in ensure_list(identified_property):
1339                properties.append(p)
1340
1341        if properties:
1342            return self.expression(exp.Properties, expressions=properties)
1343
1344        return None
1345
1346    def _parse_fallback(self, no=False) -> exp.Expression:
1347        self._match_text_seq("FALLBACK")
1348        return self.expression(
1349            exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION")
1350        )
1351
1352    def _parse_volatile_property(self) -> exp.Expression:
1353        if self._index >= 2:
1354            pre_volatile_token = self._tokens[self._index - 2]
1355        else:
1356            pre_volatile_token = None
1357
1358        if pre_volatile_token and pre_volatile_token.token_type in (
1359            TokenType.CREATE,
1360            TokenType.REPLACE,
1361            TokenType.UNIQUE,
1362        ):
1363            return exp.VolatileProperty()
1364
1365        return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE"))
1366
1367    def _parse_with_property(
1368        self,
1369    ) -> t.Union[t.Optional[exp.Expression], t.List[t.Optional[exp.Expression]]]:
1370        self._match(TokenType.WITH)
1371        if self._match(TokenType.L_PAREN, advance=False):
1372            return self._parse_wrapped_csv(self._parse_property)
1373
1374        if self._match_text_seq("JOURNAL"):
1375            return self._parse_withjournaltable()
1376
1377        if self._match_text_seq("DATA"):
1378            return self._parse_withdata(no=False)
1379        elif self._match_text_seq("NO", "DATA"):
1380            return self._parse_withdata(no=True)
1381
1382        if not self._next:
1383            return None
1384
1385        return self._parse_withisolatedloading()
1386
1387    # https://dev.mysql.com/doc/refman/8.0/en/create-view.html
1388    def _parse_definer(self) -> t.Optional[exp.Expression]:
1389        self._match(TokenType.EQ)
1390
1391        user = self._parse_id_var()
1392        self._match(TokenType.PARAMETER)
1393        host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text)
1394
1395        if not user or not host:
1396            return None
1397
1398        return exp.DefinerProperty(this=f"{user}@{host}")
1399
1400    def _parse_withjournaltable(self) -> exp.Expression:
1401        self._match(TokenType.TABLE)
1402        self._match(TokenType.EQ)
1403        return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts())
1404
1405    def _parse_log(self, no=False) -> exp.Expression:
1406        self._match_text_seq("LOG")
1407        return self.expression(exp.LogProperty, no=no)
1408
1409    def _parse_journal(self, no=False, dual=False) -> exp.Expression:
1410        before = self._match_text_seq("BEFORE")
1411        self._match_text_seq("JOURNAL")
1412        return self.expression(exp.JournalProperty, no=no, dual=dual, before=before)
1413
1414    def _parse_afterjournal(self, no=False, dual=False, local=None) -> exp.Expression:
1415        self._match_text_seq("NOT")
1416        self._match_text_seq("LOCAL")
1417        self._match_text_seq("AFTER", "JOURNAL")
1418        return self.expression(exp.AfterJournalProperty, no=no, dual=dual, local=local)
1419
1420    def _parse_checksum(self) -> exp.Expression:
1421        self._match_text_seq("CHECKSUM")
1422        self._match(TokenType.EQ)
1423
1424        on = None
1425        if self._match(TokenType.ON):
1426            on = True
1427        elif self._match_text_seq("OFF"):
1428            on = False
1429        default = self._match(TokenType.DEFAULT)
1430
1431        return self.expression(
1432            exp.ChecksumProperty,
1433            on=on,
1434            default=default,
1435        )
1436
1437    def _parse_freespace(self) -> exp.Expression:
1438        self._match_text_seq("FREESPACE")
1439        self._match(TokenType.EQ)
1440        return self.expression(
1441            exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT)
1442        )
1443
1444    def _parse_mergeblockratio(self, no=False, default=False) -> exp.Expression:
1445        self._match_text_seq("MERGEBLOCKRATIO")
1446        if self._match(TokenType.EQ):
1447            return self.expression(
1448                exp.MergeBlockRatioProperty,
1449                this=self._parse_number(),
1450                percent=self._match(TokenType.PERCENT),
1451            )
1452        else:
1453            return self.expression(
1454                exp.MergeBlockRatioProperty,
1455                no=no,
1456                default=default,
1457            )
1458
1459    def _parse_datablocksize(self, default=None) -> exp.Expression:
1460        if default:
1461            self._match_text_seq("DATABLOCKSIZE")
1462            return self.expression(exp.DataBlocksizeProperty, default=True)
1463        elif self._match_texts(("MIN", "MINIMUM")):
1464            self._match_text_seq("DATABLOCKSIZE")
1465            return self.expression(exp.DataBlocksizeProperty, min=True)
1466        elif self._match_texts(("MAX", "MAXIMUM")):
1467            self._match_text_seq("DATABLOCKSIZE")
1468            return self.expression(exp.DataBlocksizeProperty, min=False)
1469
1470        self._match_text_seq("DATABLOCKSIZE")
1471        self._match(TokenType.EQ)
1472        size = self._parse_number()
1473        units = None
1474        if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")):
1475            units = self._prev.text
1476        return self.expression(exp.DataBlocksizeProperty, size=size, units=units)
1477
1478    def _parse_blockcompression(self) -> exp.Expression:
1479        self._match_text_seq("BLOCKCOMPRESSION")
1480        self._match(TokenType.EQ)
1481        always = self._match_text_seq("ALWAYS")
1482        manual = self._match_text_seq("MANUAL")
1483        never = self._match_text_seq("NEVER")
1484        default = self._match_text_seq("DEFAULT")
1485        autotemp = None
1486        if self._match_text_seq("AUTOTEMP"):
1487            autotemp = self._parse_schema()
1488
1489        return self.expression(
1490            exp.BlockCompressionProperty,
1491            always=always,
1492            manual=manual,
1493            never=never,
1494            default=default,
1495            autotemp=autotemp,
1496        )
1497
1498    def _parse_withisolatedloading(self) -> exp.Expression:
1499        no = self._match_text_seq("NO")
1500        concurrent = self._match_text_seq("CONCURRENT")
1501        self._match_text_seq("ISOLATED", "LOADING")
1502        for_all = self._match_text_seq("FOR", "ALL")
1503        for_insert = self._match_text_seq("FOR", "INSERT")
1504        for_none = self._match_text_seq("FOR", "NONE")
1505        return self.expression(
1506            exp.IsolatedLoadingProperty,
1507            no=no,
1508            concurrent=concurrent,
1509            for_all=for_all,
1510            for_insert=for_insert,
1511            for_none=for_none,
1512        )
1513
1514    def _parse_locking(self) -> exp.Expression:
1515        if self._match(TokenType.TABLE):
1516            kind = "TABLE"
1517        elif self._match(TokenType.VIEW):
1518            kind = "VIEW"
1519        elif self._match(TokenType.ROW):
1520            kind = "ROW"
1521        elif self._match_text_seq("DATABASE"):
1522            kind = "DATABASE"
1523        else:
1524            kind = None
1525
1526        if kind in ("DATABASE", "TABLE", "VIEW"):
1527            this = self._parse_table_parts()
1528        else:
1529            this = None
1530
1531        if self._match(TokenType.FOR):
1532            for_or_in = "FOR"
1533        elif self._match(TokenType.IN):
1534            for_or_in = "IN"
1535        else:
1536            for_or_in = None
1537
1538        if self._match_text_seq("ACCESS"):
1539            lock_type = "ACCESS"
1540        elif self._match_texts(("EXCL", "EXCLUSIVE")):
1541            lock_type = "EXCLUSIVE"
1542        elif self._match_text_seq("SHARE"):
1543            lock_type = "SHARE"
1544        elif self._match_text_seq("READ"):
1545            lock_type = "READ"
1546        elif self._match_text_seq("WRITE"):
1547            lock_type = "WRITE"
1548        elif self._match_text_seq("CHECKSUM"):
1549            lock_type = "CHECKSUM"
1550        else:
1551            lock_type = None
1552
1553        override = self._match_text_seq("OVERRIDE")
1554
1555        return self.expression(
1556            exp.LockingProperty,
1557            this=this,
1558            kind=kind,
1559            for_or_in=for_or_in,
1560            lock_type=lock_type,
1561            override=override,
1562        )
1563
1564    def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]:
1565        if self._match(TokenType.PARTITION_BY):
1566            return self._parse_csv(self._parse_conjunction)
1567        return []
1568
1569    def _parse_partitioned_by(self) -> exp.Expression:
1570        self._match(TokenType.EQ)
1571        return self.expression(
1572            exp.PartitionedByProperty,
1573            this=self._parse_schema() or self._parse_bracket(self._parse_field()),
1574        )
1575
1576    def _parse_withdata(self, no=False) -> exp.Expression:
1577        if self._match_text_seq("AND", "STATISTICS"):
1578            statistics = True
1579        elif self._match_text_seq("AND", "NO", "STATISTICS"):
1580            statistics = False
1581        else:
1582            statistics = None
1583
1584        return self.expression(exp.WithDataProperty, no=no, statistics=statistics)
1585
1586    def _parse_noprimaryindex(self) -> exp.Expression:
1587        self._match_text_seq("PRIMARY", "INDEX")
1588        return exp.NoPrimaryIndexProperty()
1589
1590    def _parse_oncommit(self) -> exp.Expression:
1591        self._match_text_seq("COMMIT", "PRESERVE", "ROWS")
1592        return exp.OnCommitProperty()
1593
1594    def _parse_distkey(self) -> exp.Expression:
1595        return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var))
1596
1597    def _parse_create_like(self) -> t.Optional[exp.Expression]:
1598        table = self._parse_table(schema=True)
1599        options = []
1600        while self._match_texts(("INCLUDING", "EXCLUDING")):
1601            this = self._prev.text.upper()
1602            id_var = self._parse_id_var()
1603
1604            if not id_var:
1605                return None
1606
1607            options.append(
1608                self.expression(
1609                    exp.Property,
1610                    this=this,
1611                    value=exp.Var(this=id_var.this.upper()),
1612                )
1613            )
1614        return self.expression(exp.LikeProperty, this=table, expressions=options)
1615
1616    def _parse_sortkey(self, compound: bool = False) -> exp.Expression:
1617        return self.expression(
1618            exp.SortKeyProperty, this=self._parse_wrapped_csv(self._parse_id_var), compound=compound
1619        )
1620
1621    def _parse_character_set(self, default: bool = False) -> exp.Expression:
1622        self._match(TokenType.EQ)
1623        return self.expression(
1624            exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default
1625        )
1626
1627    def _parse_returns(self) -> exp.Expression:
1628        value: t.Optional[exp.Expression]
1629        is_table = self._match(TokenType.TABLE)
1630
1631        if is_table:
1632            if self._match(TokenType.LT):
1633                value = self.expression(
1634                    exp.Schema,
1635                    this="TABLE",
1636                    expressions=self._parse_csv(self._parse_struct_types),
1637                )
1638                if not self._match(TokenType.GT):
1639                    self.raise_error("Expecting >")
1640            else:
1641                value = self._parse_schema(exp.Var(this="TABLE"))
1642        else:
1643            value = self._parse_types()
1644
1645        return self.expression(exp.ReturnsProperty, this=value, is_table=is_table)
1646
1647    def _parse_temporary(self, global_=False) -> exp.Expression:
1648        self._match(TokenType.TEMPORARY)  # in case calling from "GLOBAL"
1649        return self.expression(exp.TemporaryProperty, global_=global_)
1650
1651    def _parse_describe(self) -> exp.Expression:
1652        kind = self._match_set(self.CREATABLES) and self._prev.text
1653        this = self._parse_table()
1654
1655        return self.expression(exp.Describe, this=this, kind=kind)
1656
1657    def _parse_insert(self) -> exp.Expression:
1658        overwrite = self._match(TokenType.OVERWRITE)
1659        local = self._match(TokenType.LOCAL)
1660        alternative = None
1661
1662        if self._match_text_seq("DIRECTORY"):
1663            this: t.Optional[exp.Expression] = self.expression(
1664                exp.Directory,
1665                this=self._parse_var_or_string(),
1666                local=local,
1667                row_format=self._parse_row_format(match_row=True),
1668            )
1669        else:
1670            if self._match(TokenType.OR):
1671                alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text
1672
1673            self._match(TokenType.INTO)
1674            self._match(TokenType.TABLE)
1675            this = self._parse_table(schema=True)
1676
1677        return self.expression(
1678            exp.Insert,
1679            this=this,
1680            exists=self._parse_exists(),
1681            partition=self._parse_partition(),
1682            expression=self._parse_ddl_select(),
1683            conflict=self._parse_on_conflict(),
1684            returning=self._parse_returning(),
1685            overwrite=overwrite,
1686            alternative=alternative,
1687        )
1688
1689    def _parse_on_conflict(self) -> t.Optional[exp.Expression]:
1690        conflict = self._match_text_seq("ON", "CONFLICT")
1691        duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY")
1692
1693        if not (conflict or duplicate):
1694            return None
1695
1696        nothing = None
1697        expressions = None
1698        key = None
1699        constraint = None
1700
1701        if conflict:
1702            if self._match_text_seq("ON", "CONSTRAINT"):
1703                constraint = self._parse_id_var()
1704            else:
1705                key = self._parse_csv(self._parse_value)
1706
1707        self._match_text_seq("DO")
1708        if self._match_text_seq("NOTHING"):
1709            nothing = True
1710        else:
1711            self._match(TokenType.UPDATE)
1712            expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality)
1713
1714        return self.expression(
1715            exp.OnConflict,
1716            duplicate=duplicate,
1717            expressions=expressions,
1718            nothing=nothing,
1719            key=key,
1720            constraint=constraint,
1721        )
1722
1723    def _parse_returning(self) -> t.Optional[exp.Expression]:
1724        if not self._match(TokenType.RETURNING):
1725            return None
1726
1727        return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column))
1728
1729    def _parse_row(self) -> t.Optional[exp.Expression]:
1730        if not self._match(TokenType.FORMAT):
1731            return None
1732        return self._parse_row_format()
1733
1734    def _parse_row_format(self, match_row: bool = False) -> t.Optional[exp.Expression]:
1735        if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT):
1736            return None
1737
1738        if self._match_text_seq("SERDE"):
1739            return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string())
1740
1741        self._match_text_seq("DELIMITED")
1742
1743        kwargs = {}
1744
1745        if self._match_text_seq("FIELDS", "TERMINATED", "BY"):
1746            kwargs["fields"] = self._parse_string()
1747            if self._match_text_seq("ESCAPED", "BY"):
1748                kwargs["escaped"] = self._parse_string()
1749        if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"):
1750            kwargs["collection_items"] = self._parse_string()
1751        if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"):
1752            kwargs["map_keys"] = self._parse_string()
1753        if self._match_text_seq("LINES", "TERMINATED", "BY"):
1754            kwargs["lines"] = self._parse_string()
1755        if self._match_text_seq("NULL", "DEFINED", "AS"):
1756            kwargs["null"] = self._parse_string()
1757
1758        return self.expression(exp.RowFormatDelimitedProperty, **kwargs)  # type: ignore
1759
1760    def _parse_load_data(self) -> exp.Expression:
1761        local = self._match(TokenType.LOCAL)
1762        self._match_text_seq("INPATH")
1763        inpath = self._parse_string()
1764        overwrite = self._match(TokenType.OVERWRITE)
1765        self._match_pair(TokenType.INTO, TokenType.TABLE)
1766
1767        return self.expression(
1768            exp.LoadData,
1769            this=self._parse_table(schema=True),
1770            local=local,
1771            overwrite=overwrite,
1772            inpath=inpath,
1773            partition=self._parse_partition(),
1774            input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(),
1775            serde=self._match_text_seq("SERDE") and self._parse_string(),
1776        )
1777
1778    def _parse_delete(self) -> exp.Expression:
1779        self._match(TokenType.FROM)
1780
1781        return self.expression(
1782            exp.Delete,
1783            this=self._parse_table(),
1784            using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()),
1785            where=self._parse_where(),
1786            returning=self._parse_returning(),
1787        )
1788
1789    def _parse_update(self) -> exp.Expression:
1790        return self.expression(
1791            exp.Update,
1792            **{  # type: ignore
1793                "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS),
1794                "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality),
1795                "from": self._parse_from(modifiers=True),
1796                "where": self._parse_where(),
1797                "returning": self._parse_returning(),
1798            },
1799        )
1800
1801    def _parse_uncache(self) -> exp.Expression:
1802        if not self._match(TokenType.TABLE):
1803            self.raise_error("Expecting TABLE after UNCACHE")
1804
1805        return self.expression(
1806            exp.Uncache,
1807            exists=self._parse_exists(),
1808            this=self._parse_table(schema=True),
1809        )
1810
1811    def _parse_cache(self) -> exp.Expression:
1812        lazy = self._match(TokenType.LAZY)
1813        self._match(TokenType.TABLE)
1814        table = self._parse_table(schema=True)
1815        options = []
1816
1817        if self._match(TokenType.OPTIONS):
1818            self._match_l_paren()
1819            k = self._parse_string()
1820            self._match(TokenType.EQ)
1821            v = self._parse_string()
1822            options = [k, v]
1823            self._match_r_paren()
1824
1825        self._match(TokenType.ALIAS)
1826        return self.expression(
1827            exp.Cache,
1828            this=table,
1829            lazy=lazy,
1830            options=options,
1831            expression=self._parse_select(nested=True),
1832        )
1833
1834    def _parse_partition(self) -> t.Optional[exp.Expression]:
1835        if not self._match(TokenType.PARTITION):
1836            return None
1837
1838        return self.expression(
1839            exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction)
1840        )
1841
1842    def _parse_value(self) -> exp.Expression:
1843        if self._match(TokenType.L_PAREN):
1844            expressions = self._parse_csv(self._parse_conjunction)
1845            self._match_r_paren()
1846            return self.expression(exp.Tuple, expressions=expressions)
1847
1848        # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows.
1849        # Source: https://prestodb.io/docs/current/sql/values.html
1850        return self.expression(exp.Tuple, expressions=[self._parse_conjunction()])
1851
1852    def _parse_select(
1853        self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True
1854    ) -> t.Optional[exp.Expression]:
1855        cte = self._parse_with()
1856        if cte:
1857            this = self._parse_statement()
1858
1859            if not this:
1860                self.raise_error("Failed to parse any statement following CTE")
1861                return cte
1862
1863            if "with" in this.arg_types:
1864                this.set("with", cte)
1865            else:
1866                self.raise_error(f"{this.key} does not support CTE")
1867                this = cte
1868        elif self._match(TokenType.SELECT):
1869            comments = self._prev_comments
1870
1871            hint = self._parse_hint()
1872            all_ = self._match(TokenType.ALL)
1873            distinct = self._match(TokenType.DISTINCT)
1874
1875            kind = (
1876                self._match(TokenType.ALIAS)
1877                and self._match_texts(("STRUCT", "VALUE"))
1878                and self._prev.text
1879            )
1880
1881            if distinct:
1882                distinct = self.expression(
1883                    exp.Distinct,
1884                    on=self._parse_value() if self._match(TokenType.ON) else None,
1885                )
1886
1887            if all_ and distinct:
1888                self.raise_error("Cannot specify both ALL and DISTINCT after SELECT")
1889
1890            limit = self._parse_limit(top=True)
1891            expressions = self._parse_csv(self._parse_expression)
1892
1893            this = self.expression(
1894                exp.Select,
1895                kind=kind,
1896                hint=hint,
1897                distinct=distinct,
1898                expressions=expressions,
1899                limit=limit,
1900            )
1901            this.comments = comments
1902
1903            into = self._parse_into()
1904            if into:
1905                this.set("into", into)
1906
1907            from_ = self._parse_from()
1908            if from_:
1909                this.set("from", from_)
1910
1911            this = self._parse_query_modifiers(this)
1912        elif (table or nested) and self._match(TokenType.L_PAREN):
1913            this = self._parse_table() if table else self._parse_select(nested=True)
1914            this = self._parse_set_operations(self._parse_query_modifiers(this))
1915            self._match_r_paren()
1916
1917            # early return so that subquery unions aren't parsed again
1918            # SELECT * FROM (SELECT 1) UNION ALL SELECT 1
1919            # Union ALL should be a property of the top select node, not the subquery
1920            return self._parse_subquery(this, parse_alias=parse_subquery_alias)
1921        elif self._match(TokenType.VALUES):
1922            this = self.expression(
1923                exp.Values,
1924                expressions=self._parse_csv(self._parse_value),
1925                alias=self._parse_table_alias(),
1926            )
1927        else:
1928            this = None
1929
1930        return self._parse_set_operations(this)
1931
1932    def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.Expression]:
1933        if not skip_with_token and not self._match(TokenType.WITH):
1934            return None
1935
1936        comments = self._prev_comments
1937        recursive = self._match(TokenType.RECURSIVE)
1938
1939        expressions = []
1940        while True:
1941            expressions.append(self._parse_cte())
1942
1943            if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH):
1944                break
1945            else:
1946                self._match(TokenType.WITH)
1947
1948        return self.expression(
1949            exp.With, comments=comments, expressions=expressions, recursive=recursive
1950        )
1951
1952    def _parse_cte(self) -> exp.Expression:
1953        alias = self._parse_table_alias()
1954        if not alias or not alias.this:
1955            self.raise_error("Expected CTE to have alias")
1956
1957        self._match(TokenType.ALIAS)
1958
1959        return self.expression(
1960            exp.CTE,
1961            this=self._parse_wrapped(self._parse_statement),
1962            alias=alias,
1963        )
1964
1965    def _parse_table_alias(
1966        self, alias_tokens: t.Optional[t.Collection[TokenType]] = None
1967    ) -> t.Optional[exp.Expression]:
1968        any_token = self._match(TokenType.ALIAS)
1969        alias = (
1970            self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS)
1971            or self._parse_string_as_identifier()
1972        )
1973
1974        index = self._index
1975        if self._match(TokenType.L_PAREN):
1976            columns = self._parse_csv(self._parse_function_parameter)
1977            self._match_r_paren() if columns else self._retreat(index)
1978        else:
1979            columns = None
1980
1981        if not alias and not columns:
1982            return None
1983
1984        return self.expression(exp.TableAlias, this=alias, columns=columns)
1985
1986    def _parse_subquery(
1987        self, this: t.Optional[exp.Expression], parse_alias: bool = True
1988    ) -> exp.Expression:
1989        return self.expression(
1990            exp.Subquery,
1991            this=this,
1992            pivots=self._parse_pivots(),
1993            alias=self._parse_table_alias() if parse_alias else None,
1994        )
1995
1996    def _parse_query_modifiers(
1997        self, this: t.Optional[exp.Expression]
1998    ) -> t.Optional[exp.Expression]:
1999        if isinstance(this, self.MODIFIABLES):
2000            for key, parser in self.QUERY_MODIFIER_PARSERS.items():
2001                expression = parser(self)
2002
2003                if expression:
2004                    this.set(key, expression)
2005        return this
2006
2007    def _parse_hint(self) -> t.Optional[exp.Expression]:
2008        if self._match(TokenType.HINT):
2009            hints = self._parse_csv(self._parse_function)
2010            if not self._match_pair(TokenType.STAR, TokenType.SLASH):
2011                self.raise_error("Expected */ after HINT")
2012            return self.expression(exp.Hint, expressions=hints)
2013
2014        return None
2015
2016    def _parse_into(self) -> t.Optional[exp.Expression]:
2017        if not self._match(TokenType.INTO):
2018            return None
2019
2020        temp = self._match(TokenType.TEMPORARY)
2021        unlogged = self._match(TokenType.UNLOGGED)
2022        self._match(TokenType.TABLE)
2023
2024        return self.expression(
2025            exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged
2026        )
2027
2028    def _parse_from(self, modifiers: bool = False) -> t.Optional[exp.Expression]:
2029        if not self._match(TokenType.FROM):
2030            return None
2031
2032        comments = self._prev_comments
2033        this = self._parse_table()
2034
2035        return self.expression(
2036            exp.From,
2037            comments=comments,
2038            this=self._parse_query_modifiers(this) if modifiers else this,
2039        )
2040
2041    def _parse_match_recognize(self) -> t.Optional[exp.Expression]:
2042        if not self._match(TokenType.MATCH_RECOGNIZE):
2043            return None
2044
2045        self._match_l_paren()
2046
2047        partition = self._parse_partition_by()
2048        order = self._parse_order()
2049        measures = (
2050            self._parse_csv(self._parse_expression) if self._match_text_seq("MEASURES") else None
2051        )
2052
2053        if self._match_text_seq("ONE", "ROW", "PER", "MATCH"):
2054            rows = exp.Var(this="ONE ROW PER MATCH")
2055        elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"):
2056            text = "ALL ROWS PER MATCH"
2057            if self._match_text_seq("SHOW", "EMPTY", "MATCHES"):
2058                text += f" SHOW EMPTY MATCHES"
2059            elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"):
2060                text += f" OMIT EMPTY MATCHES"
2061            elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"):
2062                text += f" WITH UNMATCHED ROWS"
2063            rows = exp.Var(this=text)
2064        else:
2065            rows = None
2066
2067        if self._match_text_seq("AFTER", "MATCH", "SKIP"):
2068            text = "AFTER MATCH SKIP"
2069            if self._match_text_seq("PAST", "LAST", "ROW"):
2070                text += f" PAST LAST ROW"
2071            elif self._match_text_seq("TO", "NEXT", "ROW"):
2072                text += f" TO NEXT ROW"
2073            elif self._match_text_seq("TO", "FIRST"):
2074                text += f" TO FIRST {self._advance_any().text}"  # type: ignore
2075            elif self._match_text_seq("TO", "LAST"):
2076                text += f" TO LAST {self._advance_any().text}"  # type: ignore
2077            after = exp.Var(this=text)
2078        else:
2079            after = None
2080
2081        if self._match_text_seq("PATTERN"):
2082            self._match_l_paren()
2083
2084            if not self._curr:
2085                self.raise_error("Expecting )", self._curr)
2086
2087            paren = 1
2088            start = self._curr
2089
2090            while self._curr and paren > 0:
2091                if self._curr.token_type == TokenType.L_PAREN:
2092                    paren += 1
2093                if self._curr.token_type == TokenType.R_PAREN:
2094                    paren -= 1
2095                end = self._prev
2096                self._advance()
2097            if paren > 0:
2098                self.raise_error("Expecting )", self._curr)
2099            pattern = exp.Var(this=self._find_sql(start, end))
2100        else:
2101            pattern = None
2102
2103        define = (
2104            self._parse_csv(
2105                lambda: self.expression(
2106                    exp.Alias,
2107                    alias=self._parse_id_var(any_token=True),
2108                    this=self._match(TokenType.ALIAS) and self._parse_conjunction(),
2109                )
2110            )
2111            if self._match_text_seq("DEFINE")
2112            else None
2113        )
2114
2115        self._match_r_paren()
2116
2117        return self.expression(
2118            exp.MatchRecognize,
2119            partition_by=partition,
2120            order=order,
2121            measures=measures,
2122            rows=rows,
2123            after=after,
2124            pattern=pattern,
2125            define=define,
2126            alias=self._parse_table_alias(),
2127        )
2128
2129    def _parse_lateral(self) -> t.Optional[exp.Expression]:
2130        outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY)
2131        cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY)
2132
2133        if outer_apply or cross_apply:
2134            this = self._parse_select(table=True)
2135            view = None
2136            outer = not cross_apply
2137        elif self._match(TokenType.LATERAL):
2138            this = self._parse_select(table=True)
2139            view = self._match(TokenType.VIEW)
2140            outer = self._match(TokenType.OUTER)
2141        else:
2142            return None
2143
2144        if not this:
2145            this = self._parse_function() or self._parse_id_var(any_token=False)
2146            while self._match(TokenType.DOT):
2147                this = exp.Dot(
2148                    this=this,
2149                    expression=self._parse_function() or self._parse_id_var(any_token=False),
2150                )
2151
2152        table_alias: t.Optional[exp.Expression]
2153
2154        if view:
2155            table = self._parse_id_var(any_token=False)
2156            columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else []
2157            table_alias = self.expression(exp.TableAlias, this=table, columns=columns)
2158        else:
2159            table_alias = self._parse_table_alias()
2160
2161        expression = self.expression(
2162            exp.Lateral,
2163            this=this,
2164            view=view,
2165            outer=outer,
2166            alias=table_alias,
2167        )
2168
2169        return expression
2170
2171    def _parse_join_side_and_kind(
2172        self,
2173    ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]:
2174        return (
2175            self._match(TokenType.NATURAL) and self._prev,
2176            self._match_set(self.JOIN_SIDES) and self._prev,
2177            self._match_set(self.JOIN_KINDS) and self._prev,
2178        )
2179
2180    def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Expression]:
2181        if self._match(TokenType.COMMA):
2182            return self.expression(exp.Join, this=self._parse_table())
2183
2184        index = self._index
2185        natural, side, kind = self._parse_join_side_and_kind()
2186        hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None
2187        join = self._match(TokenType.JOIN)
2188
2189        if not skip_join_token and not join:
2190            self._retreat(index)
2191            kind = None
2192            natural = None
2193            side = None
2194
2195        outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False)
2196        cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False)
2197
2198        if not skip_join_token and not join and not outer_apply and not cross_apply:
2199            return None
2200
2201        if outer_apply:
2202            side = Token(TokenType.LEFT, "LEFT")
2203
2204        kwargs: t.Dict[
2205            str, t.Optional[exp.Expression] | bool | str | t.List[t.Optional[exp.Expression]]
2206        ] = {"this": self._parse_table()}
2207
2208        if natural:
2209            kwargs["natural"] = True
2210        if side:
2211            kwargs["side"] = side.text
2212        if kind:
2213            kwargs["kind"] = kind.text
2214        if hint:
2215            kwargs["hint"] = hint
2216
2217        if self._match(TokenType.ON):
2218            kwargs["on"] = self._parse_conjunction()
2219        elif self._match(TokenType.USING):
2220            kwargs["using"] = self._parse_wrapped_id_vars()
2221
2222        return self.expression(exp.Join, **kwargs)  # type: ignore
2223
2224    def _parse_index(self) -> exp.Expression:
2225        index = self._parse_id_var()
2226        self._match(TokenType.ON)
2227        self._match(TokenType.TABLE)  # hive
2228
2229        return self.expression(
2230            exp.Index,
2231            this=index,
2232            table=self.expression(exp.Table, this=self._parse_id_var()),
2233            columns=self._parse_expression(),
2234        )
2235
2236    def _parse_create_table_index(self) -> t.Optional[exp.Expression]:
2237        unique = self._match(TokenType.UNIQUE)
2238        primary = self._match_text_seq("PRIMARY")
2239        amp = self._match_text_seq("AMP")
2240        if not self._match(TokenType.INDEX):
2241            return None
2242        index = self._parse_id_var()
2243        columns = None
2244        if self._match(TokenType.L_PAREN, advance=False):
2245            columns = self._parse_wrapped_csv(self._parse_column)
2246        return self.expression(
2247            exp.Index,
2248            this=index,
2249            columns=columns,
2250            unique=unique,
2251            primary=primary,
2252            amp=amp,
2253        )
2254
2255    def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]:
2256        return (
2257            (not schema and self._parse_function())
2258            or self._parse_id_var(any_token=False)
2259            or self._parse_string_as_identifier()
2260            or self._parse_placeholder()
2261        )
2262
2263    def _parse_table_parts(self, schema: bool = False) -> exp.Expression:
2264        catalog = None
2265        db = None
2266        table = self._parse_table_part(schema=schema)
2267
2268        while self._match(TokenType.DOT):
2269            if catalog:
2270                # This allows nesting the table in arbitrarily many dot expressions if needed
2271                table = self.expression(
2272                    exp.Dot, this=table, expression=self._parse_table_part(schema=schema)
2273                )
2274            else:
2275                catalog = db
2276                db = table
2277                table = self._parse_table_part(schema=schema)
2278
2279        if not table:
2280            self.raise_error(f"Expected table name but got {self._curr}")
2281
2282        return self.expression(
2283            exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots()
2284        )
2285
2286    def _parse_table(
2287        self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None
2288    ) -> t.Optional[exp.Expression]:
2289        lateral = self._parse_lateral()
2290        if lateral:
2291            return lateral
2292
2293        unnest = self._parse_unnest()
2294        if unnest:
2295            return unnest
2296
2297        values = self._parse_derived_table_values()
2298        if values:
2299            return values
2300
2301        subquery = self._parse_select(table=True)
2302        if subquery:
2303            if not subquery.args.get("pivots"):
2304                subquery.set("pivots", self._parse_pivots())
2305            return subquery
2306
2307        this = self._parse_table_parts(schema=schema)
2308
2309        if schema:
2310            return self._parse_schema(this=this)
2311
2312        if self.alias_post_tablesample:
2313            table_sample = self._parse_table_sample()
2314
2315        alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS)
2316        if alias:
2317            this.set("alias", alias)
2318
2319        if not this.args.get("pivots"):
2320            this.set("pivots", self._parse_pivots())
2321
2322        if self._match_pair(TokenType.WITH, TokenType.L_PAREN):
2323            this.set(
2324                "hints",
2325                self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)),
2326            )
2327            self._match_r_paren()
2328
2329        if not self.alias_post_tablesample:
2330            table_sample = self._parse_table_sample()
2331
2332        if table_sample:
2333            table_sample.set("this", this)
2334            this = table_sample
2335
2336        return this
2337
2338    def _parse_unnest(self) -> t.Optional[exp.Expression]:
2339        if not self._match(TokenType.UNNEST):
2340            return None
2341
2342        expressions = self._parse_wrapped_csv(self._parse_type)
2343        ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY)
2344        alias = self._parse_table_alias()
2345
2346        if alias and self.unnest_column_only:
2347            if alias.args.get("columns"):
2348                self.raise_error("Unexpected extra column alias in unnest.")
2349            alias.set("columns", [alias.this])
2350            alias.set("this", None)
2351
2352        offset = None
2353        if self._match_pair(TokenType.WITH, TokenType.OFFSET):
2354            self._match(TokenType.ALIAS)
2355            offset = self._parse_id_var() or exp.Identifier(this="offset")
2356
2357        return self.expression(
2358            exp.Unnest,
2359            expressions=expressions,
2360            ordinality=ordinality,
2361            alias=alias,
2362            offset=offset,
2363        )
2364
2365    def _parse_derived_table_values(self) -> t.Optional[exp.Expression]:
2366        is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES)
2367        if not is_derived and not self._match(TokenType.VALUES):
2368            return None
2369
2370        expressions = self._parse_csv(self._parse_value)
2371
2372        if is_derived:
2373            self._match_r_paren()
2374
2375        return self.expression(exp.Values, expressions=expressions, alias=self._parse_table_alias())
2376
2377    def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.Expression]:
2378        if not self._match(TokenType.TABLE_SAMPLE) and not (
2379            as_modifier and self._match_text_seq("USING", "SAMPLE")
2380        ):
2381            return None
2382
2383        bucket_numerator = None
2384        bucket_denominator = None
2385        bucket_field = None
2386        percent = None
2387        rows = None
2388        size = None
2389        seed = None
2390
2391        kind = (
2392            self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE"
2393        )
2394        method = self._parse_var(tokens=(TokenType.ROW,))
2395
2396        self._match(TokenType.L_PAREN)
2397
2398        num = self._parse_number()
2399
2400        if self._match(TokenType.BUCKET):
2401            bucket_numerator = self._parse_number()
2402            self._match(TokenType.OUT_OF)
2403            bucket_denominator = bucket_denominator = self._parse_number()
2404            self._match(TokenType.ON)
2405            bucket_field = self._parse_field()
2406        elif self._match_set((TokenType.PERCENT, TokenType.MOD)):
2407            percent = num
2408        elif self._match(TokenType.ROWS):
2409            rows = num
2410        else:
2411            size = num
2412
2413        self._match(TokenType.R_PAREN)
2414
2415        if self._match(TokenType.L_PAREN):
2416            method = self._parse_var()
2417            seed = self._match(TokenType.COMMA) and self._parse_number()
2418            self._match_r_paren()
2419        elif self._match_texts(("SEED", "REPEATABLE")):
2420            seed = self._parse_wrapped(self._parse_number)
2421
2422        return self.expression(
2423            exp.TableSample,
2424            method=method,
2425            bucket_numerator=bucket_numerator,
2426            bucket_denominator=bucket_denominator,
2427            bucket_field=bucket_field,
2428            percent=percent,
2429            rows=rows,
2430            size=size,
2431            seed=seed,
2432            kind=kind,
2433        )
2434
2435    def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]:
2436        return list(iter(self._parse_pivot, None))
2437
2438    def _parse_pivot(self) -> t.Optional[exp.Expression]:
2439        index = self._index
2440
2441        if self._match(TokenType.PIVOT):
2442            unpivot = False
2443        elif self._match(TokenType.UNPIVOT):
2444            unpivot = True
2445        else:
2446            return None
2447
2448        expressions = []
2449        field = None
2450
2451        if not self._match(TokenType.L_PAREN):
2452            self._retreat(index)
2453            return None
2454
2455        if unpivot:
2456            expressions = self._parse_csv(self._parse_column)
2457        else:
2458            expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function()))
2459
2460        if not expressions:
2461            self.raise_error("Failed to parse PIVOT's aggregation list")
2462
2463        if not self._match(TokenType.FOR):
2464            self.raise_error("Expecting FOR")
2465
2466        value = self._parse_column()
2467
2468        if not self._match(TokenType.IN):
2469            self.raise_error("Expecting IN")
2470
2471        field = self._parse_in(value)
2472
2473        self._match_r_paren()
2474
2475        pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot)
2476
2477        if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False):
2478            pivot.set("alias", self._parse_table_alias())
2479
2480        if not unpivot:
2481            names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions))
2482
2483            columns: t.List[exp.Expression] = []
2484            for fld in pivot.args["field"].expressions:
2485                field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name
2486                for name in names:
2487                    if self.PREFIXED_PIVOT_COLUMNS:
2488                        name = f"{name}_{field_name}" if name else field_name
2489                    else:
2490                        name = f"{field_name}_{name}" if name else field_name
2491
2492                    columns.append(exp.to_identifier(name))
2493
2494            pivot.set("columns", columns)
2495
2496        return pivot
2497
2498    def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]:
2499        return [agg.alias for agg in aggregations]
2500
2501    def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Expression]:
2502        if not skip_where_token and not self._match(TokenType.WHERE):
2503            return None
2504
2505        return self.expression(
2506            exp.Where, comments=self._prev_comments, this=self._parse_conjunction()
2507        )
2508
2509    def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Expression]:
2510        if not skip_group_by_token and not self._match(TokenType.GROUP_BY):
2511            return None
2512
2513        elements = defaultdict(list)
2514
2515        while True:
2516            expressions = self._parse_csv(self._parse_conjunction)
2517            if expressions:
2518                elements["expressions"].extend(expressions)
2519
2520            grouping_sets = self._parse_grouping_sets()
2521            if grouping_sets:
2522                elements["grouping_sets"].extend(grouping_sets)
2523
2524            rollup = None
2525            cube = None
2526            totals = None
2527
2528            with_ = self._match(TokenType.WITH)
2529            if self._match(TokenType.ROLLUP):
2530                rollup = with_ or self._parse_wrapped_csv(self._parse_column)
2531                elements["rollup"].extend(ensure_list(rollup))
2532
2533            if self._match(TokenType.CUBE):
2534                cube = with_ or self._parse_wrapped_csv(self._parse_column)
2535                elements["cube"].extend(ensure_list(cube))
2536
2537            if self._match_text_seq("TOTALS"):
2538                totals = True
2539                elements["totals"] = True  # type: ignore
2540
2541            if not (grouping_sets or rollup or cube or totals):
2542                break
2543
2544        return self.expression(exp.Group, **elements)  # type: ignore
2545
2546    def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
2547        if not self._match(TokenType.GROUPING_SETS):
2548            return None
2549
2550        return self._parse_wrapped_csv(self._parse_grouping_set)
2551
2552    def _parse_grouping_set(self) -> t.Optional[exp.Expression]:
2553        if self._match(TokenType.L_PAREN):
2554            grouping_set = self._parse_csv(self._parse_column)
2555            self._match_r_paren()
2556            return self.expression(exp.Tuple, expressions=grouping_set)
2557
2558        return self._parse_column()
2559
2560    def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Expression]:
2561        if not skip_having_token and not self._match(TokenType.HAVING):
2562            return None
2563        return self.expression(exp.Having, this=self._parse_conjunction())
2564
2565    def _parse_qualify(self) -> t.Optional[exp.Expression]:
2566        if not self._match(TokenType.QUALIFY):
2567            return None
2568        return self.expression(exp.Qualify, this=self._parse_conjunction())
2569
2570    def _parse_order(
2571        self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False
2572    ) -> t.Optional[exp.Expression]:
2573        if not skip_order_token and not self._match(TokenType.ORDER_BY):
2574            return this
2575
2576        return self.expression(
2577            exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered)
2578        )
2579
2580    def _parse_sort(
2581        self, token_type: TokenType, exp_class: t.Type[exp.Expression]
2582    ) -> t.Optional[exp.Expression]:
2583        if not self._match(token_type):
2584            return None
2585        return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered))
2586
2587    def _parse_ordered(self) -> exp.Expression:
2588        this = self._parse_conjunction()
2589        self._match(TokenType.ASC)
2590        is_desc = self._match(TokenType.DESC)
2591        is_nulls_first = self._match(TokenType.NULLS_FIRST)
2592        is_nulls_last = self._match(TokenType.NULLS_LAST)
2593        desc = is_desc or False
2594        asc = not desc
2595        nulls_first = is_nulls_first or False
2596        explicitly_null_ordered = is_nulls_first or is_nulls_last
2597        if (
2598            not explicitly_null_ordered
2599            and (
2600                (asc and self.null_ordering == "nulls_are_small")
2601                or (desc and self.null_ordering != "nulls_are_small")
2602            )
2603            and self.null_ordering != "nulls_are_last"
2604        ):
2605            nulls_first = True
2606
2607        return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first)
2608
2609    def _parse_limit(
2610        self, this: t.Optional[exp.Expression] = None, top: bool = False
2611    ) -> t.Optional[exp.Expression]:
2612        if self._match(TokenType.TOP if top else TokenType.LIMIT):
2613            limit_paren = self._match(TokenType.L_PAREN)
2614            limit_exp = self.expression(
2615                exp.Limit, this=this, expression=self._parse_number() if top else self._parse_term()
2616            )
2617
2618            if limit_paren:
2619                self._match_r_paren()
2620
2621            return limit_exp
2622
2623        if self._match(TokenType.FETCH):
2624            direction = self._match_set((TokenType.FIRST, TokenType.NEXT))
2625            direction = self._prev.text if direction else "FIRST"
2626
2627            count = self._parse_number()
2628            percent = self._match(TokenType.PERCENT)
2629
2630            self._match_set((TokenType.ROW, TokenType.ROWS))
2631
2632            only = self._match(TokenType.ONLY)
2633            with_ties = self._match_text_seq("WITH", "TIES")
2634
2635            if only and with_ties:
2636                self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause")
2637
2638            return self.expression(
2639                exp.Fetch,
2640                direction=direction,
2641                count=count,
2642                percent=percent,
2643                with_ties=with_ties,
2644            )
2645
2646        return this
2647
2648    def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]:
2649        if not self._match_set((TokenType.OFFSET, TokenType.COMMA)):
2650            return this
2651
2652        count = self._parse_number()
2653        self._match_set((TokenType.ROW, TokenType.ROWS))
2654        return self.expression(exp.Offset, this=this, expression=count)
2655
2656    def _parse_locks(self) -> t.List[exp.Expression]:
2657        # Lists are invariant, so we need to use a type hint here
2658        locks: t.List[exp.Expression] = []
2659
2660        while True:
2661            if self._match_text_seq("FOR", "UPDATE"):
2662                update = True
2663            elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq(
2664                "LOCK", "IN", "SHARE", "MODE"
2665            ):
2666                update = False
2667            else:
2668                break
2669
2670            expressions = None
2671            if self._match_text_seq("OF"):
2672                expressions = self._parse_csv(lambda: self._parse_table(schema=True))
2673
2674            wait: t.Optional[bool | exp.Expression] = None
2675            if self._match_text_seq("NOWAIT"):
2676                wait = True
2677            elif self._match_text_seq("WAIT"):
2678                wait = self._parse_primary()
2679            elif self._match_text_seq("SKIP", "LOCKED"):
2680                wait = False
2681
2682            locks.append(
2683                self.expression(exp.Lock, update=update, expressions=expressions, wait=wait)
2684            )
2685
2686        return locks
2687
2688    def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2689        if not self._match_set(self.SET_OPERATIONS):
2690            return this
2691
2692        token_type = self._prev.token_type
2693
2694        if token_type == TokenType.UNION:
2695            expression = exp.Union
2696        elif token_type == TokenType.EXCEPT:
2697            expression = exp.Except
2698        else:
2699            expression = exp.Intersect
2700
2701        return self.expression(
2702            expression,
2703            this=this,
2704            distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL),
2705            expression=self._parse_set_operations(self._parse_select(nested=True)),
2706        )
2707
2708    def _parse_expression(self) -> t.Optional[exp.Expression]:
2709        return self._parse_alias(self._parse_conjunction())
2710
2711    def _parse_conjunction(self) -> t.Optional[exp.Expression]:
2712        return self._parse_tokens(self._parse_equality, self.CONJUNCTION)
2713
2714    def _parse_equality(self) -> t.Optional[exp.Expression]:
2715        return self._parse_tokens(self._parse_comparison, self.EQUALITY)
2716
2717    def _parse_comparison(self) -> t.Optional[exp.Expression]:
2718        return self._parse_tokens(self._parse_range, self.COMPARISON)
2719
2720    def _parse_range(self) -> t.Optional[exp.Expression]:
2721        this = self._parse_bitwise()
2722        negate = self._match(TokenType.NOT)
2723
2724        if self._match_set(self.RANGE_PARSERS):
2725            expression = self.RANGE_PARSERS[self._prev.token_type](self, this)
2726            if not expression:
2727                return this
2728
2729            this = expression
2730        elif self._match(TokenType.ISNULL):
2731            this = self.expression(exp.Is, this=this, expression=exp.Null())
2732
2733        # Postgres supports ISNULL and NOTNULL for conditions.
2734        # https://blog.andreiavram.ro/postgresql-null-composite-type/
2735        if self._match(TokenType.NOTNULL):
2736            this = self.expression(exp.Is, this=this, expression=exp.Null())
2737            this = self.expression(exp.Not, this=this)
2738
2739        if negate:
2740            this = self.expression(exp.Not, this=this)
2741
2742        if self._match(TokenType.IS):
2743            this = self._parse_is(this)
2744
2745        return this
2746
2747    def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2748        index = self._index - 1
2749        negate = self._match(TokenType.NOT)
2750        if self._match(TokenType.DISTINCT_FROM):
2751            klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ
2752            return self.expression(klass, this=this, expression=self._parse_expression())
2753
2754        expression = self._parse_null() or self._parse_boolean()
2755        if not expression:
2756            self._retreat(index)
2757            return None
2758
2759        this = self.expression(exp.Is, this=this, expression=expression)
2760        return self.expression(exp.Not, this=this) if negate else this
2761
2762    def _parse_in(self, this: t.Optional[exp.Expression]) -> exp.Expression:
2763        unnest = self._parse_unnest()
2764        if unnest:
2765            this = self.expression(exp.In, this=this, unnest=unnest)
2766        elif self._match(TokenType.L_PAREN):
2767            expressions = self._parse_csv(self._parse_select_or_expression)
2768
2769            if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable):
2770                this = self.expression(exp.In, this=this, query=expressions[0])
2771            else:
2772                this = self.expression(exp.In, this=this, expressions=expressions)
2773
2774            self._match_r_paren(this)
2775        else:
2776            this = self.expression(exp.In, this=this, field=self._parse_field())
2777
2778        return this
2779
2780    def _parse_between(self, this: exp.Expression) -> exp.Expression:
2781        low = self._parse_bitwise()
2782        self._match(TokenType.AND)
2783        high = self._parse_bitwise()
2784        return self.expression(exp.Between, this=this, low=low, high=high)
2785
2786    def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2787        if not self._match(TokenType.ESCAPE):
2788            return this
2789        return self.expression(exp.Escape, this=this, expression=self._parse_string())
2790
2791    def _parse_interval(self) -> t.Optional[exp.Expression]:
2792        if not self._match(TokenType.INTERVAL):
2793            return None
2794
2795        this = self._parse_primary() or self._parse_term()
2796        unit = self._parse_function() or self._parse_var()
2797
2798        # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse
2799        # each INTERVAL expression into this canonical form so it's easy to transpile
2800        if this and isinstance(this, exp.Literal):
2801            if this.is_number:
2802                this = exp.Literal.string(this.name)
2803
2804            # Try to not clutter Snowflake's multi-part intervals like INTERVAL '1 day, 1 year'
2805            parts = this.name.split()
2806            if not unit and len(parts) <= 2:
2807                this = exp.Literal.string(seq_get(parts, 0))
2808                unit = self.expression(exp.Var, this=seq_get(parts, 1))
2809
2810        return self.expression(exp.Interval, this=this, unit=unit)
2811
2812    def _parse_bitwise(self) -> t.Optional[exp.Expression]:
2813        this = self._parse_term()
2814
2815        while True:
2816            if self._match_set(self.BITWISE):
2817                this = self.expression(
2818                    self.BITWISE[self._prev.token_type],
2819                    this=this,
2820                    expression=self._parse_term(),
2821                )
2822            elif self._match_pair(TokenType.LT, TokenType.LT):
2823                this = self.expression(
2824                    exp.BitwiseLeftShift, this=this, expression=self._parse_term()
2825                )
2826            elif self._match_pair(TokenType.GT, TokenType.GT):
2827                this = self.expression(
2828                    exp.BitwiseRightShift, this=this, expression=self._parse_term()
2829                )
2830            else:
2831                break
2832
2833        return this
2834
2835    def _parse_term(self) -> t.Optional[exp.Expression]:
2836        return self._parse_tokens(self._parse_factor, self.TERM)
2837
2838    def _parse_factor(self) -> t.Optional[exp.Expression]:
2839        return self._parse_tokens(self._parse_unary, self.FACTOR)
2840
2841    def _parse_unary(self) -> t.Optional[exp.Expression]:
2842        if self._match_set(self.UNARY_PARSERS):
2843            return self.UNARY_PARSERS[self._prev.token_type](self)
2844        return self._parse_at_time_zone(self._parse_type())
2845
2846    def _parse_type(self) -> t.Optional[exp.Expression]:
2847        interval = self._parse_interval()
2848        if interval:
2849            return interval
2850
2851        index = self._index
2852        data_type = self._parse_types(check_func=True)
2853        this = self._parse_column()
2854
2855        if data_type:
2856            if isinstance(this, exp.Literal):
2857                parser = self.TYPE_LITERAL_PARSERS.get(data_type.this)
2858                if parser:
2859                    return parser(self, this, data_type)
2860                return self.expression(exp.Cast, this=this, to=data_type)
2861            if not data_type.expressions:
2862                self._retreat(index)
2863                return self._parse_column()
2864            return data_type
2865
2866        return this
2867
2868    def _parse_type_size(self) -> t.Optional[exp.Expression]:
2869        this = self._parse_type()
2870        if not this:
2871            return None
2872
2873        return self.expression(
2874            exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True)
2875        )
2876
2877    def _parse_types(self, check_func: bool = False) -> t.Optional[exp.Expression]:
2878        index = self._index
2879
2880        prefix = self._match_text_seq("SYSUDTLIB", ".")
2881
2882        if not self._match_set(self.TYPE_TOKENS):
2883            return None
2884
2885        type_token = self._prev.token_type
2886
2887        if type_token == TokenType.PSEUDO_TYPE:
2888            return self.expression(exp.PseudoType, this=self._prev.text)
2889
2890        nested = type_token in self.NESTED_TYPE_TOKENS
2891        is_struct = type_token == TokenType.STRUCT
2892        expressions = None
2893        maybe_func = False
2894
2895        if self._match(TokenType.L_PAREN):
2896            if is_struct:
2897                expressions = self._parse_csv(self._parse_struct_types)
2898            elif nested:
2899                expressions = self._parse_csv(self._parse_types)
2900            else:
2901                expressions = self._parse_csv(self._parse_type_size)
2902
2903            if not expressions or not self._match(TokenType.R_PAREN):
2904                self._retreat(index)
2905                return None
2906
2907            maybe_func = True
2908
2909        if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET):
2910            this = exp.DataType(
2911                this=exp.DataType.Type.ARRAY,
2912                expressions=[exp.DataType.build(type_token.value, expressions=expressions)],
2913                nested=True,
2914            )
2915
2916            while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET):
2917                this = exp.DataType(
2918                    this=exp.DataType.Type.ARRAY,
2919                    expressions=[this],
2920                    nested=True,
2921                )
2922
2923            return this
2924
2925        if self._match(TokenType.L_BRACKET):
2926            self._retreat(index)
2927            return None
2928
2929        values: t.Optional[t.List[t.Optional[exp.Expression]]] = None
2930        if nested and self._match(TokenType.LT):
2931            if is_struct:
2932                expressions = self._parse_csv(self._parse_struct_types)
2933            else:
2934                expressions = self._parse_csv(self._parse_types)
2935
2936            if not self._match(TokenType.GT):
2937                self.raise_error("Expecting >")
2938
2939            if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)):
2940                values = self._parse_csv(self._parse_conjunction)
2941                self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN))
2942
2943        value: t.Optional[exp.Expression] = None
2944        if type_token in self.TIMESTAMPS:
2945            if self._match(TokenType.WITH_TIME_ZONE) or type_token == TokenType.TIMESTAMPTZ:
2946                value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions)
2947            elif (
2948                self._match(TokenType.WITH_LOCAL_TIME_ZONE) or type_token == TokenType.TIMESTAMPLTZ
2949            ):
2950                value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions)
2951            elif self._match(TokenType.WITHOUT_TIME_ZONE):
2952                if type_token == TokenType.TIME:
2953                    value = exp.DataType(this=exp.DataType.Type.TIME, expressions=expressions)
2954                else:
2955                    value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions)
2956
2957            maybe_func = maybe_func and value is None
2958
2959            if value is None:
2960                value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions)
2961        elif type_token == TokenType.INTERVAL:
2962            unit = self._parse_var()
2963
2964            if not unit:
2965                value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL)
2966            else:
2967                value = self.expression(exp.Interval, unit=unit)
2968
2969        if maybe_func and check_func:
2970            index2 = self._index
2971            peek = self._parse_string()
2972
2973            if not peek:
2974                self._retreat(index)
2975                return None
2976
2977            self._retreat(index2)
2978
2979        if value:
2980            return value
2981
2982        return exp.DataType(
2983            this=exp.DataType.Type[type_token.value.upper()],
2984            expressions=expressions,
2985            nested=nested,
2986            values=values,
2987            prefix=prefix,
2988        )
2989
2990    def _parse_struct_types(self) -> t.Optional[exp.Expression]:
2991        this = self._parse_type() or self._parse_id_var()
2992        self._match(TokenType.COLON)
2993        return self._parse_column_def(this)
2994
2995    def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2996        if not self._match(TokenType.AT_TIME_ZONE):
2997            return this
2998        return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary())
2999
3000    def _parse_column(self) -> t.Optional[exp.Expression]:
3001        this = self._parse_field()
3002        if isinstance(this, exp.Identifier):
3003            this = self.expression(exp.Column, this=this)
3004        elif not this:
3005            return self._parse_bracket(this)
3006        this = self._parse_bracket(this)
3007
3008        while self._match_set(self.COLUMN_OPERATORS):
3009            op_token = self._prev.token_type
3010            op = self.COLUMN_OPERATORS.get(op_token)
3011
3012            if op_token == TokenType.DCOLON:
3013                field = self._parse_types()
3014                if not field:
3015                    self.raise_error("Expected type")
3016            elif op and self._curr:
3017                self._advance()
3018                value = self._prev.text
3019                field = (
3020                    exp.Literal.number(value)
3021                    if self._prev.token_type == TokenType.NUMBER
3022                    else exp.Literal.string(value)
3023                )
3024            else:
3025                field = (
3026                    self._parse_star()
3027                    or self._parse_function(anonymous=True)
3028                    or self._parse_id_var()
3029                )
3030
3031            if isinstance(field, exp.Func):
3032                # bigquery allows function calls like x.y.count(...)
3033                # SAFE.SUBSTR(...)
3034                # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules
3035                this = self._replace_columns_with_dots(this)
3036
3037            if op:
3038                this = op(self, this, field)
3039            elif isinstance(this, exp.Column) and not this.args.get("catalog"):
3040                this = self.expression(
3041                    exp.Column,
3042                    this=field,
3043                    table=this.this,
3044                    db=this.args.get("table"),
3045                    catalog=this.args.get("db"),
3046                )
3047            else:
3048                this = self.expression(exp.Dot, this=this, expression=field)
3049            this = self._parse_bracket(this)
3050
3051        return this
3052
3053    def _parse_primary(self) -> t.Optional[exp.Expression]:
3054        if self._match_set(self.PRIMARY_PARSERS):
3055            token_type = self._prev.token_type
3056            primary = self.PRIMARY_PARSERS[token_type](self, self._prev)
3057
3058            if token_type == TokenType.STRING:
3059                expressions = [primary]
3060                while self._match(TokenType.STRING):
3061                    expressions.append(exp.Literal.string(self._prev.text))
3062                if len(expressions) > 1:
3063                    return self.expression(exp.Concat, expressions=expressions)
3064            return primary
3065
3066        if self._match_pair(TokenType.DOT, TokenType.NUMBER):
3067            return exp.Literal.number(f"0.{self._prev.text}")
3068
3069        if self._match(TokenType.L_PAREN):
3070            comments = self._prev_comments
3071            query = self._parse_select()
3072
3073            if query:
3074                expressions = [query]
3075            else:
3076                expressions = self._parse_csv(self._parse_expression)
3077
3078            this = self._parse_query_modifiers(seq_get(expressions, 0))
3079
3080            if isinstance(this, exp.Subqueryable):
3081                this = self._parse_set_operations(
3082                    self._parse_subquery(this=this, parse_alias=False)
3083                )
3084            elif len(expressions) > 1:
3085                this = self.expression(exp.Tuple, expressions=expressions)
3086            else:
3087                this = self.expression(exp.Paren, this=self._parse_set_operations(this))
3088
3089            if this:
3090                this.add_comments(comments)
3091            self._match_r_paren(expression=this)
3092
3093            return this
3094
3095        return None
3096
3097    def _parse_field(
3098        self,
3099        any_token: bool = False,
3100        tokens: t.Optional[t.Collection[TokenType]] = None,
3101    ) -> t.Optional[exp.Expression]:
3102        return (
3103            self._parse_primary()
3104            or self._parse_function()
3105            or self._parse_id_var(any_token=any_token, tokens=tokens)
3106        )
3107
3108    def _parse_function(
3109        self, functions: t.Optional[t.Dict[str, t.Callable]] = None, anonymous: bool = False
3110    ) -> t.Optional[exp.Expression]:
3111        if not self._curr:
3112            return None
3113
3114        token_type = self._curr.token_type
3115
3116        if self._match_set(self.NO_PAREN_FUNCTION_PARSERS):
3117            return self.NO_PAREN_FUNCTION_PARSERS[token_type](self)
3118
3119        if not self._next or self._next.token_type != TokenType.L_PAREN:
3120            if token_type in self.NO_PAREN_FUNCTIONS:
3121                self._advance()
3122                return self.expression(self.NO_PAREN_FUNCTIONS[token_type])
3123
3124            return None
3125
3126        if token_type not in self.FUNC_TOKENS:
3127            return None
3128
3129        this = self._curr.text
3130        upper = this.upper()
3131        self._advance(2)
3132
3133        parser = self.FUNCTION_PARSERS.get(upper)
3134
3135        if parser and not anonymous:
3136            this = parser(self)
3137        else:
3138            subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type)
3139
3140            if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH):
3141                this = self.expression(subquery_predicate, this=self._parse_select())
3142                self._match_r_paren()
3143                return this
3144
3145            if functions is None:
3146                functions = self.FUNCTIONS
3147
3148            function = functions.get(upper)
3149            args = self._parse_csv(self._parse_lambda)
3150
3151            if function and not anonymous:
3152                this = function(args)
3153                self.validate_expression(this, args)
3154            else:
3155                this = self.expression(exp.Anonymous, this=this, expressions=args)
3156
3157        self._match_r_paren(this)
3158        return self._parse_window(this)
3159
3160    def _parse_function_parameter(self) -> t.Optional[exp.Expression]:
3161        return self._parse_column_def(self._parse_id_var())
3162
3163    def _parse_user_defined_function(
3164        self, kind: t.Optional[TokenType] = None
3165    ) -> t.Optional[exp.Expression]:
3166        this = self._parse_id_var()
3167
3168        while self._match(TokenType.DOT):
3169            this = self.expression(exp.Dot, this=this, expression=self._parse_id_var())
3170
3171        if not self._match(TokenType.L_PAREN):
3172            return this
3173
3174        expressions = self._parse_csv(self._parse_function_parameter)
3175        self._match_r_paren()
3176        return self.expression(
3177            exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True
3178        )
3179
3180    def _parse_introducer(self, token: Token) -> t.Optional[exp.Expression]:
3181        literal = self._parse_primary()
3182        if literal:
3183            return self.expression(exp.Introducer, this=token.text, expression=literal)
3184
3185        return self.expression(exp.Identifier, this=token.text)
3186
3187    def _parse_national(self, token: Token) -> exp.Expression:
3188        return self.expression(exp.National, this=exp.Literal.string(token.text))
3189
3190    def _parse_session_parameter(self) -> exp.Expression:
3191        kind = None
3192        this = self._parse_id_var() or self._parse_primary()
3193
3194        if this and self._match(TokenType.DOT):
3195            kind = this.name
3196            this = self._parse_var() or self._parse_primary()
3197
3198        return self.expression(exp.SessionParameter, this=this, kind=kind)
3199
3200    def _parse_lambda(self) -> t.Optional[exp.Expression]:
3201        index = self._index
3202
3203        if self._match(TokenType.L_PAREN):
3204            expressions = self._parse_csv(self._parse_id_var)
3205
3206            if not self._match(TokenType.R_PAREN):
3207                self._retreat(index)
3208        else:
3209            expressions = [self._parse_id_var()]
3210
3211        if self._match_set(self.LAMBDAS):
3212            return self.LAMBDAS[self._prev.token_type](self, expressions)
3213
3214        self._retreat(index)
3215
3216        this: t.Optional[exp.Expression]
3217
3218        if self._match(TokenType.DISTINCT):
3219            this = self.expression(
3220                exp.Distinct, expressions=self._parse_csv(self._parse_conjunction)
3221            )
3222        else:
3223            this = self._parse_select_or_expression()
3224
3225            if isinstance(this, exp.EQ):
3226                left = this.this
3227                if isinstance(left, exp.Column):
3228                    left.replace(exp.Var(this=left.text("this")))
3229
3230        return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this)))
3231
3232    def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]:
3233        index = self._index
3234
3235        try:
3236            if self._parse_select(nested=True):
3237                return this
3238        except Exception:
3239            pass
3240        finally:
3241            self._retreat(index)
3242
3243        if not self._match(TokenType.L_PAREN):
3244            return this
3245
3246        args = self._parse_csv(
3247            lambda: self._parse_constraint()
3248            or self._parse_column_def(self._parse_field(any_token=True))
3249        )
3250        self._match_r_paren()
3251        return self.expression(exp.Schema, this=this, expressions=args)
3252
3253    def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3254        # column defs are not really columns, they're identifiers
3255        if isinstance(this, exp.Column):
3256            this = this.this
3257        kind = self._parse_types()
3258
3259        if self._match_text_seq("FOR", "ORDINALITY"):
3260            return self.expression(exp.ColumnDef, this=this, ordinality=True)
3261
3262        constraints = []
3263        while True:
3264            constraint = self._parse_column_constraint()
3265            if not constraint:
3266                break
3267            constraints.append(constraint)
3268
3269        if not kind and not constraints:
3270            return this
3271
3272        return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints)
3273
3274    def _parse_auto_increment(self) -> exp.Expression:
3275        start = None
3276        increment = None
3277
3278        if self._match(TokenType.L_PAREN, advance=False):
3279            args = self._parse_wrapped_csv(self._parse_bitwise)
3280            start = seq_get(args, 0)
3281            increment = seq_get(args, 1)
3282        elif self._match_text_seq("START"):
3283            start = self._parse_bitwise()
3284            self._match_text_seq("INCREMENT")
3285            increment = self._parse_bitwise()
3286
3287        if start and increment:
3288            return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment)
3289
3290        return exp.AutoIncrementColumnConstraint()
3291
3292    def _parse_compress(self) -> exp.Expression:
3293        if self._match(TokenType.L_PAREN, advance=False):
3294            return self.expression(
3295                exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise)
3296            )
3297
3298        return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise())
3299
3300    def _parse_generated_as_identity(self) -> exp.Expression:
3301        if self._match(TokenType.BY_DEFAULT):
3302            on_null = self._match_pair(TokenType.ON, TokenType.NULL)
3303            this = self.expression(
3304                exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null
3305            )
3306        else:
3307            self._match_text_seq("ALWAYS")
3308            this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True)
3309
3310        self._match_text_seq("AS", "IDENTITY")
3311        if self._match(TokenType.L_PAREN):
3312            if self._match_text_seq("START", "WITH"):
3313                this.set("start", self._parse_bitwise())
3314            if self._match_text_seq("INCREMENT", "BY"):
3315                this.set("increment", self._parse_bitwise())
3316            if self._match_text_seq("MINVALUE"):
3317                this.set("minvalue", self._parse_bitwise())
3318            if self._match_text_seq("MAXVALUE"):
3319                this.set("maxvalue", self._parse_bitwise())
3320
3321            if self._match_text_seq("CYCLE"):
3322                this.set("cycle", True)
3323            elif self._match_text_seq("NO", "CYCLE"):
3324                this.set("cycle", False)
3325
3326            self._match_r_paren()
3327
3328        return this
3329
3330    def _parse_inline(self) -> t.Optional[exp.Expression]:
3331        self._match_text_seq("LENGTH")
3332        return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise())
3333
3334    def _parse_not_constraint(self) -> t.Optional[exp.Expression]:
3335        if self._match_text_seq("NULL"):
3336            return self.expression(exp.NotNullColumnConstraint)
3337        if self._match_text_seq("CASESPECIFIC"):
3338            return self.expression(exp.CaseSpecificColumnConstraint, not_=True)
3339        return None
3340
3341    def _parse_column_constraint(self) -> t.Optional[exp.Expression]:
3342        if self._match(TokenType.CONSTRAINT):
3343            this = self._parse_id_var()
3344        else:
3345            this = None
3346
3347        if self._match_texts(self.CONSTRAINT_PARSERS):
3348            return self.expression(
3349                exp.ColumnConstraint,
3350                this=this,
3351                kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self),
3352            )
3353
3354        return this
3355
3356    def _parse_constraint(self) -> t.Optional[exp.Expression]:
3357        if not self._match(TokenType.CONSTRAINT):
3358            return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS)
3359
3360        this = self._parse_id_var()
3361        expressions = []
3362
3363        while True:
3364            constraint = self._parse_unnamed_constraint() or self._parse_function()
3365            if not constraint:
3366                break
3367            expressions.append(constraint)
3368
3369        return self.expression(exp.Constraint, this=this, expressions=expressions)
3370
3371    def _parse_unnamed_constraint(
3372        self, constraints: t.Optional[t.Collection[str]] = None
3373    ) -> t.Optional[exp.Expression]:
3374        if not self._match_texts(constraints or self.CONSTRAINT_PARSERS):
3375            return None
3376
3377        constraint = self._prev.text.upper()
3378        if constraint not in self.CONSTRAINT_PARSERS:
3379            self.raise_error(f"No parser found for schema constraint {constraint}.")
3380
3381        return self.CONSTRAINT_PARSERS[constraint](self)
3382
3383    def _parse_unique(self) -> exp.Expression:
3384        if not self._match(TokenType.L_PAREN, advance=False):
3385            return self.expression(exp.UniqueColumnConstraint)
3386        return self.expression(exp.Unique, expressions=self._parse_wrapped_id_vars())
3387
3388    def _parse_key_constraint_options(self) -> t.List[str]:
3389        options = []
3390        while True:
3391            if not self._curr:
3392                break
3393
3394            if self._match(TokenType.ON):
3395                action = None
3396                on = self._advance_any() and self._prev.text
3397
3398                if self._match(TokenType.NO_ACTION):
3399                    action = "NO ACTION"
3400                elif self._match(TokenType.CASCADE):
3401                    action = "CASCADE"
3402                elif self._match_pair(TokenType.SET, TokenType.NULL):
3403                    action = "SET NULL"
3404                elif self._match_pair(TokenType.SET, TokenType.DEFAULT):
3405                    action = "SET DEFAULT"
3406                else:
3407                    self.raise_error("Invalid key constraint")
3408
3409                options.append(f"ON {on} {action}")
3410            elif self._match_text_seq("NOT", "ENFORCED"):
3411                options.append("NOT ENFORCED")
3412            elif self._match_text_seq("DEFERRABLE"):
3413                options.append("DEFERRABLE")
3414            elif self._match_text_seq("INITIALLY", "DEFERRED"):
3415                options.append("INITIALLY DEFERRED")
3416            elif self._match_text_seq("NORELY"):
3417                options.append("NORELY")
3418            elif self._match_text_seq("MATCH", "FULL"):
3419                options.append("MATCH FULL")
3420            else:
3421                break
3422
3423        return options
3424
3425    def _parse_references(self, match=True) -> t.Optional[exp.Expression]:
3426        if match and not self._match(TokenType.REFERENCES):
3427            return None
3428
3429        expressions = None
3430        this = self._parse_id_var()
3431
3432        if self._match(TokenType.L_PAREN, advance=False):
3433            expressions = self._parse_wrapped_id_vars()
3434
3435        options = self._parse_key_constraint_options()
3436        return self.expression(exp.Reference, this=this, expressions=expressions, options=options)
3437
3438    def _parse_foreign_key(self) -> exp.Expression:
3439        expressions = self._parse_wrapped_id_vars()
3440        reference = self._parse_references()
3441        options = {}
3442
3443        while self._match(TokenType.ON):
3444            if not self._match_set((TokenType.DELETE, TokenType.UPDATE)):
3445                self.raise_error("Expected DELETE or UPDATE")
3446
3447            kind = self._prev.text.lower()
3448
3449            if self._match(TokenType.NO_ACTION):
3450                action = "NO ACTION"
3451            elif self._match(TokenType.SET):
3452                self._match_set((TokenType.NULL, TokenType.DEFAULT))
3453                action = "SET " + self._prev.text.upper()
3454            else:
3455                self._advance()
3456                action = self._prev.text.upper()
3457
3458            options[kind] = action
3459
3460        return self.expression(
3461            exp.ForeignKey, expressions=expressions, reference=reference, **options  # type: ignore
3462        )
3463
3464    def _parse_primary_key(self) -> exp.Expression:
3465        desc = (
3466            self._match_set((TokenType.ASC, TokenType.DESC))
3467            and self._prev.token_type == TokenType.DESC
3468        )
3469
3470        if not self._match(TokenType.L_PAREN, advance=False):
3471            return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc)
3472
3473        expressions = self._parse_wrapped_csv(self._parse_field)
3474        options = self._parse_key_constraint_options()
3475        return self.expression(exp.PrimaryKey, expressions=expressions, options=options)
3476
3477    def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3478        if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)):
3479            return this
3480
3481        bracket_kind = self._prev.token_type
3482        expressions: t.List[t.Optional[exp.Expression]]
3483
3484        if self._match(TokenType.COLON):
3485            expressions = [self.expression(exp.Slice, expression=self._parse_conjunction())]
3486        else:
3487            expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction()))
3488
3489        # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs
3490        if bracket_kind == TokenType.L_BRACE:
3491            this = self.expression(exp.Struct, expressions=expressions)
3492        elif not this or this.name.upper() == "ARRAY":
3493            this = self.expression(exp.Array, expressions=expressions)
3494        else:
3495            expressions = apply_index_offset(this, expressions, -self.index_offset)
3496            this = self.expression(exp.Bracket, this=this, expressions=expressions)
3497
3498        if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET:
3499            self.raise_error("Expected ]")
3500        elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE:
3501            self.raise_error("Expected }")
3502
3503        self._add_comments(this)
3504        return self._parse_bracket(this)
3505
3506    def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3507        if self._match(TokenType.COLON):
3508            return self.expression(exp.Slice, this=this, expression=self._parse_conjunction())
3509        return this
3510
3511    def _parse_case(self) -> t.Optional[exp.Expression]:
3512        ifs = []
3513        default = None
3514
3515        expression = self._parse_conjunction()
3516
3517        while self._match(TokenType.WHEN):
3518            this = self._parse_conjunction()
3519            self._match(TokenType.THEN)
3520            then = self._parse_conjunction()
3521            ifs.append(self.expression(exp.If, this=this, true=then))
3522
3523        if self._match(TokenType.ELSE):
3524            default = self._parse_conjunction()
3525
3526        if not self._match(TokenType.END):
3527            self.raise_error("Expected END after CASE", self._prev)
3528
3529        return self._parse_window(
3530            self.expression(exp.Case, this=expression, ifs=ifs, default=default)
3531        )
3532
3533    def _parse_if(self) -> t.Optional[exp.Expression]:
3534        if self._match(TokenType.L_PAREN):
3535            args = self._parse_csv(self._parse_conjunction)
3536            this = exp.If.from_arg_list(args)
3537            self.validate_expression(this, args)
3538            self._match_r_paren()
3539        else:
3540            index = self._index - 1
3541            condition = self._parse_conjunction()
3542
3543            if not condition:
3544                self._retreat(index)
3545                return None
3546
3547            self._match(TokenType.THEN)
3548            true = self._parse_conjunction()
3549            false = self._parse_conjunction() if self._match(TokenType.ELSE) else None
3550            self._match(TokenType.END)
3551            this = self.expression(exp.If, this=condition, true=true, false=false)
3552
3553        return self._parse_window(this)
3554
3555    def _parse_extract(self) -> exp.Expression:
3556        this = self._parse_function() or self._parse_var() or self._parse_type()
3557
3558        if self._match(TokenType.FROM):
3559            return self.expression(exp.Extract, this=this, expression=self._parse_bitwise())
3560
3561        if not self._match(TokenType.COMMA):
3562            self.raise_error("Expected FROM or comma after EXTRACT", self._prev)
3563
3564        return self.expression(exp.Extract, this=this, expression=self._parse_bitwise())
3565
3566    def _parse_cast(self, strict: bool) -> exp.Expression:
3567        this = self._parse_conjunction()
3568
3569        if not self._match(TokenType.ALIAS):
3570            if self._match(TokenType.COMMA):
3571                return self.expression(
3572                    exp.CastToStrType, this=this, expression=self._parse_string()
3573                )
3574            else:
3575                self.raise_error("Expected AS after CAST")
3576
3577        to = self._parse_types()
3578
3579        if not to:
3580            self.raise_error("Expected TYPE after CAST")
3581        elif to.this == exp.DataType.Type.CHAR:
3582            if self._match(TokenType.CHARACTER_SET):
3583                to = self.expression(exp.CharacterSet, this=self._parse_var_or_string())
3584
3585        return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to)
3586
3587    def _parse_string_agg(self) -> exp.Expression:
3588        expression: t.Optional[exp.Expression]
3589
3590        if self._match(TokenType.DISTINCT):
3591            args = self._parse_csv(self._parse_conjunction)
3592            expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)])
3593        else:
3594            args = self._parse_csv(self._parse_conjunction)
3595            expression = seq_get(args, 0)
3596
3597        index = self._index
3598        if not self._match(TokenType.R_PAREN):
3599            # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]])
3600            order = self._parse_order(this=expression)
3601            return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1))
3602
3603        # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]).
3604        # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that
3605        # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them.
3606        if not self._match(TokenType.WITHIN_GROUP):
3607            self._retreat(index)
3608            this = exp.GroupConcat.from_arg_list(args)
3609            self.validate_expression(this, args)
3610            return this
3611
3612        self._match_l_paren()  # The corresponding match_r_paren will be called in parse_function (caller)
3613        order = self._parse_order(this=expression)
3614        return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1))
3615
3616    def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]:
3617        to: t.Optional[exp.Expression]
3618        this = self._parse_bitwise()
3619
3620        if self._match(TokenType.USING):
3621            to = self.expression(exp.CharacterSet, this=self._parse_var())
3622        elif self._match(TokenType.COMMA):
3623            to = self._parse_bitwise()
3624        else:
3625            to = None
3626
3627        # Swap the argument order if needed to produce the correct AST
3628        if self.CONVERT_TYPE_FIRST:
3629            this, to = to, this
3630
3631        return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to)
3632
3633    def _parse_decode(self) -> t.Optional[exp.Expression]:
3634        """
3635        There are generally two variants of the DECODE function:
3636
3637        - DECODE(bin, charset)
3638        - DECODE(expression, search, result [, search, result] ... [, default])
3639
3640        The second variant will always be parsed into a CASE expression. Note that NULL
3641        needs special treatment, since we need to explicitly check for it with `IS NULL`,
3642        instead of relying on pattern matching.
3643        """
3644        args = self._parse_csv(self._parse_conjunction)
3645
3646        if len(args) < 3:
3647            return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1))
3648
3649        expression, *expressions = args
3650        if not expression:
3651            return None
3652
3653        ifs = []
3654        for search, result in zip(expressions[::2], expressions[1::2]):
3655            if not search or not result:
3656                return None
3657
3658            if isinstance(search, exp.Literal):
3659                ifs.append(
3660                    exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result)
3661                )
3662            elif isinstance(search, exp.Null):
3663                ifs.append(
3664                    exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result)
3665                )
3666            else:
3667                cond = exp.or_(
3668                    exp.EQ(this=expression.copy(), expression=search),
3669                    exp.and_(
3670                        exp.Is(this=expression.copy(), expression=exp.Null()),
3671                        exp.Is(this=search.copy(), expression=exp.Null()),
3672                        copy=False,
3673                    ),
3674                    copy=False,
3675                )
3676                ifs.append(exp.If(this=cond, true=result))
3677
3678        return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None)
3679
3680    def _parse_json_key_value(self) -> t.Optional[exp.Expression]:
3681        self._match_text_seq("KEY")
3682        key = self._parse_field()
3683        self._match(TokenType.COLON)
3684        self._match_text_seq("VALUE")
3685        value = self._parse_field()
3686        if not key and not value:
3687            return None
3688        return self.expression(exp.JSONKeyValue, this=key, expression=value)
3689
3690    def _parse_json_object(self) -> exp.Expression:
3691        expressions = self._parse_csv(self._parse_json_key_value)
3692
3693        null_handling = None
3694        if self._match_text_seq("NULL", "ON", "NULL"):
3695            null_handling = "NULL ON NULL"
3696        elif self._match_text_seq("ABSENT", "ON", "NULL"):
3697            null_handling = "ABSENT ON NULL"
3698
3699        unique_keys = None
3700        if self._match_text_seq("WITH", "UNIQUE"):
3701            unique_keys = True
3702        elif self._match_text_seq("WITHOUT", "UNIQUE"):
3703            unique_keys = False
3704
3705        self._match_text_seq("KEYS")
3706
3707        return_type = self._match_text_seq("RETURNING") and self._parse_type()
3708        format_json = self._match_text_seq("FORMAT", "JSON")
3709        encoding = self._match_text_seq("ENCODING") and self._parse_var()
3710
3711        return self.expression(
3712            exp.JSONObject,
3713            expressions=expressions,
3714            null_handling=null_handling,
3715            unique_keys=unique_keys,
3716            return_type=return_type,
3717            format_json=format_json,
3718            encoding=encoding,
3719        )
3720
3721    def _parse_logarithm(self) -> exp.Expression:
3722        # Default argument order is base, expression
3723        args = self._parse_csv(self._parse_range)
3724
3725        if len(args) > 1:
3726            if not self.LOG_BASE_FIRST:
3727                args.reverse()
3728            return exp.Log.from_arg_list(args)
3729
3730        return self.expression(
3731            exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0)
3732        )
3733
3734    def _parse_match_against(self) -> exp.Expression:
3735        expressions = self._parse_csv(self._parse_column)
3736
3737        self._match_text_seq(")", "AGAINST", "(")
3738
3739        this = self._parse_string()
3740
3741        if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"):
3742            modifier = "IN NATURAL LANGUAGE MODE"
3743            if self._match_text_seq("WITH", "QUERY", "EXPANSION"):
3744                modifier = f"{modifier} WITH QUERY EXPANSION"
3745        elif self._match_text_seq("IN", "BOOLEAN", "MODE"):
3746            modifier = "IN BOOLEAN MODE"
3747        elif self._match_text_seq("WITH", "QUERY", "EXPANSION"):
3748            modifier = "WITH QUERY EXPANSION"
3749        else:
3750            modifier = None
3751
3752        return self.expression(
3753            exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier
3754        )
3755
3756    # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16
3757    def _parse_open_json(self) -> exp.Expression:
3758        this = self._parse_bitwise()
3759        path = self._match(TokenType.COMMA) and self._parse_string()
3760
3761        def _parse_open_json_column_def() -> exp.Expression:
3762            this = self._parse_field(any_token=True)
3763            kind = self._parse_types()
3764            path = self._parse_string()
3765            as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON)
3766            return self.expression(
3767                exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json
3768            )
3769
3770        expressions = None
3771        if self._match_pair(TokenType.R_PAREN, TokenType.WITH):
3772            self._match_l_paren()
3773            expressions = self._parse_csv(_parse_open_json_column_def)
3774
3775        return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions)
3776
3777    def _parse_position(self, haystack_first: bool = False) -> exp.Expression:
3778        args = self._parse_csv(self._parse_bitwise)
3779
3780        if self._match(TokenType.IN):
3781            return self.expression(
3782                exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0)
3783            )
3784
3785        if haystack_first:
3786            haystack = seq_get(args, 0)
3787            needle = seq_get(args, 1)
3788        else:
3789            needle = seq_get(args, 0)
3790            haystack = seq_get(args, 1)
3791
3792        this = exp.StrPosition(this=haystack, substr=needle, position=seq_get(args, 2))
3793
3794        self.validate_expression(this, args)
3795
3796        return this
3797
3798    def _parse_join_hint(self, func_name: str) -> exp.Expression:
3799        args = self._parse_csv(self._parse_table)
3800        return exp.JoinHint(this=func_name.upper(), expressions=args)
3801
3802    def _parse_substring(self) -> exp.Expression:
3803        # Postgres supports the form: substring(string [from int] [for int])
3804        # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6
3805
3806        args = self._parse_csv(self._parse_bitwise)
3807
3808        if self._match(TokenType.FROM):
3809            args.append(self._parse_bitwise())
3810            if self._match(TokenType.FOR):
3811                args.append(self._parse_bitwise())
3812
3813        this = exp.Substring.from_arg_list(args)
3814        self.validate_expression(this, args)
3815
3816        return this
3817
3818    def _parse_trim(self) -> exp.Expression:
3819        # https://www.w3resource.com/sql/character-functions/trim.php
3820        # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html
3821
3822        position = None
3823        collation = None
3824
3825        if self._match_set(self.TRIM_TYPES):
3826            position = self._prev.text.upper()
3827
3828        expression = self._parse_bitwise()
3829        if self._match_set((TokenType.FROM, TokenType.COMMA)):
3830            this = self._parse_bitwise()
3831        else:
3832            this = expression
3833            expression = None
3834
3835        if self._match(TokenType.COLLATE):
3836            collation = self._parse_bitwise()
3837
3838        return self.expression(
3839            exp.Trim,
3840            this=this,
3841            position=position,
3842            expression=expression,
3843            collation=collation,
3844        )
3845
3846    def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
3847        return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window)
3848
3849    def _parse_named_window(self) -> t.Optional[exp.Expression]:
3850        return self._parse_window(self._parse_id_var(), alias=True)
3851
3852    def _parse_respect_or_ignore_nulls(
3853        self, this: t.Optional[exp.Expression]
3854    ) -> t.Optional[exp.Expression]:
3855        if self._match(TokenType.IGNORE_NULLS):
3856            return self.expression(exp.IgnoreNulls, this=this)
3857        if self._match(TokenType.RESPECT_NULLS):
3858            return self.expression(exp.RespectNulls, this=this)
3859        return this
3860
3861    def _parse_window(
3862        self, this: t.Optional[exp.Expression], alias: bool = False
3863    ) -> t.Optional[exp.Expression]:
3864        if self._match_pair(TokenType.FILTER, TokenType.L_PAREN):
3865            this = self.expression(exp.Filter, this=this, expression=self._parse_where())
3866            self._match_r_paren()
3867
3868        # T-SQL allows the OVER (...) syntax after WITHIN GROUP.
3869        # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16
3870        if self._match(TokenType.WITHIN_GROUP):
3871            order = self._parse_wrapped(self._parse_order)
3872            this = self.expression(exp.WithinGroup, this=this, expression=order)
3873
3874        # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER
3875        # Some dialects choose to implement and some do not.
3876        # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html
3877
3878        # There is some code above in _parse_lambda that handles
3879        #   SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ...
3880
3881        # The below changes handle
3882        #   SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ...
3883
3884        # Oracle allows both formats
3885        #   (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html)
3886        #   and Snowflake chose to do the same for familiarity
3887        #   https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes
3888        this = self._parse_respect_or_ignore_nulls(this)
3889
3890        # bigquery select from window x AS (partition by ...)
3891        if alias:
3892            over = None
3893            self._match(TokenType.ALIAS)
3894        elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS):
3895            return this
3896        else:
3897            over = self._prev.text.upper()
3898
3899        if not self._match(TokenType.L_PAREN):
3900            return self.expression(
3901                exp.Window, this=this, alias=self._parse_id_var(False), over=over
3902            )
3903
3904        window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS)
3905
3906        first = self._match(TokenType.FIRST)
3907        if self._match_text_seq("LAST"):
3908            first = False
3909
3910        partition = self._parse_partition_by()
3911        order = self._parse_order()
3912        kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text
3913
3914        if kind:
3915            self._match(TokenType.BETWEEN)
3916            start = self._parse_window_spec()
3917            self._match(TokenType.AND)
3918            end = self._parse_window_spec()
3919
3920            spec = self.expression(
3921                exp.WindowSpec,
3922                kind=kind,
3923                start=start["value"],
3924                start_side=start["side"],
3925                end=end["value"],
3926                end_side=end["side"],
3927            )
3928        else:
3929            spec = None
3930
3931        self._match_r_paren()
3932
3933        return self.expression(
3934            exp.Window,
3935            this=this,
3936            partition_by=partition,
3937            order=order,
3938            spec=spec,
3939            alias=window_alias,
3940            over=over,
3941            first=first,
3942        )
3943
3944    def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]:
3945        self._match(TokenType.BETWEEN)
3946
3947        return {
3948            "value": (
3949                self._match_set((TokenType.UNBOUNDED, TokenType.CURRENT_ROW)) and self._prev.text
3950            )
3951            or self._parse_bitwise(),
3952            "side": self._match_set((TokenType.PRECEDING, TokenType.FOLLOWING)) and self._prev.text,
3953        }
3954
3955    def _parse_alias(
3956        self, this: t.Optional[exp.Expression], explicit: bool = False
3957    ) -> t.Optional[exp.Expression]:
3958        any_token = self._match(TokenType.ALIAS)
3959
3960        if explicit and not any_token:
3961            return this
3962
3963        if self._match(TokenType.L_PAREN):
3964            aliases = self.expression(
3965                exp.Aliases,
3966                this=this,
3967                expressions=self._parse_csv(lambda: self._parse_id_var(any_token)),
3968            )
3969            self._match_r_paren(aliases)
3970            return aliases
3971
3972        alias = self._parse_id_var(any_token)
3973
3974        if alias:
3975            return self.expression(exp.Alias, this=this, alias=alias)
3976
3977        return this
3978
3979    def _parse_id_var(
3980        self,
3981        any_token: bool = True,
3982        tokens: t.Optional[t.Collection[TokenType]] = None,
3983        prefix_tokens: t.Optional[t.Collection[TokenType]] = None,
3984    ) -> t.Optional[exp.Expression]:
3985        identifier = self._parse_identifier()
3986
3987        if identifier:
3988            return identifier
3989
3990        prefix = ""
3991
3992        if prefix_tokens:
3993            while self._match_set(prefix_tokens):
3994                prefix += self._prev.text
3995
3996        if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS):
3997            quoted = self._prev.token_type == TokenType.STRING
3998            return exp.Identifier(this=prefix + self._prev.text, quoted=quoted)
3999
4000        return None
4001
4002    def _parse_string(self) -> t.Optional[exp.Expression]:
4003        if self._match(TokenType.STRING):
4004            return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev)
4005        return self._parse_placeholder()
4006
4007    def _parse_string_as_identifier(self) -> t.Optional[exp.Expression]:
4008        return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True)
4009
4010    def _parse_number(self) -> t.Optional[exp.Expression]:
4011        if self._match(TokenType.NUMBER):
4012            return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev)
4013        return self._parse_placeholder()
4014
4015    def _parse_identifier(self) -> t.Optional[exp.Expression]:
4016        if self._match(TokenType.IDENTIFIER):
4017            return self.expression(exp.Identifier, this=self._prev.text, quoted=True)
4018        return self._parse_placeholder()
4019
4020    def _parse_var(
4021        self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None
4022    ) -> t.Optional[exp.Expression]:
4023        if (
4024            (any_token and self._advance_any())
4025            or self._match(TokenType.VAR)
4026            or (self._match_set(tokens) if tokens else False)
4027        ):
4028            return self.expression(exp.Var, this=self._prev.text)
4029        return self._parse_placeholder()
4030
4031    def _advance_any(self) -> t.Optional[Token]:
4032        if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS:
4033            self._advance()
4034            return self._prev
4035        return None
4036
4037    def _parse_var_or_string(self) -> t.Optional[exp.Expression]:
4038        return self._parse_var() or self._parse_string()
4039
4040    def _parse_null(self) -> t.Optional[exp.Expression]:
4041        if self._match(TokenType.NULL):
4042            return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev)
4043        return None
4044
4045    def _parse_boolean(self) -> t.Optional[exp.Expression]:
4046        if self._match(TokenType.TRUE):
4047            return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev)
4048        if self._match(TokenType.FALSE):
4049            return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev)
4050        return None
4051
4052    def _parse_star(self) -> t.Optional[exp.Expression]:
4053        if self._match(TokenType.STAR):
4054            return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev)
4055        return None
4056
4057    def _parse_parameter(self) -> exp.Expression:
4058        wrapped = self._match(TokenType.L_BRACE)
4059        this = self._parse_var() or self._parse_identifier() or self._parse_primary()
4060        self._match(TokenType.R_BRACE)
4061        return self.expression(exp.Parameter, this=this, wrapped=wrapped)
4062
4063    def _parse_placeholder(self) -> t.Optional[exp.Expression]:
4064        if self._match_set(self.PLACEHOLDER_PARSERS):
4065            placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self)
4066            if placeholder:
4067                return placeholder
4068            self._advance(-1)
4069        return None
4070
4071    def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
4072        if not self._match(TokenType.EXCEPT):
4073            return None
4074        if self._match(TokenType.L_PAREN, advance=False):
4075            return self._parse_wrapped_csv(self._parse_column)
4076        return self._parse_csv(self._parse_column)
4077
4078    def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
4079        if not self._match(TokenType.REPLACE):
4080            return None
4081        if self._match(TokenType.L_PAREN, advance=False):
4082            return self._parse_wrapped_csv(self._parse_expression)
4083        return self._parse_csv(self._parse_expression)
4084
4085    def _parse_csv(
4086        self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA
4087    ) -> t.List[t.Optional[exp.Expression]]:
4088        parse_result = parse_method()
4089        items = [parse_result] if parse_result is not None else []
4090
4091        while self._match(sep):
4092            self._add_comments(parse_result)
4093            parse_result = parse_method()
4094            if parse_result is not None:
4095                items.append(parse_result)
4096
4097        return items
4098
4099    def _parse_tokens(
4100        self, parse_method: t.Callable, expressions: t.Dict
4101    ) -> t.Optional[exp.Expression]:
4102        this = parse_method()
4103
4104        while self._match_set(expressions):
4105            this = self.expression(
4106                expressions[self._prev.token_type],
4107                this=this,
4108                comments=self._prev_comments,
4109                expression=parse_method(),
4110            )
4111
4112        return this
4113
4114    def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]:
4115        return self._parse_wrapped_csv(self._parse_id_var, optional=optional)
4116
4117    def _parse_wrapped_csv(
4118        self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False
4119    ) -> t.List[t.Optional[exp.Expression]]:
4120        return self._parse_wrapped(
4121            lambda: self._parse_csv(parse_method, sep=sep), optional=optional
4122        )
4123
4124    def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any:
4125        wrapped = self._match(TokenType.L_PAREN)
4126        if not wrapped and not optional:
4127            self.raise_error("Expecting (")
4128        parse_result = parse_method()
4129        if wrapped:
4130            self._match_r_paren()
4131        return parse_result
4132
4133    def _parse_select_or_expression(self) -> t.Optional[exp.Expression]:
4134        return self._parse_select() or self._parse_set_operations(self._parse_expression())
4135
4136    def _parse_ddl_select(self) -> t.Optional[exp.Expression]:
4137        return self._parse_set_operations(
4138            self._parse_select(nested=True, parse_subquery_alias=False)
4139        )
4140
4141    def _parse_transaction(self) -> exp.Expression:
4142        this = None
4143        if self._match_texts(self.TRANSACTION_KIND):
4144            this = self._prev.text
4145
4146        self._match_texts({"TRANSACTION", "WORK"})
4147
4148        modes = []
4149        while True:
4150            mode = []
4151            while self._match(TokenType.VAR):
4152                mode.append(self._prev.text)
4153
4154            if mode:
4155                modes.append(" ".join(mode))
4156            if not self._match(TokenType.COMMA):
4157                break
4158
4159        return self.expression(exp.Transaction, this=this, modes=modes)
4160
4161    def _parse_commit_or_rollback(self) -> exp.Expression:
4162        chain = None
4163        savepoint = None
4164        is_rollback = self._prev.token_type == TokenType.ROLLBACK
4165
4166        self._match_texts({"TRANSACTION", "WORK"})
4167
4168        if self._match_text_seq("TO"):
4169            self._match_text_seq("SAVEPOINT")
4170            savepoint = self._parse_id_var()
4171
4172        if self._match(TokenType.AND):
4173            chain = not self._match_text_seq("NO")
4174            self._match_text_seq("CHAIN")
4175
4176        if is_rollback:
4177            return self.expression(exp.Rollback, savepoint=savepoint)
4178        return self.expression(exp.Commit, chain=chain)
4179
4180    def _parse_add_column(self) -> t.Optional[exp.Expression]:
4181        if not self._match_text_seq("ADD"):
4182            return None
4183
4184        self._match(TokenType.COLUMN)
4185        exists_column = self._parse_exists(not_=True)
4186        expression = self._parse_column_def(self._parse_field(any_token=True))
4187
4188        if expression:
4189            expression.set("exists", exists_column)
4190
4191            # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns
4192            if self._match_texts(("FIRST", "AFTER")):
4193                position = self._prev.text
4194                column_position = self.expression(
4195                    exp.ColumnPosition, this=self._parse_column(), position=position
4196                )
4197                expression.set("position", column_position)
4198
4199        return expression
4200
4201    def _parse_drop_column(self) -> t.Optional[exp.Expression]:
4202        drop = self._match(TokenType.DROP) and self._parse_drop()
4203        if drop and not isinstance(drop, exp.Command):
4204            drop.set("kind", drop.args.get("kind", "COLUMN"))
4205        return drop
4206
4207    # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html
4208    def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.Expression:
4209        return self.expression(
4210            exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists
4211        )
4212
4213    def _parse_add_constraint(self) -> t.Optional[exp.Expression]:
4214        this = None
4215        kind = self._prev.token_type
4216
4217        if kind == TokenType.CONSTRAINT:
4218            this = self._parse_id_var()
4219
4220            if self._match_text_seq("CHECK"):
4221                expression = self._parse_wrapped(self._parse_conjunction)
4222                enforced = self._match_text_seq("ENFORCED")
4223
4224                return self.expression(
4225                    exp.AddConstraint, this=this, expression=expression, enforced=enforced
4226                )
4227
4228        if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY):
4229            expression = self._parse_foreign_key()
4230        elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY):
4231            expression = self._parse_primary_key()
4232        else:
4233            expression = None
4234
4235        return self.expression(exp.AddConstraint, this=this, expression=expression)
4236
4237    def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]:
4238        index = self._index - 1
4239
4240        if self._match_set(self.ADD_CONSTRAINT_TOKENS):
4241            return self._parse_csv(self._parse_add_constraint)
4242
4243        self._retreat(index)
4244        return self._parse_csv(self._parse_add_column)
4245
4246    def _parse_alter_table_alter(self) -> exp.Expression:
4247        self._match(TokenType.COLUMN)
4248        column = self._parse_field(any_token=True)
4249
4250        if self._match_pair(TokenType.DROP, TokenType.DEFAULT):
4251            return self.expression(exp.AlterColumn, this=column, drop=True)
4252        if self._match_pair(TokenType.SET, TokenType.DEFAULT):
4253            return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction())
4254
4255        self._match_text_seq("SET", "DATA")
4256        return self.expression(
4257            exp.AlterColumn,
4258            this=column,
4259            dtype=self._match_text_seq("TYPE") and self._parse_types(),
4260            collate=self._match(TokenType.COLLATE) and self._parse_term(),
4261            using=self._match(TokenType.USING) and self._parse_conjunction(),
4262        )
4263
4264    def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]:
4265        index = self._index - 1
4266
4267        partition_exists = self._parse_exists()
4268        if self._match(TokenType.PARTITION, advance=False):
4269            return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists))
4270
4271        self._retreat(index)
4272        return self._parse_csv(self._parse_drop_column)
4273
4274    def _parse_alter_table_rename(self) -> exp.Expression:
4275        self._match_text_seq("TO")
4276        return self.expression(exp.RenameTable, this=self._parse_table(schema=True))
4277
4278    def _parse_alter(self) -> t.Optional[exp.Expression]:
4279        start = self._prev
4280
4281        if not self._match(TokenType.TABLE):
4282            return self._parse_as_command(start)
4283
4284        exists = self._parse_exists()
4285        this = self._parse_table(schema=True)
4286
4287        if self._next:
4288            self._advance()
4289        parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None
4290
4291        if parser:
4292            actions = ensure_list(parser(self))
4293
4294            if not self._curr:
4295                return self.expression(
4296                    exp.AlterTable,
4297                    this=this,
4298                    exists=exists,
4299                    actions=actions,
4300                )
4301        return self._parse_as_command(start)
4302
4303    def _parse_merge(self) -> exp.Expression:
4304        self._match(TokenType.INTO)
4305        target = self._parse_table()
4306
4307        self._match(TokenType.USING)
4308        using = self._parse_table()
4309
4310        self._match(TokenType.ON)
4311        on = self._parse_conjunction()
4312
4313        whens = []
4314        while self._match(TokenType.WHEN):
4315            matched = not self._match(TokenType.NOT)
4316            self._match_text_seq("MATCHED")
4317            source = (
4318                False
4319                if self._match_text_seq("BY", "TARGET")
4320                else self._match_text_seq("BY", "SOURCE")
4321            )
4322            condition = self._parse_conjunction() if self._match(TokenType.AND) else None
4323
4324            self._match(TokenType.THEN)
4325
4326            if self._match(TokenType.INSERT):
4327                _this = self._parse_star()
4328                if _this:
4329                    then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this)
4330                else:
4331                    then = self.expression(
4332                        exp.Insert,
4333                        this=self._parse_value(),
4334                        expression=self._match(TokenType.VALUES) and self._parse_value(),
4335                    )
4336            elif self._match(TokenType.UPDATE):
4337                expressions = self._parse_star()
4338                if expressions:
4339                    then = self.expression(exp.Update, expressions=expressions)
4340                else:
4341                    then = self.expression(
4342                        exp.Update,
4343                        expressions=self._match(TokenType.SET)
4344                        and self._parse_csv(self._parse_equality),
4345                    )
4346            elif self._match(TokenType.DELETE):
4347                then = self.expression(exp.Var, this=self._prev.text)
4348            else:
4349                then = None
4350
4351            whens.append(
4352                self.expression(
4353                    exp.When,
4354                    matched=matched,
4355                    source=source,
4356                    condition=condition,
4357                    then=then,
4358                )
4359            )
4360
4361        return self.expression(
4362            exp.Merge,
4363            this=target,
4364            using=using,
4365            on=on,
4366            expressions=whens,
4367        )
4368
4369    def _parse_show(self) -> t.Optional[exp.Expression]:
4370        parser = self._find_parser(self.SHOW_PARSERS, self._show_trie)  # type: ignore
4371        if parser:
4372            return parser(self)
4373        self._advance()
4374        return self.expression(exp.Show, this=self._prev.text.upper())
4375
4376    def _parse_set_item_assignment(
4377        self, kind: t.Optional[str] = None
4378    ) -> t.Optional[exp.Expression]:
4379        index = self._index
4380
4381        if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"):
4382            return self._parse_set_transaction(global_=kind == "GLOBAL")
4383
4384        left = self._parse_primary() or self._parse_id_var()
4385
4386        if not self._match_texts(("=", "TO")):
4387            self._retreat(index)
4388            return None
4389
4390        right = self._parse_statement() or self._parse_id_var()
4391        this = self.expression(
4392            exp.EQ,
4393            this=left,
4394            expression=right,
4395        )
4396
4397        return self.expression(
4398            exp.SetItem,
4399            this=this,
4400            kind=kind,
4401        )
4402
4403    def _parse_set_transaction(self, global_: bool = False) -> exp.Expression:
4404        self._match_text_seq("TRANSACTION")
4405        characteristics = self._parse_csv(
4406            lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS)
4407        )
4408        return self.expression(
4409            exp.SetItem,
4410            expressions=characteristics,
4411            kind="TRANSACTION",
4412            **{"global": global_},  # type: ignore
4413        )
4414
4415    def _parse_set_item(self) -> t.Optional[exp.Expression]:
4416        parser = self._find_parser(self.SET_PARSERS, self._set_trie)  # type: ignore
4417        return parser(self) if parser else self._parse_set_item_assignment(kind=None)
4418
4419    def _parse_set(self) -> exp.Expression:
4420        index = self._index
4421        set_ = self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item))
4422
4423        if self._curr:
4424            self._retreat(index)
4425            return self._parse_as_command(self._prev)
4426
4427        return set_
4428
4429    def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Expression]:
4430        for option in options:
4431            if self._match_text_seq(*option.split(" ")):
4432                return exp.Var(this=option)
4433        return None
4434
4435    def _parse_as_command(self, start: Token) -> exp.Command:
4436        while self._curr:
4437            self._advance()
4438        text = self._find_sql(start, self._prev)
4439        size = len(start.text)
4440        return exp.Command(this=text[:size], expression=text[size:])
4441
4442    def _find_parser(
4443        self, parsers: t.Dict[str, t.Callable], trie: t.Dict
4444    ) -> t.Optional[t.Callable]:
4445        if not self._curr:
4446            return None
4447
4448        index = self._index
4449        this = []
4450        while True:
4451            # The current token might be multiple words
4452            curr = self._curr.text.upper()
4453            key = curr.split(" ")
4454            this.append(curr)
4455            self._advance()
4456            result, trie = in_trie(trie, key)
4457            if result == 0:
4458                break
4459            if result == 2:
4460                subparser = parsers[" ".join(this)]
4461                return subparser
4462        self._retreat(index)
4463        return None
4464
4465    def _match(self, token_type, advance=True, expression=None):
4466        if not self._curr:
4467            return None
4468
4469        if self._curr.token_type == token_type:
4470            if advance:
4471                self._advance()
4472            self._add_comments(expression)
4473            return True
4474
4475        return None
4476
4477    def _match_set(self, types, advance=True):
4478        if not self._curr:
4479            return None
4480
4481        if self._curr.token_type in types:
4482            if advance:
4483                self._advance()
4484            return True
4485
4486        return None
4487
4488    def _match_pair(self, token_type_a, token_type_b, advance=True):
4489        if not self._curr or not self._next:
4490            return None
4491
4492        if self._curr.token_type == token_type_a and self._next.token_type == token_type_b:
4493            if advance:
4494                self._advance(2)
4495            return True
4496
4497        return None
4498
4499    def _match_l_paren(self, expression=None):
4500        if not self._match(TokenType.L_PAREN, expression=expression):
4501            self.raise_error("Expecting (")
4502
4503    def _match_r_paren(self, expression=None):
4504        if not self._match(TokenType.R_PAREN, expression=expression):
4505            self.raise_error("Expecting )")
4506
4507    def _match_texts(self, texts, advance=True):
4508        if self._curr and self._curr.text.upper() in texts:
4509            if advance:
4510                self._advance()
4511            return True
4512        return False
4513
4514    def _match_text_seq(self, *texts, advance=True):
4515        index = self._index
4516        for text in texts:
4517            if self._curr and self._curr.text.upper() == text:
4518                self._advance()
4519            else:
4520                self._retreat(index)
4521                return False
4522
4523        if not advance:
4524            self._retreat(index)
4525
4526        return True
4527
4528    def _replace_columns_with_dots(self, this):
4529        if isinstance(this, exp.Dot):
4530            exp.replace_children(this, self._replace_columns_with_dots)
4531        elif isinstance(this, exp.Column):
4532            exp.replace_children(this, self._replace_columns_with_dots)
4533            table = this.args.get("table")
4534            this = (
4535                self.expression(exp.Dot, this=table, expression=this.this)
4536                if table
4537                else self.expression(exp.Var, this=this.name)
4538            )
4539        elif isinstance(this, exp.Identifier):
4540            this = self.expression(exp.Var, this=this.name)
4541        return this
4542
4543    def _replace_lambda(self, node, lambda_variables):
4544        for column in node.find_all(exp.Column):
4545            if column.parts[0].name in lambda_variables:
4546                dot_or_id = column.to_dot() if column.table else column.this
4547                parent = column.parent
4548
4549                while isinstance(parent, exp.Dot):
4550                    if not isinstance(parent.parent, exp.Dot):
4551                        parent.replace(dot_or_id)
4552                        break
4553                    parent = parent.parent
4554                else:
4555                    if column is node:
4556                        node = dot_or_id
4557                    else:
4558                        column.replace(dot_or_id)
4559        return node
def parse_var_map(args: Sequence) -> sqlglot.expressions.Expression:
19def parse_var_map(args: t.Sequence) -> exp.Expression:
20    if len(args) == 1 and args[0].is_star:
21        return exp.StarMap(this=args[0])
22
23    keys = []
24    values = []
25    for i in range(0, len(args), 2):
26        keys.append(args[i])
27        values.append(args[i + 1])
28    return exp.VarMap(
29        keys=exp.Array(expressions=keys),
30        values=exp.Array(expressions=values),
31    )
def parse_like(args):
34def parse_like(args):
35    like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0))
36    return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like
def binary_range_parser( expr_type: Type[sqlglot.expressions.Expression]) -> Callable[[sqlglot.parser.Parser, Optional[sqlglot.expressions.Expression]], Optional[sqlglot.expressions.Expression]]:
39def binary_range_parser(
40    expr_type: t.Type[exp.Expression],
41) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]:
42    return lambda self, this: self._parse_escape(
43        self.expression(expr_type, this=this, expression=self._parse_bitwise())
44    )
class Parser:
  56class Parser(metaclass=_Parser):
  57    """
  58    Parser consumes a list of tokens produced by the `sqlglot.tokens.Tokenizer` and produces
  59    a parsed syntax tree.
  60
  61    Args:
  62        error_level: the desired error level.
  63            Default: ErrorLevel.RAISE
  64        error_message_context: determines the amount of context to capture from a
  65            query string when displaying the error message (in number of characters).
  66            Default: 50.
  67        index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list.
  68            Default: 0
  69        alias_post_tablesample: If the table alias comes after tablesample.
  70            Default: False
  71        max_errors: Maximum number of error messages to include in a raised ParseError.
  72            This is only relevant if error_level is ErrorLevel.RAISE.
  73            Default: 3
  74        null_ordering: Indicates the default null ordering method to use if not explicitly set.
  75            Options are "nulls_are_small", "nulls_are_large", "nulls_are_last".
  76            Default: "nulls_are_small"
  77    """
  78
  79    FUNCTIONS: t.Dict[str, t.Callable] = {
  80        **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()},
  81        "DATE_TO_DATE_STR": lambda args: exp.Cast(
  82            this=seq_get(args, 0),
  83            to=exp.DataType(this=exp.DataType.Type.TEXT),
  84        ),
  85        "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)),
  86        "IFNULL": exp.Coalesce.from_arg_list,
  87        "LIKE": parse_like,
  88        "TIME_TO_TIME_STR": lambda args: exp.Cast(
  89            this=seq_get(args, 0),
  90            to=exp.DataType(this=exp.DataType.Type.TEXT),
  91        ),
  92        "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring(
  93            this=exp.Cast(
  94                this=seq_get(args, 0),
  95                to=exp.DataType(this=exp.DataType.Type.TEXT),
  96            ),
  97            start=exp.Literal.number(1),
  98            length=exp.Literal.number(10),
  99        ),
 100        "VAR_MAP": parse_var_map,
 101    }
 102
 103    NO_PAREN_FUNCTIONS = {
 104        TokenType.CURRENT_DATE: exp.CurrentDate,
 105        TokenType.CURRENT_DATETIME: exp.CurrentDate,
 106        TokenType.CURRENT_TIME: exp.CurrentTime,
 107        TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp,
 108        TokenType.CURRENT_USER: exp.CurrentUser,
 109    }
 110
 111    JOIN_HINTS: t.Set[str] = set()
 112
 113    NESTED_TYPE_TOKENS = {
 114        TokenType.ARRAY,
 115        TokenType.MAP,
 116        TokenType.NULLABLE,
 117        TokenType.STRUCT,
 118    }
 119
 120    TYPE_TOKENS = {
 121        TokenType.BIT,
 122        TokenType.BOOLEAN,
 123        TokenType.TINYINT,
 124        TokenType.UTINYINT,
 125        TokenType.SMALLINT,
 126        TokenType.USMALLINT,
 127        TokenType.INT,
 128        TokenType.UINT,
 129        TokenType.BIGINT,
 130        TokenType.UBIGINT,
 131        TokenType.INT128,
 132        TokenType.UINT128,
 133        TokenType.INT256,
 134        TokenType.UINT256,
 135        TokenType.FLOAT,
 136        TokenType.DOUBLE,
 137        TokenType.CHAR,
 138        TokenType.NCHAR,
 139        TokenType.VARCHAR,
 140        TokenType.NVARCHAR,
 141        TokenType.TEXT,
 142        TokenType.MEDIUMTEXT,
 143        TokenType.LONGTEXT,
 144        TokenType.MEDIUMBLOB,
 145        TokenType.LONGBLOB,
 146        TokenType.BINARY,
 147        TokenType.VARBINARY,
 148        TokenType.JSON,
 149        TokenType.JSONB,
 150        TokenType.INTERVAL,
 151        TokenType.TIME,
 152        TokenType.TIMESTAMP,
 153        TokenType.TIMESTAMPTZ,
 154        TokenType.TIMESTAMPLTZ,
 155        TokenType.DATETIME,
 156        TokenType.DATETIME64,
 157        TokenType.DATE,
 158        TokenType.DECIMAL,
 159        TokenType.BIGDECIMAL,
 160        TokenType.UUID,
 161        TokenType.GEOGRAPHY,
 162        TokenType.GEOMETRY,
 163        TokenType.HLLSKETCH,
 164        TokenType.HSTORE,
 165        TokenType.PSEUDO_TYPE,
 166        TokenType.SUPER,
 167        TokenType.SERIAL,
 168        TokenType.SMALLSERIAL,
 169        TokenType.BIGSERIAL,
 170        TokenType.XML,
 171        TokenType.UNIQUEIDENTIFIER,
 172        TokenType.MONEY,
 173        TokenType.SMALLMONEY,
 174        TokenType.ROWVERSION,
 175        TokenType.IMAGE,
 176        TokenType.VARIANT,
 177        TokenType.OBJECT,
 178        TokenType.INET,
 179        *NESTED_TYPE_TOKENS,
 180    }
 181
 182    SUBQUERY_PREDICATES = {
 183        TokenType.ANY: exp.Any,
 184        TokenType.ALL: exp.All,
 185        TokenType.EXISTS: exp.Exists,
 186        TokenType.SOME: exp.Any,
 187    }
 188
 189    RESERVED_KEYWORDS = {*Tokenizer.SINGLE_TOKENS.values(), TokenType.SELECT}
 190
 191    DB_CREATABLES = {
 192        TokenType.DATABASE,
 193        TokenType.SCHEMA,
 194        TokenType.TABLE,
 195        TokenType.VIEW,
 196    }
 197
 198    CREATABLES = {
 199        TokenType.COLUMN,
 200        TokenType.FUNCTION,
 201        TokenType.INDEX,
 202        TokenType.PROCEDURE,
 203        *DB_CREATABLES,
 204    }
 205
 206    ID_VAR_TOKENS = {
 207        TokenType.VAR,
 208        TokenType.ANTI,
 209        TokenType.APPLY,
 210        TokenType.AUTO_INCREMENT,
 211        TokenType.BEGIN,
 212        TokenType.BOTH,
 213        TokenType.BUCKET,
 214        TokenType.CACHE,
 215        TokenType.CASCADE,
 216        TokenType.COLLATE,
 217        TokenType.COMMAND,
 218        TokenType.COMMENT,
 219        TokenType.COMMIT,
 220        TokenType.COMPOUND,
 221        TokenType.CONSTRAINT,
 222        TokenType.DEFAULT,
 223        TokenType.DELETE,
 224        TokenType.DESCRIBE,
 225        TokenType.DIV,
 226        TokenType.END,
 227        TokenType.EXECUTE,
 228        TokenType.ESCAPE,
 229        TokenType.FALSE,
 230        TokenType.FIRST,
 231        TokenType.FILTER,
 232        TokenType.FOLLOWING,
 233        TokenType.FORMAT,
 234        TokenType.FULL,
 235        TokenType.IF,
 236        TokenType.IS,
 237        TokenType.ISNULL,
 238        TokenType.INTERVAL,
 239        TokenType.KEEP,
 240        TokenType.LAZY,
 241        TokenType.LEADING,
 242        TokenType.LEFT,
 243        TokenType.LOCAL,
 244        TokenType.MATERIALIZED,
 245        TokenType.MERGE,
 246        TokenType.NATURAL,
 247        TokenType.NEXT,
 248        TokenType.OFFSET,
 249        TokenType.ONLY,
 250        TokenType.OPTIONS,
 251        TokenType.ORDINALITY,
 252        TokenType.OVERWRITE,
 253        TokenType.PARTITION,
 254        TokenType.PERCENT,
 255        TokenType.PIVOT,
 256        TokenType.PRAGMA,
 257        TokenType.PRECEDING,
 258        TokenType.RANGE,
 259        TokenType.REFERENCES,
 260        TokenType.RIGHT,
 261        TokenType.ROW,
 262        TokenType.ROWS,
 263        TokenType.SEED,
 264        TokenType.SEMI,
 265        TokenType.SET,
 266        TokenType.SETTINGS,
 267        TokenType.SHOW,
 268        TokenType.SORTKEY,
 269        TokenType.TEMPORARY,
 270        TokenType.TOP,
 271        TokenType.TRAILING,
 272        TokenType.TRUE,
 273        TokenType.UNBOUNDED,
 274        TokenType.UNIQUE,
 275        TokenType.UNLOGGED,
 276        TokenType.UNPIVOT,
 277        TokenType.VOLATILE,
 278        TokenType.WINDOW,
 279        *CREATABLES,
 280        *SUBQUERY_PREDICATES,
 281        *TYPE_TOKENS,
 282        *NO_PAREN_FUNCTIONS,
 283    }
 284
 285    INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END}
 286
 287    TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - {
 288        TokenType.APPLY,
 289        TokenType.FULL,
 290        TokenType.LEFT,
 291        TokenType.LOCK,
 292        TokenType.NATURAL,
 293        TokenType.OFFSET,
 294        TokenType.RIGHT,
 295        TokenType.WINDOW,
 296    }
 297
 298    COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS}
 299
 300    UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET}
 301
 302    TRIM_TYPES = {TokenType.LEADING, TokenType.TRAILING, TokenType.BOTH}
 303
 304    FUNC_TOKENS = {
 305        TokenType.COMMAND,
 306        TokenType.CURRENT_DATE,
 307        TokenType.CURRENT_DATETIME,
 308        TokenType.CURRENT_TIMESTAMP,
 309        TokenType.CURRENT_TIME,
 310        TokenType.CURRENT_USER,
 311        TokenType.FILTER,
 312        TokenType.FIRST,
 313        TokenType.FORMAT,
 314        TokenType.GLOB,
 315        TokenType.IDENTIFIER,
 316        TokenType.INDEX,
 317        TokenType.ISNULL,
 318        TokenType.ILIKE,
 319        TokenType.LIKE,
 320        TokenType.MERGE,
 321        TokenType.OFFSET,
 322        TokenType.PRIMARY_KEY,
 323        TokenType.RANGE,
 324        TokenType.REPLACE,
 325        TokenType.ROW,
 326        TokenType.UNNEST,
 327        TokenType.VAR,
 328        TokenType.LEFT,
 329        TokenType.RIGHT,
 330        TokenType.DATE,
 331        TokenType.DATETIME,
 332        TokenType.TABLE,
 333        TokenType.TIMESTAMP,
 334        TokenType.TIMESTAMPTZ,
 335        TokenType.WINDOW,
 336        *TYPE_TOKENS,
 337        *SUBQUERY_PREDICATES,
 338    }
 339
 340    CONJUNCTION = {
 341        TokenType.AND: exp.And,
 342        TokenType.OR: exp.Or,
 343    }
 344
 345    EQUALITY = {
 346        TokenType.EQ: exp.EQ,
 347        TokenType.NEQ: exp.NEQ,
 348        TokenType.NULLSAFE_EQ: exp.NullSafeEQ,
 349    }
 350
 351    COMPARISON = {
 352        TokenType.GT: exp.GT,
 353        TokenType.GTE: exp.GTE,
 354        TokenType.LT: exp.LT,
 355        TokenType.LTE: exp.LTE,
 356    }
 357
 358    BITWISE = {
 359        TokenType.AMP: exp.BitwiseAnd,
 360        TokenType.CARET: exp.BitwiseXor,
 361        TokenType.PIPE: exp.BitwiseOr,
 362        TokenType.DPIPE: exp.DPipe,
 363    }
 364
 365    TERM = {
 366        TokenType.DASH: exp.Sub,
 367        TokenType.PLUS: exp.Add,
 368        TokenType.MOD: exp.Mod,
 369        TokenType.COLLATE: exp.Collate,
 370    }
 371
 372    FACTOR = {
 373        TokenType.DIV: exp.IntDiv,
 374        TokenType.LR_ARROW: exp.Distance,
 375        TokenType.SLASH: exp.Div,
 376        TokenType.STAR: exp.Mul,
 377    }
 378
 379    TIMESTAMPS = {
 380        TokenType.TIME,
 381        TokenType.TIMESTAMP,
 382        TokenType.TIMESTAMPTZ,
 383        TokenType.TIMESTAMPLTZ,
 384    }
 385
 386    SET_OPERATIONS = {
 387        TokenType.UNION,
 388        TokenType.INTERSECT,
 389        TokenType.EXCEPT,
 390    }
 391
 392    JOIN_SIDES = {
 393        TokenType.LEFT,
 394        TokenType.RIGHT,
 395        TokenType.FULL,
 396    }
 397
 398    JOIN_KINDS = {
 399        TokenType.INNER,
 400        TokenType.OUTER,
 401        TokenType.CROSS,
 402        TokenType.SEMI,
 403        TokenType.ANTI,
 404    }
 405
 406    LAMBDAS = {
 407        TokenType.ARROW: lambda self, expressions: self.expression(
 408            exp.Lambda,
 409            this=self._replace_lambda(
 410                self._parse_conjunction(),
 411                {node.name for node in expressions},
 412            ),
 413            expressions=expressions,
 414        ),
 415        TokenType.FARROW: lambda self, expressions: self.expression(
 416            exp.Kwarg,
 417            this=exp.Var(this=expressions[0].name),
 418            expression=self._parse_conjunction(),
 419        ),
 420    }
 421
 422    COLUMN_OPERATORS = {
 423        TokenType.DOT: None,
 424        TokenType.DCOLON: lambda self, this, to: self.expression(
 425            exp.Cast if self.STRICT_CAST else exp.TryCast,
 426            this=this,
 427            to=to,
 428        ),
 429        TokenType.ARROW: lambda self, this, path: self.expression(
 430            exp.JSONExtract,
 431            this=this,
 432            expression=path,
 433        ),
 434        TokenType.DARROW: lambda self, this, path: self.expression(
 435            exp.JSONExtractScalar,
 436            this=this,
 437            expression=path,
 438        ),
 439        TokenType.HASH_ARROW: lambda self, this, path: self.expression(
 440            exp.JSONBExtract,
 441            this=this,
 442            expression=path,
 443        ),
 444        TokenType.DHASH_ARROW: lambda self, this, path: self.expression(
 445            exp.JSONBExtractScalar,
 446            this=this,
 447            expression=path,
 448        ),
 449        TokenType.PLACEHOLDER: lambda self, this, key: self.expression(
 450            exp.JSONBContains,
 451            this=this,
 452            expression=key,
 453        ),
 454    }
 455
 456    EXPRESSION_PARSERS = {
 457        exp.Column: lambda self: self._parse_column(),
 458        exp.DataType: lambda self: self._parse_types(),
 459        exp.From: lambda self: self._parse_from(),
 460        exp.Group: lambda self: self._parse_group(),
 461        exp.Identifier: lambda self: self._parse_id_var(),
 462        exp.Lateral: lambda self: self._parse_lateral(),
 463        exp.Join: lambda self: self._parse_join(),
 464        exp.Order: lambda self: self._parse_order(),
 465        exp.Cluster: lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster),
 466        exp.Sort: lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort),
 467        exp.Lambda: lambda self: self._parse_lambda(),
 468        exp.Limit: lambda self: self._parse_limit(),
 469        exp.Offset: lambda self: self._parse_offset(),
 470        exp.TableAlias: lambda self: self._parse_table_alias(),
 471        exp.Table: lambda self: self._parse_table(),
 472        exp.Condition: lambda self: self._parse_conjunction(),
 473        exp.Expression: lambda self: self._parse_statement(),
 474        exp.Properties: lambda self: self._parse_properties(),
 475        exp.Where: lambda self: self._parse_where(),
 476        exp.Ordered: lambda self: self._parse_ordered(),
 477        exp.Having: lambda self: self._parse_having(),
 478        exp.With: lambda self: self._parse_with(),
 479        exp.Window: lambda self: self._parse_named_window(),
 480        exp.Qualify: lambda self: self._parse_qualify(),
 481        exp.Returning: lambda self: self._parse_returning(),
 482        "JOIN_TYPE": lambda self: self._parse_join_side_and_kind(),
 483    }
 484
 485    STATEMENT_PARSERS = {
 486        TokenType.ALTER: lambda self: self._parse_alter(),
 487        TokenType.BEGIN: lambda self: self._parse_transaction(),
 488        TokenType.CACHE: lambda self: self._parse_cache(),
 489        TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(),
 490        TokenType.COMMENT: lambda self: self._parse_comment(),
 491        TokenType.CREATE: lambda self: self._parse_create(),
 492        TokenType.DELETE: lambda self: self._parse_delete(),
 493        TokenType.DESC: lambda self: self._parse_describe(),
 494        TokenType.DESCRIBE: lambda self: self._parse_describe(),
 495        TokenType.DROP: lambda self: self._parse_drop(),
 496        TokenType.END: lambda self: self._parse_commit_or_rollback(),
 497        TokenType.INSERT: lambda self: self._parse_insert(),
 498        TokenType.LOAD_DATA: lambda self: self._parse_load_data(),
 499        TokenType.MERGE: lambda self: self._parse_merge(),
 500        TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()),
 501        TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(),
 502        TokenType.SET: lambda self: self._parse_set(),
 503        TokenType.UNCACHE: lambda self: self._parse_uncache(),
 504        TokenType.UPDATE: lambda self: self._parse_update(),
 505        TokenType.USE: lambda self: self.expression(
 506            exp.Use,
 507            kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"))
 508            and exp.Var(this=self._prev.text),
 509            this=self._parse_table(schema=False),
 510        ),
 511    }
 512
 513    UNARY_PARSERS = {
 514        TokenType.PLUS: lambda self: self._parse_unary(),  # Unary + is handled as a no-op
 515        TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()),
 516        TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()),
 517        TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()),
 518    }
 519
 520    PRIMARY_PARSERS = {
 521        TokenType.STRING: lambda self, token: self.expression(
 522            exp.Literal, this=token.text, is_string=True
 523        ),
 524        TokenType.NUMBER: lambda self, token: self.expression(
 525            exp.Literal, this=token.text, is_string=False
 526        ),
 527        TokenType.STAR: lambda self, _: self.expression(
 528            exp.Star,
 529            **{"except": self._parse_except(), "replace": self._parse_replace()},
 530        ),
 531        TokenType.NULL: lambda self, _: self.expression(exp.Null),
 532        TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True),
 533        TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False),
 534        TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text),
 535        TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text),
 536        TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text),
 537        TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token),
 538        TokenType.NATIONAL: lambda self, token: self._parse_national(token),
 539        TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(),
 540    }
 541
 542    PLACEHOLDER_PARSERS = {
 543        TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder),
 544        TokenType.PARAMETER: lambda self: self._parse_parameter(),
 545        TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text)
 546        if self._match_set((TokenType.NUMBER, TokenType.VAR))
 547        else None,
 548    }
 549
 550    RANGE_PARSERS = {
 551        TokenType.BETWEEN: lambda self, this: self._parse_between(this),
 552        TokenType.GLOB: binary_range_parser(exp.Glob),
 553        TokenType.ILIKE: binary_range_parser(exp.ILike),
 554        TokenType.IN: lambda self, this: self._parse_in(this),
 555        TokenType.IRLIKE: binary_range_parser(exp.RegexpILike),
 556        TokenType.IS: lambda self, this: self._parse_is(this),
 557        TokenType.LIKE: binary_range_parser(exp.Like),
 558        TokenType.OVERLAPS: binary_range_parser(exp.Overlaps),
 559        TokenType.RLIKE: binary_range_parser(exp.RegexpLike),
 560        TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo),
 561    }
 562
 563    PROPERTY_PARSERS = {
 564        "AFTER": lambda self: self._parse_afterjournal(
 565            no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL"
 566        ),
 567        "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty),
 568        "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty),
 569        "BEFORE": lambda self: self._parse_journal(
 570            no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL"
 571        ),
 572        "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(),
 573        "CHARACTER SET": lambda self: self._parse_character_set(),
 574        "CHECKSUM": lambda self: self._parse_checksum(),
 575        "CLUSTER BY": lambda self: self.expression(
 576            exp.Cluster, expressions=self._parse_csv(self._parse_ordered)
 577        ),
 578        "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty),
 579        "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty),
 580        "DATABLOCKSIZE": lambda self: self._parse_datablocksize(
 581            default=self._prev.text.upper() == "DEFAULT"
 582        ),
 583        "DEFINER": lambda self: self._parse_definer(),
 584        "DETERMINISTIC": lambda self: self.expression(
 585            exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE")
 586        ),
 587        "DISTKEY": lambda self: self._parse_distkey(),
 588        "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty),
 589        "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty),
 590        "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty),
 591        "EXTERNAL": lambda self: self.expression(exp.ExternalProperty),
 592        "FALLBACK": lambda self: self._parse_fallback(no=self._prev.text.upper() == "NO"),
 593        "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty),
 594        "FREESPACE": lambda self: self._parse_freespace(),
 595        "GLOBAL": lambda self: self._parse_temporary(global_=True),
 596        "IMMUTABLE": lambda self: self.expression(
 597            exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE")
 598        ),
 599        "JOURNAL": lambda self: self._parse_journal(
 600            no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL"
 601        ),
 602        "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty),
 603        "LIKE": lambda self: self._parse_create_like(),
 604        "LOCAL": lambda self: self._parse_afterjournal(no=False, dual=False, local=True),
 605        "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty),
 606        "LOCK": lambda self: self._parse_locking(),
 607        "LOCKING": lambda self: self._parse_locking(),
 608        "LOG": lambda self: self._parse_log(no=self._prev.text.upper() == "NO"),
 609        "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty),
 610        "MAX": lambda self: self._parse_datablocksize(),
 611        "MAXIMUM": lambda self: self._parse_datablocksize(),
 612        "MERGEBLOCKRATIO": lambda self: self._parse_mergeblockratio(
 613            no=self._prev.text.upper() == "NO", default=self._prev.text.upper() == "DEFAULT"
 614        ),
 615        "MIN": lambda self: self._parse_datablocksize(),
 616        "MINIMUM": lambda self: self._parse_datablocksize(),
 617        "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True),
 618        "NO": lambda self: self._parse_noprimaryindex(),
 619        "NOT": lambda self: self._parse_afterjournal(no=False, dual=False, local=False),
 620        "ON": lambda self: self._parse_oncommit(),
 621        "ORDER BY": lambda self: self._parse_order(skip_order_token=True),
 622        "PARTITION BY": lambda self: self._parse_partitioned_by(),
 623        "PARTITIONED BY": lambda self: self._parse_partitioned_by(),
 624        "PARTITIONED_BY": lambda self: self._parse_partitioned_by(),
 625        "PRIMARY KEY": lambda self: self._parse_primary_key(),
 626        "RETURNS": lambda self: self._parse_returns(),
 627        "ROW": lambda self: self._parse_row(),
 628        "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty),
 629        "SET": lambda self: self.expression(exp.SetProperty, multi=False),
 630        "SETTINGS": lambda self: self.expression(
 631            exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item)
 632        ),
 633        "SORTKEY": lambda self: self._parse_sortkey(),
 634        "STABLE": lambda self: self.expression(
 635            exp.StabilityProperty, this=exp.Literal.string("STABLE")
 636        ),
 637        "STORED": lambda self: self._parse_stored(),
 638        "TABLE_FORMAT": lambda self: self._parse_property_assignment(exp.TableFormatProperty),
 639        "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property),
 640        "TEMP": lambda self: self._parse_temporary(global_=False),
 641        "TEMPORARY": lambda self: self._parse_temporary(global_=False),
 642        "TRANSIENT": lambda self: self.expression(exp.TransientProperty),
 643        "TTL": lambda self: self._parse_ttl(),
 644        "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty),
 645        "VOLATILE": lambda self: self._parse_volatile_property(),
 646        "WITH": lambda self: self._parse_with_property(),
 647    }
 648
 649    CONSTRAINT_PARSERS = {
 650        "AUTOINCREMENT": lambda self: self._parse_auto_increment(),
 651        "AUTO_INCREMENT": lambda self: self._parse_auto_increment(),
 652        "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False),
 653        "CHARACTER SET": lambda self: self.expression(
 654            exp.CharacterSetColumnConstraint, this=self._parse_var_or_string()
 655        ),
 656        "CHECK": lambda self: self.expression(
 657            exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction)
 658        ),
 659        "COLLATE": lambda self: self.expression(
 660            exp.CollateColumnConstraint, this=self._parse_var()
 661        ),
 662        "COMMENT": lambda self: self.expression(
 663            exp.CommentColumnConstraint, this=self._parse_string()
 664        ),
 665        "COMPRESS": lambda self: self._parse_compress(),
 666        "DEFAULT": lambda self: self.expression(
 667            exp.DefaultColumnConstraint, this=self._parse_bitwise()
 668        ),
 669        "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()),
 670        "FOREIGN KEY": lambda self: self._parse_foreign_key(),
 671        "FORMAT": lambda self: self.expression(
 672            exp.DateFormatColumnConstraint, this=self._parse_var_or_string()
 673        ),
 674        "GENERATED": lambda self: self._parse_generated_as_identity(),
 675        "IDENTITY": lambda self: self._parse_auto_increment(),
 676        "INLINE": lambda self: self._parse_inline(),
 677        "LIKE": lambda self: self._parse_create_like(),
 678        "NOT": lambda self: self._parse_not_constraint(),
 679        "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True),
 680        "ON": lambda self: self._match(TokenType.UPDATE)
 681        and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()),
 682        "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()),
 683        "PRIMARY KEY": lambda self: self._parse_primary_key(),
 684        "REFERENCES": lambda self: self._parse_references(match=False),
 685        "TITLE": lambda self: self.expression(
 686            exp.TitleColumnConstraint, this=self._parse_var_or_string()
 687        ),
 688        "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]),
 689        "UNIQUE": lambda self: self._parse_unique(),
 690        "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint),
 691    }
 692
 693    ALTER_PARSERS = {
 694        "ADD": lambda self: self._parse_alter_table_add(),
 695        "ALTER": lambda self: self._parse_alter_table_alter(),
 696        "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()),
 697        "DROP": lambda self: self._parse_alter_table_drop(),
 698        "RENAME": lambda self: self._parse_alter_table_rename(),
 699    }
 700
 701    SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"}
 702
 703    NO_PAREN_FUNCTION_PARSERS = {
 704        TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()),
 705        TokenType.CASE: lambda self: self._parse_case(),
 706        TokenType.IF: lambda self: self._parse_if(),
 707        TokenType.NEXT_VALUE_FOR: lambda self: self.expression(
 708            exp.NextValueFor,
 709            this=self._parse_column(),
 710            order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order),
 711        ),
 712    }
 713
 714    FUNCTION_PARSERS: t.Dict[str, t.Callable] = {
 715        "CAST": lambda self: self._parse_cast(self.STRICT_CAST),
 716        "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST),
 717        "DECODE": lambda self: self._parse_decode(),
 718        "EXTRACT": lambda self: self._parse_extract(),
 719        "JSON_OBJECT": lambda self: self._parse_json_object(),
 720        "LOG": lambda self: self._parse_logarithm(),
 721        "MATCH": lambda self: self._parse_match_against(),
 722        "OPENJSON": lambda self: self._parse_open_json(),
 723        "POSITION": lambda self: self._parse_position(),
 724        "STRING_AGG": lambda self: self._parse_string_agg(),
 725        "SUBSTRING": lambda self: self._parse_substring(),
 726        "TRIM": lambda self: self._parse_trim(),
 727        "TRY_CAST": lambda self: self._parse_cast(False),
 728        "TRY_CONVERT": lambda self: self._parse_convert(False),
 729    }
 730
 731    QUERY_MODIFIER_PARSERS = {
 732        "joins": lambda self: list(iter(self._parse_join, None)),
 733        "laterals": lambda self: list(iter(self._parse_lateral, None)),
 734        "match": lambda self: self._parse_match_recognize(),
 735        "where": lambda self: self._parse_where(),
 736        "group": lambda self: self._parse_group(),
 737        "having": lambda self: self._parse_having(),
 738        "qualify": lambda self: self._parse_qualify(),
 739        "windows": lambda self: self._parse_window_clause(),
 740        "order": lambda self: self._parse_order(),
 741        "limit": lambda self: self._parse_limit(),
 742        "offset": lambda self: self._parse_offset(),
 743        "locks": lambda self: self._parse_locks(),
 744        "sample": lambda self: self._parse_table_sample(as_modifier=True),
 745    }
 746
 747    SET_PARSERS = {
 748        "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"),
 749        "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"),
 750        "SESSION": lambda self: self._parse_set_item_assignment("SESSION"),
 751        "TRANSACTION": lambda self: self._parse_set_transaction(),
 752    }
 753
 754    SHOW_PARSERS: t.Dict[str, t.Callable] = {}
 755
 756    TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {}
 757
 758    MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table)
 759
 760    TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"}
 761
 762    TRANSACTION_CHARACTERISTICS = {
 763        "ISOLATION LEVEL REPEATABLE READ",
 764        "ISOLATION LEVEL READ COMMITTED",
 765        "ISOLATION LEVEL READ UNCOMMITTED",
 766        "ISOLATION LEVEL SERIALIZABLE",
 767        "READ WRITE",
 768        "READ ONLY",
 769    }
 770
 771    INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"}
 772
 773    CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"}
 774
 775    WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS}
 776    WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER}
 777
 778    ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY}
 779
 780    STRICT_CAST = True
 781
 782    CONVERT_TYPE_FIRST = False
 783
 784    PREFIXED_PIVOT_COLUMNS = False
 785    IDENTIFY_PIVOT_STRINGS = False
 786
 787    LOG_BASE_FIRST = True
 788    LOG_DEFAULTS_TO_LN = False
 789
 790    __slots__ = (
 791        "error_level",
 792        "error_message_context",
 793        "sql",
 794        "errors",
 795        "index_offset",
 796        "unnest_column_only",
 797        "alias_post_tablesample",
 798        "max_errors",
 799        "null_ordering",
 800        "_tokens",
 801        "_index",
 802        "_curr",
 803        "_next",
 804        "_prev",
 805        "_prev_comments",
 806        "_show_trie",
 807        "_set_trie",
 808    )
 809
 810    def __init__(
 811        self,
 812        error_level: t.Optional[ErrorLevel] = None,
 813        error_message_context: int = 100,
 814        index_offset: int = 0,
 815        unnest_column_only: bool = False,
 816        alias_post_tablesample: bool = False,
 817        max_errors: int = 3,
 818        null_ordering: t.Optional[str] = None,
 819    ):
 820        self.error_level = error_level or ErrorLevel.IMMEDIATE
 821        self.error_message_context = error_message_context
 822        self.index_offset = index_offset
 823        self.unnest_column_only = unnest_column_only
 824        self.alias_post_tablesample = alias_post_tablesample
 825        self.max_errors = max_errors
 826        self.null_ordering = null_ordering
 827        self.reset()
 828
 829    def reset(self):
 830        self.sql = ""
 831        self.errors = []
 832        self._tokens = []
 833        self._index = 0
 834        self._curr = None
 835        self._next = None
 836        self._prev = None
 837        self._prev_comments = None
 838
 839    def parse(
 840        self, raw_tokens: t.List[Token], sql: t.Optional[str] = None
 841    ) -> t.List[t.Optional[exp.Expression]]:
 842        """
 843        Parses a list of tokens and returns a list of syntax trees, one tree
 844        per parsed SQL statement.
 845
 846        Args:
 847            raw_tokens: the list of tokens.
 848            sql: the original SQL string, used to produce helpful debug messages.
 849
 850        Returns:
 851            The list of syntax trees.
 852        """
 853        return self._parse(
 854            parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql
 855        )
 856
 857    def parse_into(
 858        self,
 859        expression_types: exp.IntoType,
 860        raw_tokens: t.List[Token],
 861        sql: t.Optional[str] = None,
 862    ) -> t.List[t.Optional[exp.Expression]]:
 863        """
 864        Parses a list of tokens into a given Expression type. If a collection of Expression
 865        types is given instead, this method will try to parse the token list into each one
 866        of them, stopping at the first for which the parsing succeeds.
 867
 868        Args:
 869            expression_types: the expression type(s) to try and parse the token list into.
 870            raw_tokens: the list of tokens.
 871            sql: the original SQL string, used to produce helpful debug messages.
 872
 873        Returns:
 874            The target Expression.
 875        """
 876        errors = []
 877        for expression_type in ensure_collection(expression_types):
 878            parser = self.EXPRESSION_PARSERS.get(expression_type)
 879            if not parser:
 880                raise TypeError(f"No parser registered for {expression_type}")
 881            try:
 882                return self._parse(parser, raw_tokens, sql)
 883            except ParseError as e:
 884                e.errors[0]["into_expression"] = expression_type
 885                errors.append(e)
 886        raise ParseError(
 887            f"Failed to parse into {expression_types}",
 888            errors=merge_errors(errors),
 889        ) from errors[-1]
 890
 891    def _parse(
 892        self,
 893        parse_method: t.Callable[[Parser], t.Optional[exp.Expression]],
 894        raw_tokens: t.List[Token],
 895        sql: t.Optional[str] = None,
 896    ) -> t.List[t.Optional[exp.Expression]]:
 897        self.reset()
 898        self.sql = sql or ""
 899        total = len(raw_tokens)
 900        chunks: t.List[t.List[Token]] = [[]]
 901
 902        for i, token in enumerate(raw_tokens):
 903            if token.token_type == TokenType.SEMICOLON:
 904                if i < total - 1:
 905                    chunks.append([])
 906            else:
 907                chunks[-1].append(token)
 908
 909        expressions = []
 910
 911        for tokens in chunks:
 912            self._index = -1
 913            self._tokens = tokens
 914            self._advance()
 915
 916            expressions.append(parse_method(self))
 917
 918            if self._index < len(self._tokens):
 919                self.raise_error("Invalid expression / Unexpected token")
 920
 921            self.check_errors()
 922
 923        return expressions
 924
 925    def check_errors(self) -> None:
 926        """
 927        Logs or raises any found errors, depending on the chosen error level setting.
 928        """
 929        if self.error_level == ErrorLevel.WARN:
 930            for error in self.errors:
 931                logger.error(str(error))
 932        elif self.error_level == ErrorLevel.RAISE and self.errors:
 933            raise ParseError(
 934                concat_messages(self.errors, self.max_errors),
 935                errors=merge_errors(self.errors),
 936            )
 937
 938    def raise_error(self, message: str, token: t.Optional[Token] = None) -> None:
 939        """
 940        Appends an error in the list of recorded errors or raises it, depending on the chosen
 941        error level setting.
 942        """
 943        token = token or self._curr or self._prev or Token.string("")
 944        start = token.start
 945        end = token.end
 946        start_context = self.sql[max(start - self.error_message_context, 0) : start]
 947        highlight = self.sql[start:end]
 948        end_context = self.sql[end : end + self.error_message_context]
 949
 950        error = ParseError.new(
 951            f"{message}. Line {token.line}, Col: {token.col}.\n"
 952            f"  {start_context}\033[4m{highlight}\033[0m{end_context}",
 953            description=message,
 954            line=token.line,
 955            col=token.col,
 956            start_context=start_context,
 957            highlight=highlight,
 958            end_context=end_context,
 959        )
 960
 961        if self.error_level == ErrorLevel.IMMEDIATE:
 962            raise error
 963
 964        self.errors.append(error)
 965
 966    def expression(
 967        self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs
 968    ) -> E:
 969        """
 970        Creates a new, validated Expression.
 971
 972        Args:
 973            exp_class: the expression class to instantiate.
 974            comments: an optional list of comments to attach to the expression.
 975            kwargs: the arguments to set for the expression along with their respective values.
 976
 977        Returns:
 978            The target expression.
 979        """
 980        instance = exp_class(**kwargs)
 981        instance.add_comments(comments) if comments else self._add_comments(instance)
 982        self.validate_expression(instance)
 983        return instance
 984
 985    def _add_comments(self, expression: t.Optional[exp.Expression]) -> None:
 986        if expression and self._prev_comments:
 987            expression.add_comments(self._prev_comments)
 988            self._prev_comments = None
 989
 990    def validate_expression(
 991        self, expression: exp.Expression, args: t.Optional[t.List] = None
 992    ) -> None:
 993        """
 994        Validates an already instantiated expression, making sure that all its mandatory arguments
 995        are set.
 996
 997        Args:
 998            expression: the expression to validate.
 999            args: an optional list of items that was used to instantiate the expression, if it's a Func.
1000        """
1001        if self.error_level == ErrorLevel.IGNORE:
1002            return
1003
1004        for error_message in expression.error_messages(args):
1005            self.raise_error(error_message)
1006
1007    def _find_sql(self, start: Token, end: Token) -> str:
1008        return self.sql[start.start : end.end]
1009
1010    def _advance(self, times: int = 1) -> None:
1011        self._index += times
1012        self._curr = seq_get(self._tokens, self._index)
1013        self._next = seq_get(self._tokens, self._index + 1)
1014        if self._index > 0:
1015            self._prev = self._tokens[self._index - 1]
1016            self._prev_comments = self._prev.comments
1017        else:
1018            self._prev = None
1019            self._prev_comments = None
1020
1021    def _retreat(self, index: int) -> None:
1022        if index != self._index:
1023            self._advance(index - self._index)
1024
1025    def _parse_command(self) -> exp.Command:
1026        return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string())
1027
1028    def _parse_comment(self, allow_exists: bool = True) -> exp.Expression:
1029        start = self._prev
1030        exists = self._parse_exists() if allow_exists else None
1031
1032        self._match(TokenType.ON)
1033
1034        kind = self._match_set(self.CREATABLES) and self._prev
1035
1036        if not kind:
1037            return self._parse_as_command(start)
1038
1039        if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE):
1040            this = self._parse_user_defined_function(kind=kind.token_type)
1041        elif kind.token_type == TokenType.TABLE:
1042            this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS)
1043        elif kind.token_type == TokenType.COLUMN:
1044            this = self._parse_column()
1045        else:
1046            this = self._parse_id_var()
1047
1048        self._match(TokenType.IS)
1049
1050        return self.expression(
1051            exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists
1052        )
1053
1054    # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl
1055    def _parse_ttl(self) -> exp.Expression:
1056        def _parse_ttl_action() -> t.Optional[exp.Expression]:
1057            this = self._parse_bitwise()
1058
1059            if self._match_text_seq("DELETE"):
1060                return self.expression(exp.MergeTreeTTLAction, this=this, delete=True)
1061            if self._match_text_seq("RECOMPRESS"):
1062                return self.expression(
1063                    exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise()
1064                )
1065            if self._match_text_seq("TO", "DISK"):
1066                return self.expression(
1067                    exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string()
1068                )
1069            if self._match_text_seq("TO", "VOLUME"):
1070                return self.expression(
1071                    exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string()
1072                )
1073
1074            return this
1075
1076        expressions = self._parse_csv(_parse_ttl_action)
1077        where = self._parse_where()
1078        group = self._parse_group()
1079
1080        aggregates = None
1081        if group and self._match(TokenType.SET):
1082            aggregates = self._parse_csv(self._parse_set_item)
1083
1084        return self.expression(
1085            exp.MergeTreeTTL,
1086            expressions=expressions,
1087            where=where,
1088            group=group,
1089            aggregates=aggregates,
1090        )
1091
1092    def _parse_statement(self) -> t.Optional[exp.Expression]:
1093        if self._curr is None:
1094            return None
1095
1096        if self._match_set(self.STATEMENT_PARSERS):
1097            return self.STATEMENT_PARSERS[self._prev.token_type](self)
1098
1099        if self._match_set(Tokenizer.COMMANDS):
1100            return self._parse_command()
1101
1102        expression = self._parse_expression()
1103        expression = self._parse_set_operations(expression) if expression else self._parse_select()
1104        return self._parse_query_modifiers(expression)
1105
1106    def _parse_drop(self) -> t.Optional[exp.Drop | exp.Command]:
1107        start = self._prev
1108        temporary = self._match(TokenType.TEMPORARY)
1109        materialized = self._match(TokenType.MATERIALIZED)
1110        kind = self._match_set(self.CREATABLES) and self._prev.text
1111        if not kind:
1112            return self._parse_as_command(start)
1113
1114        return self.expression(
1115            exp.Drop,
1116            exists=self._parse_exists(),
1117            this=self._parse_table(schema=True),
1118            kind=kind,
1119            temporary=temporary,
1120            materialized=materialized,
1121            cascade=self._match(TokenType.CASCADE),
1122            constraints=self._match_text_seq("CONSTRAINTS"),
1123            purge=self._match_text_seq("PURGE"),
1124        )
1125
1126    def _parse_exists(self, not_: bool = False) -> t.Optional[bool]:
1127        return (
1128            self._match(TokenType.IF)
1129            and (not not_ or self._match(TokenType.NOT))
1130            and self._match(TokenType.EXISTS)
1131        )
1132
1133    def _parse_create(self) -> t.Optional[exp.Expression]:
1134        start = self._prev
1135        replace = self._prev.text.upper() == "REPLACE" or self._match_pair(
1136            TokenType.OR, TokenType.REPLACE
1137        )
1138        unique = self._match(TokenType.UNIQUE)
1139
1140        if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False):
1141            self._match(TokenType.TABLE)
1142
1143        properties = None
1144        create_token = self._match_set(self.CREATABLES) and self._prev
1145
1146        if not create_token:
1147            properties = self._parse_properties()  # exp.Properties.Location.POST_CREATE
1148            create_token = self._match_set(self.CREATABLES) and self._prev
1149
1150            if not properties or not create_token:
1151                return self._parse_as_command(start)
1152
1153        exists = self._parse_exists(not_=True)
1154        this = None
1155        expression = None
1156        indexes = None
1157        no_schema_binding = None
1158        begin = None
1159        clone = None
1160
1161        if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE):
1162            this = self._parse_user_defined_function(kind=create_token.token_type)
1163            temp_properties = self._parse_properties()
1164            if properties and temp_properties:
1165                properties.expressions.extend(temp_properties.expressions)
1166            elif temp_properties:
1167                properties = temp_properties
1168
1169            self._match(TokenType.ALIAS)
1170            begin = self._match(TokenType.BEGIN)
1171            return_ = self._match_text_seq("RETURN")
1172            expression = self._parse_statement()
1173
1174            if return_:
1175                expression = self.expression(exp.Return, this=expression)
1176        elif create_token.token_type == TokenType.INDEX:
1177            this = self._parse_index()
1178        elif create_token.token_type in self.DB_CREATABLES:
1179            table_parts = self._parse_table_parts(schema=True)
1180
1181            # exp.Properties.Location.POST_NAME
1182            if self._match(TokenType.COMMA):
1183                temp_properties = self._parse_properties(before=True)
1184                if properties and temp_properties:
1185                    properties.expressions.extend(temp_properties.expressions)
1186                elif temp_properties:
1187                    properties = temp_properties
1188
1189            this = self._parse_schema(this=table_parts)
1190
1191            # exp.Properties.Location.POST_SCHEMA and POST_WITH
1192            temp_properties = self._parse_properties()
1193            if properties and temp_properties:
1194                properties.expressions.extend(temp_properties.expressions)
1195            elif temp_properties:
1196                properties = temp_properties
1197
1198            self._match(TokenType.ALIAS)
1199
1200            # exp.Properties.Location.POST_ALIAS
1201            if not (
1202                self._match(TokenType.SELECT, advance=False)
1203                or self._match(TokenType.WITH, advance=False)
1204                or self._match(TokenType.L_PAREN, advance=False)
1205            ):
1206                temp_properties = self._parse_properties()
1207                if properties and temp_properties:
1208                    properties.expressions.extend(temp_properties.expressions)
1209                elif temp_properties:
1210                    properties = temp_properties
1211
1212            expression = self._parse_ddl_select()
1213
1214            if create_token.token_type == TokenType.TABLE:
1215                # exp.Properties.Location.POST_EXPRESSION
1216                temp_properties = self._parse_properties()
1217                if properties and temp_properties:
1218                    properties.expressions.extend(temp_properties.expressions)
1219                elif temp_properties:
1220                    properties = temp_properties
1221
1222                indexes = []
1223                while True:
1224                    index = self._parse_create_table_index()
1225
1226                    # exp.Properties.Location.POST_INDEX
1227                    if self._match(TokenType.PARTITION_BY, advance=False):
1228                        temp_properties = self._parse_properties()
1229                        if properties and temp_properties:
1230                            properties.expressions.extend(temp_properties.expressions)
1231                        elif temp_properties:
1232                            properties = temp_properties
1233
1234                    if not index:
1235                        break
1236                    else:
1237                        indexes.append(index)
1238            elif create_token.token_type == TokenType.VIEW:
1239                if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"):
1240                    no_schema_binding = True
1241
1242            if self._match_text_seq("CLONE"):
1243                clone = self._parse_table(schema=True)
1244                when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper()
1245                clone_kind = (
1246                    self._match(TokenType.L_PAREN)
1247                    and self._match_texts(self.CLONE_KINDS)
1248                    and self._prev.text.upper()
1249                )
1250                clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise()
1251                self._match(TokenType.R_PAREN)
1252                clone = self.expression(
1253                    exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression
1254                )
1255
1256        return self.expression(
1257            exp.Create,
1258            this=this,
1259            kind=create_token.text,
1260            replace=replace,
1261            unique=unique,
1262            expression=expression,
1263            exists=exists,
1264            properties=properties,
1265            indexes=indexes,
1266            no_schema_binding=no_schema_binding,
1267            begin=begin,
1268            clone=clone,
1269        )
1270
1271    def _parse_property_before(self) -> t.Optional[exp.Expression]:
1272        self._match(TokenType.COMMA)
1273
1274        # parsers look to _prev for no/dual/default, so need to consume first
1275        self._match_text_seq("NO")
1276        self._match_text_seq("DUAL")
1277        self._match_text_seq("DEFAULT")
1278
1279        if self.PROPERTY_PARSERS.get(self._curr.text.upper()):
1280            return self.PROPERTY_PARSERS[self._curr.text.upper()](self)
1281
1282        return None
1283
1284    def _parse_property(self) -> t.Optional[exp.Expression]:
1285        if self._match_texts(self.PROPERTY_PARSERS):
1286            return self.PROPERTY_PARSERS[self._prev.text.upper()](self)
1287
1288        if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET):
1289            return self._parse_character_set(default=True)
1290
1291        if self._match_pair(TokenType.COMPOUND, TokenType.SORTKEY):
1292            return self._parse_sortkey(compound=True)
1293
1294        if self._match_text_seq("SQL", "SECURITY"):
1295            return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER"))
1296
1297        assignment = self._match_pair(
1298            TokenType.VAR, TokenType.EQ, advance=False
1299        ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False)
1300
1301        if assignment:
1302            key = self._parse_var_or_string()
1303            self._match(TokenType.EQ)
1304            return self.expression(exp.Property, this=key, value=self._parse_column())
1305
1306        return None
1307
1308    def _parse_stored(self) -> exp.Expression:
1309        self._match(TokenType.ALIAS)
1310
1311        input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None
1312        output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None
1313
1314        return self.expression(
1315            exp.FileFormatProperty,
1316            this=self.expression(
1317                exp.InputOutputFormat, input_format=input_format, output_format=output_format
1318            )
1319            if input_format or output_format
1320            else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(),
1321        )
1322
1323    def _parse_property_assignment(self, exp_class: t.Type[exp.Expression]) -> exp.Expression:
1324        self._match(TokenType.EQ)
1325        self._match(TokenType.ALIAS)
1326        return self.expression(exp_class, this=self._parse_field())
1327
1328    def _parse_properties(self, before=None) -> t.Optional[exp.Expression]:
1329        properties = []
1330
1331        while True:
1332            if before:
1333                identified_property = self._parse_property_before()
1334            else:
1335                identified_property = self._parse_property()
1336
1337            if not identified_property:
1338                break
1339            for p in ensure_list(identified_property):
1340                properties.append(p)
1341
1342        if properties:
1343            return self.expression(exp.Properties, expressions=properties)
1344
1345        return None
1346
1347    def _parse_fallback(self, no=False) -> exp.Expression:
1348        self._match_text_seq("FALLBACK")
1349        return self.expression(
1350            exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION")
1351        )
1352
1353    def _parse_volatile_property(self) -> exp.Expression:
1354        if self._index >= 2:
1355            pre_volatile_token = self._tokens[self._index - 2]
1356        else:
1357            pre_volatile_token = None
1358
1359        if pre_volatile_token and pre_volatile_token.token_type in (
1360            TokenType.CREATE,
1361            TokenType.REPLACE,
1362            TokenType.UNIQUE,
1363        ):
1364            return exp.VolatileProperty()
1365
1366        return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE"))
1367
1368    def _parse_with_property(
1369        self,
1370    ) -> t.Union[t.Optional[exp.Expression], t.List[t.Optional[exp.Expression]]]:
1371        self._match(TokenType.WITH)
1372        if self._match(TokenType.L_PAREN, advance=False):
1373            return self._parse_wrapped_csv(self._parse_property)
1374
1375        if self._match_text_seq("JOURNAL"):
1376            return self._parse_withjournaltable()
1377
1378        if self._match_text_seq("DATA"):
1379            return self._parse_withdata(no=False)
1380        elif self._match_text_seq("NO", "DATA"):
1381            return self._parse_withdata(no=True)
1382
1383        if not self._next:
1384            return None
1385
1386        return self._parse_withisolatedloading()
1387
1388    # https://dev.mysql.com/doc/refman/8.0/en/create-view.html
1389    def _parse_definer(self) -> t.Optional[exp.Expression]:
1390        self._match(TokenType.EQ)
1391
1392        user = self._parse_id_var()
1393        self._match(TokenType.PARAMETER)
1394        host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text)
1395
1396        if not user or not host:
1397            return None
1398
1399        return exp.DefinerProperty(this=f"{user}@{host}")
1400
1401    def _parse_withjournaltable(self) -> exp.Expression:
1402        self._match(TokenType.TABLE)
1403        self._match(TokenType.EQ)
1404        return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts())
1405
1406    def _parse_log(self, no=False) -> exp.Expression:
1407        self._match_text_seq("LOG")
1408        return self.expression(exp.LogProperty, no=no)
1409
1410    def _parse_journal(self, no=False, dual=False) -> exp.Expression:
1411        before = self._match_text_seq("BEFORE")
1412        self._match_text_seq("JOURNAL")
1413        return self.expression(exp.JournalProperty, no=no, dual=dual, before=before)
1414
1415    def _parse_afterjournal(self, no=False, dual=False, local=None) -> exp.Expression:
1416        self._match_text_seq("NOT")
1417        self._match_text_seq("LOCAL")
1418        self._match_text_seq("AFTER", "JOURNAL")
1419        return self.expression(exp.AfterJournalProperty, no=no, dual=dual, local=local)
1420
1421    def _parse_checksum(self) -> exp.Expression:
1422        self._match_text_seq("CHECKSUM")
1423        self._match(TokenType.EQ)
1424
1425        on = None
1426        if self._match(TokenType.ON):
1427            on = True
1428        elif self._match_text_seq("OFF"):
1429            on = False
1430        default = self._match(TokenType.DEFAULT)
1431
1432        return self.expression(
1433            exp.ChecksumProperty,
1434            on=on,
1435            default=default,
1436        )
1437
1438    def _parse_freespace(self) -> exp.Expression:
1439        self._match_text_seq("FREESPACE")
1440        self._match(TokenType.EQ)
1441        return self.expression(
1442            exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT)
1443        )
1444
1445    def _parse_mergeblockratio(self, no=False, default=False) -> exp.Expression:
1446        self._match_text_seq("MERGEBLOCKRATIO")
1447        if self._match(TokenType.EQ):
1448            return self.expression(
1449                exp.MergeBlockRatioProperty,
1450                this=self._parse_number(),
1451                percent=self._match(TokenType.PERCENT),
1452            )
1453        else:
1454            return self.expression(
1455                exp.MergeBlockRatioProperty,
1456                no=no,
1457                default=default,
1458            )
1459
1460    def _parse_datablocksize(self, default=None) -> exp.Expression:
1461        if default:
1462            self._match_text_seq("DATABLOCKSIZE")
1463            return self.expression(exp.DataBlocksizeProperty, default=True)
1464        elif self._match_texts(("MIN", "MINIMUM")):
1465            self._match_text_seq("DATABLOCKSIZE")
1466            return self.expression(exp.DataBlocksizeProperty, min=True)
1467        elif self._match_texts(("MAX", "MAXIMUM")):
1468            self._match_text_seq("DATABLOCKSIZE")
1469            return self.expression(exp.DataBlocksizeProperty, min=False)
1470
1471        self._match_text_seq("DATABLOCKSIZE")
1472        self._match(TokenType.EQ)
1473        size = self._parse_number()
1474        units = None
1475        if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")):
1476            units = self._prev.text
1477        return self.expression(exp.DataBlocksizeProperty, size=size, units=units)
1478
1479    def _parse_blockcompression(self) -> exp.Expression:
1480        self._match_text_seq("BLOCKCOMPRESSION")
1481        self._match(TokenType.EQ)
1482        always = self._match_text_seq("ALWAYS")
1483        manual = self._match_text_seq("MANUAL")
1484        never = self._match_text_seq("NEVER")
1485        default = self._match_text_seq("DEFAULT")
1486        autotemp = None
1487        if self._match_text_seq("AUTOTEMP"):
1488            autotemp = self._parse_schema()
1489
1490        return self.expression(
1491            exp.BlockCompressionProperty,
1492            always=always,
1493            manual=manual,
1494            never=never,
1495            default=default,
1496            autotemp=autotemp,
1497        )
1498
1499    def _parse_withisolatedloading(self) -> exp.Expression:
1500        no = self._match_text_seq("NO")
1501        concurrent = self._match_text_seq("CONCURRENT")
1502        self._match_text_seq("ISOLATED", "LOADING")
1503        for_all = self._match_text_seq("FOR", "ALL")
1504        for_insert = self._match_text_seq("FOR", "INSERT")
1505        for_none = self._match_text_seq("FOR", "NONE")
1506        return self.expression(
1507            exp.IsolatedLoadingProperty,
1508            no=no,
1509            concurrent=concurrent,
1510            for_all=for_all,
1511            for_insert=for_insert,
1512            for_none=for_none,
1513        )
1514
1515    def _parse_locking(self) -> exp.Expression:
1516        if self._match(TokenType.TABLE):
1517            kind = "TABLE"
1518        elif self._match(TokenType.VIEW):
1519            kind = "VIEW"
1520        elif self._match(TokenType.ROW):
1521            kind = "ROW"
1522        elif self._match_text_seq("DATABASE"):
1523            kind = "DATABASE"
1524        else:
1525            kind = None
1526
1527        if kind in ("DATABASE", "TABLE", "VIEW"):
1528            this = self._parse_table_parts()
1529        else:
1530            this = None
1531
1532        if self._match(TokenType.FOR):
1533            for_or_in = "FOR"
1534        elif self._match(TokenType.IN):
1535            for_or_in = "IN"
1536        else:
1537            for_or_in = None
1538
1539        if self._match_text_seq("ACCESS"):
1540            lock_type = "ACCESS"
1541        elif self._match_texts(("EXCL", "EXCLUSIVE")):
1542            lock_type = "EXCLUSIVE"
1543        elif self._match_text_seq("SHARE"):
1544            lock_type = "SHARE"
1545        elif self._match_text_seq("READ"):
1546            lock_type = "READ"
1547        elif self._match_text_seq("WRITE"):
1548            lock_type = "WRITE"
1549        elif self._match_text_seq("CHECKSUM"):
1550            lock_type = "CHECKSUM"
1551        else:
1552            lock_type = None
1553
1554        override = self._match_text_seq("OVERRIDE")
1555
1556        return self.expression(
1557            exp.LockingProperty,
1558            this=this,
1559            kind=kind,
1560            for_or_in=for_or_in,
1561            lock_type=lock_type,
1562            override=override,
1563        )
1564
1565    def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]:
1566        if self._match(TokenType.PARTITION_BY):
1567            return self._parse_csv(self._parse_conjunction)
1568        return []
1569
1570    def _parse_partitioned_by(self) -> exp.Expression:
1571        self._match(TokenType.EQ)
1572        return self.expression(
1573            exp.PartitionedByProperty,
1574            this=self._parse_schema() or self._parse_bracket(self._parse_field()),
1575        )
1576
1577    def _parse_withdata(self, no=False) -> exp.Expression:
1578        if self._match_text_seq("AND", "STATISTICS"):
1579            statistics = True
1580        elif self._match_text_seq("AND", "NO", "STATISTICS"):
1581            statistics = False
1582        else:
1583            statistics = None
1584
1585        return self.expression(exp.WithDataProperty, no=no, statistics=statistics)
1586
1587    def _parse_noprimaryindex(self) -> exp.Expression:
1588        self._match_text_seq("PRIMARY", "INDEX")
1589        return exp.NoPrimaryIndexProperty()
1590
1591    def _parse_oncommit(self) -> exp.Expression:
1592        self._match_text_seq("COMMIT", "PRESERVE", "ROWS")
1593        return exp.OnCommitProperty()
1594
1595    def _parse_distkey(self) -> exp.Expression:
1596        return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var))
1597
1598    def _parse_create_like(self) -> t.Optional[exp.Expression]:
1599        table = self._parse_table(schema=True)
1600        options = []
1601        while self._match_texts(("INCLUDING", "EXCLUDING")):
1602            this = self._prev.text.upper()
1603            id_var = self._parse_id_var()
1604
1605            if not id_var:
1606                return None
1607
1608            options.append(
1609                self.expression(
1610                    exp.Property,
1611                    this=this,
1612                    value=exp.Var(this=id_var.this.upper()),
1613                )
1614            )
1615        return self.expression(exp.LikeProperty, this=table, expressions=options)
1616
1617    def _parse_sortkey(self, compound: bool = False) -> exp.Expression:
1618        return self.expression(
1619            exp.SortKeyProperty, this=self._parse_wrapped_csv(self._parse_id_var), compound=compound
1620        )
1621
1622    def _parse_character_set(self, default: bool = False) -> exp.Expression:
1623        self._match(TokenType.EQ)
1624        return self.expression(
1625            exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default
1626        )
1627
1628    def _parse_returns(self) -> exp.Expression:
1629        value: t.Optional[exp.Expression]
1630        is_table = self._match(TokenType.TABLE)
1631
1632        if is_table:
1633            if self._match(TokenType.LT):
1634                value = self.expression(
1635                    exp.Schema,
1636                    this="TABLE",
1637                    expressions=self._parse_csv(self._parse_struct_types),
1638                )
1639                if not self._match(TokenType.GT):
1640                    self.raise_error("Expecting >")
1641            else:
1642                value = self._parse_schema(exp.Var(this="TABLE"))
1643        else:
1644            value = self._parse_types()
1645
1646        return self.expression(exp.ReturnsProperty, this=value, is_table=is_table)
1647
1648    def _parse_temporary(self, global_=False) -> exp.Expression:
1649        self._match(TokenType.TEMPORARY)  # in case calling from "GLOBAL"
1650        return self.expression(exp.TemporaryProperty, global_=global_)
1651
1652    def _parse_describe(self) -> exp.Expression:
1653        kind = self._match_set(self.CREATABLES) and self._prev.text
1654        this = self._parse_table()
1655
1656        return self.expression(exp.Describe, this=this, kind=kind)
1657
1658    def _parse_insert(self) -> exp.Expression:
1659        overwrite = self._match(TokenType.OVERWRITE)
1660        local = self._match(TokenType.LOCAL)
1661        alternative = None
1662
1663        if self._match_text_seq("DIRECTORY"):
1664            this: t.Optional[exp.Expression] = self.expression(
1665                exp.Directory,
1666                this=self._parse_var_or_string(),
1667                local=local,
1668                row_format=self._parse_row_format(match_row=True),
1669            )
1670        else:
1671            if self._match(TokenType.OR):
1672                alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text
1673
1674            self._match(TokenType.INTO)
1675            self._match(TokenType.TABLE)
1676            this = self._parse_table(schema=True)
1677
1678        return self.expression(
1679            exp.Insert,
1680            this=this,
1681            exists=self._parse_exists(),
1682            partition=self._parse_partition(),
1683            expression=self._parse_ddl_select(),
1684            conflict=self._parse_on_conflict(),
1685            returning=self._parse_returning(),
1686            overwrite=overwrite,
1687            alternative=alternative,
1688        )
1689
1690    def _parse_on_conflict(self) -> t.Optional[exp.Expression]:
1691        conflict = self._match_text_seq("ON", "CONFLICT")
1692        duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY")
1693
1694        if not (conflict or duplicate):
1695            return None
1696
1697        nothing = None
1698        expressions = None
1699        key = None
1700        constraint = None
1701
1702        if conflict:
1703            if self._match_text_seq("ON", "CONSTRAINT"):
1704                constraint = self._parse_id_var()
1705            else:
1706                key = self._parse_csv(self._parse_value)
1707
1708        self._match_text_seq("DO")
1709        if self._match_text_seq("NOTHING"):
1710            nothing = True
1711        else:
1712            self._match(TokenType.UPDATE)
1713            expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality)
1714
1715        return self.expression(
1716            exp.OnConflict,
1717            duplicate=duplicate,
1718            expressions=expressions,
1719            nothing=nothing,
1720            key=key,
1721            constraint=constraint,
1722        )
1723
1724    def _parse_returning(self) -> t.Optional[exp.Expression]:
1725        if not self._match(TokenType.RETURNING):
1726            return None
1727
1728        return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column))
1729
1730    def _parse_row(self) -> t.Optional[exp.Expression]:
1731        if not self._match(TokenType.FORMAT):
1732            return None
1733        return self._parse_row_format()
1734
1735    def _parse_row_format(self, match_row: bool = False) -> t.Optional[exp.Expression]:
1736        if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT):
1737            return None
1738
1739        if self._match_text_seq("SERDE"):
1740            return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string())
1741
1742        self._match_text_seq("DELIMITED")
1743
1744        kwargs = {}
1745
1746        if self._match_text_seq("FIELDS", "TERMINATED", "BY"):
1747            kwargs["fields"] = self._parse_string()
1748            if self._match_text_seq("ESCAPED", "BY"):
1749                kwargs["escaped"] = self._parse_string()
1750        if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"):
1751            kwargs["collection_items"] = self._parse_string()
1752        if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"):
1753            kwargs["map_keys"] = self._parse_string()
1754        if self._match_text_seq("LINES", "TERMINATED", "BY"):
1755            kwargs["lines"] = self._parse_string()
1756        if self._match_text_seq("NULL", "DEFINED", "AS"):
1757            kwargs["null"] = self._parse_string()
1758
1759        return self.expression(exp.RowFormatDelimitedProperty, **kwargs)  # type: ignore
1760
1761    def _parse_load_data(self) -> exp.Expression:
1762        local = self._match(TokenType.LOCAL)
1763        self._match_text_seq("INPATH")
1764        inpath = self._parse_string()
1765        overwrite = self._match(TokenType.OVERWRITE)
1766        self._match_pair(TokenType.INTO, TokenType.TABLE)
1767
1768        return self.expression(
1769            exp.LoadData,
1770            this=self._parse_table(schema=True),
1771            local=local,
1772            overwrite=overwrite,
1773            inpath=inpath,
1774            partition=self._parse_partition(),
1775            input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(),
1776            serde=self._match_text_seq("SERDE") and self._parse_string(),
1777        )
1778
1779    def _parse_delete(self) -> exp.Expression:
1780        self._match(TokenType.FROM)
1781
1782        return self.expression(
1783            exp.Delete,
1784            this=self._parse_table(),
1785            using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()),
1786            where=self._parse_where(),
1787            returning=self._parse_returning(),
1788        )
1789
1790    def _parse_update(self) -> exp.Expression:
1791        return self.expression(
1792            exp.Update,
1793            **{  # type: ignore
1794                "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS),
1795                "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality),
1796                "from": self._parse_from(modifiers=True),
1797                "where": self._parse_where(),
1798                "returning": self._parse_returning(),
1799            },
1800        )
1801
1802    def _parse_uncache(self) -> exp.Expression:
1803        if not self._match(TokenType.TABLE):
1804            self.raise_error("Expecting TABLE after UNCACHE")
1805
1806        return self.expression(
1807            exp.Uncache,
1808            exists=self._parse_exists(),
1809            this=self._parse_table(schema=True),
1810        )
1811
1812    def _parse_cache(self) -> exp.Expression:
1813        lazy = self._match(TokenType.LAZY)
1814        self._match(TokenType.TABLE)
1815        table = self._parse_table(schema=True)
1816        options = []
1817
1818        if self._match(TokenType.OPTIONS):
1819            self._match_l_paren()
1820            k = self._parse_string()
1821            self._match(TokenType.EQ)
1822            v = self._parse_string()
1823            options = [k, v]
1824            self._match_r_paren()
1825
1826        self._match(TokenType.ALIAS)
1827        return self.expression(
1828            exp.Cache,
1829            this=table,
1830            lazy=lazy,
1831            options=options,
1832            expression=self._parse_select(nested=True),
1833        )
1834
1835    def _parse_partition(self) -> t.Optional[exp.Expression]:
1836        if not self._match(TokenType.PARTITION):
1837            return None
1838
1839        return self.expression(
1840            exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction)
1841        )
1842
1843    def _parse_value(self) -> exp.Expression:
1844        if self._match(TokenType.L_PAREN):
1845            expressions = self._parse_csv(self._parse_conjunction)
1846            self._match_r_paren()
1847            return self.expression(exp.Tuple, expressions=expressions)
1848
1849        # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows.
1850        # Source: https://prestodb.io/docs/current/sql/values.html
1851        return self.expression(exp.Tuple, expressions=[self._parse_conjunction()])
1852
1853    def _parse_select(
1854        self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True
1855    ) -> t.Optional[exp.Expression]:
1856        cte = self._parse_with()
1857        if cte:
1858            this = self._parse_statement()
1859
1860            if not this:
1861                self.raise_error("Failed to parse any statement following CTE")
1862                return cte
1863
1864            if "with" in this.arg_types:
1865                this.set("with", cte)
1866            else:
1867                self.raise_error(f"{this.key} does not support CTE")
1868                this = cte
1869        elif self._match(TokenType.SELECT):
1870            comments = self._prev_comments
1871
1872            hint = self._parse_hint()
1873            all_ = self._match(TokenType.ALL)
1874            distinct = self._match(TokenType.DISTINCT)
1875
1876            kind = (
1877                self._match(TokenType.ALIAS)
1878                and self._match_texts(("STRUCT", "VALUE"))
1879                and self._prev.text
1880            )
1881
1882            if distinct:
1883                distinct = self.expression(
1884                    exp.Distinct,
1885                    on=self._parse_value() if self._match(TokenType.ON) else None,
1886                )
1887
1888            if all_ and distinct:
1889                self.raise_error("Cannot specify both ALL and DISTINCT after SELECT")
1890
1891            limit = self._parse_limit(top=True)
1892            expressions = self._parse_csv(self._parse_expression)
1893
1894            this = self.expression(
1895                exp.Select,
1896                kind=kind,
1897                hint=hint,
1898                distinct=distinct,
1899                expressions=expressions,
1900                limit=limit,
1901            )
1902            this.comments = comments
1903
1904            into = self._parse_into()
1905            if into:
1906                this.set("into", into)
1907
1908            from_ = self._parse_from()
1909            if from_:
1910                this.set("from", from_)
1911
1912            this = self._parse_query_modifiers(this)
1913        elif (table or nested) and self._match(TokenType.L_PAREN):
1914            this = self._parse_table() if table else self._parse_select(nested=True)
1915            this = self._parse_set_operations(self._parse_query_modifiers(this))
1916            self._match_r_paren()
1917
1918            # early return so that subquery unions aren't parsed again
1919            # SELECT * FROM (SELECT 1) UNION ALL SELECT 1
1920            # Union ALL should be a property of the top select node, not the subquery
1921            return self._parse_subquery(this, parse_alias=parse_subquery_alias)
1922        elif self._match(TokenType.VALUES):
1923            this = self.expression(
1924                exp.Values,
1925                expressions=self._parse_csv(self._parse_value),
1926                alias=self._parse_table_alias(),
1927            )
1928        else:
1929            this = None
1930
1931        return self._parse_set_operations(this)
1932
1933    def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.Expression]:
1934        if not skip_with_token and not self._match(TokenType.WITH):
1935            return None
1936
1937        comments = self._prev_comments
1938        recursive = self._match(TokenType.RECURSIVE)
1939
1940        expressions = []
1941        while True:
1942            expressions.append(self._parse_cte())
1943
1944            if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH):
1945                break
1946            else:
1947                self._match(TokenType.WITH)
1948
1949        return self.expression(
1950            exp.With, comments=comments, expressions=expressions, recursive=recursive
1951        )
1952
1953    def _parse_cte(self) -> exp.Expression:
1954        alias = self._parse_table_alias()
1955        if not alias or not alias.this:
1956            self.raise_error("Expected CTE to have alias")
1957
1958        self._match(TokenType.ALIAS)
1959
1960        return self.expression(
1961            exp.CTE,
1962            this=self._parse_wrapped(self._parse_statement),
1963            alias=alias,
1964        )
1965
1966    def _parse_table_alias(
1967        self, alias_tokens: t.Optional[t.Collection[TokenType]] = None
1968    ) -> t.Optional[exp.Expression]:
1969        any_token = self._match(TokenType.ALIAS)
1970        alias = (
1971            self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS)
1972            or self._parse_string_as_identifier()
1973        )
1974
1975        index = self._index
1976        if self._match(TokenType.L_PAREN):
1977            columns = self._parse_csv(self._parse_function_parameter)
1978            self._match_r_paren() if columns else self._retreat(index)
1979        else:
1980            columns = None
1981
1982        if not alias and not columns:
1983            return None
1984
1985        return self.expression(exp.TableAlias, this=alias, columns=columns)
1986
1987    def _parse_subquery(
1988        self, this: t.Optional[exp.Expression], parse_alias: bool = True
1989    ) -> exp.Expression:
1990        return self.expression(
1991            exp.Subquery,
1992            this=this,
1993            pivots=self._parse_pivots(),
1994            alias=self._parse_table_alias() if parse_alias else None,
1995        )
1996
1997    def _parse_query_modifiers(
1998        self, this: t.Optional[exp.Expression]
1999    ) -> t.Optional[exp.Expression]:
2000        if isinstance(this, self.MODIFIABLES):
2001            for key, parser in self.QUERY_MODIFIER_PARSERS.items():
2002                expression = parser(self)
2003
2004                if expression:
2005                    this.set(key, expression)
2006        return this
2007
2008    def _parse_hint(self) -> t.Optional[exp.Expression]:
2009        if self._match(TokenType.HINT):
2010            hints = self._parse_csv(self._parse_function)
2011            if not self._match_pair(TokenType.STAR, TokenType.SLASH):
2012                self.raise_error("Expected */ after HINT")
2013            return self.expression(exp.Hint, expressions=hints)
2014
2015        return None
2016
2017    def _parse_into(self) -> t.Optional[exp.Expression]:
2018        if not self._match(TokenType.INTO):
2019            return None
2020
2021        temp = self._match(TokenType.TEMPORARY)
2022        unlogged = self._match(TokenType.UNLOGGED)
2023        self._match(TokenType.TABLE)
2024
2025        return self.expression(
2026            exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged
2027        )
2028
2029    def _parse_from(self, modifiers: bool = False) -> t.Optional[exp.Expression]:
2030        if not self._match(TokenType.FROM):
2031            return None
2032
2033        comments = self._prev_comments
2034        this = self._parse_table()
2035
2036        return self.expression(
2037            exp.From,
2038            comments=comments,
2039            this=self._parse_query_modifiers(this) if modifiers else this,
2040        )
2041
2042    def _parse_match_recognize(self) -> t.Optional[exp.Expression]:
2043        if not self._match(TokenType.MATCH_RECOGNIZE):
2044            return None
2045
2046        self._match_l_paren()
2047
2048        partition = self._parse_partition_by()
2049        order = self._parse_order()
2050        measures = (
2051            self._parse_csv(self._parse_expression) if self._match_text_seq("MEASURES") else None
2052        )
2053
2054        if self._match_text_seq("ONE", "ROW", "PER", "MATCH"):
2055            rows = exp.Var(this="ONE ROW PER MATCH")
2056        elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"):
2057            text = "ALL ROWS PER MATCH"
2058            if self._match_text_seq("SHOW", "EMPTY", "MATCHES"):
2059                text += f" SHOW EMPTY MATCHES"
2060            elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"):
2061                text += f" OMIT EMPTY MATCHES"
2062            elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"):
2063                text += f" WITH UNMATCHED ROWS"
2064            rows = exp.Var(this=text)
2065        else:
2066            rows = None
2067
2068        if self._match_text_seq("AFTER", "MATCH", "SKIP"):
2069            text = "AFTER MATCH SKIP"
2070            if self._match_text_seq("PAST", "LAST", "ROW"):
2071                text += f" PAST LAST ROW"
2072            elif self._match_text_seq("TO", "NEXT", "ROW"):
2073                text += f" TO NEXT ROW"
2074            elif self._match_text_seq("TO", "FIRST"):
2075                text += f" TO FIRST {self._advance_any().text}"  # type: ignore
2076            elif self._match_text_seq("TO", "LAST"):
2077                text += f" TO LAST {self._advance_any().text}"  # type: ignore
2078            after = exp.Var(this=text)
2079        else:
2080            after = None
2081
2082        if self._match_text_seq("PATTERN"):
2083            self._match_l_paren()
2084
2085            if not self._curr:
2086                self.raise_error("Expecting )", self._curr)
2087
2088            paren = 1
2089            start = self._curr
2090
2091            while self._curr and paren > 0:
2092                if self._curr.token_type == TokenType.L_PAREN:
2093                    paren += 1
2094                if self._curr.token_type == TokenType.R_PAREN:
2095                    paren -= 1
2096                end = self._prev
2097                self._advance()
2098            if paren > 0:
2099                self.raise_error("Expecting )", self._curr)
2100            pattern = exp.Var(this=self._find_sql(start, end))
2101        else:
2102            pattern = None
2103
2104        define = (
2105            self._parse_csv(
2106                lambda: self.expression(
2107                    exp.Alias,
2108                    alias=self._parse_id_var(any_token=True),
2109                    this=self._match(TokenType.ALIAS) and self._parse_conjunction(),
2110                )
2111            )
2112            if self._match_text_seq("DEFINE")
2113            else None
2114        )
2115
2116        self._match_r_paren()
2117
2118        return self.expression(
2119            exp.MatchRecognize,
2120            partition_by=partition,
2121            order=order,
2122            measures=measures,
2123            rows=rows,
2124            after=after,
2125            pattern=pattern,
2126            define=define,
2127            alias=self._parse_table_alias(),
2128        )
2129
2130    def _parse_lateral(self) -> t.Optional[exp.Expression]:
2131        outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY)
2132        cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY)
2133
2134        if outer_apply or cross_apply:
2135            this = self._parse_select(table=True)
2136            view = None
2137            outer = not cross_apply
2138        elif self._match(TokenType.LATERAL):
2139            this = self._parse_select(table=True)
2140            view = self._match(TokenType.VIEW)
2141            outer = self._match(TokenType.OUTER)
2142        else:
2143            return None
2144
2145        if not this:
2146            this = self._parse_function() or self._parse_id_var(any_token=False)
2147            while self._match(TokenType.DOT):
2148                this = exp.Dot(
2149                    this=this,
2150                    expression=self._parse_function() or self._parse_id_var(any_token=False),
2151                )
2152
2153        table_alias: t.Optional[exp.Expression]
2154
2155        if view:
2156            table = self._parse_id_var(any_token=False)
2157            columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else []
2158            table_alias = self.expression(exp.TableAlias, this=table, columns=columns)
2159        else:
2160            table_alias = self._parse_table_alias()
2161
2162        expression = self.expression(
2163            exp.Lateral,
2164            this=this,
2165            view=view,
2166            outer=outer,
2167            alias=table_alias,
2168        )
2169
2170        return expression
2171
2172    def _parse_join_side_and_kind(
2173        self,
2174    ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]:
2175        return (
2176            self._match(TokenType.NATURAL) and self._prev,
2177            self._match_set(self.JOIN_SIDES) and self._prev,
2178            self._match_set(self.JOIN_KINDS) and self._prev,
2179        )
2180
2181    def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Expression]:
2182        if self._match(TokenType.COMMA):
2183            return self.expression(exp.Join, this=self._parse_table())
2184
2185        index = self._index
2186        natural, side, kind = self._parse_join_side_and_kind()
2187        hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None
2188        join = self._match(TokenType.JOIN)
2189
2190        if not skip_join_token and not join:
2191            self._retreat(index)
2192            kind = None
2193            natural = None
2194            side = None
2195
2196        outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False)
2197        cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False)
2198
2199        if not skip_join_token and not join and not outer_apply and not cross_apply:
2200            return None
2201
2202        if outer_apply:
2203            side = Token(TokenType.LEFT, "LEFT")
2204
2205        kwargs: t.Dict[
2206            str, t.Optional[exp.Expression] | bool | str | t.List[t.Optional[exp.Expression]]
2207        ] = {"this": self._parse_table()}
2208
2209        if natural:
2210            kwargs["natural"] = True
2211        if side:
2212            kwargs["side"] = side.text
2213        if kind:
2214            kwargs["kind"] = kind.text
2215        if hint:
2216            kwargs["hint"] = hint
2217
2218        if self._match(TokenType.ON):
2219            kwargs["on"] = self._parse_conjunction()
2220        elif self._match(TokenType.USING):
2221            kwargs["using"] = self._parse_wrapped_id_vars()
2222
2223        return self.expression(exp.Join, **kwargs)  # type: ignore
2224
2225    def _parse_index(self) -> exp.Expression:
2226        index = self._parse_id_var()
2227        self._match(TokenType.ON)
2228        self._match(TokenType.TABLE)  # hive
2229
2230        return self.expression(
2231            exp.Index,
2232            this=index,
2233            table=self.expression(exp.Table, this=self._parse_id_var()),
2234            columns=self._parse_expression(),
2235        )
2236
2237    def _parse_create_table_index(self) -> t.Optional[exp.Expression]:
2238        unique = self._match(TokenType.UNIQUE)
2239        primary = self._match_text_seq("PRIMARY")
2240        amp = self._match_text_seq("AMP")
2241        if not self._match(TokenType.INDEX):
2242            return None
2243        index = self._parse_id_var()
2244        columns = None
2245        if self._match(TokenType.L_PAREN, advance=False):
2246            columns = self._parse_wrapped_csv(self._parse_column)
2247        return self.expression(
2248            exp.Index,
2249            this=index,
2250            columns=columns,
2251            unique=unique,
2252            primary=primary,
2253            amp=amp,
2254        )
2255
2256    def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]:
2257        return (
2258            (not schema and self._parse_function())
2259            or self._parse_id_var(any_token=False)
2260            or self._parse_string_as_identifier()
2261            or self._parse_placeholder()
2262        )
2263
2264    def _parse_table_parts(self, schema: bool = False) -> exp.Expression:
2265        catalog = None
2266        db = None
2267        table = self._parse_table_part(schema=schema)
2268
2269        while self._match(TokenType.DOT):
2270            if catalog:
2271                # This allows nesting the table in arbitrarily many dot expressions if needed
2272                table = self.expression(
2273                    exp.Dot, this=table, expression=self._parse_table_part(schema=schema)
2274                )
2275            else:
2276                catalog = db
2277                db = table
2278                table = self._parse_table_part(schema=schema)
2279
2280        if not table:
2281            self.raise_error(f"Expected table name but got {self._curr}")
2282
2283        return self.expression(
2284            exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots()
2285        )
2286
2287    def _parse_table(
2288        self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None
2289    ) -> t.Optional[exp.Expression]:
2290        lateral = self._parse_lateral()
2291        if lateral:
2292            return lateral
2293
2294        unnest = self._parse_unnest()
2295        if unnest:
2296            return unnest
2297
2298        values = self._parse_derived_table_values()
2299        if values:
2300            return values
2301
2302        subquery = self._parse_select(table=True)
2303        if subquery:
2304            if not subquery.args.get("pivots"):
2305                subquery.set("pivots", self._parse_pivots())
2306            return subquery
2307
2308        this = self._parse_table_parts(schema=schema)
2309
2310        if schema:
2311            return self._parse_schema(this=this)
2312
2313        if self.alias_post_tablesample:
2314            table_sample = self._parse_table_sample()
2315
2316        alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS)
2317        if alias:
2318            this.set("alias", alias)
2319
2320        if not this.args.get("pivots"):
2321            this.set("pivots", self._parse_pivots())
2322
2323        if self._match_pair(TokenType.WITH, TokenType.L_PAREN):
2324            this.set(
2325                "hints",
2326                self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)),
2327            )
2328            self._match_r_paren()
2329
2330        if not self.alias_post_tablesample:
2331            table_sample = self._parse_table_sample()
2332
2333        if table_sample:
2334            table_sample.set("this", this)
2335            this = table_sample
2336
2337        return this
2338
2339    def _parse_unnest(self) -> t.Optional[exp.Expression]:
2340        if not self._match(TokenType.UNNEST):
2341            return None
2342
2343        expressions = self._parse_wrapped_csv(self._parse_type)
2344        ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY)
2345        alias = self._parse_table_alias()
2346
2347        if alias and self.unnest_column_only:
2348            if alias.args.get("columns"):
2349                self.raise_error("Unexpected extra column alias in unnest.")
2350            alias.set("columns", [alias.this])
2351            alias.set("this", None)
2352
2353        offset = None
2354        if self._match_pair(TokenType.WITH, TokenType.OFFSET):
2355            self._match(TokenType.ALIAS)
2356            offset = self._parse_id_var() or exp.Identifier(this="offset")
2357
2358        return self.expression(
2359            exp.Unnest,
2360            expressions=expressions,
2361            ordinality=ordinality,
2362            alias=alias,
2363            offset=offset,
2364        )
2365
2366    def _parse_derived_table_values(self) -> t.Optional[exp.Expression]:
2367        is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES)
2368        if not is_derived and not self._match(TokenType.VALUES):
2369            return None
2370
2371        expressions = self._parse_csv(self._parse_value)
2372
2373        if is_derived:
2374            self._match_r_paren()
2375
2376        return self.expression(exp.Values, expressions=expressions, alias=self._parse_table_alias())
2377
2378    def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.Expression]:
2379        if not self._match(TokenType.TABLE_SAMPLE) and not (
2380            as_modifier and self._match_text_seq("USING", "SAMPLE")
2381        ):
2382            return None
2383
2384        bucket_numerator = None
2385        bucket_denominator = None
2386        bucket_field = None
2387        percent = None
2388        rows = None
2389        size = None
2390        seed = None
2391
2392        kind = (
2393            self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE"
2394        )
2395        method = self._parse_var(tokens=(TokenType.ROW,))
2396
2397        self._match(TokenType.L_PAREN)
2398
2399        num = self._parse_number()
2400
2401        if self._match(TokenType.BUCKET):
2402            bucket_numerator = self._parse_number()
2403            self._match(TokenType.OUT_OF)
2404            bucket_denominator = bucket_denominator = self._parse_number()
2405            self._match(TokenType.ON)
2406            bucket_field = self._parse_field()
2407        elif self._match_set((TokenType.PERCENT, TokenType.MOD)):
2408            percent = num
2409        elif self._match(TokenType.ROWS):
2410            rows = num
2411        else:
2412            size = num
2413
2414        self._match(TokenType.R_PAREN)
2415
2416        if self._match(TokenType.L_PAREN):
2417            method = self._parse_var()
2418            seed = self._match(TokenType.COMMA) and self._parse_number()
2419            self._match_r_paren()
2420        elif self._match_texts(("SEED", "REPEATABLE")):
2421            seed = self._parse_wrapped(self._parse_number)
2422
2423        return self.expression(
2424            exp.TableSample,
2425            method=method,
2426            bucket_numerator=bucket_numerator,
2427            bucket_denominator=bucket_denominator,
2428            bucket_field=bucket_field,
2429            percent=percent,
2430            rows=rows,
2431            size=size,
2432            seed=seed,
2433            kind=kind,
2434        )
2435
2436    def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]:
2437        return list(iter(self._parse_pivot, None))
2438
2439    def _parse_pivot(self) -> t.Optional[exp.Expression]:
2440        index = self._index
2441
2442        if self._match(TokenType.PIVOT):
2443            unpivot = False
2444        elif self._match(TokenType.UNPIVOT):
2445            unpivot = True
2446        else:
2447            return None
2448
2449        expressions = []
2450        field = None
2451
2452        if not self._match(TokenType.L_PAREN):
2453            self._retreat(index)
2454            return None
2455
2456        if unpivot:
2457            expressions = self._parse_csv(self._parse_column)
2458        else:
2459            expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function()))
2460
2461        if not expressions:
2462            self.raise_error("Failed to parse PIVOT's aggregation list")
2463
2464        if not self._match(TokenType.FOR):
2465            self.raise_error("Expecting FOR")
2466
2467        value = self._parse_column()
2468
2469        if not self._match(TokenType.IN):
2470            self.raise_error("Expecting IN")
2471
2472        field = self._parse_in(value)
2473
2474        self._match_r_paren()
2475
2476        pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot)
2477
2478        if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False):
2479            pivot.set("alias", self._parse_table_alias())
2480
2481        if not unpivot:
2482            names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions))
2483
2484            columns: t.List[exp.Expression] = []
2485            for fld in pivot.args["field"].expressions:
2486                field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name
2487                for name in names:
2488                    if self.PREFIXED_PIVOT_COLUMNS:
2489                        name = f"{name}_{field_name}" if name else field_name
2490                    else:
2491                        name = f"{field_name}_{name}" if name else field_name
2492
2493                    columns.append(exp.to_identifier(name))
2494
2495            pivot.set("columns", columns)
2496
2497        return pivot
2498
2499    def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]:
2500        return [agg.alias for agg in aggregations]
2501
2502    def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Expression]:
2503        if not skip_where_token and not self._match(TokenType.WHERE):
2504            return None
2505
2506        return self.expression(
2507            exp.Where, comments=self._prev_comments, this=self._parse_conjunction()
2508        )
2509
2510    def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Expression]:
2511        if not skip_group_by_token and not self._match(TokenType.GROUP_BY):
2512            return None
2513
2514        elements = defaultdict(list)
2515
2516        while True:
2517            expressions = self._parse_csv(self._parse_conjunction)
2518            if expressions:
2519                elements["expressions"].extend(expressions)
2520
2521            grouping_sets = self._parse_grouping_sets()
2522            if grouping_sets:
2523                elements["grouping_sets"].extend(grouping_sets)
2524
2525            rollup = None
2526            cube = None
2527            totals = None
2528
2529            with_ = self._match(TokenType.WITH)
2530            if self._match(TokenType.ROLLUP):
2531                rollup = with_ or self._parse_wrapped_csv(self._parse_column)
2532                elements["rollup"].extend(ensure_list(rollup))
2533
2534            if self._match(TokenType.CUBE):
2535                cube = with_ or self._parse_wrapped_csv(self._parse_column)
2536                elements["cube"].extend(ensure_list(cube))
2537
2538            if self._match_text_seq("TOTALS"):
2539                totals = True
2540                elements["totals"] = True  # type: ignore
2541
2542            if not (grouping_sets or rollup or cube or totals):
2543                break
2544
2545        return self.expression(exp.Group, **elements)  # type: ignore
2546
2547    def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
2548        if not self._match(TokenType.GROUPING_SETS):
2549            return None
2550
2551        return self._parse_wrapped_csv(self._parse_grouping_set)
2552
2553    def _parse_grouping_set(self) -> t.Optional[exp.Expression]:
2554        if self._match(TokenType.L_PAREN):
2555            grouping_set = self._parse_csv(self._parse_column)
2556            self._match_r_paren()
2557            return self.expression(exp.Tuple, expressions=grouping_set)
2558
2559        return self._parse_column()
2560
2561    def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Expression]:
2562        if not skip_having_token and not self._match(TokenType.HAVING):
2563            return None
2564        return self.expression(exp.Having, this=self._parse_conjunction())
2565
2566    def _parse_qualify(self) -> t.Optional[exp.Expression]:
2567        if not self._match(TokenType.QUALIFY):
2568            return None
2569        return self.expression(exp.Qualify, this=self._parse_conjunction())
2570
2571    def _parse_order(
2572        self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False
2573    ) -> t.Optional[exp.Expression]:
2574        if not skip_order_token and not self._match(TokenType.ORDER_BY):
2575            return this
2576
2577        return self.expression(
2578            exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered)
2579        )
2580
2581    def _parse_sort(
2582        self, token_type: TokenType, exp_class: t.Type[exp.Expression]
2583    ) -> t.Optional[exp.Expression]:
2584        if not self._match(token_type):
2585            return None
2586        return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered))
2587
2588    def _parse_ordered(self) -> exp.Expression:
2589        this = self._parse_conjunction()
2590        self._match(TokenType.ASC)
2591        is_desc = self._match(TokenType.DESC)
2592        is_nulls_first = self._match(TokenType.NULLS_FIRST)
2593        is_nulls_last = self._match(TokenType.NULLS_LAST)
2594        desc = is_desc or False
2595        asc = not desc
2596        nulls_first = is_nulls_first or False
2597        explicitly_null_ordered = is_nulls_first or is_nulls_last
2598        if (
2599            not explicitly_null_ordered
2600            and (
2601                (asc and self.null_ordering == "nulls_are_small")
2602                or (desc and self.null_ordering != "nulls_are_small")
2603            )
2604            and self.null_ordering != "nulls_are_last"
2605        ):
2606            nulls_first = True
2607
2608        return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first)
2609
2610    def _parse_limit(
2611        self, this: t.Optional[exp.Expression] = None, top: bool = False
2612    ) -> t.Optional[exp.Expression]:
2613        if self._match(TokenType.TOP if top else TokenType.LIMIT):
2614            limit_paren = self._match(TokenType.L_PAREN)
2615            limit_exp = self.expression(
2616                exp.Limit, this=this, expression=self._parse_number() if top else self._parse_term()
2617            )
2618
2619            if limit_paren:
2620                self._match_r_paren()
2621
2622            return limit_exp
2623
2624        if self._match(TokenType.FETCH):
2625            direction = self._match_set((TokenType.FIRST, TokenType.NEXT))
2626            direction = self._prev.text if direction else "FIRST"
2627
2628            count = self._parse_number()
2629            percent = self._match(TokenType.PERCENT)
2630
2631            self._match_set((TokenType.ROW, TokenType.ROWS))
2632
2633            only = self._match(TokenType.ONLY)
2634            with_ties = self._match_text_seq("WITH", "TIES")
2635
2636            if only and with_ties:
2637                self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause")
2638
2639            return self.expression(
2640                exp.Fetch,
2641                direction=direction,
2642                count=count,
2643                percent=percent,
2644                with_ties=with_ties,
2645            )
2646
2647        return this
2648
2649    def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]:
2650        if not self._match_set((TokenType.OFFSET, TokenType.COMMA)):
2651            return this
2652
2653        count = self._parse_number()
2654        self._match_set((TokenType.ROW, TokenType.ROWS))
2655        return self.expression(exp.Offset, this=this, expression=count)
2656
2657    def _parse_locks(self) -> t.List[exp.Expression]:
2658        # Lists are invariant, so we need to use a type hint here
2659        locks: t.List[exp.Expression] = []
2660
2661        while True:
2662            if self._match_text_seq("FOR", "UPDATE"):
2663                update = True
2664            elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq(
2665                "LOCK", "IN", "SHARE", "MODE"
2666            ):
2667                update = False
2668            else:
2669                break
2670
2671            expressions = None
2672            if self._match_text_seq("OF"):
2673                expressions = self._parse_csv(lambda: self._parse_table(schema=True))
2674
2675            wait: t.Optional[bool | exp.Expression] = None
2676            if self._match_text_seq("NOWAIT"):
2677                wait = True
2678            elif self._match_text_seq("WAIT"):
2679                wait = self._parse_primary()
2680            elif self._match_text_seq("SKIP", "LOCKED"):
2681                wait = False
2682
2683            locks.append(
2684                self.expression(exp.Lock, update=update, expressions=expressions, wait=wait)
2685            )
2686
2687        return locks
2688
2689    def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2690        if not self._match_set(self.SET_OPERATIONS):
2691            return this
2692
2693        token_type = self._prev.token_type
2694
2695        if token_type == TokenType.UNION:
2696            expression = exp.Union
2697        elif token_type == TokenType.EXCEPT:
2698            expression = exp.Except
2699        else:
2700            expression = exp.Intersect
2701
2702        return self.expression(
2703            expression,
2704            this=this,
2705            distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL),
2706            expression=self._parse_set_operations(self._parse_select(nested=True)),
2707        )
2708
2709    def _parse_expression(self) -> t.Optional[exp.Expression]:
2710        return self._parse_alias(self._parse_conjunction())
2711
2712    def _parse_conjunction(self) -> t.Optional[exp.Expression]:
2713        return self._parse_tokens(self._parse_equality, self.CONJUNCTION)
2714
2715    def _parse_equality(self) -> t.Optional[exp.Expression]:
2716        return self._parse_tokens(self._parse_comparison, self.EQUALITY)
2717
2718    def _parse_comparison(self) -> t.Optional[exp.Expression]:
2719        return self._parse_tokens(self._parse_range, self.COMPARISON)
2720
2721    def _parse_range(self) -> t.Optional[exp.Expression]:
2722        this = self._parse_bitwise()
2723        negate = self._match(TokenType.NOT)
2724
2725        if self._match_set(self.RANGE_PARSERS):
2726            expression = self.RANGE_PARSERS[self._prev.token_type](self, this)
2727            if not expression:
2728                return this
2729
2730            this = expression
2731        elif self._match(TokenType.ISNULL):
2732            this = self.expression(exp.Is, this=this, expression=exp.Null())
2733
2734        # Postgres supports ISNULL and NOTNULL for conditions.
2735        # https://blog.andreiavram.ro/postgresql-null-composite-type/
2736        if self._match(TokenType.NOTNULL):
2737            this = self.expression(exp.Is, this=this, expression=exp.Null())
2738            this = self.expression(exp.Not, this=this)
2739
2740        if negate:
2741            this = self.expression(exp.Not, this=this)
2742
2743        if self._match(TokenType.IS):
2744            this = self._parse_is(this)
2745
2746        return this
2747
2748    def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2749        index = self._index - 1
2750        negate = self._match(TokenType.NOT)
2751        if self._match(TokenType.DISTINCT_FROM):
2752            klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ
2753            return self.expression(klass, this=this, expression=self._parse_expression())
2754
2755        expression = self._parse_null() or self._parse_boolean()
2756        if not expression:
2757            self._retreat(index)
2758            return None
2759
2760        this = self.expression(exp.Is, this=this, expression=expression)
2761        return self.expression(exp.Not, this=this) if negate else this
2762
2763    def _parse_in(self, this: t.Optional[exp.Expression]) -> exp.Expression:
2764        unnest = self._parse_unnest()
2765        if unnest:
2766            this = self.expression(exp.In, this=this, unnest=unnest)
2767        elif self._match(TokenType.L_PAREN):
2768            expressions = self._parse_csv(self._parse_select_or_expression)
2769
2770            if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable):
2771                this = self.expression(exp.In, this=this, query=expressions[0])
2772            else:
2773                this = self.expression(exp.In, this=this, expressions=expressions)
2774
2775            self._match_r_paren(this)
2776        else:
2777            this = self.expression(exp.In, this=this, field=self._parse_field())
2778
2779        return this
2780
2781    def _parse_between(self, this: exp.Expression) -> exp.Expression:
2782        low = self._parse_bitwise()
2783        self._match(TokenType.AND)
2784        high = self._parse_bitwise()
2785        return self.expression(exp.Between, this=this, low=low, high=high)
2786
2787    def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2788        if not self._match(TokenType.ESCAPE):
2789            return this
2790        return self.expression(exp.Escape, this=this, expression=self._parse_string())
2791
2792    def _parse_interval(self) -> t.Optional[exp.Expression]:
2793        if not self._match(TokenType.INTERVAL):
2794            return None
2795
2796        this = self._parse_primary() or self._parse_term()
2797        unit = self._parse_function() or self._parse_var()
2798
2799        # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse
2800        # each INTERVAL expression into this canonical form so it's easy to transpile
2801        if this and isinstance(this, exp.Literal):
2802            if this.is_number:
2803                this = exp.Literal.string(this.name)
2804
2805            # Try to not clutter Snowflake's multi-part intervals like INTERVAL '1 day, 1 year'
2806            parts = this.name.split()
2807            if not unit and len(parts) <= 2:
2808                this = exp.Literal.string(seq_get(parts, 0))
2809                unit = self.expression(exp.Var, this=seq_get(parts, 1))
2810
2811        return self.expression(exp.Interval, this=this, unit=unit)
2812
2813    def _parse_bitwise(self) -> t.Optional[exp.Expression]:
2814        this = self._parse_term()
2815
2816        while True:
2817            if self._match_set(self.BITWISE):
2818                this = self.expression(
2819                    self.BITWISE[self._prev.token_type],
2820                    this=this,
2821                    expression=self._parse_term(),
2822                )
2823            elif self._match_pair(TokenType.LT, TokenType.LT):
2824                this = self.expression(
2825                    exp.BitwiseLeftShift, this=this, expression=self._parse_term()
2826                )
2827            elif self._match_pair(TokenType.GT, TokenType.GT):
2828                this = self.expression(
2829                    exp.BitwiseRightShift, this=this, expression=self._parse_term()
2830                )
2831            else:
2832                break
2833
2834        return this
2835
2836    def _parse_term(self) -> t.Optional[exp.Expression]:
2837        return self._parse_tokens(self._parse_factor, self.TERM)
2838
2839    def _parse_factor(self) -> t.Optional[exp.Expression]:
2840        return self._parse_tokens(self._parse_unary, self.FACTOR)
2841
2842    def _parse_unary(self) -> t.Optional[exp.Expression]:
2843        if self._match_set(self.UNARY_PARSERS):
2844            return self.UNARY_PARSERS[self._prev.token_type](self)
2845        return self._parse_at_time_zone(self._parse_type())
2846
2847    def _parse_type(self) -> t.Optional[exp.Expression]:
2848        interval = self._parse_interval()
2849        if interval:
2850            return interval
2851
2852        index = self._index
2853        data_type = self._parse_types(check_func=True)
2854        this = self._parse_column()
2855
2856        if data_type:
2857            if isinstance(this, exp.Literal):
2858                parser = self.TYPE_LITERAL_PARSERS.get(data_type.this)
2859                if parser:
2860                    return parser(self, this, data_type)
2861                return self.expression(exp.Cast, this=this, to=data_type)
2862            if not data_type.expressions:
2863                self._retreat(index)
2864                return self._parse_column()
2865            return data_type
2866
2867        return this
2868
2869    def _parse_type_size(self) -> t.Optional[exp.Expression]:
2870        this = self._parse_type()
2871        if not this:
2872            return None
2873
2874        return self.expression(
2875            exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True)
2876        )
2877
2878    def _parse_types(self, check_func: bool = False) -> t.Optional[exp.Expression]:
2879        index = self._index
2880
2881        prefix = self._match_text_seq("SYSUDTLIB", ".")
2882
2883        if not self._match_set(self.TYPE_TOKENS):
2884            return None
2885
2886        type_token = self._prev.token_type
2887
2888        if type_token == TokenType.PSEUDO_TYPE:
2889            return self.expression(exp.PseudoType, this=self._prev.text)
2890
2891        nested = type_token in self.NESTED_TYPE_TOKENS
2892        is_struct = type_token == TokenType.STRUCT
2893        expressions = None
2894        maybe_func = False
2895
2896        if self._match(TokenType.L_PAREN):
2897            if is_struct:
2898                expressions = self._parse_csv(self._parse_struct_types)
2899            elif nested:
2900                expressions = self._parse_csv(self._parse_types)
2901            else:
2902                expressions = self._parse_csv(self._parse_type_size)
2903
2904            if not expressions or not self._match(TokenType.R_PAREN):
2905                self._retreat(index)
2906                return None
2907
2908            maybe_func = True
2909
2910        if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET):
2911            this = exp.DataType(
2912                this=exp.DataType.Type.ARRAY,
2913                expressions=[exp.DataType.build(type_token.value, expressions=expressions)],
2914                nested=True,
2915            )
2916
2917            while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET):
2918                this = exp.DataType(
2919                    this=exp.DataType.Type.ARRAY,
2920                    expressions=[this],
2921                    nested=True,
2922                )
2923
2924            return this
2925
2926        if self._match(TokenType.L_BRACKET):
2927            self._retreat(index)
2928            return None
2929
2930        values: t.Optional[t.List[t.Optional[exp.Expression]]] = None
2931        if nested and self._match(TokenType.LT):
2932            if is_struct:
2933                expressions = self._parse_csv(self._parse_struct_types)
2934            else:
2935                expressions = self._parse_csv(self._parse_types)
2936
2937            if not self._match(TokenType.GT):
2938                self.raise_error("Expecting >")
2939
2940            if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)):
2941                values = self._parse_csv(self._parse_conjunction)
2942                self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN))
2943
2944        value: t.Optional[exp.Expression] = None
2945        if type_token in self.TIMESTAMPS:
2946            if self._match(TokenType.WITH_TIME_ZONE) or type_token == TokenType.TIMESTAMPTZ:
2947                value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions)
2948            elif (
2949                self._match(TokenType.WITH_LOCAL_TIME_ZONE) or type_token == TokenType.TIMESTAMPLTZ
2950            ):
2951                value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions)
2952            elif self._match(TokenType.WITHOUT_TIME_ZONE):
2953                if type_token == TokenType.TIME:
2954                    value = exp.DataType(this=exp.DataType.Type.TIME, expressions=expressions)
2955                else:
2956                    value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions)
2957
2958            maybe_func = maybe_func and value is None
2959
2960            if value is None:
2961                value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions)
2962        elif type_token == TokenType.INTERVAL:
2963            unit = self._parse_var()
2964
2965            if not unit:
2966                value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL)
2967            else:
2968                value = self.expression(exp.Interval, unit=unit)
2969
2970        if maybe_func and check_func:
2971            index2 = self._index
2972            peek = self._parse_string()
2973
2974            if not peek:
2975                self._retreat(index)
2976                return None
2977
2978            self._retreat(index2)
2979
2980        if value:
2981            return value
2982
2983        return exp.DataType(
2984            this=exp.DataType.Type[type_token.value.upper()],
2985            expressions=expressions,
2986            nested=nested,
2987            values=values,
2988            prefix=prefix,
2989        )
2990
2991    def _parse_struct_types(self) -> t.Optional[exp.Expression]:
2992        this = self._parse_type() or self._parse_id_var()
2993        self._match(TokenType.COLON)
2994        return self._parse_column_def(this)
2995
2996    def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2997        if not self._match(TokenType.AT_TIME_ZONE):
2998            return this
2999        return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary())
3000
3001    def _parse_column(self) -> t.Optional[exp.Expression]:
3002        this = self._parse_field()
3003        if isinstance(this, exp.Identifier):
3004            this = self.expression(exp.Column, this=this)
3005        elif not this:
3006            return self._parse_bracket(this)
3007        this = self._parse_bracket(this)
3008
3009        while self._match_set(self.COLUMN_OPERATORS):
3010            op_token = self._prev.token_type
3011            op = self.COLUMN_OPERATORS.get(op_token)
3012
3013            if op_token == TokenType.DCOLON:
3014                field = self._parse_types()
3015                if not field:
3016                    self.raise_error("Expected type")
3017            elif op and self._curr:
3018                self._advance()
3019                value = self._prev.text
3020                field = (
3021                    exp.Literal.number(value)
3022                    if self._prev.token_type == TokenType.NUMBER
3023                    else exp.Literal.string(value)
3024                )
3025            else:
3026                field = (
3027                    self._parse_star()
3028                    or self._parse_function(anonymous=True)
3029                    or self._parse_id_var()
3030                )
3031
3032            if isinstance(field, exp.Func):
3033                # bigquery allows function calls like x.y.count(...)
3034                # SAFE.SUBSTR(...)
3035                # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules
3036                this = self._replace_columns_with_dots(this)
3037
3038            if op:
3039                this = op(self, this, field)
3040            elif isinstance(this, exp.Column) and not this.args.get("catalog"):
3041                this = self.expression(
3042                    exp.Column,
3043                    this=field,
3044                    table=this.this,
3045                    db=this.args.get("table"),
3046                    catalog=this.args.get("db"),
3047                )
3048            else:
3049                this = self.expression(exp.Dot, this=this, expression=field)
3050            this = self._parse_bracket(this)
3051
3052        return this
3053
3054    def _parse_primary(self) -> t.Optional[exp.Expression]:
3055        if self._match_set(self.PRIMARY_PARSERS):
3056            token_type = self._prev.token_type
3057            primary = self.PRIMARY_PARSERS[token_type](self, self._prev)
3058
3059            if token_type == TokenType.STRING:
3060                expressions = [primary]
3061                while self._match(TokenType.STRING):
3062                    expressions.append(exp.Literal.string(self._prev.text))
3063                if len(expressions) > 1:
3064                    return self.expression(exp.Concat, expressions=expressions)
3065            return primary
3066
3067        if self._match_pair(TokenType.DOT, TokenType.NUMBER):
3068            return exp.Literal.number(f"0.{self._prev.text}")
3069
3070        if self._match(TokenType.L_PAREN):
3071            comments = self._prev_comments
3072            query = self._parse_select()
3073
3074            if query:
3075                expressions = [query]
3076            else:
3077                expressions = self._parse_csv(self._parse_expression)
3078
3079            this = self._parse_query_modifiers(seq_get(expressions, 0))
3080
3081            if isinstance(this, exp.Subqueryable):
3082                this = self._parse_set_operations(
3083                    self._parse_subquery(this=this, parse_alias=False)
3084                )
3085            elif len(expressions) > 1:
3086                this = self.expression(exp.Tuple, expressions=expressions)
3087            else:
3088                this = self.expression(exp.Paren, this=self._parse_set_operations(this))
3089
3090            if this:
3091                this.add_comments(comments)
3092            self._match_r_paren(expression=this)
3093
3094            return this
3095
3096        return None
3097
3098    def _parse_field(
3099        self,
3100        any_token: bool = False,
3101        tokens: t.Optional[t.Collection[TokenType]] = None,
3102    ) -> t.Optional[exp.Expression]:
3103        return (
3104            self._parse_primary()
3105            or self._parse_function()
3106            or self._parse_id_var(any_token=any_token, tokens=tokens)
3107        )
3108
3109    def _parse_function(
3110        self, functions: t.Optional[t.Dict[str, t.Callable]] = None, anonymous: bool = False
3111    ) -> t.Optional[exp.Expression]:
3112        if not self._curr:
3113            return None
3114
3115        token_type = self._curr.token_type
3116
3117        if self._match_set(self.NO_PAREN_FUNCTION_PARSERS):
3118            return self.NO_PAREN_FUNCTION_PARSERS[token_type](self)
3119
3120        if not self._next or self._next.token_type != TokenType.L_PAREN:
3121            if token_type in self.NO_PAREN_FUNCTIONS:
3122                self._advance()
3123                return self.expression(self.NO_PAREN_FUNCTIONS[token_type])
3124
3125            return None
3126
3127        if token_type not in self.FUNC_TOKENS:
3128            return None
3129
3130        this = self._curr.text
3131        upper = this.upper()
3132        self._advance(2)
3133
3134        parser = self.FUNCTION_PARSERS.get(upper)
3135
3136        if parser and not anonymous:
3137            this = parser(self)
3138        else:
3139            subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type)
3140
3141            if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH):
3142                this = self.expression(subquery_predicate, this=self._parse_select())
3143                self._match_r_paren()
3144                return this
3145
3146            if functions is None:
3147                functions = self.FUNCTIONS
3148
3149            function = functions.get(upper)
3150            args = self._parse_csv(self._parse_lambda)
3151
3152            if function and not anonymous:
3153                this = function(args)
3154                self.validate_expression(this, args)
3155            else:
3156                this = self.expression(exp.Anonymous, this=this, expressions=args)
3157
3158        self._match_r_paren(this)
3159        return self._parse_window(this)
3160
3161    def _parse_function_parameter(self) -> t.Optional[exp.Expression]:
3162        return self._parse_column_def(self._parse_id_var())
3163
3164    def _parse_user_defined_function(
3165        self, kind: t.Optional[TokenType] = None
3166    ) -> t.Optional[exp.Expression]:
3167        this = self._parse_id_var()
3168
3169        while self._match(TokenType.DOT):
3170            this = self.expression(exp.Dot, this=this, expression=self._parse_id_var())
3171
3172        if not self._match(TokenType.L_PAREN):
3173            return this
3174
3175        expressions = self._parse_csv(self._parse_function_parameter)
3176        self._match_r_paren()
3177        return self.expression(
3178            exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True
3179        )
3180
3181    def _parse_introducer(self, token: Token) -> t.Optional[exp.Expression]:
3182        literal = self._parse_primary()
3183        if literal:
3184            return self.expression(exp.Introducer, this=token.text, expression=literal)
3185
3186        return self.expression(exp.Identifier, this=token.text)
3187
3188    def _parse_national(self, token: Token) -> exp.Expression:
3189        return self.expression(exp.National, this=exp.Literal.string(token.text))
3190
3191    def _parse_session_parameter(self) -> exp.Expression:
3192        kind = None
3193        this = self._parse_id_var() or self._parse_primary()
3194
3195        if this and self._match(TokenType.DOT):
3196            kind = this.name
3197            this = self._parse_var() or self._parse_primary()
3198
3199        return self.expression(exp.SessionParameter, this=this, kind=kind)
3200
3201    def _parse_lambda(self) -> t.Optional[exp.Expression]:
3202        index = self._index
3203
3204        if self._match(TokenType.L_PAREN):
3205            expressions = self._parse_csv(self._parse_id_var)
3206
3207            if not self._match(TokenType.R_PAREN):
3208                self._retreat(index)
3209        else:
3210            expressions = [self._parse_id_var()]
3211
3212        if self._match_set(self.LAMBDAS):
3213            return self.LAMBDAS[self._prev.token_type](self, expressions)
3214
3215        self._retreat(index)
3216
3217        this: t.Optional[exp.Expression]
3218
3219        if self._match(TokenType.DISTINCT):
3220            this = self.expression(
3221                exp.Distinct, expressions=self._parse_csv(self._parse_conjunction)
3222            )
3223        else:
3224            this = self._parse_select_or_expression()
3225
3226            if isinstance(this, exp.EQ):
3227                left = this.this
3228                if isinstance(left, exp.Column):
3229                    left.replace(exp.Var(this=left.text("this")))
3230
3231        return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this)))
3232
3233    def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]:
3234        index = self._index
3235
3236        try:
3237            if self._parse_select(nested=True):
3238                return this
3239        except Exception:
3240            pass
3241        finally:
3242            self._retreat(index)
3243
3244        if not self._match(TokenType.L_PAREN):
3245            return this
3246
3247        args = self._parse_csv(
3248            lambda: self._parse_constraint()
3249            or self._parse_column_def(self._parse_field(any_token=True))
3250        )
3251        self._match_r_paren()
3252        return self.expression(exp.Schema, this=this, expressions=args)
3253
3254    def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3255        # column defs are not really columns, they're identifiers
3256        if isinstance(this, exp.Column):
3257            this = this.this
3258        kind = self._parse_types()
3259
3260        if self._match_text_seq("FOR", "ORDINALITY"):
3261            return self.expression(exp.ColumnDef, this=this, ordinality=True)
3262
3263        constraints = []
3264        while True:
3265            constraint = self._parse_column_constraint()
3266            if not constraint:
3267                break
3268            constraints.append(constraint)
3269
3270        if not kind and not constraints:
3271            return this
3272
3273        return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints)
3274
3275    def _parse_auto_increment(self) -> exp.Expression:
3276        start = None
3277        increment = None
3278
3279        if self._match(TokenType.L_PAREN, advance=False):
3280            args = self._parse_wrapped_csv(self._parse_bitwise)
3281            start = seq_get(args, 0)
3282            increment = seq_get(args, 1)
3283        elif self._match_text_seq("START"):
3284            start = self._parse_bitwise()
3285            self._match_text_seq("INCREMENT")
3286            increment = self._parse_bitwise()
3287
3288        if start and increment:
3289            return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment)
3290
3291        return exp.AutoIncrementColumnConstraint()
3292
3293    def _parse_compress(self) -> exp.Expression:
3294        if self._match(TokenType.L_PAREN, advance=False):
3295            return self.expression(
3296                exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise)
3297            )
3298
3299        return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise())
3300
3301    def _parse_generated_as_identity(self) -> exp.Expression:
3302        if self._match(TokenType.BY_DEFAULT):
3303            on_null = self._match_pair(TokenType.ON, TokenType.NULL)
3304            this = self.expression(
3305                exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null
3306            )
3307        else:
3308            self._match_text_seq("ALWAYS")
3309            this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True)
3310
3311        self._match_text_seq("AS", "IDENTITY")
3312        if self._match(TokenType.L_PAREN):
3313            if self._match_text_seq("START", "WITH"):
3314                this.set("start", self._parse_bitwise())
3315            if self._match_text_seq("INCREMENT", "BY"):
3316                this.set("increment", self._parse_bitwise())
3317            if self._match_text_seq("MINVALUE"):
3318                this.set("minvalue", self._parse_bitwise())
3319            if self._match_text_seq("MAXVALUE"):
3320                this.set("maxvalue", self._parse_bitwise())
3321
3322            if self._match_text_seq("CYCLE"):
3323                this.set("cycle", True)
3324            elif self._match_text_seq("NO", "CYCLE"):
3325                this.set("cycle", False)
3326
3327            self._match_r_paren()
3328
3329        return this
3330
3331    def _parse_inline(self) -> t.Optional[exp.Expression]:
3332        self._match_text_seq("LENGTH")
3333        return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise())
3334
3335    def _parse_not_constraint(self) -> t.Optional[exp.Expression]:
3336        if self._match_text_seq("NULL"):
3337            return self.expression(exp.NotNullColumnConstraint)
3338        if self._match_text_seq("CASESPECIFIC"):
3339            return self.expression(exp.CaseSpecificColumnConstraint, not_=True)
3340        return None
3341
3342    def _parse_column_constraint(self) -> t.Optional[exp.Expression]:
3343        if self._match(TokenType.CONSTRAINT):
3344            this = self._parse_id_var()
3345        else:
3346            this = None
3347
3348        if self._match_texts(self.CONSTRAINT_PARSERS):
3349            return self.expression(
3350                exp.ColumnConstraint,
3351                this=this,
3352                kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self),
3353            )
3354
3355        return this
3356
3357    def _parse_constraint(self) -> t.Optional[exp.Expression]:
3358        if not self._match(TokenType.CONSTRAINT):
3359            return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS)
3360
3361        this = self._parse_id_var()
3362        expressions = []
3363
3364        while True:
3365            constraint = self._parse_unnamed_constraint() or self._parse_function()
3366            if not constraint:
3367                break
3368            expressions.append(constraint)
3369
3370        return self.expression(exp.Constraint, this=this, expressions=expressions)
3371
3372    def _parse_unnamed_constraint(
3373        self, constraints: t.Optional[t.Collection[str]] = None
3374    ) -> t.Optional[exp.Expression]:
3375        if not self._match_texts(constraints or self.CONSTRAINT_PARSERS):
3376            return None
3377
3378        constraint = self._prev.text.upper()
3379        if constraint not in self.CONSTRAINT_PARSERS:
3380            self.raise_error(f"No parser found for schema constraint {constraint}.")
3381
3382        return self.CONSTRAINT_PARSERS[constraint](self)
3383
3384    def _parse_unique(self) -> exp.Expression:
3385        if not self._match(TokenType.L_PAREN, advance=False):
3386            return self.expression(exp.UniqueColumnConstraint)
3387        return self.expression(exp.Unique, expressions=self._parse_wrapped_id_vars())
3388
3389    def _parse_key_constraint_options(self) -> t.List[str]:
3390        options = []
3391        while True:
3392            if not self._curr:
3393                break
3394
3395            if self._match(TokenType.ON):
3396                action = None
3397                on = self._advance_any() and self._prev.text
3398
3399                if self._match(TokenType.NO_ACTION):
3400                    action = "NO ACTION"
3401                elif self._match(TokenType.CASCADE):
3402                    action = "CASCADE"
3403                elif self._match_pair(TokenType.SET, TokenType.NULL):
3404                    action = "SET NULL"
3405                elif self._match_pair(TokenType.SET, TokenType.DEFAULT):
3406                    action = "SET DEFAULT"
3407                else:
3408                    self.raise_error("Invalid key constraint")
3409
3410                options.append(f"ON {on} {action}")
3411            elif self._match_text_seq("NOT", "ENFORCED"):
3412                options.append("NOT ENFORCED")
3413            elif self._match_text_seq("DEFERRABLE"):
3414                options.append("DEFERRABLE")
3415            elif self._match_text_seq("INITIALLY", "DEFERRED"):
3416                options.append("INITIALLY DEFERRED")
3417            elif self._match_text_seq("NORELY"):
3418                options.append("NORELY")
3419            elif self._match_text_seq("MATCH", "FULL"):
3420                options.append("MATCH FULL")
3421            else:
3422                break
3423
3424        return options
3425
3426    def _parse_references(self, match=True) -> t.Optional[exp.Expression]:
3427        if match and not self._match(TokenType.REFERENCES):
3428            return None
3429
3430        expressions = None
3431        this = self._parse_id_var()
3432
3433        if self._match(TokenType.L_PAREN, advance=False):
3434            expressions = self._parse_wrapped_id_vars()
3435
3436        options = self._parse_key_constraint_options()
3437        return self.expression(exp.Reference, this=this, expressions=expressions, options=options)
3438
3439    def _parse_foreign_key(self) -> exp.Expression:
3440        expressions = self._parse_wrapped_id_vars()
3441        reference = self._parse_references()
3442        options = {}
3443
3444        while self._match(TokenType.ON):
3445            if not self._match_set((TokenType.DELETE, TokenType.UPDATE)):
3446                self.raise_error("Expected DELETE or UPDATE")
3447
3448            kind = self._prev.text.lower()
3449
3450            if self._match(TokenType.NO_ACTION):
3451                action = "NO ACTION"
3452            elif self._match(TokenType.SET):
3453                self._match_set((TokenType.NULL, TokenType.DEFAULT))
3454                action = "SET " + self._prev.text.upper()
3455            else:
3456                self._advance()
3457                action = self._prev.text.upper()
3458
3459            options[kind] = action
3460
3461        return self.expression(
3462            exp.ForeignKey, expressions=expressions, reference=reference, **options  # type: ignore
3463        )
3464
3465    def _parse_primary_key(self) -> exp.Expression:
3466        desc = (
3467            self._match_set((TokenType.ASC, TokenType.DESC))
3468            and self._prev.token_type == TokenType.DESC
3469        )
3470
3471        if not self._match(TokenType.L_PAREN, advance=False):
3472            return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc)
3473
3474        expressions = self._parse_wrapped_csv(self._parse_field)
3475        options = self._parse_key_constraint_options()
3476        return self.expression(exp.PrimaryKey, expressions=expressions, options=options)
3477
3478    def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3479        if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)):
3480            return this
3481
3482        bracket_kind = self._prev.token_type
3483        expressions: t.List[t.Optional[exp.Expression]]
3484
3485        if self._match(TokenType.COLON):
3486            expressions = [self.expression(exp.Slice, expression=self._parse_conjunction())]
3487        else:
3488            expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction()))
3489
3490        # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs
3491        if bracket_kind == TokenType.L_BRACE:
3492            this = self.expression(exp.Struct, expressions=expressions)
3493        elif not this or this.name.upper() == "ARRAY":
3494            this = self.expression(exp.Array, expressions=expressions)
3495        else:
3496            expressions = apply_index_offset(this, expressions, -self.index_offset)
3497            this = self.expression(exp.Bracket, this=this, expressions=expressions)
3498
3499        if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET:
3500            self.raise_error("Expected ]")
3501        elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE:
3502            self.raise_error("Expected }")
3503
3504        self._add_comments(this)
3505        return self._parse_bracket(this)
3506
3507    def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3508        if self._match(TokenType.COLON):
3509            return self.expression(exp.Slice, this=this, expression=self._parse_conjunction())
3510        return this
3511
3512    def _parse_case(self) -> t.Optional[exp.Expression]:
3513        ifs = []
3514        default = None
3515
3516        expression = self._parse_conjunction()
3517
3518        while self._match(TokenType.WHEN):
3519            this = self._parse_conjunction()
3520            self._match(TokenType.THEN)
3521            then = self._parse_conjunction()
3522            ifs.append(self.expression(exp.If, this=this, true=then))
3523
3524        if self._match(TokenType.ELSE):
3525            default = self._parse_conjunction()
3526
3527        if not self._match(TokenType.END):
3528            self.raise_error("Expected END after CASE", self._prev)
3529
3530        return self._parse_window(
3531            self.expression(exp.Case, this=expression, ifs=ifs, default=default)
3532        )
3533
3534    def _parse_if(self) -> t.Optional[exp.Expression]:
3535        if self._match(TokenType.L_PAREN):
3536            args = self._parse_csv(self._parse_conjunction)
3537            this = exp.If.from_arg_list(args)
3538            self.validate_expression(this, args)
3539            self._match_r_paren()
3540        else:
3541            index = self._index - 1
3542            condition = self._parse_conjunction()
3543
3544            if not condition:
3545                self._retreat(index)
3546                return None
3547
3548            self._match(TokenType.THEN)
3549            true = self._parse_conjunction()
3550            false = self._parse_conjunction() if self._match(TokenType.ELSE) else None
3551            self._match(TokenType.END)
3552            this = self.expression(exp.If, this=condition, true=true, false=false)
3553
3554        return self._parse_window(this)
3555
3556    def _parse_extract(self) -> exp.Expression:
3557        this = self._parse_function() or self._parse_var() or self._parse_type()
3558
3559        if self._match(TokenType.FROM):
3560            return self.expression(exp.Extract, this=this, expression=self._parse_bitwise())
3561
3562        if not self._match(TokenType.COMMA):
3563            self.raise_error("Expected FROM or comma after EXTRACT", self._prev)
3564
3565        return self.expression(exp.Extract, this=this, expression=self._parse_bitwise())
3566
3567    def _parse_cast(self, strict: bool) -> exp.Expression:
3568        this = self._parse_conjunction()
3569
3570        if not self._match(TokenType.ALIAS):
3571            if self._match(TokenType.COMMA):
3572                return self.expression(
3573                    exp.CastToStrType, this=this, expression=self._parse_string()
3574                )
3575            else:
3576                self.raise_error("Expected AS after CAST")
3577
3578        to = self._parse_types()
3579
3580        if not to:
3581            self.raise_error("Expected TYPE after CAST")
3582        elif to.this == exp.DataType.Type.CHAR:
3583            if self._match(TokenType.CHARACTER_SET):
3584                to = self.expression(exp.CharacterSet, this=self._parse_var_or_string())
3585
3586        return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to)
3587
3588    def _parse_string_agg(self) -> exp.Expression:
3589        expression: t.Optional[exp.Expression]
3590
3591        if self._match(TokenType.DISTINCT):
3592            args = self._parse_csv(self._parse_conjunction)
3593            expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)])
3594        else:
3595            args = self._parse_csv(self._parse_conjunction)
3596            expression = seq_get(args, 0)
3597
3598        index = self._index
3599        if not self._match(TokenType.R_PAREN):
3600            # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]])
3601            order = self._parse_order(this=expression)
3602            return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1))
3603
3604        # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]).
3605        # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that
3606        # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them.
3607        if not self._match(TokenType.WITHIN_GROUP):
3608            self._retreat(index)
3609            this = exp.GroupConcat.from_arg_list(args)
3610            self.validate_expression(this, args)
3611            return this
3612
3613        self._match_l_paren()  # The corresponding match_r_paren will be called in parse_function (caller)
3614        order = self._parse_order(this=expression)
3615        return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1))
3616
3617    def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]:
3618        to: t.Optional[exp.Expression]
3619        this = self._parse_bitwise()
3620
3621        if self._match(TokenType.USING):
3622            to = self.expression(exp.CharacterSet, this=self._parse_var())
3623        elif self._match(TokenType.COMMA):
3624            to = self._parse_bitwise()
3625        else:
3626            to = None
3627
3628        # Swap the argument order if needed to produce the correct AST
3629        if self.CONVERT_TYPE_FIRST:
3630            this, to = to, this
3631
3632        return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to)
3633
3634    def _parse_decode(self) -> t.Optional[exp.Expression]:
3635        """
3636        There are generally two variants of the DECODE function:
3637
3638        - DECODE(bin, charset)
3639        - DECODE(expression, search, result [, search, result] ... [, default])
3640
3641        The second variant will always be parsed into a CASE expression. Note that NULL
3642        needs special treatment, since we need to explicitly check for it with `IS NULL`,
3643        instead of relying on pattern matching.
3644        """
3645        args = self._parse_csv(self._parse_conjunction)
3646
3647        if len(args) < 3:
3648            return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1))
3649
3650        expression, *expressions = args
3651        if not expression:
3652            return None
3653
3654        ifs = []
3655        for search, result in zip(expressions[::2], expressions[1::2]):
3656            if not search or not result:
3657                return None
3658
3659            if isinstance(search, exp.Literal):
3660                ifs.append(
3661                    exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result)
3662                )
3663            elif isinstance(search, exp.Null):
3664                ifs.append(
3665                    exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result)
3666                )
3667            else:
3668                cond = exp.or_(
3669                    exp.EQ(this=expression.copy(), expression=search),
3670                    exp.and_(
3671                        exp.Is(this=expression.copy(), expression=exp.Null()),
3672                        exp.Is(this=search.copy(), expression=exp.Null()),
3673                        copy=False,
3674                    ),
3675                    copy=False,
3676                )
3677                ifs.append(exp.If(this=cond, true=result))
3678
3679        return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None)
3680
3681    def _parse_json_key_value(self) -> t.Optional[exp.Expression]:
3682        self._match_text_seq("KEY")
3683        key = self._parse_field()
3684        self._match(TokenType.COLON)
3685        self._match_text_seq("VALUE")
3686        value = self._parse_field()
3687        if not key and not value:
3688            return None
3689        return self.expression(exp.JSONKeyValue, this=key, expression=value)
3690
3691    def _parse_json_object(self) -> exp.Expression:
3692        expressions = self._parse_csv(self._parse_json_key_value)
3693
3694        null_handling = None
3695        if self._match_text_seq("NULL", "ON", "NULL"):
3696            null_handling = "NULL ON NULL"
3697        elif self._match_text_seq("ABSENT", "ON", "NULL"):
3698            null_handling = "ABSENT ON NULL"
3699
3700        unique_keys = None
3701        if self._match_text_seq("WITH", "UNIQUE"):
3702            unique_keys = True
3703        elif self._match_text_seq("WITHOUT", "UNIQUE"):
3704            unique_keys = False
3705
3706        self._match_text_seq("KEYS")
3707
3708        return_type = self._match_text_seq("RETURNING") and self._parse_type()
3709        format_json = self._match_text_seq("FORMAT", "JSON")
3710        encoding = self._match_text_seq("ENCODING") and self._parse_var()
3711
3712        return self.expression(
3713            exp.JSONObject,
3714            expressions=expressions,
3715            null_handling=null_handling,
3716            unique_keys=unique_keys,
3717            return_type=return_type,
3718            format_json=format_json,
3719            encoding=encoding,
3720        )
3721
3722    def _parse_logarithm(self) -> exp.Expression:
3723        # Default argument order is base, expression
3724        args = self._parse_csv(self._parse_range)
3725
3726        if len(args) > 1:
3727            if not self.LOG_BASE_FIRST:
3728                args.reverse()
3729            return exp.Log.from_arg_list(args)
3730
3731        return self.expression(
3732            exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0)
3733        )
3734
3735    def _parse_match_against(self) -> exp.Expression:
3736        expressions = self._parse_csv(self._parse_column)
3737
3738        self._match_text_seq(")", "AGAINST", "(")
3739
3740        this = self._parse_string()
3741
3742        if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"):
3743            modifier = "IN NATURAL LANGUAGE MODE"
3744            if self._match_text_seq("WITH", "QUERY", "EXPANSION"):
3745                modifier = f"{modifier} WITH QUERY EXPANSION"
3746        elif self._match_text_seq("IN", "BOOLEAN", "MODE"):
3747            modifier = "IN BOOLEAN MODE"
3748        elif self._match_text_seq("WITH", "QUERY", "EXPANSION"):
3749            modifier = "WITH QUERY EXPANSION"
3750        else:
3751            modifier = None
3752
3753        return self.expression(
3754            exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier
3755        )
3756
3757    # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16
3758    def _parse_open_json(self) -> exp.Expression:
3759        this = self._parse_bitwise()
3760        path = self._match(TokenType.COMMA) and self._parse_string()
3761
3762        def _parse_open_json_column_def() -> exp.Expression:
3763            this = self._parse_field(any_token=True)
3764            kind = self._parse_types()
3765            path = self._parse_string()
3766            as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON)
3767            return self.expression(
3768                exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json
3769            )
3770
3771        expressions = None
3772        if self._match_pair(TokenType.R_PAREN, TokenType.WITH):
3773            self._match_l_paren()
3774            expressions = self._parse_csv(_parse_open_json_column_def)
3775
3776        return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions)
3777
3778    def _parse_position(self, haystack_first: bool = False) -> exp.Expression:
3779        args = self._parse_csv(self._parse_bitwise)
3780
3781        if self._match(TokenType.IN):
3782            return self.expression(
3783                exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0)
3784            )
3785
3786        if haystack_first:
3787            haystack = seq_get(args, 0)
3788            needle = seq_get(args, 1)
3789        else:
3790            needle = seq_get(args, 0)
3791            haystack = seq_get(args, 1)
3792
3793        this = exp.StrPosition(this=haystack, substr=needle, position=seq_get(args, 2))
3794
3795        self.validate_expression(this, args)
3796
3797        return this
3798
3799    def _parse_join_hint(self, func_name: str) -> exp.Expression:
3800        args = self._parse_csv(self._parse_table)
3801        return exp.JoinHint(this=func_name.upper(), expressions=args)
3802
3803    def _parse_substring(self) -> exp.Expression:
3804        # Postgres supports the form: substring(string [from int] [for int])
3805        # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6
3806
3807        args = self._parse_csv(self._parse_bitwise)
3808
3809        if self._match(TokenType.FROM):
3810            args.append(self._parse_bitwise())
3811            if self._match(TokenType.FOR):
3812                args.append(self._parse_bitwise())
3813
3814        this = exp.Substring.from_arg_list(args)
3815        self.validate_expression(this, args)
3816
3817        return this
3818
3819    def _parse_trim(self) -> exp.Expression:
3820        # https://www.w3resource.com/sql/character-functions/trim.php
3821        # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html
3822
3823        position = None
3824        collation = None
3825
3826        if self._match_set(self.TRIM_TYPES):
3827            position = self._prev.text.upper()
3828
3829        expression = self._parse_bitwise()
3830        if self._match_set((TokenType.FROM, TokenType.COMMA)):
3831            this = self._parse_bitwise()
3832        else:
3833            this = expression
3834            expression = None
3835
3836        if self._match(TokenType.COLLATE):
3837            collation = self._parse_bitwise()
3838
3839        return self.expression(
3840            exp.Trim,
3841            this=this,
3842            position=position,
3843            expression=expression,
3844            collation=collation,
3845        )
3846
3847    def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
3848        return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window)
3849
3850    def _parse_named_window(self) -> t.Optional[exp.Expression]:
3851        return self._parse_window(self._parse_id_var(), alias=True)
3852
3853    def _parse_respect_or_ignore_nulls(
3854        self, this: t.Optional[exp.Expression]
3855    ) -> t.Optional[exp.Expression]:
3856        if self._match(TokenType.IGNORE_NULLS):
3857            return self.expression(exp.IgnoreNulls, this=this)
3858        if self._match(TokenType.RESPECT_NULLS):
3859            return self.expression(exp.RespectNulls, this=this)
3860        return this
3861
3862    def _parse_window(
3863        self, this: t.Optional[exp.Expression], alias: bool = False
3864    ) -> t.Optional[exp.Expression]:
3865        if self._match_pair(TokenType.FILTER, TokenType.L_PAREN):
3866            this = self.expression(exp.Filter, this=this, expression=self._parse_where())
3867            self._match_r_paren()
3868
3869        # T-SQL allows the OVER (...) syntax after WITHIN GROUP.
3870        # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16
3871        if self._match(TokenType.WITHIN_GROUP):
3872            order = self._parse_wrapped(self._parse_order)
3873            this = self.expression(exp.WithinGroup, this=this, expression=order)
3874
3875        # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER
3876        # Some dialects choose to implement and some do not.
3877        # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html
3878
3879        # There is some code above in _parse_lambda that handles
3880        #   SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ...
3881
3882        # The below changes handle
3883        #   SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ...
3884
3885        # Oracle allows both formats
3886        #   (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html)
3887        #   and Snowflake chose to do the same for familiarity
3888        #   https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes
3889        this = self._parse_respect_or_ignore_nulls(this)
3890
3891        # bigquery select from window x AS (partition by ...)
3892        if alias:
3893            over = None
3894            self._match(TokenType.ALIAS)
3895        elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS):
3896            return this
3897        else:
3898            over = self._prev.text.upper()
3899
3900        if not self._match(TokenType.L_PAREN):
3901            return self.expression(
3902                exp.Window, this=this, alias=self._parse_id_var(False), over=over
3903            )
3904
3905        window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS)
3906
3907        first = self._match(TokenType.FIRST)
3908        if self._match_text_seq("LAST"):
3909            first = False
3910
3911        partition = self._parse_partition_by()
3912        order = self._parse_order()
3913        kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text
3914
3915        if kind:
3916            self._match(TokenType.BETWEEN)
3917            start = self._parse_window_spec()
3918            self._match(TokenType.AND)
3919            end = self._parse_window_spec()
3920
3921            spec = self.expression(
3922                exp.WindowSpec,
3923                kind=kind,
3924                start=start["value"],
3925                start_side=start["side"],
3926                end=end["value"],
3927                end_side=end["side"],
3928            )
3929        else:
3930            spec = None
3931
3932        self._match_r_paren()
3933
3934        return self.expression(
3935            exp.Window,
3936            this=this,
3937            partition_by=partition,
3938            order=order,
3939            spec=spec,
3940            alias=window_alias,
3941            over=over,
3942            first=first,
3943        )
3944
3945    def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]:
3946        self._match(TokenType.BETWEEN)
3947
3948        return {
3949            "value": (
3950                self._match_set((TokenType.UNBOUNDED, TokenType.CURRENT_ROW)) and self._prev.text
3951            )
3952            or self._parse_bitwise(),
3953            "side": self._match_set((TokenType.PRECEDING, TokenType.FOLLOWING)) and self._prev.text,
3954        }
3955
3956    def _parse_alias(
3957        self, this: t.Optional[exp.Expression], explicit: bool = False
3958    ) -> t.Optional[exp.Expression]:
3959        any_token = self._match(TokenType.ALIAS)
3960
3961        if explicit and not any_token:
3962            return this
3963
3964        if self._match(TokenType.L_PAREN):
3965            aliases = self.expression(
3966                exp.Aliases,
3967                this=this,
3968                expressions=self._parse_csv(lambda: self._parse_id_var(any_token)),
3969            )
3970            self._match_r_paren(aliases)
3971            return aliases
3972
3973        alias = self._parse_id_var(any_token)
3974
3975        if alias:
3976            return self.expression(exp.Alias, this=this, alias=alias)
3977
3978        return this
3979
3980    def _parse_id_var(
3981        self,
3982        any_token: bool = True,
3983        tokens: t.Optional[t.Collection[TokenType]] = None,
3984        prefix_tokens: t.Optional[t.Collection[TokenType]] = None,
3985    ) -> t.Optional[exp.Expression]:
3986        identifier = self._parse_identifier()
3987
3988        if identifier:
3989            return identifier
3990
3991        prefix = ""
3992
3993        if prefix_tokens:
3994            while self._match_set(prefix_tokens):
3995                prefix += self._prev.text
3996
3997        if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS):
3998            quoted = self._prev.token_type == TokenType.STRING
3999            return exp.Identifier(this=prefix + self._prev.text, quoted=quoted)
4000
4001        return None
4002
4003    def _parse_string(self) -> t.Optional[exp.Expression]:
4004        if self._match(TokenType.STRING):
4005            return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev)
4006        return self._parse_placeholder()
4007
4008    def _parse_string_as_identifier(self) -> t.Optional[exp.Expression]:
4009        return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True)
4010
4011    def _parse_number(self) -> t.Optional[exp.Expression]:
4012        if self._match(TokenType.NUMBER):
4013            return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev)
4014        return self._parse_placeholder()
4015
4016    def _parse_identifier(self) -> t.Optional[exp.Expression]:
4017        if self._match(TokenType.IDENTIFIER):
4018            return self.expression(exp.Identifier, this=self._prev.text, quoted=True)
4019        return self._parse_placeholder()
4020
4021    def _parse_var(
4022        self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None
4023    ) -> t.Optional[exp.Expression]:
4024        if (
4025            (any_token and self._advance_any())
4026            or self._match(TokenType.VAR)
4027            or (self._match_set(tokens) if tokens else False)
4028        ):
4029            return self.expression(exp.Var, this=self._prev.text)
4030        return self._parse_placeholder()
4031
4032    def _advance_any(self) -> t.Optional[Token]:
4033        if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS:
4034            self._advance()
4035            return self._prev
4036        return None
4037
4038    def _parse_var_or_string(self) -> t.Optional[exp.Expression]:
4039        return self._parse_var() or self._parse_string()
4040
4041    def _parse_null(self) -> t.Optional[exp.Expression]:
4042        if self._match(TokenType.NULL):
4043            return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev)
4044        return None
4045
4046    def _parse_boolean(self) -> t.Optional[exp.Expression]:
4047        if self._match(TokenType.TRUE):
4048            return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev)
4049        if self._match(TokenType.FALSE):
4050            return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev)
4051        return None
4052
4053    def _parse_star(self) -> t.Optional[exp.Expression]:
4054        if self._match(TokenType.STAR):
4055            return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev)
4056        return None
4057
4058    def _parse_parameter(self) -> exp.Expression:
4059        wrapped = self._match(TokenType.L_BRACE)
4060        this = self._parse_var() or self._parse_identifier() or self._parse_primary()
4061        self._match(TokenType.R_BRACE)
4062        return self.expression(exp.Parameter, this=this, wrapped=wrapped)
4063
4064    def _parse_placeholder(self) -> t.Optional[exp.Expression]:
4065        if self._match_set(self.PLACEHOLDER_PARSERS):
4066            placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self)
4067            if placeholder:
4068                return placeholder
4069            self._advance(-1)
4070        return None
4071
4072    def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
4073        if not self._match(TokenType.EXCEPT):
4074            return None
4075        if self._match(TokenType.L_PAREN, advance=False):
4076            return self._parse_wrapped_csv(self._parse_column)
4077        return self._parse_csv(self._parse_column)
4078
4079    def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
4080        if not self._match(TokenType.REPLACE):
4081            return None
4082        if self._match(TokenType.L_PAREN, advance=False):
4083            return self._parse_wrapped_csv(self._parse_expression)
4084        return self._parse_csv(self._parse_expression)
4085
4086    def _parse_csv(
4087        self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA
4088    ) -> t.List[t.Optional[exp.Expression]]:
4089        parse_result = parse_method()
4090        items = [parse_result] if parse_result is not None else []
4091
4092        while self._match(sep):
4093            self._add_comments(parse_result)
4094            parse_result = parse_method()
4095            if parse_result is not None:
4096                items.append(parse_result)
4097
4098        return items
4099
4100    def _parse_tokens(
4101        self, parse_method: t.Callable, expressions: t.Dict
4102    ) -> t.Optional[exp.Expression]:
4103        this = parse_method()
4104
4105        while self._match_set(expressions):
4106            this = self.expression(
4107                expressions[self._prev.token_type],
4108                this=this,
4109                comments=self._prev_comments,
4110                expression=parse_method(),
4111            )
4112
4113        return this
4114
4115    def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]:
4116        return self._parse_wrapped_csv(self._parse_id_var, optional=optional)
4117
4118    def _parse_wrapped_csv(
4119        self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False
4120    ) -> t.List[t.Optional[exp.Expression]]:
4121        return self._parse_wrapped(
4122            lambda: self._parse_csv(parse_method, sep=sep), optional=optional
4123        )
4124
4125    def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any:
4126        wrapped = self._match(TokenType.L_PAREN)
4127        if not wrapped and not optional:
4128            self.raise_error("Expecting (")
4129        parse_result = parse_method()
4130        if wrapped:
4131            self._match_r_paren()
4132        return parse_result
4133
4134    def _parse_select_or_expression(self) -> t.Optional[exp.Expression]:
4135        return self._parse_select() or self._parse_set_operations(self._parse_expression())
4136
4137    def _parse_ddl_select(self) -> t.Optional[exp.Expression]:
4138        return self._parse_set_operations(
4139            self._parse_select(nested=True, parse_subquery_alias=False)
4140        )
4141
4142    def _parse_transaction(self) -> exp.Expression:
4143        this = None
4144        if self._match_texts(self.TRANSACTION_KIND):
4145            this = self._prev.text
4146
4147        self._match_texts({"TRANSACTION", "WORK"})
4148
4149        modes = []
4150        while True:
4151            mode = []
4152            while self._match(TokenType.VAR):
4153                mode.append(self._prev.text)
4154
4155            if mode:
4156                modes.append(" ".join(mode))
4157            if not self._match(TokenType.COMMA):
4158                break
4159
4160        return self.expression(exp.Transaction, this=this, modes=modes)
4161
4162    def _parse_commit_or_rollback(self) -> exp.Expression:
4163        chain = None
4164        savepoint = None
4165        is_rollback = self._prev.token_type == TokenType.ROLLBACK
4166
4167        self._match_texts({"TRANSACTION", "WORK"})
4168
4169        if self._match_text_seq("TO"):
4170            self._match_text_seq("SAVEPOINT")
4171            savepoint = self._parse_id_var()
4172
4173        if self._match(TokenType.AND):
4174            chain = not self._match_text_seq("NO")
4175            self._match_text_seq("CHAIN")
4176
4177        if is_rollback:
4178            return self.expression(exp.Rollback, savepoint=savepoint)
4179        return self.expression(exp.Commit, chain=chain)
4180
4181    def _parse_add_column(self) -> t.Optional[exp.Expression]:
4182        if not self._match_text_seq("ADD"):
4183            return None
4184
4185        self._match(TokenType.COLUMN)
4186        exists_column = self._parse_exists(not_=True)
4187        expression = self._parse_column_def(self._parse_field(any_token=True))
4188
4189        if expression:
4190            expression.set("exists", exists_column)
4191
4192            # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns
4193            if self._match_texts(("FIRST", "AFTER")):
4194                position = self._prev.text
4195                column_position = self.expression(
4196                    exp.ColumnPosition, this=self._parse_column(), position=position
4197                )
4198                expression.set("position", column_position)
4199
4200        return expression
4201
4202    def _parse_drop_column(self) -> t.Optional[exp.Expression]:
4203        drop = self._match(TokenType.DROP) and self._parse_drop()
4204        if drop and not isinstance(drop, exp.Command):
4205            drop.set("kind", drop.args.get("kind", "COLUMN"))
4206        return drop
4207
4208    # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html
4209    def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.Expression:
4210        return self.expression(
4211            exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists
4212        )
4213
4214    def _parse_add_constraint(self) -> t.Optional[exp.Expression]:
4215        this = None
4216        kind = self._prev.token_type
4217
4218        if kind == TokenType.CONSTRAINT:
4219            this = self._parse_id_var()
4220
4221            if self._match_text_seq("CHECK"):
4222                expression = self._parse_wrapped(self._parse_conjunction)
4223                enforced = self._match_text_seq("ENFORCED")
4224
4225                return self.expression(
4226                    exp.AddConstraint, this=this, expression=expression, enforced=enforced
4227                )
4228
4229        if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY):
4230            expression = self._parse_foreign_key()
4231        elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY):
4232            expression = self._parse_primary_key()
4233        else:
4234            expression = None
4235
4236        return self.expression(exp.AddConstraint, this=this, expression=expression)
4237
4238    def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]:
4239        index = self._index - 1
4240
4241        if self._match_set(self.ADD_CONSTRAINT_TOKENS):
4242            return self._parse_csv(self._parse_add_constraint)
4243
4244        self._retreat(index)
4245        return self._parse_csv(self._parse_add_column)
4246
4247    def _parse_alter_table_alter(self) -> exp.Expression:
4248        self._match(TokenType.COLUMN)
4249        column = self._parse_field(any_token=True)
4250
4251        if self._match_pair(TokenType.DROP, TokenType.DEFAULT):
4252            return self.expression(exp.AlterColumn, this=column, drop=True)
4253        if self._match_pair(TokenType.SET, TokenType.DEFAULT):
4254            return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction())
4255
4256        self._match_text_seq("SET", "DATA")
4257        return self.expression(
4258            exp.AlterColumn,
4259            this=column,
4260            dtype=self._match_text_seq("TYPE") and self._parse_types(),
4261            collate=self._match(TokenType.COLLATE) and self._parse_term(),
4262            using=self._match(TokenType.USING) and self._parse_conjunction(),
4263        )
4264
4265    def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]:
4266        index = self._index - 1
4267
4268        partition_exists = self._parse_exists()
4269        if self._match(TokenType.PARTITION, advance=False):
4270            return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists))
4271
4272        self._retreat(index)
4273        return self._parse_csv(self._parse_drop_column)
4274
4275    def _parse_alter_table_rename(self) -> exp.Expression:
4276        self._match_text_seq("TO")
4277        return self.expression(exp.RenameTable, this=self._parse_table(schema=True))
4278
4279    def _parse_alter(self) -> t.Optional[exp.Expression]:
4280        start = self._prev
4281
4282        if not self._match(TokenType.TABLE):
4283            return self._parse_as_command(start)
4284
4285        exists = self._parse_exists()
4286        this = self._parse_table(schema=True)
4287
4288        if self._next:
4289            self._advance()
4290        parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None
4291
4292        if parser:
4293            actions = ensure_list(parser(self))
4294
4295            if not self._curr:
4296                return self.expression(
4297                    exp.AlterTable,
4298                    this=this,
4299                    exists=exists,
4300                    actions=actions,
4301                )
4302        return self._parse_as_command(start)
4303
4304    def _parse_merge(self) -> exp.Expression:
4305        self._match(TokenType.INTO)
4306        target = self._parse_table()
4307
4308        self._match(TokenType.USING)
4309        using = self._parse_table()
4310
4311        self._match(TokenType.ON)
4312        on = self._parse_conjunction()
4313
4314        whens = []
4315        while self._match(TokenType.WHEN):
4316            matched = not self._match(TokenType.NOT)
4317            self._match_text_seq("MATCHED")
4318            source = (
4319                False
4320                if self._match_text_seq("BY", "TARGET")
4321                else self._match_text_seq("BY", "SOURCE")
4322            )
4323            condition = self._parse_conjunction() if self._match(TokenType.AND) else None
4324
4325            self._match(TokenType.THEN)
4326
4327            if self._match(TokenType.INSERT):
4328                _this = self._parse_star()
4329                if _this:
4330                    then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this)
4331                else:
4332                    then = self.expression(
4333                        exp.Insert,
4334                        this=self._parse_value(),
4335                        expression=self._match(TokenType.VALUES) and self._parse_value(),
4336                    )
4337            elif self._match(TokenType.UPDATE):
4338                expressions = self._parse_star()
4339                if expressions:
4340                    then = self.expression(exp.Update, expressions=expressions)
4341                else:
4342                    then = self.expression(
4343                        exp.Update,
4344                        expressions=self._match(TokenType.SET)
4345                        and self._parse_csv(self._parse_equality),
4346                    )
4347            elif self._match(TokenType.DELETE):
4348                then = self.expression(exp.Var, this=self._prev.text)
4349            else:
4350                then = None
4351
4352            whens.append(
4353                self.expression(
4354                    exp.When,
4355                    matched=matched,
4356                    source=source,
4357                    condition=condition,
4358                    then=then,
4359                )
4360            )
4361
4362        return self.expression(
4363            exp.Merge,
4364            this=target,
4365            using=using,
4366            on=on,
4367            expressions=whens,
4368        )
4369
4370    def _parse_show(self) -> t.Optional[exp.Expression]:
4371        parser = self._find_parser(self.SHOW_PARSERS, self._show_trie)  # type: ignore
4372        if parser:
4373            return parser(self)
4374        self._advance()
4375        return self.expression(exp.Show, this=self._prev.text.upper())
4376
4377    def _parse_set_item_assignment(
4378        self, kind: t.Optional[str] = None
4379    ) -> t.Optional[exp.Expression]:
4380        index = self._index
4381
4382        if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"):
4383            return self._parse_set_transaction(global_=kind == "GLOBAL")
4384
4385        left = self._parse_primary() or self._parse_id_var()
4386
4387        if not self._match_texts(("=", "TO")):
4388            self._retreat(index)
4389            return None
4390
4391        right = self._parse_statement() or self._parse_id_var()
4392        this = self.expression(
4393            exp.EQ,
4394            this=left,
4395            expression=right,
4396        )
4397
4398        return self.expression(
4399            exp.SetItem,
4400            this=this,
4401            kind=kind,
4402        )
4403
4404    def _parse_set_transaction(self, global_: bool = False) -> exp.Expression:
4405        self._match_text_seq("TRANSACTION")
4406        characteristics = self._parse_csv(
4407            lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS)
4408        )
4409        return self.expression(
4410            exp.SetItem,
4411            expressions=characteristics,
4412            kind="TRANSACTION",
4413            **{"global": global_},  # type: ignore
4414        )
4415
4416    def _parse_set_item(self) -> t.Optional[exp.Expression]:
4417        parser = self._find_parser(self.SET_PARSERS, self._set_trie)  # type: ignore
4418        return parser(self) if parser else self._parse_set_item_assignment(kind=None)
4419
4420    def _parse_set(self) -> exp.Expression:
4421        index = self._index
4422        set_ = self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item))
4423
4424        if self._curr:
4425            self._retreat(index)
4426            return self._parse_as_command(self._prev)
4427
4428        return set_
4429
4430    def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Expression]:
4431        for option in options:
4432            if self._match_text_seq(*option.split(" ")):
4433                return exp.Var(this=option)
4434        return None
4435
4436    def _parse_as_command(self, start: Token) -> exp.Command:
4437        while self._curr:
4438            self._advance()
4439        text = self._find_sql(start, self._prev)
4440        size = len(start.text)
4441        return exp.Command(this=text[:size], expression=text[size:])
4442
4443    def _find_parser(
4444        self, parsers: t.Dict[str, t.Callable], trie: t.Dict
4445    ) -> t.Optional[t.Callable]:
4446        if not self._curr:
4447            return None
4448
4449        index = self._index
4450        this = []
4451        while True:
4452            # The current token might be multiple words
4453            curr = self._curr.text.upper()
4454            key = curr.split(" ")
4455            this.append(curr)
4456            self._advance()
4457            result, trie = in_trie(trie, key)
4458            if result == 0:
4459                break
4460            if result == 2:
4461                subparser = parsers[" ".join(this)]
4462                return subparser
4463        self._retreat(index)
4464        return None
4465
4466    def _match(self, token_type, advance=True, expression=None):
4467        if not self._curr:
4468            return None
4469
4470        if self._curr.token_type == token_type:
4471            if advance:
4472                self._advance()
4473            self._add_comments(expression)
4474            return True
4475
4476        return None
4477
4478    def _match_set(self, types, advance=True):
4479        if not self._curr:
4480            return None
4481
4482        if self._curr.token_type in types:
4483            if advance:
4484                self._advance()
4485            return True
4486
4487        return None
4488
4489    def _match_pair(self, token_type_a, token_type_b, advance=True):
4490        if not self._curr or not self._next:
4491            return None
4492
4493        if self._curr.token_type == token_type_a and self._next.token_type == token_type_b:
4494            if advance:
4495                self._advance(2)
4496            return True
4497
4498        return None
4499
4500    def _match_l_paren(self, expression=None):
4501        if not self._match(TokenType.L_PAREN, expression=expression):
4502            self.raise_error("Expecting (")
4503
4504    def _match_r_paren(self, expression=None):
4505        if not self._match(TokenType.R_PAREN, expression=expression):
4506            self.raise_error("Expecting )")
4507
4508    def _match_texts(self, texts, advance=True):
4509        if self._curr and self._curr.text.upper() in texts:
4510            if advance:
4511                self._advance()
4512            return True
4513        return False
4514
4515    def _match_text_seq(self, *texts, advance=True):
4516        index = self._index
4517        for text in texts:
4518            if self._curr and self._curr.text.upper() == text:
4519                self._advance()
4520            else:
4521                self._retreat(index)
4522                return False
4523
4524        if not advance:
4525            self._retreat(index)
4526
4527        return True
4528
4529    def _replace_columns_with_dots(self, this):
4530        if isinstance(this, exp.Dot):
4531            exp.replace_children(this, self._replace_columns_with_dots)
4532        elif isinstance(this, exp.Column):
4533            exp.replace_children(this, self._replace_columns_with_dots)
4534            table = this.args.get("table")
4535            this = (
4536                self.expression(exp.Dot, this=table, expression=this.this)
4537                if table
4538                else self.expression(exp.Var, this=this.name)
4539            )
4540        elif isinstance(this, exp.Identifier):
4541            this = self.expression(exp.Var, this=this.name)
4542        return this
4543
4544    def _replace_lambda(self, node, lambda_variables):
4545        for column in node.find_all(exp.Column):
4546            if column.parts[0].name in lambda_variables:
4547                dot_or_id = column.to_dot() if column.table else column.this
4548                parent = column.parent
4549
4550                while isinstance(parent, exp.Dot):
4551                    if not isinstance(parent.parent, exp.Dot):
4552                        parent.replace(dot_or_id)
4553                        break
4554                    parent = parent.parent
4555                else:
4556                    if column is node:
4557                        node = dot_or_id
4558                    else:
4559                        column.replace(dot_or_id)
4560        return node

Parser consumes a list of tokens produced by the sqlglot.tokens.Tokenizer and produces a parsed syntax tree.

Arguments:
  • error_level: the desired error level. Default: ErrorLevel.RAISE
  • error_message_context: determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 50.
  • index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. Default: 0
  • alias_post_tablesample: If the table alias comes after tablesample. Default: False
  • max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
  • null_ordering: Indicates the default null ordering method to use if not explicitly set. Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". Default: "nulls_are_small"
Parser( error_level: Optional[sqlglot.errors.ErrorLevel] = None, error_message_context: int = 100, index_offset: int = 0, unnest_column_only: bool = False, alias_post_tablesample: bool = False, max_errors: int = 3, null_ordering: Optional[str] = None)
810    def __init__(
811        self,
812        error_level: t.Optional[ErrorLevel] = None,
813        error_message_context: int = 100,
814        index_offset: int = 0,
815        unnest_column_only: bool = False,
816        alias_post_tablesample: bool = False,
817        max_errors: int = 3,
818        null_ordering: t.Optional[str] = None,
819    ):
820        self.error_level = error_level or ErrorLevel.IMMEDIATE
821        self.error_message_context = error_message_context
822        self.index_offset = index_offset
823        self.unnest_column_only = unnest_column_only
824        self.alias_post_tablesample = alias_post_tablesample
825        self.max_errors = max_errors
826        self.null_ordering = null_ordering
827        self.reset()
def reset(self):
829    def reset(self):
830        self.sql = ""
831        self.errors = []
832        self._tokens = []
833        self._index = 0
834        self._curr = None
835        self._next = None
836        self._prev = None
837        self._prev_comments = None
def parse( self, raw_tokens: List[sqlglot.tokens.Token], sql: Optional[str] = None) -> List[Optional[sqlglot.expressions.Expression]]:
839    def parse(
840        self, raw_tokens: t.List[Token], sql: t.Optional[str] = None
841    ) -> t.List[t.Optional[exp.Expression]]:
842        """
843        Parses a list of tokens and returns a list of syntax trees, one tree
844        per parsed SQL statement.
845
846        Args:
847            raw_tokens: the list of tokens.
848            sql: the original SQL string, used to produce helpful debug messages.
849
850        Returns:
851            The list of syntax trees.
852        """
853        return self._parse(
854            parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql
855        )

Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.

Arguments:
  • raw_tokens: the list of tokens.
  • sql: the original SQL string, used to produce helpful debug messages.
Returns:

The list of syntax trees.

def parse_into( self, expression_types: Union[str, Type[sqlglot.expressions.Expression], Collection[Union[str, Type[sqlglot.expressions.Expression]]]], raw_tokens: List[sqlglot.tokens.Token], sql: Optional[str] = None) -> List[Optional[sqlglot.expressions.Expression]]:
857    def parse_into(
858        self,
859        expression_types: exp.IntoType,
860        raw_tokens: t.List[Token],
861        sql: t.Optional[str] = None,
862    ) -> t.List[t.Optional[exp.Expression]]:
863        """
864        Parses a list of tokens into a given Expression type. If a collection of Expression
865        types is given instead, this method will try to parse the token list into each one
866        of them, stopping at the first for which the parsing succeeds.
867
868        Args:
869            expression_types: the expression type(s) to try and parse the token list into.
870            raw_tokens: the list of tokens.
871            sql: the original SQL string, used to produce helpful debug messages.
872
873        Returns:
874            The target Expression.
875        """
876        errors = []
877        for expression_type in ensure_collection(expression_types):
878            parser = self.EXPRESSION_PARSERS.get(expression_type)
879            if not parser:
880                raise TypeError(f"No parser registered for {expression_type}")
881            try:
882                return self._parse(parser, raw_tokens, sql)
883            except ParseError as e:
884                e.errors[0]["into_expression"] = expression_type
885                errors.append(e)
886        raise ParseError(
887            f"Failed to parse into {expression_types}",
888            errors=merge_errors(errors),
889        ) from errors[-1]

Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.

Arguments:
  • expression_types: the expression type(s) to try and parse the token list into.
  • raw_tokens: the list of tokens.
  • sql: the original SQL string, used to produce helpful debug messages.
Returns:

The target Expression.

def check_errors(self) -> None:
925    def check_errors(self) -> None:
926        """
927        Logs or raises any found errors, depending on the chosen error level setting.
928        """
929        if self.error_level == ErrorLevel.WARN:
930            for error in self.errors:
931                logger.error(str(error))
932        elif self.error_level == ErrorLevel.RAISE and self.errors:
933            raise ParseError(
934                concat_messages(self.errors, self.max_errors),
935                errors=merge_errors(self.errors),
936            )

Logs or raises any found errors, depending on the chosen error level setting.

def raise_error(self, message: str, token: Optional[sqlglot.tokens.Token] = None) -> None:
938    def raise_error(self, message: str, token: t.Optional[Token] = None) -> None:
939        """
940        Appends an error in the list of recorded errors or raises it, depending on the chosen
941        error level setting.
942        """
943        token = token or self._curr or self._prev or Token.string("")
944        start = token.start
945        end = token.end
946        start_context = self.sql[max(start - self.error_message_context, 0) : start]
947        highlight = self.sql[start:end]
948        end_context = self.sql[end : end + self.error_message_context]
949
950        error = ParseError.new(
951            f"{message}. Line {token.line}, Col: {token.col}.\n"
952            f"  {start_context}\033[4m{highlight}\033[0m{end_context}",
953            description=message,
954            line=token.line,
955            col=token.col,
956            start_context=start_context,
957            highlight=highlight,
958            end_context=end_context,
959        )
960
961        if self.error_level == ErrorLevel.IMMEDIATE:
962            raise error
963
964        self.errors.append(error)

Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.

def expression( self, exp_class: Type[~E], comments: Optional[List[str]] = None, **kwargs) -> ~E:
966    def expression(
967        self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs
968    ) -> E:
969        """
970        Creates a new, validated Expression.
971
972        Args:
973            exp_class: the expression class to instantiate.
974            comments: an optional list of comments to attach to the expression.
975            kwargs: the arguments to set for the expression along with their respective values.
976
977        Returns:
978            The target expression.
979        """
980        instance = exp_class(**kwargs)
981        instance.add_comments(comments) if comments else self._add_comments(instance)
982        self.validate_expression(instance)
983        return instance

Creates a new, validated Expression.

Arguments:
  • exp_class: the expression class to instantiate.
  • comments: an optional list of comments to attach to the expression.
  • kwargs: the arguments to set for the expression along with their respective values.
Returns:

The target expression.

def validate_expression( self, expression: sqlglot.expressions.Expression, args: Optional[List] = None) -> None:
 990    def validate_expression(
 991        self, expression: exp.Expression, args: t.Optional[t.List] = None
 992    ) -> None:
 993        """
 994        Validates an already instantiated expression, making sure that all its mandatory arguments
 995        are set.
 996
 997        Args:
 998            expression: the expression to validate.
 999            args: an optional list of items that was used to instantiate the expression, if it's a Func.
1000        """
1001        if self.error_level == ErrorLevel.IGNORE:
1002            return
1003
1004        for error_message in expression.error_messages(args):
1005            self.raise_error(error_message)

Validates an already instantiated expression, making sure that all its mandatory arguments are set.

Arguments:
  • expression: the expression to validate.
  • args: an optional list of items that was used to instantiate the expression, if it's a Func.