sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18 T = t.TypeVar("T") 19 20logger = logging.getLogger("sqlglot") 21 22OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 23 24 25def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 26 if len(args) == 1 and args[0].is_star: 27 return exp.StarMap(this=args[0]) 28 29 keys = [] 30 values = [] 31 for i in range(0, len(args), 2): 32 keys.append(args[i]) 33 values.append(args[i + 1]) 34 35 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 36 37 38def build_like(args: t.List) -> exp.Escape | exp.Like: 39 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 40 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 41 42 43def binary_range_parser( 44 expr_type: t.Type[exp.Expression], 45) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 46 return lambda self, this: self._parse_escape( 47 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 48 ) 49 50 51def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 52 # Default argument order is base, expression 53 this = seq_get(args, 0) 54 expression = seq_get(args, 1) 55 56 if expression: 57 if not dialect.LOG_BASE_FIRST: 58 this, expression = expression, this 59 return exp.Log(this=this, expression=expression) 60 61 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 62 63 64def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 65 def _builder(args: t.List, dialect: Dialect) -> E: 66 expression = expr_type( 67 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 68 ) 69 if len(args) > 2 and expr_type is exp.JSONExtract: 70 expression.set("expressions", args[2:]) 71 72 return expression 73 74 return _builder 75 76 77class _Parser(type): 78 def __new__(cls, clsname, bases, attrs): 79 klass = super().__new__(cls, clsname, bases, attrs) 80 81 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 82 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 83 84 return klass 85 86 87class Parser(metaclass=_Parser): 88 """ 89 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 90 91 Args: 92 error_level: The desired error level. 93 Default: ErrorLevel.IMMEDIATE 94 error_message_context: The amount of context to capture from a query string when displaying 95 the error message (in number of characters). 96 Default: 100 97 max_errors: Maximum number of error messages to include in a raised ParseError. 98 This is only relevant if error_level is ErrorLevel.RAISE. 99 Default: 3 100 """ 101 102 FUNCTIONS: t.Dict[str, t.Callable] = { 103 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 104 "CONCAT": lambda args, dialect: exp.Concat( 105 expressions=args, 106 safe=not dialect.STRICT_STRING_CONCAT, 107 coalesce=dialect.CONCAT_COALESCE, 108 ), 109 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 110 expressions=args, 111 safe=not dialect.STRICT_STRING_CONCAT, 112 coalesce=dialect.CONCAT_COALESCE, 113 ), 114 "DATE_TO_DATE_STR": lambda args: exp.Cast( 115 this=seq_get(args, 0), 116 to=exp.DataType(this=exp.DataType.Type.TEXT), 117 ), 118 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 119 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 120 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 121 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 122 "LIKE": build_like, 123 "LOG": build_logarithm, 124 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 125 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 126 "MOD": lambda args: exp.Mod(this=seq_get(args, 0), expression=seq_get(args, 1)), 127 "TIME_TO_TIME_STR": lambda args: exp.Cast( 128 this=seq_get(args, 0), 129 to=exp.DataType(this=exp.DataType.Type.TEXT), 130 ), 131 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 132 this=exp.Cast( 133 this=seq_get(args, 0), 134 to=exp.DataType(this=exp.DataType.Type.TEXT), 135 ), 136 start=exp.Literal.number(1), 137 length=exp.Literal.number(10), 138 ), 139 "VAR_MAP": build_var_map, 140 } 141 142 NO_PAREN_FUNCTIONS = { 143 TokenType.CURRENT_DATE: exp.CurrentDate, 144 TokenType.CURRENT_DATETIME: exp.CurrentDate, 145 TokenType.CURRENT_TIME: exp.CurrentTime, 146 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 147 TokenType.CURRENT_USER: exp.CurrentUser, 148 } 149 150 STRUCT_TYPE_TOKENS = { 151 TokenType.NESTED, 152 TokenType.OBJECT, 153 TokenType.STRUCT, 154 } 155 156 NESTED_TYPE_TOKENS = { 157 TokenType.ARRAY, 158 TokenType.LOWCARDINALITY, 159 TokenType.MAP, 160 TokenType.NULLABLE, 161 *STRUCT_TYPE_TOKENS, 162 } 163 164 ENUM_TYPE_TOKENS = { 165 TokenType.ENUM, 166 TokenType.ENUM8, 167 TokenType.ENUM16, 168 } 169 170 AGGREGATE_TYPE_TOKENS = { 171 TokenType.AGGREGATEFUNCTION, 172 TokenType.SIMPLEAGGREGATEFUNCTION, 173 } 174 175 TYPE_TOKENS = { 176 TokenType.BIT, 177 TokenType.BOOLEAN, 178 TokenType.TINYINT, 179 TokenType.UTINYINT, 180 TokenType.SMALLINT, 181 TokenType.USMALLINT, 182 TokenType.INT, 183 TokenType.UINT, 184 TokenType.BIGINT, 185 TokenType.UBIGINT, 186 TokenType.INT128, 187 TokenType.UINT128, 188 TokenType.INT256, 189 TokenType.UINT256, 190 TokenType.MEDIUMINT, 191 TokenType.UMEDIUMINT, 192 TokenType.FIXEDSTRING, 193 TokenType.FLOAT, 194 TokenType.DOUBLE, 195 TokenType.CHAR, 196 TokenType.NCHAR, 197 TokenType.VARCHAR, 198 TokenType.NVARCHAR, 199 TokenType.BPCHAR, 200 TokenType.TEXT, 201 TokenType.MEDIUMTEXT, 202 TokenType.LONGTEXT, 203 TokenType.MEDIUMBLOB, 204 TokenType.LONGBLOB, 205 TokenType.BINARY, 206 TokenType.VARBINARY, 207 TokenType.JSON, 208 TokenType.JSONB, 209 TokenType.INTERVAL, 210 TokenType.TINYBLOB, 211 TokenType.TINYTEXT, 212 TokenType.TIME, 213 TokenType.TIMETZ, 214 TokenType.TIMESTAMP, 215 TokenType.TIMESTAMP_S, 216 TokenType.TIMESTAMP_MS, 217 TokenType.TIMESTAMP_NS, 218 TokenType.TIMESTAMPTZ, 219 TokenType.TIMESTAMPLTZ, 220 TokenType.DATETIME, 221 TokenType.DATETIME64, 222 TokenType.DATE, 223 TokenType.DATE32, 224 TokenType.INT4RANGE, 225 TokenType.INT4MULTIRANGE, 226 TokenType.INT8RANGE, 227 TokenType.INT8MULTIRANGE, 228 TokenType.NUMRANGE, 229 TokenType.NUMMULTIRANGE, 230 TokenType.TSRANGE, 231 TokenType.TSMULTIRANGE, 232 TokenType.TSTZRANGE, 233 TokenType.TSTZMULTIRANGE, 234 TokenType.DATERANGE, 235 TokenType.DATEMULTIRANGE, 236 TokenType.DECIMAL, 237 TokenType.UDECIMAL, 238 TokenType.BIGDECIMAL, 239 TokenType.UUID, 240 TokenType.GEOGRAPHY, 241 TokenType.GEOMETRY, 242 TokenType.HLLSKETCH, 243 TokenType.HSTORE, 244 TokenType.PSEUDO_TYPE, 245 TokenType.SUPER, 246 TokenType.SERIAL, 247 TokenType.SMALLSERIAL, 248 TokenType.BIGSERIAL, 249 TokenType.XML, 250 TokenType.YEAR, 251 TokenType.UNIQUEIDENTIFIER, 252 TokenType.USERDEFINED, 253 TokenType.MONEY, 254 TokenType.SMALLMONEY, 255 TokenType.ROWVERSION, 256 TokenType.IMAGE, 257 TokenType.VARIANT, 258 TokenType.OBJECT, 259 TokenType.OBJECT_IDENTIFIER, 260 TokenType.INET, 261 TokenType.IPADDRESS, 262 TokenType.IPPREFIX, 263 TokenType.IPV4, 264 TokenType.IPV6, 265 TokenType.UNKNOWN, 266 TokenType.NULL, 267 TokenType.NAME, 268 *ENUM_TYPE_TOKENS, 269 *NESTED_TYPE_TOKENS, 270 *AGGREGATE_TYPE_TOKENS, 271 } 272 273 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 274 TokenType.BIGINT: TokenType.UBIGINT, 275 TokenType.INT: TokenType.UINT, 276 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 277 TokenType.SMALLINT: TokenType.USMALLINT, 278 TokenType.TINYINT: TokenType.UTINYINT, 279 TokenType.DECIMAL: TokenType.UDECIMAL, 280 } 281 282 SUBQUERY_PREDICATES = { 283 TokenType.ANY: exp.Any, 284 TokenType.ALL: exp.All, 285 TokenType.EXISTS: exp.Exists, 286 TokenType.SOME: exp.Any, 287 } 288 289 RESERVED_TOKENS = { 290 *Tokenizer.SINGLE_TOKENS.values(), 291 TokenType.SELECT, 292 } 293 294 DB_CREATABLES = { 295 TokenType.DATABASE, 296 TokenType.SCHEMA, 297 TokenType.TABLE, 298 TokenType.VIEW, 299 TokenType.MODEL, 300 TokenType.DICTIONARY, 301 TokenType.SEQUENCE, 302 TokenType.STORAGE_INTEGRATION, 303 } 304 305 CREATABLES = { 306 TokenType.COLUMN, 307 TokenType.CONSTRAINT, 308 TokenType.FUNCTION, 309 TokenType.INDEX, 310 TokenType.PROCEDURE, 311 TokenType.FOREIGN_KEY, 312 *DB_CREATABLES, 313 } 314 315 # Tokens that can represent identifiers 316 ID_VAR_TOKENS = { 317 TokenType.VAR, 318 TokenType.ANTI, 319 TokenType.APPLY, 320 TokenType.ASC, 321 TokenType.ASOF, 322 TokenType.AUTO_INCREMENT, 323 TokenType.BEGIN, 324 TokenType.BPCHAR, 325 TokenType.CACHE, 326 TokenType.CASE, 327 TokenType.COLLATE, 328 TokenType.COMMAND, 329 TokenType.COMMENT, 330 TokenType.COMMIT, 331 TokenType.CONSTRAINT, 332 TokenType.DEFAULT, 333 TokenType.DELETE, 334 TokenType.DESC, 335 TokenType.DESCRIBE, 336 TokenType.DICTIONARY, 337 TokenType.DIV, 338 TokenType.END, 339 TokenType.EXECUTE, 340 TokenType.ESCAPE, 341 TokenType.FALSE, 342 TokenType.FIRST, 343 TokenType.FILTER, 344 TokenType.FINAL, 345 TokenType.FORMAT, 346 TokenType.FULL, 347 TokenType.IS, 348 TokenType.ISNULL, 349 TokenType.INTERVAL, 350 TokenType.KEEP, 351 TokenType.KILL, 352 TokenType.LEFT, 353 TokenType.LOAD, 354 TokenType.MERGE, 355 TokenType.NATURAL, 356 TokenType.NEXT, 357 TokenType.OFFSET, 358 TokenType.OPERATOR, 359 TokenType.ORDINALITY, 360 TokenType.OVERLAPS, 361 TokenType.OVERWRITE, 362 TokenType.PARTITION, 363 TokenType.PERCENT, 364 TokenType.PIVOT, 365 TokenType.PRAGMA, 366 TokenType.RANGE, 367 TokenType.RECURSIVE, 368 TokenType.REFERENCES, 369 TokenType.REFRESH, 370 TokenType.REPLACE, 371 TokenType.RIGHT, 372 TokenType.ROW, 373 TokenType.ROWS, 374 TokenType.SEMI, 375 TokenType.SET, 376 TokenType.SETTINGS, 377 TokenType.SHOW, 378 TokenType.TEMPORARY, 379 TokenType.TOP, 380 TokenType.TRUE, 381 TokenType.TRUNCATE, 382 TokenType.UNIQUE, 383 TokenType.UNPIVOT, 384 TokenType.UPDATE, 385 TokenType.USE, 386 TokenType.VOLATILE, 387 TokenType.WINDOW, 388 *CREATABLES, 389 *SUBQUERY_PREDICATES, 390 *TYPE_TOKENS, 391 *NO_PAREN_FUNCTIONS, 392 } 393 394 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 395 396 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 397 TokenType.ANTI, 398 TokenType.APPLY, 399 TokenType.ASOF, 400 TokenType.FULL, 401 TokenType.LEFT, 402 TokenType.LOCK, 403 TokenType.NATURAL, 404 TokenType.OFFSET, 405 TokenType.RIGHT, 406 TokenType.SEMI, 407 TokenType.WINDOW, 408 } 409 410 ALIAS_TOKENS = ID_VAR_TOKENS 411 412 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 413 414 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 415 416 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 417 418 FUNC_TOKENS = { 419 TokenType.COLLATE, 420 TokenType.COMMAND, 421 TokenType.CURRENT_DATE, 422 TokenType.CURRENT_DATETIME, 423 TokenType.CURRENT_TIMESTAMP, 424 TokenType.CURRENT_TIME, 425 TokenType.CURRENT_USER, 426 TokenType.FILTER, 427 TokenType.FIRST, 428 TokenType.FORMAT, 429 TokenType.GLOB, 430 TokenType.IDENTIFIER, 431 TokenType.INDEX, 432 TokenType.ISNULL, 433 TokenType.ILIKE, 434 TokenType.INSERT, 435 TokenType.LIKE, 436 TokenType.MERGE, 437 TokenType.OFFSET, 438 TokenType.PRIMARY_KEY, 439 TokenType.RANGE, 440 TokenType.REPLACE, 441 TokenType.RLIKE, 442 TokenType.ROW, 443 TokenType.UNNEST, 444 TokenType.VAR, 445 TokenType.LEFT, 446 TokenType.RIGHT, 447 TokenType.SEQUENCE, 448 TokenType.DATE, 449 TokenType.DATETIME, 450 TokenType.TABLE, 451 TokenType.TIMESTAMP, 452 TokenType.TIMESTAMPTZ, 453 TokenType.TRUNCATE, 454 TokenType.WINDOW, 455 TokenType.XOR, 456 *TYPE_TOKENS, 457 *SUBQUERY_PREDICATES, 458 } 459 460 CONJUNCTION = { 461 TokenType.AND: exp.And, 462 TokenType.OR: exp.Or, 463 } 464 465 EQUALITY = { 466 TokenType.COLON_EQ: exp.PropertyEQ, 467 TokenType.EQ: exp.EQ, 468 TokenType.NEQ: exp.NEQ, 469 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 470 } 471 472 COMPARISON = { 473 TokenType.GT: exp.GT, 474 TokenType.GTE: exp.GTE, 475 TokenType.LT: exp.LT, 476 TokenType.LTE: exp.LTE, 477 } 478 479 BITWISE = { 480 TokenType.AMP: exp.BitwiseAnd, 481 TokenType.CARET: exp.BitwiseXor, 482 TokenType.PIPE: exp.BitwiseOr, 483 } 484 485 TERM = { 486 TokenType.DASH: exp.Sub, 487 TokenType.PLUS: exp.Add, 488 TokenType.MOD: exp.Mod, 489 TokenType.COLLATE: exp.Collate, 490 } 491 492 FACTOR = { 493 TokenType.DIV: exp.IntDiv, 494 TokenType.LR_ARROW: exp.Distance, 495 TokenType.SLASH: exp.Div, 496 TokenType.STAR: exp.Mul, 497 } 498 499 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 500 501 TIMES = { 502 TokenType.TIME, 503 TokenType.TIMETZ, 504 } 505 506 TIMESTAMPS = { 507 TokenType.TIMESTAMP, 508 TokenType.TIMESTAMPTZ, 509 TokenType.TIMESTAMPLTZ, 510 *TIMES, 511 } 512 513 SET_OPERATIONS = { 514 TokenType.UNION, 515 TokenType.INTERSECT, 516 TokenType.EXCEPT, 517 } 518 519 JOIN_METHODS = { 520 TokenType.ASOF, 521 TokenType.NATURAL, 522 TokenType.POSITIONAL, 523 } 524 525 JOIN_SIDES = { 526 TokenType.LEFT, 527 TokenType.RIGHT, 528 TokenType.FULL, 529 } 530 531 JOIN_KINDS = { 532 TokenType.INNER, 533 TokenType.OUTER, 534 TokenType.CROSS, 535 TokenType.SEMI, 536 TokenType.ANTI, 537 } 538 539 JOIN_HINTS: t.Set[str] = set() 540 541 LAMBDAS = { 542 TokenType.ARROW: lambda self, expressions: self.expression( 543 exp.Lambda, 544 this=self._replace_lambda( 545 self._parse_conjunction(), 546 {node.name for node in expressions}, 547 ), 548 expressions=expressions, 549 ), 550 TokenType.FARROW: lambda self, expressions: self.expression( 551 exp.Kwarg, 552 this=exp.var(expressions[0].name), 553 expression=self._parse_conjunction(), 554 ), 555 } 556 557 COLUMN_OPERATORS = { 558 TokenType.DOT: None, 559 TokenType.DCOLON: lambda self, this, to: self.expression( 560 exp.Cast if self.STRICT_CAST else exp.TryCast, 561 this=this, 562 to=to, 563 ), 564 TokenType.ARROW: lambda self, this, path: self.expression( 565 exp.JSONExtract, 566 this=this, 567 expression=self.dialect.to_json_path(path), 568 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 569 ), 570 TokenType.DARROW: lambda self, this, path: self.expression( 571 exp.JSONExtractScalar, 572 this=this, 573 expression=self.dialect.to_json_path(path), 574 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 575 ), 576 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 577 exp.JSONBExtract, 578 this=this, 579 expression=path, 580 ), 581 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 582 exp.JSONBExtractScalar, 583 this=this, 584 expression=path, 585 ), 586 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 587 exp.JSONBContains, 588 this=this, 589 expression=key, 590 ), 591 } 592 593 EXPRESSION_PARSERS = { 594 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 595 exp.Column: lambda self: self._parse_column(), 596 exp.Condition: lambda self: self._parse_conjunction(), 597 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 598 exp.Expression: lambda self: self._parse_expression(), 599 exp.From: lambda self: self._parse_from(), 600 exp.Group: lambda self: self._parse_group(), 601 exp.Having: lambda self: self._parse_having(), 602 exp.Identifier: lambda self: self._parse_id_var(), 603 exp.Join: lambda self: self._parse_join(), 604 exp.Lambda: lambda self: self._parse_lambda(), 605 exp.Lateral: lambda self: self._parse_lateral(), 606 exp.Limit: lambda self: self._parse_limit(), 607 exp.Offset: lambda self: self._parse_offset(), 608 exp.Order: lambda self: self._parse_order(), 609 exp.Ordered: lambda self: self._parse_ordered(), 610 exp.Properties: lambda self: self._parse_properties(), 611 exp.Qualify: lambda self: self._parse_qualify(), 612 exp.Returning: lambda self: self._parse_returning(), 613 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 614 exp.Table: lambda self: self._parse_table_parts(), 615 exp.TableAlias: lambda self: self._parse_table_alias(), 616 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 617 exp.Where: lambda self: self._parse_where(), 618 exp.Window: lambda self: self._parse_named_window(), 619 exp.With: lambda self: self._parse_with(), 620 "JOIN_TYPE": lambda self: self._parse_join_parts(), 621 } 622 623 STATEMENT_PARSERS = { 624 TokenType.ALTER: lambda self: self._parse_alter(), 625 TokenType.BEGIN: lambda self: self._parse_transaction(), 626 TokenType.CACHE: lambda self: self._parse_cache(), 627 TokenType.COMMENT: lambda self: self._parse_comment(), 628 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 629 TokenType.CREATE: lambda self: self._parse_create(), 630 TokenType.DELETE: lambda self: self._parse_delete(), 631 TokenType.DESC: lambda self: self._parse_describe(), 632 TokenType.DESCRIBE: lambda self: self._parse_describe(), 633 TokenType.DROP: lambda self: self._parse_drop(), 634 TokenType.INSERT: lambda self: self._parse_insert(), 635 TokenType.KILL: lambda self: self._parse_kill(), 636 TokenType.LOAD: lambda self: self._parse_load(), 637 TokenType.MERGE: lambda self: self._parse_merge(), 638 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 639 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 640 TokenType.REFRESH: lambda self: self._parse_refresh(), 641 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 642 TokenType.SET: lambda self: self._parse_set(), 643 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 644 TokenType.UNCACHE: lambda self: self._parse_uncache(), 645 TokenType.UPDATE: lambda self: self._parse_update(), 646 TokenType.USE: lambda self: self.expression( 647 exp.Use, 648 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 649 this=self._parse_table(schema=False), 650 ), 651 } 652 653 UNARY_PARSERS = { 654 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 655 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 656 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 657 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 658 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 659 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 660 } 661 662 STRING_PARSERS = { 663 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 664 exp.RawString, this=token.text 665 ), 666 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 667 exp.National, this=token.text 668 ), 669 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 670 TokenType.STRING: lambda self, token: self.expression( 671 exp.Literal, this=token.text, is_string=True 672 ), 673 TokenType.UNICODE_STRING: lambda self, token: self.expression( 674 exp.UnicodeString, 675 this=token.text, 676 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 677 ), 678 } 679 680 NUMERIC_PARSERS = { 681 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 682 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 683 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 684 TokenType.NUMBER: lambda self, token: self.expression( 685 exp.Literal, this=token.text, is_string=False 686 ), 687 } 688 689 PRIMARY_PARSERS = { 690 **STRING_PARSERS, 691 **NUMERIC_PARSERS, 692 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 693 TokenType.NULL: lambda self, _: self.expression(exp.Null), 694 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 695 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 696 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 697 TokenType.STAR: lambda self, _: self.expression( 698 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 699 ), 700 } 701 702 PLACEHOLDER_PARSERS = { 703 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 704 TokenType.PARAMETER: lambda self: self._parse_parameter(), 705 TokenType.COLON: lambda self: ( 706 self.expression(exp.Placeholder, this=self._prev.text) 707 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 708 else None 709 ), 710 } 711 712 RANGE_PARSERS = { 713 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 714 TokenType.GLOB: binary_range_parser(exp.Glob), 715 TokenType.ILIKE: binary_range_parser(exp.ILike), 716 TokenType.IN: lambda self, this: self._parse_in(this), 717 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 718 TokenType.IS: lambda self, this: self._parse_is(this), 719 TokenType.LIKE: binary_range_parser(exp.Like), 720 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 721 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 722 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 723 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 724 } 725 726 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 727 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 728 "AUTO": lambda self: self._parse_auto_property(), 729 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 730 "BACKUP": lambda self: self.expression( 731 exp.BackupProperty, this=self._parse_var(any_token=True) 732 ), 733 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 734 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 735 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 736 "CHECKSUM": lambda self: self._parse_checksum(), 737 "CLUSTER BY": lambda self: self._parse_cluster(), 738 "CLUSTERED": lambda self: self._parse_clustered_by(), 739 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 740 exp.CollateProperty, **kwargs 741 ), 742 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 743 "CONTAINS": lambda self: self._parse_contains_property(), 744 "COPY": lambda self: self._parse_copy_property(), 745 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 746 "DEFINER": lambda self: self._parse_definer(), 747 "DETERMINISTIC": lambda self: self.expression( 748 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 749 ), 750 "DISTKEY": lambda self: self._parse_distkey(), 751 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 752 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 753 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 754 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 755 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 756 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 757 "FREESPACE": lambda self: self._parse_freespace(), 758 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 759 "HEAP": lambda self: self.expression(exp.HeapProperty), 760 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 761 "IMMUTABLE": lambda self: self.expression( 762 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 763 ), 764 "INHERITS": lambda self: self.expression( 765 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 766 ), 767 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 768 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 769 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 770 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 771 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 772 "LIKE": lambda self: self._parse_create_like(), 773 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 774 "LOCK": lambda self: self._parse_locking(), 775 "LOCKING": lambda self: self._parse_locking(), 776 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 777 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 778 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 779 "MODIFIES": lambda self: self._parse_modifies_property(), 780 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 781 "NO": lambda self: self._parse_no_property(), 782 "ON": lambda self: self._parse_on_property(), 783 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 784 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 785 "PARTITION": lambda self: self._parse_partitioned_of(), 786 "PARTITION BY": lambda self: self._parse_partitioned_by(), 787 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 788 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 789 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 790 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 791 "READS": lambda self: self._parse_reads_property(), 792 "REMOTE": lambda self: self._parse_remote_with_connection(), 793 "RETURNS": lambda self: self._parse_returns(), 794 "ROW": lambda self: self._parse_row(), 795 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 796 "SAMPLE": lambda self: self.expression( 797 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 798 ), 799 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 800 "SETTINGS": lambda self: self.expression( 801 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 802 ), 803 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 804 "SORTKEY": lambda self: self._parse_sortkey(), 805 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 806 "STABLE": lambda self: self.expression( 807 exp.StabilityProperty, this=exp.Literal.string("STABLE") 808 ), 809 "STORED": lambda self: self._parse_stored(), 810 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 811 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 812 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 813 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 814 "TO": lambda self: self._parse_to_table(), 815 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 816 "TRANSFORM": lambda self: self.expression( 817 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 818 ), 819 "TTL": lambda self: self._parse_ttl(), 820 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 821 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 822 "VOLATILE": lambda self: self._parse_volatile_property(), 823 "WITH": lambda self: self._parse_with_property(), 824 } 825 826 CONSTRAINT_PARSERS = { 827 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 828 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 829 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 830 "CHARACTER SET": lambda self: self.expression( 831 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 832 ), 833 "CHECK": lambda self: self.expression( 834 exp.CheckColumnConstraint, 835 this=self._parse_wrapped(self._parse_conjunction), 836 enforced=self._match_text_seq("ENFORCED"), 837 ), 838 "COLLATE": lambda self: self.expression( 839 exp.CollateColumnConstraint, this=self._parse_var() 840 ), 841 "COMMENT": lambda self: self.expression( 842 exp.CommentColumnConstraint, this=self._parse_string() 843 ), 844 "COMPRESS": lambda self: self._parse_compress(), 845 "CLUSTERED": lambda self: self.expression( 846 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 847 ), 848 "NONCLUSTERED": lambda self: self.expression( 849 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 850 ), 851 "DEFAULT": lambda self: self.expression( 852 exp.DefaultColumnConstraint, this=self._parse_bitwise() 853 ), 854 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 855 "EXCLUDE": lambda self: self.expression( 856 exp.ExcludeColumnConstraint, this=self._parse_index_params() 857 ), 858 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 859 "FORMAT": lambda self: self.expression( 860 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 861 ), 862 "GENERATED": lambda self: self._parse_generated_as_identity(), 863 "IDENTITY": lambda self: self._parse_auto_increment(), 864 "INLINE": lambda self: self._parse_inline(), 865 "LIKE": lambda self: self._parse_create_like(), 866 "NOT": lambda self: self._parse_not_constraint(), 867 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 868 "ON": lambda self: ( 869 self._match(TokenType.UPDATE) 870 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 871 ) 872 or self.expression(exp.OnProperty, this=self._parse_id_var()), 873 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 874 "PERIOD": lambda self: self._parse_period_for_system_time(), 875 "PRIMARY KEY": lambda self: self._parse_primary_key(), 876 "REFERENCES": lambda self: self._parse_references(match=False), 877 "TITLE": lambda self: self.expression( 878 exp.TitleColumnConstraint, this=self._parse_var_or_string() 879 ), 880 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 881 "UNIQUE": lambda self: self._parse_unique(), 882 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 883 "WITH": lambda self: self.expression( 884 exp.Properties, expressions=self._parse_wrapped_properties() 885 ), 886 } 887 888 ALTER_PARSERS = { 889 "ADD": lambda self: self._parse_alter_table_add(), 890 "ALTER": lambda self: self._parse_alter_table_alter(), 891 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 892 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 893 "DROP": lambda self: self._parse_alter_table_drop(), 894 "RENAME": lambda self: self._parse_alter_table_rename(), 895 } 896 897 SCHEMA_UNNAMED_CONSTRAINTS = { 898 "CHECK", 899 "EXCLUDE", 900 "FOREIGN KEY", 901 "LIKE", 902 "PERIOD", 903 "PRIMARY KEY", 904 "UNIQUE", 905 } 906 907 NO_PAREN_FUNCTION_PARSERS = { 908 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 909 "CASE": lambda self: self._parse_case(), 910 "IF": lambda self: self._parse_if(), 911 "NEXT": lambda self: self._parse_next_value_for(), 912 } 913 914 INVALID_FUNC_NAME_TOKENS = { 915 TokenType.IDENTIFIER, 916 TokenType.STRING, 917 } 918 919 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 920 921 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 922 923 FUNCTION_PARSERS = { 924 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 925 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 926 "DECODE": lambda self: self._parse_decode(), 927 "EXTRACT": lambda self: self._parse_extract(), 928 "JSON_OBJECT": lambda self: self._parse_json_object(), 929 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 930 "JSON_TABLE": lambda self: self._parse_json_table(), 931 "MATCH": lambda self: self._parse_match_against(), 932 "OPENJSON": lambda self: self._parse_open_json(), 933 "POSITION": lambda self: self._parse_position(), 934 "PREDICT": lambda self: self._parse_predict(), 935 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 936 "STRING_AGG": lambda self: self._parse_string_agg(), 937 "SUBSTRING": lambda self: self._parse_substring(), 938 "TRIM": lambda self: self._parse_trim(), 939 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 940 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 941 } 942 943 QUERY_MODIFIER_PARSERS = { 944 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 945 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 946 TokenType.WHERE: lambda self: ("where", self._parse_where()), 947 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 948 TokenType.HAVING: lambda self: ("having", self._parse_having()), 949 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 950 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 951 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 952 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 953 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 954 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 955 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 956 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 957 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 958 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 959 TokenType.CLUSTER_BY: lambda self: ( 960 "cluster", 961 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 962 ), 963 TokenType.DISTRIBUTE_BY: lambda self: ( 964 "distribute", 965 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 966 ), 967 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 968 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 969 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 970 } 971 972 SET_PARSERS = { 973 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 974 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 975 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 976 "TRANSACTION": lambda self: self._parse_set_transaction(), 977 } 978 979 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 980 981 TYPE_LITERAL_PARSERS = { 982 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 983 } 984 985 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 986 987 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 988 989 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 990 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 991 "ISOLATION": ( 992 ("LEVEL", "REPEATABLE", "READ"), 993 ("LEVEL", "READ", "COMMITTED"), 994 ("LEVEL", "READ", "UNCOMITTED"), 995 ("LEVEL", "SERIALIZABLE"), 996 ), 997 "READ": ("WRITE", "ONLY"), 998 } 999 1000 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1001 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1002 ) 1003 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1004 1005 CREATE_SEQUENCE: OPTIONS_TYPE = { 1006 "SCALE": ("EXTEND", "NOEXTEND"), 1007 "SHARD": ("EXTEND", "NOEXTEND"), 1008 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1009 **dict.fromkeys( 1010 ( 1011 "SESSION", 1012 "GLOBAL", 1013 "KEEP", 1014 "NOKEEP", 1015 "ORDER", 1016 "NOORDER", 1017 "NOCACHE", 1018 "CYCLE", 1019 "NOCYCLE", 1020 "NOMINVALUE", 1021 "NOMAXVALUE", 1022 "NOSCALE", 1023 "NOSHARD", 1024 ), 1025 tuple(), 1026 ), 1027 } 1028 1029 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1030 1031 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 1032 1033 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1034 1035 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1036 1037 CLONE_KEYWORDS = {"CLONE", "COPY"} 1038 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1039 1040 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1041 1042 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1043 1044 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1045 1046 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1047 1048 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1049 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1050 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1051 1052 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1053 1054 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1055 1056 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 1057 1058 DISTINCT_TOKENS = {TokenType.DISTINCT} 1059 1060 NULL_TOKENS = {TokenType.NULL} 1061 1062 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1063 1064 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1065 1066 STRICT_CAST = True 1067 1068 PREFIXED_PIVOT_COLUMNS = False 1069 IDENTIFY_PIVOT_STRINGS = False 1070 1071 LOG_DEFAULTS_TO_LN = False 1072 1073 # Whether ADD is present for each column added by ALTER TABLE 1074 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1075 1076 # Whether the table sample clause expects CSV syntax 1077 TABLESAMPLE_CSV = False 1078 1079 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1080 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1081 1082 # Whether the TRIM function expects the characters to trim as its first argument 1083 TRIM_PATTERN_FIRST = False 1084 1085 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1086 STRING_ALIASES = False 1087 1088 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1089 MODIFIERS_ATTACHED_TO_UNION = True 1090 UNION_MODIFIERS = {"order", "limit", "offset"} 1091 1092 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1093 NO_PAREN_IF_COMMANDS = True 1094 1095 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1096 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1097 1098 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1099 # If this is True and '(' is not found, the keyword will be treated as an identifier 1100 VALUES_FOLLOWED_BY_PAREN = True 1101 1102 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1103 SUPPORTS_IMPLICIT_UNNEST = False 1104 1105 __slots__ = ( 1106 "error_level", 1107 "error_message_context", 1108 "max_errors", 1109 "dialect", 1110 "sql", 1111 "errors", 1112 "_tokens", 1113 "_index", 1114 "_curr", 1115 "_next", 1116 "_prev", 1117 "_prev_comments", 1118 ) 1119 1120 # Autofilled 1121 SHOW_TRIE: t.Dict = {} 1122 SET_TRIE: t.Dict = {} 1123 1124 def __init__( 1125 self, 1126 error_level: t.Optional[ErrorLevel] = None, 1127 error_message_context: int = 100, 1128 max_errors: int = 3, 1129 dialect: DialectType = None, 1130 ): 1131 from sqlglot.dialects import Dialect 1132 1133 self.error_level = error_level or ErrorLevel.IMMEDIATE 1134 self.error_message_context = error_message_context 1135 self.max_errors = max_errors 1136 self.dialect = Dialect.get_or_raise(dialect) 1137 self.reset() 1138 1139 def reset(self): 1140 self.sql = "" 1141 self.errors = [] 1142 self._tokens = [] 1143 self._index = 0 1144 self._curr = None 1145 self._next = None 1146 self._prev = None 1147 self._prev_comments = None 1148 1149 def parse( 1150 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1151 ) -> t.List[t.Optional[exp.Expression]]: 1152 """ 1153 Parses a list of tokens and returns a list of syntax trees, one tree 1154 per parsed SQL statement. 1155 1156 Args: 1157 raw_tokens: The list of tokens. 1158 sql: The original SQL string, used to produce helpful debug messages. 1159 1160 Returns: 1161 The list of the produced syntax trees. 1162 """ 1163 return self._parse( 1164 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1165 ) 1166 1167 def parse_into( 1168 self, 1169 expression_types: exp.IntoType, 1170 raw_tokens: t.List[Token], 1171 sql: t.Optional[str] = None, 1172 ) -> t.List[t.Optional[exp.Expression]]: 1173 """ 1174 Parses a list of tokens into a given Expression type. If a collection of Expression 1175 types is given instead, this method will try to parse the token list into each one 1176 of them, stopping at the first for which the parsing succeeds. 1177 1178 Args: 1179 expression_types: The expression type(s) to try and parse the token list into. 1180 raw_tokens: The list of tokens. 1181 sql: The original SQL string, used to produce helpful debug messages. 1182 1183 Returns: 1184 The target Expression. 1185 """ 1186 errors = [] 1187 for expression_type in ensure_list(expression_types): 1188 parser = self.EXPRESSION_PARSERS.get(expression_type) 1189 if not parser: 1190 raise TypeError(f"No parser registered for {expression_type}") 1191 1192 try: 1193 return self._parse(parser, raw_tokens, sql) 1194 except ParseError as e: 1195 e.errors[0]["into_expression"] = expression_type 1196 errors.append(e) 1197 1198 raise ParseError( 1199 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1200 errors=merge_errors(errors), 1201 ) from errors[-1] 1202 1203 def _parse( 1204 self, 1205 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1206 raw_tokens: t.List[Token], 1207 sql: t.Optional[str] = None, 1208 ) -> t.List[t.Optional[exp.Expression]]: 1209 self.reset() 1210 self.sql = sql or "" 1211 1212 total = len(raw_tokens) 1213 chunks: t.List[t.List[Token]] = [[]] 1214 1215 for i, token in enumerate(raw_tokens): 1216 if token.token_type == TokenType.SEMICOLON: 1217 if i < total - 1: 1218 chunks.append([]) 1219 else: 1220 chunks[-1].append(token) 1221 1222 expressions = [] 1223 1224 for tokens in chunks: 1225 self._index = -1 1226 self._tokens = tokens 1227 self._advance() 1228 1229 expressions.append(parse_method(self)) 1230 1231 if self._index < len(self._tokens): 1232 self.raise_error("Invalid expression / Unexpected token") 1233 1234 self.check_errors() 1235 1236 return expressions 1237 1238 def check_errors(self) -> None: 1239 """Logs or raises any found errors, depending on the chosen error level setting.""" 1240 if self.error_level == ErrorLevel.WARN: 1241 for error in self.errors: 1242 logger.error(str(error)) 1243 elif self.error_level == ErrorLevel.RAISE and self.errors: 1244 raise ParseError( 1245 concat_messages(self.errors, self.max_errors), 1246 errors=merge_errors(self.errors), 1247 ) 1248 1249 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1250 """ 1251 Appends an error in the list of recorded errors or raises it, depending on the chosen 1252 error level setting. 1253 """ 1254 token = token or self._curr or self._prev or Token.string("") 1255 start = token.start 1256 end = token.end + 1 1257 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1258 highlight = self.sql[start:end] 1259 end_context = self.sql[end : end + self.error_message_context] 1260 1261 error = ParseError.new( 1262 f"{message}. Line {token.line}, Col: {token.col}.\n" 1263 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1264 description=message, 1265 line=token.line, 1266 col=token.col, 1267 start_context=start_context, 1268 highlight=highlight, 1269 end_context=end_context, 1270 ) 1271 1272 if self.error_level == ErrorLevel.IMMEDIATE: 1273 raise error 1274 1275 self.errors.append(error) 1276 1277 def expression( 1278 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1279 ) -> E: 1280 """ 1281 Creates a new, validated Expression. 1282 1283 Args: 1284 exp_class: The expression class to instantiate. 1285 comments: An optional list of comments to attach to the expression. 1286 kwargs: The arguments to set for the expression along with their respective values. 1287 1288 Returns: 1289 The target expression. 1290 """ 1291 instance = exp_class(**kwargs) 1292 instance.add_comments(comments) if comments else self._add_comments(instance) 1293 return self.validate_expression(instance) 1294 1295 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1296 if expression and self._prev_comments: 1297 expression.add_comments(self._prev_comments) 1298 self._prev_comments = None 1299 1300 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1301 """ 1302 Validates an Expression, making sure that all its mandatory arguments are set. 1303 1304 Args: 1305 expression: The expression to validate. 1306 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1307 1308 Returns: 1309 The validated expression. 1310 """ 1311 if self.error_level != ErrorLevel.IGNORE: 1312 for error_message in expression.error_messages(args): 1313 self.raise_error(error_message) 1314 1315 return expression 1316 1317 def _find_sql(self, start: Token, end: Token) -> str: 1318 return self.sql[start.start : end.end + 1] 1319 1320 def _is_connected(self) -> bool: 1321 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1322 1323 def _advance(self, times: int = 1) -> None: 1324 self._index += times 1325 self._curr = seq_get(self._tokens, self._index) 1326 self._next = seq_get(self._tokens, self._index + 1) 1327 1328 if self._index > 0: 1329 self._prev = self._tokens[self._index - 1] 1330 self._prev_comments = self._prev.comments 1331 else: 1332 self._prev = None 1333 self._prev_comments = None 1334 1335 def _retreat(self, index: int) -> None: 1336 if index != self._index: 1337 self._advance(index - self._index) 1338 1339 def _warn_unsupported(self) -> None: 1340 if len(self._tokens) <= 1: 1341 return 1342 1343 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1344 # interested in emitting a warning for the one being currently processed. 1345 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1346 1347 logger.warning( 1348 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1349 ) 1350 1351 def _parse_command(self) -> exp.Command: 1352 self._warn_unsupported() 1353 return self.expression( 1354 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1355 ) 1356 1357 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1358 """ 1359 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. This behavior can 1360 be different depending on the uset-set ErrorLevel, so _try_parse aims to solve this by setting & resetting 1361 the parser state accordingly 1362 """ 1363 index = self._index 1364 error_level = self.error_level 1365 1366 self.error_level = ErrorLevel.IMMEDIATE 1367 try: 1368 this = parse_method() 1369 except ParseError: 1370 this = None 1371 finally: 1372 if not this or retreat: 1373 self._retreat(index) 1374 self.error_level = error_level 1375 1376 return this 1377 1378 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1379 start = self._prev 1380 exists = self._parse_exists() if allow_exists else None 1381 1382 self._match(TokenType.ON) 1383 1384 kind = self._match_set(self.CREATABLES) and self._prev 1385 if not kind: 1386 return self._parse_as_command(start) 1387 1388 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1389 this = self._parse_user_defined_function(kind=kind.token_type) 1390 elif kind.token_type == TokenType.TABLE: 1391 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1392 elif kind.token_type == TokenType.COLUMN: 1393 this = self._parse_column() 1394 else: 1395 this = self._parse_id_var() 1396 1397 self._match(TokenType.IS) 1398 1399 return self.expression( 1400 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1401 ) 1402 1403 def _parse_to_table( 1404 self, 1405 ) -> exp.ToTableProperty: 1406 table = self._parse_table_parts(schema=True) 1407 return self.expression(exp.ToTableProperty, this=table) 1408 1409 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1410 def _parse_ttl(self) -> exp.Expression: 1411 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1412 this = self._parse_bitwise() 1413 1414 if self._match_text_seq("DELETE"): 1415 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1416 if self._match_text_seq("RECOMPRESS"): 1417 return self.expression( 1418 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1419 ) 1420 if self._match_text_seq("TO", "DISK"): 1421 return self.expression( 1422 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1423 ) 1424 if self._match_text_seq("TO", "VOLUME"): 1425 return self.expression( 1426 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1427 ) 1428 1429 return this 1430 1431 expressions = self._parse_csv(_parse_ttl_action) 1432 where = self._parse_where() 1433 group = self._parse_group() 1434 1435 aggregates = None 1436 if group and self._match(TokenType.SET): 1437 aggregates = self._parse_csv(self._parse_set_item) 1438 1439 return self.expression( 1440 exp.MergeTreeTTL, 1441 expressions=expressions, 1442 where=where, 1443 group=group, 1444 aggregates=aggregates, 1445 ) 1446 1447 def _parse_statement(self) -> t.Optional[exp.Expression]: 1448 if self._curr is None: 1449 return None 1450 1451 if self._match_set(self.STATEMENT_PARSERS): 1452 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1453 1454 if self._match_set(Tokenizer.COMMANDS): 1455 return self._parse_command() 1456 1457 expression = self._parse_expression() 1458 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1459 return self._parse_query_modifiers(expression) 1460 1461 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1462 start = self._prev 1463 temporary = self._match(TokenType.TEMPORARY) 1464 materialized = self._match_text_seq("MATERIALIZED") 1465 1466 kind = self._match_set(self.CREATABLES) and self._prev.text 1467 if not kind: 1468 return self._parse_as_command(start) 1469 1470 if_exists = exists or self._parse_exists() 1471 table = self._parse_table_parts( 1472 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1473 ) 1474 1475 if self._match(TokenType.L_PAREN, advance=False): 1476 expressions = self._parse_wrapped_csv(self._parse_types) 1477 else: 1478 expressions = None 1479 1480 return self.expression( 1481 exp.Drop, 1482 comments=start.comments, 1483 exists=if_exists, 1484 this=table, 1485 expressions=expressions, 1486 kind=kind, 1487 temporary=temporary, 1488 materialized=materialized, 1489 cascade=self._match_text_seq("CASCADE"), 1490 constraints=self._match_text_seq("CONSTRAINTS"), 1491 purge=self._match_text_seq("PURGE"), 1492 ) 1493 1494 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1495 return ( 1496 self._match_text_seq("IF") 1497 and (not not_ or self._match(TokenType.NOT)) 1498 and self._match(TokenType.EXISTS) 1499 ) 1500 1501 def _parse_create(self) -> exp.Create | exp.Command: 1502 # Note: this can't be None because we've matched a statement parser 1503 start = self._prev 1504 comments = self._prev_comments 1505 1506 replace = ( 1507 start.token_type == TokenType.REPLACE 1508 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1509 or self._match_pair(TokenType.OR, TokenType.ALTER) 1510 ) 1511 1512 unique = self._match(TokenType.UNIQUE) 1513 1514 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1515 self._advance() 1516 1517 properties = None 1518 create_token = self._match_set(self.CREATABLES) and self._prev 1519 1520 if not create_token: 1521 # exp.Properties.Location.POST_CREATE 1522 properties = self._parse_properties() 1523 create_token = self._match_set(self.CREATABLES) and self._prev 1524 1525 if not properties or not create_token: 1526 return self._parse_as_command(start) 1527 1528 exists = self._parse_exists(not_=True) 1529 this = None 1530 expression: t.Optional[exp.Expression] = None 1531 indexes = None 1532 no_schema_binding = None 1533 begin = None 1534 end = None 1535 clone = None 1536 1537 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1538 nonlocal properties 1539 if properties and temp_props: 1540 properties.expressions.extend(temp_props.expressions) 1541 elif temp_props: 1542 properties = temp_props 1543 1544 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1545 this = self._parse_user_defined_function(kind=create_token.token_type) 1546 1547 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1548 extend_props(self._parse_properties()) 1549 1550 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1551 1552 if not expression: 1553 if self._match(TokenType.COMMAND): 1554 expression = self._parse_as_command(self._prev) 1555 else: 1556 begin = self._match(TokenType.BEGIN) 1557 return_ = self._match_text_seq("RETURN") 1558 1559 if self._match(TokenType.STRING, advance=False): 1560 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1561 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1562 expression = self._parse_string() 1563 extend_props(self._parse_properties()) 1564 else: 1565 expression = self._parse_statement() 1566 1567 end = self._match_text_seq("END") 1568 1569 if return_: 1570 expression = self.expression(exp.Return, this=expression) 1571 elif create_token.token_type == TokenType.INDEX: 1572 this = self._parse_index(index=self._parse_id_var()) 1573 elif create_token.token_type in self.DB_CREATABLES: 1574 table_parts = self._parse_table_parts( 1575 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1576 ) 1577 1578 # exp.Properties.Location.POST_NAME 1579 self._match(TokenType.COMMA) 1580 extend_props(self._parse_properties(before=True)) 1581 1582 this = self._parse_schema(this=table_parts) 1583 1584 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1585 extend_props(self._parse_properties()) 1586 1587 self._match(TokenType.ALIAS) 1588 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1589 # exp.Properties.Location.POST_ALIAS 1590 extend_props(self._parse_properties()) 1591 1592 if create_token.token_type == TokenType.SEQUENCE: 1593 expression = self._parse_types() 1594 extend_props(self._parse_properties()) 1595 else: 1596 expression = self._parse_ddl_select() 1597 1598 if create_token.token_type == TokenType.TABLE: 1599 # exp.Properties.Location.POST_EXPRESSION 1600 extend_props(self._parse_properties()) 1601 1602 indexes = [] 1603 while True: 1604 index = self._parse_index() 1605 1606 # exp.Properties.Location.POST_INDEX 1607 extend_props(self._parse_properties()) 1608 1609 if not index: 1610 break 1611 else: 1612 self._match(TokenType.COMMA) 1613 indexes.append(index) 1614 elif create_token.token_type == TokenType.VIEW: 1615 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1616 no_schema_binding = True 1617 1618 shallow = self._match_text_seq("SHALLOW") 1619 1620 if self._match_texts(self.CLONE_KEYWORDS): 1621 copy = self._prev.text.lower() == "copy" 1622 clone = self.expression( 1623 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1624 ) 1625 1626 if self._curr: 1627 return self._parse_as_command(start) 1628 1629 return self.expression( 1630 exp.Create, 1631 comments=comments, 1632 this=this, 1633 kind=create_token.text.upper(), 1634 replace=replace, 1635 unique=unique, 1636 expression=expression, 1637 exists=exists, 1638 properties=properties, 1639 indexes=indexes, 1640 no_schema_binding=no_schema_binding, 1641 begin=begin, 1642 end=end, 1643 clone=clone, 1644 ) 1645 1646 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1647 seq = exp.SequenceProperties() 1648 1649 options = [] 1650 index = self._index 1651 1652 while self._curr: 1653 if self._match_text_seq("INCREMENT"): 1654 self._match_text_seq("BY") 1655 self._match_text_seq("=") 1656 seq.set("increment", self._parse_term()) 1657 elif self._match_text_seq("MINVALUE"): 1658 seq.set("minvalue", self._parse_term()) 1659 elif self._match_text_seq("MAXVALUE"): 1660 seq.set("maxvalue", self._parse_term()) 1661 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1662 self._match_text_seq("=") 1663 seq.set("start", self._parse_term()) 1664 elif self._match_text_seq("CACHE"): 1665 # T-SQL allows empty CACHE which is initialized dynamically 1666 seq.set("cache", self._parse_number() or True) 1667 elif self._match_text_seq("OWNED", "BY"): 1668 # "OWNED BY NONE" is the default 1669 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1670 else: 1671 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1672 if opt: 1673 options.append(opt) 1674 else: 1675 break 1676 1677 seq.set("options", options if options else None) 1678 return None if self._index == index else seq 1679 1680 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1681 # only used for teradata currently 1682 self._match(TokenType.COMMA) 1683 1684 kwargs = { 1685 "no": self._match_text_seq("NO"), 1686 "dual": self._match_text_seq("DUAL"), 1687 "before": self._match_text_seq("BEFORE"), 1688 "default": self._match_text_seq("DEFAULT"), 1689 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1690 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1691 "after": self._match_text_seq("AFTER"), 1692 "minimum": self._match_texts(("MIN", "MINIMUM")), 1693 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1694 } 1695 1696 if self._match_texts(self.PROPERTY_PARSERS): 1697 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1698 try: 1699 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1700 except TypeError: 1701 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1702 1703 return None 1704 1705 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1706 return self._parse_wrapped_csv(self._parse_property) 1707 1708 def _parse_property(self) -> t.Optional[exp.Expression]: 1709 if self._match_texts(self.PROPERTY_PARSERS): 1710 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1711 1712 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1713 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1714 1715 if self._match_text_seq("COMPOUND", "SORTKEY"): 1716 return self._parse_sortkey(compound=True) 1717 1718 if self._match_text_seq("SQL", "SECURITY"): 1719 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1720 1721 index = self._index 1722 key = self._parse_column() 1723 1724 if not self._match(TokenType.EQ): 1725 self._retreat(index) 1726 return self._parse_sequence_properties() 1727 1728 return self.expression( 1729 exp.Property, 1730 this=key.to_dot() if isinstance(key, exp.Column) else key, 1731 value=self._parse_bitwise() or self._parse_var(any_token=True), 1732 ) 1733 1734 def _parse_stored(self) -> exp.FileFormatProperty: 1735 self._match(TokenType.ALIAS) 1736 1737 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1738 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1739 1740 return self.expression( 1741 exp.FileFormatProperty, 1742 this=( 1743 self.expression( 1744 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1745 ) 1746 if input_format or output_format 1747 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1748 ), 1749 ) 1750 1751 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1752 self._match(TokenType.EQ) 1753 self._match(TokenType.ALIAS) 1754 return self.expression(exp_class, this=self._parse_field(), **kwargs) 1755 1756 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1757 properties = [] 1758 while True: 1759 if before: 1760 prop = self._parse_property_before() 1761 else: 1762 prop = self._parse_property() 1763 if not prop: 1764 break 1765 for p in ensure_list(prop): 1766 properties.append(p) 1767 1768 if properties: 1769 return self.expression(exp.Properties, expressions=properties) 1770 1771 return None 1772 1773 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1774 return self.expression( 1775 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1776 ) 1777 1778 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1779 if self._index >= 2: 1780 pre_volatile_token = self._tokens[self._index - 2] 1781 else: 1782 pre_volatile_token = None 1783 1784 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1785 return exp.VolatileProperty() 1786 1787 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1788 1789 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1790 self._match_pair(TokenType.EQ, TokenType.ON) 1791 1792 prop = self.expression(exp.WithSystemVersioningProperty) 1793 if self._match(TokenType.L_PAREN): 1794 self._match_text_seq("HISTORY_TABLE", "=") 1795 prop.set("this", self._parse_table_parts()) 1796 1797 if self._match(TokenType.COMMA): 1798 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1799 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1800 1801 self._match_r_paren() 1802 1803 return prop 1804 1805 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1806 if self._match(TokenType.L_PAREN, advance=False): 1807 return self._parse_wrapped_properties() 1808 1809 if self._match_text_seq("JOURNAL"): 1810 return self._parse_withjournaltable() 1811 1812 if self._match_texts(self.VIEW_ATTRIBUTES): 1813 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 1814 1815 if self._match_text_seq("DATA"): 1816 return self._parse_withdata(no=False) 1817 elif self._match_text_seq("NO", "DATA"): 1818 return self._parse_withdata(no=True) 1819 1820 if not self._next: 1821 return None 1822 1823 return self._parse_withisolatedloading() 1824 1825 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1826 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1827 self._match(TokenType.EQ) 1828 1829 user = self._parse_id_var() 1830 self._match(TokenType.PARAMETER) 1831 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1832 1833 if not user or not host: 1834 return None 1835 1836 return exp.DefinerProperty(this=f"{user}@{host}") 1837 1838 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1839 self._match(TokenType.TABLE) 1840 self._match(TokenType.EQ) 1841 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1842 1843 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1844 return self.expression(exp.LogProperty, no=no) 1845 1846 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1847 return self.expression(exp.JournalProperty, **kwargs) 1848 1849 def _parse_checksum(self) -> exp.ChecksumProperty: 1850 self._match(TokenType.EQ) 1851 1852 on = None 1853 if self._match(TokenType.ON): 1854 on = True 1855 elif self._match_text_seq("OFF"): 1856 on = False 1857 1858 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1859 1860 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 1861 return self.expression( 1862 exp.Cluster, 1863 expressions=( 1864 self._parse_wrapped_csv(self._parse_ordered) 1865 if wrapped 1866 else self._parse_csv(self._parse_ordered) 1867 ), 1868 ) 1869 1870 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1871 self._match_text_seq("BY") 1872 1873 self._match_l_paren() 1874 expressions = self._parse_csv(self._parse_column) 1875 self._match_r_paren() 1876 1877 if self._match_text_seq("SORTED", "BY"): 1878 self._match_l_paren() 1879 sorted_by = self._parse_csv(self._parse_ordered) 1880 self._match_r_paren() 1881 else: 1882 sorted_by = None 1883 1884 self._match(TokenType.INTO) 1885 buckets = self._parse_number() 1886 self._match_text_seq("BUCKETS") 1887 1888 return self.expression( 1889 exp.ClusteredByProperty, 1890 expressions=expressions, 1891 sorted_by=sorted_by, 1892 buckets=buckets, 1893 ) 1894 1895 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1896 if not self._match_text_seq("GRANTS"): 1897 self._retreat(self._index - 1) 1898 return None 1899 1900 return self.expression(exp.CopyGrantsProperty) 1901 1902 def _parse_freespace(self) -> exp.FreespaceProperty: 1903 self._match(TokenType.EQ) 1904 return self.expression( 1905 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1906 ) 1907 1908 def _parse_mergeblockratio( 1909 self, no: bool = False, default: bool = False 1910 ) -> exp.MergeBlockRatioProperty: 1911 if self._match(TokenType.EQ): 1912 return self.expression( 1913 exp.MergeBlockRatioProperty, 1914 this=self._parse_number(), 1915 percent=self._match(TokenType.PERCENT), 1916 ) 1917 1918 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1919 1920 def _parse_datablocksize( 1921 self, 1922 default: t.Optional[bool] = None, 1923 minimum: t.Optional[bool] = None, 1924 maximum: t.Optional[bool] = None, 1925 ) -> exp.DataBlocksizeProperty: 1926 self._match(TokenType.EQ) 1927 size = self._parse_number() 1928 1929 units = None 1930 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1931 units = self._prev.text 1932 1933 return self.expression( 1934 exp.DataBlocksizeProperty, 1935 size=size, 1936 units=units, 1937 default=default, 1938 minimum=minimum, 1939 maximum=maximum, 1940 ) 1941 1942 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1943 self._match(TokenType.EQ) 1944 always = self._match_text_seq("ALWAYS") 1945 manual = self._match_text_seq("MANUAL") 1946 never = self._match_text_seq("NEVER") 1947 default = self._match_text_seq("DEFAULT") 1948 1949 autotemp = None 1950 if self._match_text_seq("AUTOTEMP"): 1951 autotemp = self._parse_schema() 1952 1953 return self.expression( 1954 exp.BlockCompressionProperty, 1955 always=always, 1956 manual=manual, 1957 never=never, 1958 default=default, 1959 autotemp=autotemp, 1960 ) 1961 1962 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 1963 index = self._index 1964 no = self._match_text_seq("NO") 1965 concurrent = self._match_text_seq("CONCURRENT") 1966 1967 if not self._match_text_seq("ISOLATED", "LOADING"): 1968 self._retreat(index) 1969 return None 1970 1971 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 1972 return self.expression( 1973 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 1974 ) 1975 1976 def _parse_locking(self) -> exp.LockingProperty: 1977 if self._match(TokenType.TABLE): 1978 kind = "TABLE" 1979 elif self._match(TokenType.VIEW): 1980 kind = "VIEW" 1981 elif self._match(TokenType.ROW): 1982 kind = "ROW" 1983 elif self._match_text_seq("DATABASE"): 1984 kind = "DATABASE" 1985 else: 1986 kind = None 1987 1988 if kind in ("DATABASE", "TABLE", "VIEW"): 1989 this = self._parse_table_parts() 1990 else: 1991 this = None 1992 1993 if self._match(TokenType.FOR): 1994 for_or_in = "FOR" 1995 elif self._match(TokenType.IN): 1996 for_or_in = "IN" 1997 else: 1998 for_or_in = None 1999 2000 if self._match_text_seq("ACCESS"): 2001 lock_type = "ACCESS" 2002 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2003 lock_type = "EXCLUSIVE" 2004 elif self._match_text_seq("SHARE"): 2005 lock_type = "SHARE" 2006 elif self._match_text_seq("READ"): 2007 lock_type = "READ" 2008 elif self._match_text_seq("WRITE"): 2009 lock_type = "WRITE" 2010 elif self._match_text_seq("CHECKSUM"): 2011 lock_type = "CHECKSUM" 2012 else: 2013 lock_type = None 2014 2015 override = self._match_text_seq("OVERRIDE") 2016 2017 return self.expression( 2018 exp.LockingProperty, 2019 this=this, 2020 kind=kind, 2021 for_or_in=for_or_in, 2022 lock_type=lock_type, 2023 override=override, 2024 ) 2025 2026 def _parse_partition_by(self) -> t.List[exp.Expression]: 2027 if self._match(TokenType.PARTITION_BY): 2028 return self._parse_csv(self._parse_conjunction) 2029 return [] 2030 2031 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2032 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2033 if self._match_text_seq("MINVALUE"): 2034 return exp.var("MINVALUE") 2035 if self._match_text_seq("MAXVALUE"): 2036 return exp.var("MAXVALUE") 2037 return self._parse_bitwise() 2038 2039 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2040 expression = None 2041 from_expressions = None 2042 to_expressions = None 2043 2044 if self._match(TokenType.IN): 2045 this = self._parse_wrapped_csv(self._parse_bitwise) 2046 elif self._match(TokenType.FROM): 2047 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2048 self._match_text_seq("TO") 2049 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2050 elif self._match_text_seq("WITH", "(", "MODULUS"): 2051 this = self._parse_number() 2052 self._match_text_seq(",", "REMAINDER") 2053 expression = self._parse_number() 2054 self._match_r_paren() 2055 else: 2056 self.raise_error("Failed to parse partition bound spec.") 2057 2058 return self.expression( 2059 exp.PartitionBoundSpec, 2060 this=this, 2061 expression=expression, 2062 from_expressions=from_expressions, 2063 to_expressions=to_expressions, 2064 ) 2065 2066 # https://www.postgresql.org/docs/current/sql-createtable.html 2067 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2068 if not self._match_text_seq("OF"): 2069 self._retreat(self._index - 1) 2070 return None 2071 2072 this = self._parse_table(schema=True) 2073 2074 if self._match(TokenType.DEFAULT): 2075 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2076 elif self._match_text_seq("FOR", "VALUES"): 2077 expression = self._parse_partition_bound_spec() 2078 else: 2079 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2080 2081 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2082 2083 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2084 self._match(TokenType.EQ) 2085 return self.expression( 2086 exp.PartitionedByProperty, 2087 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2088 ) 2089 2090 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2091 if self._match_text_seq("AND", "STATISTICS"): 2092 statistics = True 2093 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2094 statistics = False 2095 else: 2096 statistics = None 2097 2098 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2099 2100 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2101 if self._match_text_seq("SQL"): 2102 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2103 return None 2104 2105 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2106 if self._match_text_seq("SQL", "DATA"): 2107 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2108 return None 2109 2110 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2111 if self._match_text_seq("PRIMARY", "INDEX"): 2112 return exp.NoPrimaryIndexProperty() 2113 if self._match_text_seq("SQL"): 2114 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2115 return None 2116 2117 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2118 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2119 return exp.OnCommitProperty() 2120 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2121 return exp.OnCommitProperty(delete=True) 2122 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2123 2124 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2125 if self._match_text_seq("SQL", "DATA"): 2126 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2127 return None 2128 2129 def _parse_distkey(self) -> exp.DistKeyProperty: 2130 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2131 2132 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2133 table = self._parse_table(schema=True) 2134 2135 options = [] 2136 while self._match_texts(("INCLUDING", "EXCLUDING")): 2137 this = self._prev.text.upper() 2138 2139 id_var = self._parse_id_var() 2140 if not id_var: 2141 return None 2142 2143 options.append( 2144 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2145 ) 2146 2147 return self.expression(exp.LikeProperty, this=table, expressions=options) 2148 2149 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2150 return self.expression( 2151 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2152 ) 2153 2154 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2155 self._match(TokenType.EQ) 2156 return self.expression( 2157 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2158 ) 2159 2160 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2161 self._match_text_seq("WITH", "CONNECTION") 2162 return self.expression( 2163 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2164 ) 2165 2166 def _parse_returns(self) -> exp.ReturnsProperty: 2167 value: t.Optional[exp.Expression] 2168 is_table = self._match(TokenType.TABLE) 2169 2170 if is_table: 2171 if self._match(TokenType.LT): 2172 value = self.expression( 2173 exp.Schema, 2174 this="TABLE", 2175 expressions=self._parse_csv(self._parse_struct_types), 2176 ) 2177 if not self._match(TokenType.GT): 2178 self.raise_error("Expecting >") 2179 else: 2180 value = self._parse_schema(exp.var("TABLE")) 2181 else: 2182 value = self._parse_types() 2183 2184 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 2185 2186 def _parse_describe(self) -> exp.Describe: 2187 kind = self._match_set(self.CREATABLES) and self._prev.text 2188 extended = self._match_text_seq("EXTENDED") 2189 this = self._parse_table(schema=True) 2190 properties = self._parse_properties() 2191 expressions = properties.expressions if properties else None 2192 return self.expression( 2193 exp.Describe, this=this, extended=extended, kind=kind, expressions=expressions 2194 ) 2195 2196 def _parse_insert(self) -> exp.Insert: 2197 comments = ensure_list(self._prev_comments) 2198 hint = self._parse_hint() 2199 overwrite = self._match(TokenType.OVERWRITE) 2200 ignore = self._match(TokenType.IGNORE) 2201 local = self._match_text_seq("LOCAL") 2202 alternative = None 2203 is_function = None 2204 2205 if self._match_text_seq("DIRECTORY"): 2206 this: t.Optional[exp.Expression] = self.expression( 2207 exp.Directory, 2208 this=self._parse_var_or_string(), 2209 local=local, 2210 row_format=self._parse_row_format(match_row=True), 2211 ) 2212 else: 2213 if self._match(TokenType.OR): 2214 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2215 2216 self._match(TokenType.INTO) 2217 comments += ensure_list(self._prev_comments) 2218 self._match(TokenType.TABLE) 2219 is_function = self._match(TokenType.FUNCTION) 2220 2221 this = self._parse_table(schema=True) if not is_function else self._parse_function() 2222 2223 returning = self._parse_returning() 2224 2225 return self.expression( 2226 exp.Insert, 2227 comments=comments, 2228 hint=hint, 2229 is_function=is_function, 2230 this=this, 2231 by_name=self._match_text_seq("BY", "NAME"), 2232 exists=self._parse_exists(), 2233 partition=self._parse_partition(), 2234 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 2235 and self._parse_conjunction(), 2236 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2237 conflict=self._parse_on_conflict(), 2238 returning=returning or self._parse_returning(), 2239 overwrite=overwrite, 2240 alternative=alternative, 2241 ignore=ignore, 2242 ) 2243 2244 def _parse_kill(self) -> exp.Kill: 2245 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2246 2247 return self.expression( 2248 exp.Kill, 2249 this=self._parse_primary(), 2250 kind=kind, 2251 ) 2252 2253 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2254 conflict = self._match_text_seq("ON", "CONFLICT") 2255 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2256 2257 if not conflict and not duplicate: 2258 return None 2259 2260 conflict_keys = None 2261 constraint = None 2262 2263 if conflict: 2264 if self._match_text_seq("ON", "CONSTRAINT"): 2265 constraint = self._parse_id_var() 2266 elif self._match(TokenType.L_PAREN): 2267 conflict_keys = self._parse_csv(self._parse_id_var) 2268 self._match_r_paren() 2269 2270 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2271 if self._prev.token_type == TokenType.UPDATE: 2272 self._match(TokenType.SET) 2273 expressions = self._parse_csv(self._parse_equality) 2274 else: 2275 expressions = None 2276 2277 return self.expression( 2278 exp.OnConflict, 2279 duplicate=duplicate, 2280 expressions=expressions, 2281 action=action, 2282 conflict_keys=conflict_keys, 2283 constraint=constraint, 2284 ) 2285 2286 def _parse_returning(self) -> t.Optional[exp.Returning]: 2287 if not self._match(TokenType.RETURNING): 2288 return None 2289 return self.expression( 2290 exp.Returning, 2291 expressions=self._parse_csv(self._parse_expression), 2292 into=self._match(TokenType.INTO) and self._parse_table_part(), 2293 ) 2294 2295 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2296 if not self._match(TokenType.FORMAT): 2297 return None 2298 return self._parse_row_format() 2299 2300 def _parse_row_format( 2301 self, match_row: bool = False 2302 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2303 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2304 return None 2305 2306 if self._match_text_seq("SERDE"): 2307 this = self._parse_string() 2308 2309 serde_properties = None 2310 if self._match(TokenType.SERDE_PROPERTIES): 2311 serde_properties = self.expression( 2312 exp.SerdeProperties, expressions=self._parse_wrapped_properties() 2313 ) 2314 2315 return self.expression( 2316 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2317 ) 2318 2319 self._match_text_seq("DELIMITED") 2320 2321 kwargs = {} 2322 2323 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2324 kwargs["fields"] = self._parse_string() 2325 if self._match_text_seq("ESCAPED", "BY"): 2326 kwargs["escaped"] = self._parse_string() 2327 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2328 kwargs["collection_items"] = self._parse_string() 2329 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2330 kwargs["map_keys"] = self._parse_string() 2331 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2332 kwargs["lines"] = self._parse_string() 2333 if self._match_text_seq("NULL", "DEFINED", "AS"): 2334 kwargs["null"] = self._parse_string() 2335 2336 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2337 2338 def _parse_load(self) -> exp.LoadData | exp.Command: 2339 if self._match_text_seq("DATA"): 2340 local = self._match_text_seq("LOCAL") 2341 self._match_text_seq("INPATH") 2342 inpath = self._parse_string() 2343 overwrite = self._match(TokenType.OVERWRITE) 2344 self._match_pair(TokenType.INTO, TokenType.TABLE) 2345 2346 return self.expression( 2347 exp.LoadData, 2348 this=self._parse_table(schema=True), 2349 local=local, 2350 overwrite=overwrite, 2351 inpath=inpath, 2352 partition=self._parse_partition(), 2353 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2354 serde=self._match_text_seq("SERDE") and self._parse_string(), 2355 ) 2356 return self._parse_as_command(self._prev) 2357 2358 def _parse_delete(self) -> exp.Delete: 2359 # This handles MySQL's "Multiple-Table Syntax" 2360 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2361 tables = None 2362 comments = self._prev_comments 2363 if not self._match(TokenType.FROM, advance=False): 2364 tables = self._parse_csv(self._parse_table) or None 2365 2366 returning = self._parse_returning() 2367 2368 return self.expression( 2369 exp.Delete, 2370 comments=comments, 2371 tables=tables, 2372 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2373 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2374 where=self._parse_where(), 2375 returning=returning or self._parse_returning(), 2376 limit=self._parse_limit(), 2377 ) 2378 2379 def _parse_update(self) -> exp.Update: 2380 comments = self._prev_comments 2381 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2382 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2383 returning = self._parse_returning() 2384 return self.expression( 2385 exp.Update, 2386 comments=comments, 2387 **{ # type: ignore 2388 "this": this, 2389 "expressions": expressions, 2390 "from": self._parse_from(joins=True), 2391 "where": self._parse_where(), 2392 "returning": returning or self._parse_returning(), 2393 "order": self._parse_order(), 2394 "limit": self._parse_limit(), 2395 }, 2396 ) 2397 2398 def _parse_uncache(self) -> exp.Uncache: 2399 if not self._match(TokenType.TABLE): 2400 self.raise_error("Expecting TABLE after UNCACHE") 2401 2402 return self.expression( 2403 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2404 ) 2405 2406 def _parse_cache(self) -> exp.Cache: 2407 lazy = self._match_text_seq("LAZY") 2408 self._match(TokenType.TABLE) 2409 table = self._parse_table(schema=True) 2410 2411 options = [] 2412 if self._match_text_seq("OPTIONS"): 2413 self._match_l_paren() 2414 k = self._parse_string() 2415 self._match(TokenType.EQ) 2416 v = self._parse_string() 2417 options = [k, v] 2418 self._match_r_paren() 2419 2420 self._match(TokenType.ALIAS) 2421 return self.expression( 2422 exp.Cache, 2423 this=table, 2424 lazy=lazy, 2425 options=options, 2426 expression=self._parse_select(nested=True), 2427 ) 2428 2429 def _parse_partition(self) -> t.Optional[exp.Partition]: 2430 if not self._match(TokenType.PARTITION): 2431 return None 2432 2433 return self.expression( 2434 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2435 ) 2436 2437 def _parse_value(self) -> exp.Tuple: 2438 if self._match(TokenType.L_PAREN): 2439 expressions = self._parse_csv(self._parse_expression) 2440 self._match_r_paren() 2441 return self.expression(exp.Tuple, expressions=expressions) 2442 2443 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2444 return self.expression(exp.Tuple, expressions=[self._parse_expression()]) 2445 2446 def _parse_projections(self) -> t.List[exp.Expression]: 2447 return self._parse_expressions() 2448 2449 def _parse_select( 2450 self, 2451 nested: bool = False, 2452 table: bool = False, 2453 parse_subquery_alias: bool = True, 2454 parse_set_operation: bool = True, 2455 ) -> t.Optional[exp.Expression]: 2456 cte = self._parse_with() 2457 2458 if cte: 2459 this = self._parse_statement() 2460 2461 if not this: 2462 self.raise_error("Failed to parse any statement following CTE") 2463 return cte 2464 2465 if "with" in this.arg_types: 2466 this.set("with", cte) 2467 else: 2468 self.raise_error(f"{this.key} does not support CTE") 2469 this = cte 2470 2471 return this 2472 2473 # duckdb supports leading with FROM x 2474 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2475 2476 if self._match(TokenType.SELECT): 2477 comments = self._prev_comments 2478 2479 hint = self._parse_hint() 2480 all_ = self._match(TokenType.ALL) 2481 distinct = self._match_set(self.DISTINCT_TOKENS) 2482 2483 kind = ( 2484 self._match(TokenType.ALIAS) 2485 and self._match_texts(("STRUCT", "VALUE")) 2486 and self._prev.text.upper() 2487 ) 2488 2489 if distinct: 2490 distinct = self.expression( 2491 exp.Distinct, 2492 on=self._parse_value() if self._match(TokenType.ON) else None, 2493 ) 2494 2495 if all_ and distinct: 2496 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2497 2498 limit = self._parse_limit(top=True) 2499 projections = self._parse_projections() 2500 2501 this = self.expression( 2502 exp.Select, 2503 kind=kind, 2504 hint=hint, 2505 distinct=distinct, 2506 expressions=projections, 2507 limit=limit, 2508 ) 2509 this.comments = comments 2510 2511 into = self._parse_into() 2512 if into: 2513 this.set("into", into) 2514 2515 if not from_: 2516 from_ = self._parse_from() 2517 2518 if from_: 2519 this.set("from", from_) 2520 2521 this = self._parse_query_modifiers(this) 2522 elif (table or nested) and self._match(TokenType.L_PAREN): 2523 if self._match(TokenType.PIVOT): 2524 this = self._parse_simplified_pivot() 2525 elif self._match(TokenType.FROM): 2526 this = exp.select("*").from_( 2527 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2528 ) 2529 else: 2530 this = ( 2531 self._parse_table() 2532 if table 2533 else self._parse_select(nested=True, parse_set_operation=False) 2534 ) 2535 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2536 2537 self._match_r_paren() 2538 2539 # We return early here so that the UNION isn't attached to the subquery by the 2540 # following call to _parse_set_operations, but instead becomes the parent node 2541 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2542 elif self._match(TokenType.VALUES, advance=False): 2543 this = self._parse_derived_table_values() 2544 elif from_: 2545 this = exp.select("*").from_(from_.this, copy=False) 2546 else: 2547 this = None 2548 2549 if parse_set_operation: 2550 return self._parse_set_operations(this) 2551 return this 2552 2553 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2554 if not skip_with_token and not self._match(TokenType.WITH): 2555 return None 2556 2557 comments = self._prev_comments 2558 recursive = self._match(TokenType.RECURSIVE) 2559 2560 expressions = [] 2561 while True: 2562 expressions.append(self._parse_cte()) 2563 2564 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2565 break 2566 else: 2567 self._match(TokenType.WITH) 2568 2569 return self.expression( 2570 exp.With, comments=comments, expressions=expressions, recursive=recursive 2571 ) 2572 2573 def _parse_cte(self) -> exp.CTE: 2574 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2575 if not alias or not alias.this: 2576 self.raise_error("Expected CTE to have alias") 2577 2578 self._match(TokenType.ALIAS) 2579 2580 if self._match_text_seq("NOT", "MATERIALIZED"): 2581 materialized = False 2582 elif self._match_text_seq("MATERIALIZED"): 2583 materialized = True 2584 else: 2585 materialized = None 2586 2587 return self.expression( 2588 exp.CTE, 2589 this=self._parse_wrapped(self._parse_statement), 2590 alias=alias, 2591 materialized=materialized, 2592 ) 2593 2594 def _parse_table_alias( 2595 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2596 ) -> t.Optional[exp.TableAlias]: 2597 any_token = self._match(TokenType.ALIAS) 2598 alias = ( 2599 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2600 or self._parse_string_as_identifier() 2601 ) 2602 2603 index = self._index 2604 if self._match(TokenType.L_PAREN): 2605 columns = self._parse_csv(self._parse_function_parameter) 2606 self._match_r_paren() if columns else self._retreat(index) 2607 else: 2608 columns = None 2609 2610 if not alias and not columns: 2611 return None 2612 2613 return self.expression(exp.TableAlias, this=alias, columns=columns) 2614 2615 def _parse_subquery( 2616 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2617 ) -> t.Optional[exp.Subquery]: 2618 if not this: 2619 return None 2620 2621 return self.expression( 2622 exp.Subquery, 2623 this=this, 2624 pivots=self._parse_pivots(), 2625 alias=self._parse_table_alias() if parse_alias else None, 2626 ) 2627 2628 def _implicit_unnests_to_explicit(self, this: E) -> E: 2629 from sqlglot.optimizer.normalize_identifiers import ( 2630 normalize_identifiers as _norm, 2631 ) 2632 2633 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2634 for i, join in enumerate(this.args.get("joins") or []): 2635 table = join.this 2636 normalized_table = table.copy() 2637 normalized_table.meta["maybe_column"] = True 2638 normalized_table = _norm(normalized_table, dialect=self.dialect) 2639 2640 if isinstance(table, exp.Table) and not join.args.get("on"): 2641 if normalized_table.parts[0].name in refs: 2642 table_as_column = table.to_column() 2643 unnest = exp.Unnest(expressions=[table_as_column]) 2644 2645 # Table.to_column creates a parent Alias node that we want to convert to 2646 # a TableAlias and attach to the Unnest, so it matches the parser's output 2647 if isinstance(table.args.get("alias"), exp.TableAlias): 2648 table_as_column.replace(table_as_column.this) 2649 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2650 2651 table.replace(unnest) 2652 2653 refs.add(normalized_table.alias_or_name) 2654 2655 return this 2656 2657 def _parse_query_modifiers( 2658 self, this: t.Optional[exp.Expression] 2659 ) -> t.Optional[exp.Expression]: 2660 if isinstance(this, (exp.Query, exp.Table)): 2661 for join in self._parse_joins(): 2662 this.append("joins", join) 2663 for lateral in iter(self._parse_lateral, None): 2664 this.append("laterals", lateral) 2665 2666 while True: 2667 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2668 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2669 key, expression = parser(self) 2670 2671 if expression: 2672 this.set(key, expression) 2673 if key == "limit": 2674 offset = expression.args.pop("offset", None) 2675 2676 if offset: 2677 offset = exp.Offset(expression=offset) 2678 this.set("offset", offset) 2679 2680 limit_by_expressions = expression.expressions 2681 expression.set("expressions", None) 2682 offset.set("expressions", limit_by_expressions) 2683 continue 2684 break 2685 2686 if self.SUPPORTS_IMPLICIT_UNNEST and this and "from" in this.args: 2687 this = self._implicit_unnests_to_explicit(this) 2688 2689 return this 2690 2691 def _parse_hint(self) -> t.Optional[exp.Hint]: 2692 if self._match(TokenType.HINT): 2693 hints = [] 2694 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2695 hints.extend(hint) 2696 2697 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2698 self.raise_error("Expected */ after HINT") 2699 2700 return self.expression(exp.Hint, expressions=hints) 2701 2702 return None 2703 2704 def _parse_into(self) -> t.Optional[exp.Into]: 2705 if not self._match(TokenType.INTO): 2706 return None 2707 2708 temp = self._match(TokenType.TEMPORARY) 2709 unlogged = self._match_text_seq("UNLOGGED") 2710 self._match(TokenType.TABLE) 2711 2712 return self.expression( 2713 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2714 ) 2715 2716 def _parse_from( 2717 self, joins: bool = False, skip_from_token: bool = False 2718 ) -> t.Optional[exp.From]: 2719 if not skip_from_token and not self._match(TokenType.FROM): 2720 return None 2721 2722 return self.expression( 2723 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2724 ) 2725 2726 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2727 if not self._match(TokenType.MATCH_RECOGNIZE): 2728 return None 2729 2730 self._match_l_paren() 2731 2732 partition = self._parse_partition_by() 2733 order = self._parse_order() 2734 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2735 2736 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2737 rows = exp.var("ONE ROW PER MATCH") 2738 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2739 text = "ALL ROWS PER MATCH" 2740 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2741 text += " SHOW EMPTY MATCHES" 2742 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2743 text += " OMIT EMPTY MATCHES" 2744 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2745 text += " WITH UNMATCHED ROWS" 2746 rows = exp.var(text) 2747 else: 2748 rows = None 2749 2750 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2751 text = "AFTER MATCH SKIP" 2752 if self._match_text_seq("PAST", "LAST", "ROW"): 2753 text += " PAST LAST ROW" 2754 elif self._match_text_seq("TO", "NEXT", "ROW"): 2755 text += " TO NEXT ROW" 2756 elif self._match_text_seq("TO", "FIRST"): 2757 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2758 elif self._match_text_seq("TO", "LAST"): 2759 text += f" TO LAST {self._advance_any().text}" # type: ignore 2760 after = exp.var(text) 2761 else: 2762 after = None 2763 2764 if self._match_text_seq("PATTERN"): 2765 self._match_l_paren() 2766 2767 if not self._curr: 2768 self.raise_error("Expecting )", self._curr) 2769 2770 paren = 1 2771 start = self._curr 2772 2773 while self._curr and paren > 0: 2774 if self._curr.token_type == TokenType.L_PAREN: 2775 paren += 1 2776 if self._curr.token_type == TokenType.R_PAREN: 2777 paren -= 1 2778 2779 end = self._prev 2780 self._advance() 2781 2782 if paren > 0: 2783 self.raise_error("Expecting )", self._curr) 2784 2785 pattern = exp.var(self._find_sql(start, end)) 2786 else: 2787 pattern = None 2788 2789 define = ( 2790 self._parse_csv(self._parse_name_as_expression) 2791 if self._match_text_seq("DEFINE") 2792 else None 2793 ) 2794 2795 self._match_r_paren() 2796 2797 return self.expression( 2798 exp.MatchRecognize, 2799 partition_by=partition, 2800 order=order, 2801 measures=measures, 2802 rows=rows, 2803 after=after, 2804 pattern=pattern, 2805 define=define, 2806 alias=self._parse_table_alias(), 2807 ) 2808 2809 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2810 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2811 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 2812 cross_apply = False 2813 2814 if cross_apply is not None: 2815 this = self._parse_select(table=True) 2816 view = None 2817 outer = None 2818 elif self._match(TokenType.LATERAL): 2819 this = self._parse_select(table=True) 2820 view = self._match(TokenType.VIEW) 2821 outer = self._match(TokenType.OUTER) 2822 else: 2823 return None 2824 2825 if not this: 2826 this = ( 2827 self._parse_unnest() 2828 or self._parse_function() 2829 or self._parse_id_var(any_token=False) 2830 ) 2831 2832 while self._match(TokenType.DOT): 2833 this = exp.Dot( 2834 this=this, 2835 expression=self._parse_function() or self._parse_id_var(any_token=False), 2836 ) 2837 2838 if view: 2839 table = self._parse_id_var(any_token=False) 2840 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2841 table_alias: t.Optional[exp.TableAlias] = self.expression( 2842 exp.TableAlias, this=table, columns=columns 2843 ) 2844 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2845 # We move the alias from the lateral's child node to the lateral itself 2846 table_alias = this.args["alias"].pop() 2847 else: 2848 table_alias = self._parse_table_alias() 2849 2850 return self.expression( 2851 exp.Lateral, 2852 this=this, 2853 view=view, 2854 outer=outer, 2855 alias=table_alias, 2856 cross_apply=cross_apply, 2857 ) 2858 2859 def _parse_join_parts( 2860 self, 2861 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2862 return ( 2863 self._match_set(self.JOIN_METHODS) and self._prev, 2864 self._match_set(self.JOIN_SIDES) and self._prev, 2865 self._match_set(self.JOIN_KINDS) and self._prev, 2866 ) 2867 2868 def _parse_join( 2869 self, skip_join_token: bool = False, parse_bracket: bool = False 2870 ) -> t.Optional[exp.Join]: 2871 if self._match(TokenType.COMMA): 2872 return self.expression(exp.Join, this=self._parse_table()) 2873 2874 index = self._index 2875 method, side, kind = self._parse_join_parts() 2876 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2877 join = self._match(TokenType.JOIN) 2878 2879 if not skip_join_token and not join: 2880 self._retreat(index) 2881 kind = None 2882 method = None 2883 side = None 2884 2885 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2886 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2887 2888 if not skip_join_token and not join and not outer_apply and not cross_apply: 2889 return None 2890 2891 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2892 2893 if method: 2894 kwargs["method"] = method.text 2895 if side: 2896 kwargs["side"] = side.text 2897 if kind: 2898 kwargs["kind"] = kind.text 2899 if hint: 2900 kwargs["hint"] = hint 2901 2902 if self._match(TokenType.ON): 2903 kwargs["on"] = self._parse_conjunction() 2904 elif self._match(TokenType.USING): 2905 kwargs["using"] = self._parse_wrapped_id_vars() 2906 elif not isinstance(kwargs["this"], exp.Unnest) and not ( 2907 kind and kind.token_type == TokenType.CROSS 2908 ): 2909 index = self._index 2910 joins: t.Optional[list] = list(self._parse_joins()) 2911 2912 if joins and self._match(TokenType.ON): 2913 kwargs["on"] = self._parse_conjunction() 2914 elif joins and self._match(TokenType.USING): 2915 kwargs["using"] = self._parse_wrapped_id_vars() 2916 else: 2917 joins = None 2918 self._retreat(index) 2919 2920 kwargs["this"].set("joins", joins if joins else None) 2921 2922 comments = [c for token in (method, side, kind) if token for c in token.comments] 2923 return self.expression(exp.Join, comments=comments, **kwargs) 2924 2925 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2926 this = self._parse_conjunction() 2927 2928 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2929 return this 2930 2931 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 2932 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 2933 2934 return this 2935 2936 def _parse_index_params(self) -> exp.IndexParameters: 2937 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2938 2939 if self._match(TokenType.L_PAREN, advance=False): 2940 columns = self._parse_wrapped_csv(self._parse_with_operator) 2941 else: 2942 columns = None 2943 2944 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 2945 partition_by = self._parse_partition_by() 2946 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 2947 tablespace = ( 2948 self._parse_var(any_token=True) 2949 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 2950 else None 2951 ) 2952 where = self._parse_where() 2953 2954 return self.expression( 2955 exp.IndexParameters, 2956 using=using, 2957 columns=columns, 2958 include=include, 2959 partition_by=partition_by, 2960 where=where, 2961 with_storage=with_storage, 2962 tablespace=tablespace, 2963 ) 2964 2965 def _parse_index( 2966 self, 2967 index: t.Optional[exp.Expression] = None, 2968 ) -> t.Optional[exp.Index]: 2969 if index: 2970 unique = None 2971 primary = None 2972 amp = None 2973 2974 self._match(TokenType.ON) 2975 self._match(TokenType.TABLE) # hive 2976 table = self._parse_table_parts(schema=True) 2977 else: 2978 unique = self._match(TokenType.UNIQUE) 2979 primary = self._match_text_seq("PRIMARY") 2980 amp = self._match_text_seq("AMP") 2981 2982 if not self._match(TokenType.INDEX): 2983 return None 2984 2985 index = self._parse_id_var() 2986 table = None 2987 2988 params = self._parse_index_params() 2989 2990 return self.expression( 2991 exp.Index, 2992 this=index, 2993 table=table, 2994 unique=unique, 2995 primary=primary, 2996 amp=amp, 2997 params=params, 2998 ) 2999 3000 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3001 hints: t.List[exp.Expression] = [] 3002 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3003 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3004 hints.append( 3005 self.expression( 3006 exp.WithTableHint, 3007 expressions=self._parse_csv( 3008 lambda: self._parse_function() or self._parse_var(any_token=True) 3009 ), 3010 ) 3011 ) 3012 self._match_r_paren() 3013 else: 3014 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3015 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3016 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3017 3018 self._match_texts(("INDEX", "KEY")) 3019 if self._match(TokenType.FOR): 3020 hint.set("target", self._advance_any() and self._prev.text.upper()) 3021 3022 hint.set("expressions", self._parse_wrapped_id_vars()) 3023 hints.append(hint) 3024 3025 return hints or None 3026 3027 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3028 return ( 3029 (not schema and self._parse_function(optional_parens=False)) 3030 or self._parse_id_var(any_token=False) 3031 or self._parse_string_as_identifier() 3032 or self._parse_placeholder() 3033 ) 3034 3035 def _parse_table_parts( 3036 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3037 ) -> exp.Table: 3038 catalog = None 3039 db = None 3040 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3041 3042 while self._match(TokenType.DOT): 3043 if catalog: 3044 # This allows nesting the table in arbitrarily many dot expressions if needed 3045 table = self.expression( 3046 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3047 ) 3048 else: 3049 catalog = db 3050 db = table 3051 # "" used for tsql FROM a..b case 3052 table = self._parse_table_part(schema=schema) or "" 3053 3054 if ( 3055 wildcard 3056 and self._is_connected() 3057 and (isinstance(table, exp.Identifier) or not table) 3058 and self._match(TokenType.STAR) 3059 ): 3060 if isinstance(table, exp.Identifier): 3061 table.args["this"] += "*" 3062 else: 3063 table = exp.Identifier(this="*") 3064 3065 if is_db_reference: 3066 catalog = db 3067 db = table 3068 table = None 3069 3070 if not table and not is_db_reference: 3071 self.raise_error(f"Expected table name but got {self._curr}") 3072 if not db and is_db_reference: 3073 self.raise_error(f"Expected database name but got {self._curr}") 3074 3075 return self.expression( 3076 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 3077 ) 3078 3079 def _parse_table( 3080 self, 3081 schema: bool = False, 3082 joins: bool = False, 3083 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3084 parse_bracket: bool = False, 3085 is_db_reference: bool = False, 3086 ) -> t.Optional[exp.Expression]: 3087 lateral = self._parse_lateral() 3088 if lateral: 3089 return lateral 3090 3091 unnest = self._parse_unnest() 3092 if unnest: 3093 return unnest 3094 3095 values = self._parse_derived_table_values() 3096 if values: 3097 return values 3098 3099 subquery = self._parse_select(table=True) 3100 if subquery: 3101 if not subquery.args.get("pivots"): 3102 subquery.set("pivots", self._parse_pivots()) 3103 return subquery 3104 3105 bracket = parse_bracket and self._parse_bracket(None) 3106 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3107 3108 only = self._match(TokenType.ONLY) 3109 3110 this = t.cast( 3111 exp.Expression, 3112 bracket 3113 or self._parse_bracket( 3114 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3115 ), 3116 ) 3117 3118 if only: 3119 this.set("only", only) 3120 3121 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3122 self._match_text_seq("*") 3123 3124 if schema: 3125 return self._parse_schema(this=this) 3126 3127 version = self._parse_version() 3128 3129 if version: 3130 this.set("version", version) 3131 3132 if self.dialect.ALIAS_POST_TABLESAMPLE: 3133 table_sample = self._parse_table_sample() 3134 3135 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3136 if alias: 3137 this.set("alias", alias) 3138 3139 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3140 return self.expression( 3141 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3142 ) 3143 3144 this.set("hints", self._parse_table_hints()) 3145 3146 if not this.args.get("pivots"): 3147 this.set("pivots", self._parse_pivots()) 3148 3149 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3150 table_sample = self._parse_table_sample() 3151 3152 if table_sample: 3153 table_sample.set("this", this) 3154 this = table_sample 3155 3156 if joins: 3157 for join in self._parse_joins(): 3158 this.append("joins", join) 3159 3160 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3161 this.set("ordinality", True) 3162 this.set("alias", self._parse_table_alias()) 3163 3164 return this 3165 3166 def _parse_version(self) -> t.Optional[exp.Version]: 3167 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3168 this = "TIMESTAMP" 3169 elif self._match(TokenType.VERSION_SNAPSHOT): 3170 this = "VERSION" 3171 else: 3172 return None 3173 3174 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3175 kind = self._prev.text.upper() 3176 start = self._parse_bitwise() 3177 self._match_texts(("TO", "AND")) 3178 end = self._parse_bitwise() 3179 expression: t.Optional[exp.Expression] = self.expression( 3180 exp.Tuple, expressions=[start, end] 3181 ) 3182 elif self._match_text_seq("CONTAINED", "IN"): 3183 kind = "CONTAINED IN" 3184 expression = self.expression( 3185 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3186 ) 3187 elif self._match(TokenType.ALL): 3188 kind = "ALL" 3189 expression = None 3190 else: 3191 self._match_text_seq("AS", "OF") 3192 kind = "AS OF" 3193 expression = self._parse_type() 3194 3195 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3196 3197 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3198 if not self._match(TokenType.UNNEST): 3199 return None 3200 3201 expressions = self._parse_wrapped_csv(self._parse_equality) 3202 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3203 3204 alias = self._parse_table_alias() if with_alias else None 3205 3206 if alias: 3207 if self.dialect.UNNEST_COLUMN_ONLY: 3208 if alias.args.get("columns"): 3209 self.raise_error("Unexpected extra column alias in unnest.") 3210 3211 alias.set("columns", [alias.this]) 3212 alias.set("this", None) 3213 3214 columns = alias.args.get("columns") or [] 3215 if offset and len(expressions) < len(columns): 3216 offset = columns.pop() 3217 3218 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3219 self._match(TokenType.ALIAS) 3220 offset = self._parse_id_var( 3221 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3222 ) or exp.to_identifier("offset") 3223 3224 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3225 3226 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3227 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3228 if not is_derived and not self._match_text_seq("VALUES"): 3229 return None 3230 3231 expressions = self._parse_csv(self._parse_value) 3232 alias = self._parse_table_alias() 3233 3234 if is_derived: 3235 self._match_r_paren() 3236 3237 return self.expression( 3238 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3239 ) 3240 3241 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3242 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3243 as_modifier and self._match_text_seq("USING", "SAMPLE") 3244 ): 3245 return None 3246 3247 bucket_numerator = None 3248 bucket_denominator = None 3249 bucket_field = None 3250 percent = None 3251 size = None 3252 seed = None 3253 3254 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3255 matched_l_paren = self._match(TokenType.L_PAREN) 3256 3257 if self.TABLESAMPLE_CSV: 3258 num = None 3259 expressions = self._parse_csv(self._parse_primary) 3260 else: 3261 expressions = None 3262 num = ( 3263 self._parse_factor() 3264 if self._match(TokenType.NUMBER, advance=False) 3265 else self._parse_primary() or self._parse_placeholder() 3266 ) 3267 3268 if self._match_text_seq("BUCKET"): 3269 bucket_numerator = self._parse_number() 3270 self._match_text_seq("OUT", "OF") 3271 bucket_denominator = bucket_denominator = self._parse_number() 3272 self._match(TokenType.ON) 3273 bucket_field = self._parse_field() 3274 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3275 percent = num 3276 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3277 size = num 3278 else: 3279 percent = num 3280 3281 if matched_l_paren: 3282 self._match_r_paren() 3283 3284 if self._match(TokenType.L_PAREN): 3285 method = self._parse_var(upper=True) 3286 seed = self._match(TokenType.COMMA) and self._parse_number() 3287 self._match_r_paren() 3288 elif self._match_texts(("SEED", "REPEATABLE")): 3289 seed = self._parse_wrapped(self._parse_number) 3290 3291 return self.expression( 3292 exp.TableSample, 3293 expressions=expressions, 3294 method=method, 3295 bucket_numerator=bucket_numerator, 3296 bucket_denominator=bucket_denominator, 3297 bucket_field=bucket_field, 3298 percent=percent, 3299 size=size, 3300 seed=seed, 3301 ) 3302 3303 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3304 return list(iter(self._parse_pivot, None)) or None 3305 3306 def _parse_joins(self) -> t.Iterator[exp.Join]: 3307 return iter(self._parse_join, None) 3308 3309 # https://duckdb.org/docs/sql/statements/pivot 3310 def _parse_simplified_pivot(self) -> exp.Pivot: 3311 def _parse_on() -> t.Optional[exp.Expression]: 3312 this = self._parse_bitwise() 3313 return self._parse_in(this) if self._match(TokenType.IN) else this 3314 3315 this = self._parse_table() 3316 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3317 using = self._match(TokenType.USING) and self._parse_csv( 3318 lambda: self._parse_alias(self._parse_function()) 3319 ) 3320 group = self._parse_group() 3321 return self.expression( 3322 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3323 ) 3324 3325 def _parse_pivot_in(self) -> exp.In: 3326 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3327 this = self._parse_conjunction() 3328 3329 self._match(TokenType.ALIAS) 3330 alias = self._parse_field() 3331 if alias: 3332 return self.expression(exp.PivotAlias, this=this, alias=alias) 3333 3334 return this 3335 3336 value = self._parse_column() 3337 3338 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3339 self.raise_error("Expecting IN (") 3340 3341 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3342 3343 self._match_r_paren() 3344 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3345 3346 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3347 index = self._index 3348 include_nulls = None 3349 3350 if self._match(TokenType.PIVOT): 3351 unpivot = False 3352 elif self._match(TokenType.UNPIVOT): 3353 unpivot = True 3354 3355 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3356 if self._match_text_seq("INCLUDE", "NULLS"): 3357 include_nulls = True 3358 elif self._match_text_seq("EXCLUDE", "NULLS"): 3359 include_nulls = False 3360 else: 3361 return None 3362 3363 expressions = [] 3364 3365 if not self._match(TokenType.L_PAREN): 3366 self._retreat(index) 3367 return None 3368 3369 if unpivot: 3370 expressions = self._parse_csv(self._parse_column) 3371 else: 3372 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3373 3374 if not expressions: 3375 self.raise_error("Failed to parse PIVOT's aggregation list") 3376 3377 if not self._match(TokenType.FOR): 3378 self.raise_error("Expecting FOR") 3379 3380 field = self._parse_pivot_in() 3381 3382 self._match_r_paren() 3383 3384 pivot = self.expression( 3385 exp.Pivot, 3386 expressions=expressions, 3387 field=field, 3388 unpivot=unpivot, 3389 include_nulls=include_nulls, 3390 ) 3391 3392 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3393 pivot.set("alias", self._parse_table_alias()) 3394 3395 if not unpivot: 3396 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3397 3398 columns: t.List[exp.Expression] = [] 3399 for fld in pivot.args["field"].expressions: 3400 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3401 for name in names: 3402 if self.PREFIXED_PIVOT_COLUMNS: 3403 name = f"{name}_{field_name}" if name else field_name 3404 else: 3405 name = f"{field_name}_{name}" if name else field_name 3406 3407 columns.append(exp.to_identifier(name)) 3408 3409 pivot.set("columns", columns) 3410 3411 return pivot 3412 3413 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3414 return [agg.alias for agg in aggregations] 3415 3416 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3417 if not skip_where_token and not self._match(TokenType.PREWHERE): 3418 return None 3419 3420 return self.expression( 3421 exp.PreWhere, comments=self._prev_comments, this=self._parse_conjunction() 3422 ) 3423 3424 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3425 if not skip_where_token and not self._match(TokenType.WHERE): 3426 return None 3427 3428 return self.expression( 3429 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3430 ) 3431 3432 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3433 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3434 return None 3435 3436 elements = defaultdict(list) 3437 3438 if self._match(TokenType.ALL): 3439 return self.expression(exp.Group, all=True) 3440 3441 while True: 3442 expressions = self._parse_csv(self._parse_conjunction) 3443 if expressions: 3444 elements["expressions"].extend(expressions) 3445 3446 grouping_sets = self._parse_grouping_sets() 3447 if grouping_sets: 3448 elements["grouping_sets"].extend(grouping_sets) 3449 3450 rollup = None 3451 cube = None 3452 totals = None 3453 3454 index = self._index 3455 with_ = self._match(TokenType.WITH) 3456 if self._match(TokenType.ROLLUP): 3457 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3458 elements["rollup"].extend(ensure_list(rollup)) 3459 3460 if self._match(TokenType.CUBE): 3461 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3462 elements["cube"].extend(ensure_list(cube)) 3463 3464 if self._match_text_seq("TOTALS"): 3465 totals = True 3466 elements["totals"] = True # type: ignore 3467 3468 if not (grouping_sets or rollup or cube or totals): 3469 if with_: 3470 self._retreat(index) 3471 break 3472 3473 return self.expression(exp.Group, **elements) # type: ignore 3474 3475 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3476 if not self._match(TokenType.GROUPING_SETS): 3477 return None 3478 3479 return self._parse_wrapped_csv(self._parse_grouping_set) 3480 3481 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3482 if self._match(TokenType.L_PAREN): 3483 grouping_set = self._parse_csv(self._parse_column) 3484 self._match_r_paren() 3485 return self.expression(exp.Tuple, expressions=grouping_set) 3486 3487 return self._parse_column() 3488 3489 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3490 if not skip_having_token and not self._match(TokenType.HAVING): 3491 return None 3492 return self.expression(exp.Having, this=self._parse_conjunction()) 3493 3494 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3495 if not self._match(TokenType.QUALIFY): 3496 return None 3497 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3498 3499 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3500 if skip_start_token: 3501 start = None 3502 elif self._match(TokenType.START_WITH): 3503 start = self._parse_conjunction() 3504 else: 3505 return None 3506 3507 self._match(TokenType.CONNECT_BY) 3508 nocycle = self._match_text_seq("NOCYCLE") 3509 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3510 exp.Prior, this=self._parse_bitwise() 3511 ) 3512 connect = self._parse_conjunction() 3513 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3514 3515 if not start and self._match(TokenType.START_WITH): 3516 start = self._parse_conjunction() 3517 3518 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3519 3520 def _parse_name_as_expression(self) -> exp.Alias: 3521 return self.expression( 3522 exp.Alias, 3523 alias=self._parse_id_var(any_token=True), 3524 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3525 ) 3526 3527 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3528 if self._match_text_seq("INTERPOLATE"): 3529 return self._parse_wrapped_csv(self._parse_name_as_expression) 3530 return None 3531 3532 def _parse_order( 3533 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3534 ) -> t.Optional[exp.Expression]: 3535 siblings = None 3536 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3537 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3538 return this 3539 3540 siblings = True 3541 3542 return self.expression( 3543 exp.Order, 3544 this=this, 3545 expressions=self._parse_csv(self._parse_ordered), 3546 interpolate=self._parse_interpolate(), 3547 siblings=siblings, 3548 ) 3549 3550 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3551 if not self._match(token): 3552 return None 3553 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3554 3555 def _parse_ordered( 3556 self, parse_method: t.Optional[t.Callable] = None 3557 ) -> t.Optional[exp.Ordered]: 3558 this = parse_method() if parse_method else self._parse_conjunction() 3559 if not this: 3560 return None 3561 3562 asc = self._match(TokenType.ASC) 3563 desc = self._match(TokenType.DESC) or (asc and False) 3564 3565 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3566 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3567 3568 nulls_first = is_nulls_first or False 3569 explicitly_null_ordered = is_nulls_first or is_nulls_last 3570 3571 if ( 3572 not explicitly_null_ordered 3573 and ( 3574 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3575 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3576 ) 3577 and self.dialect.NULL_ORDERING != "nulls_are_last" 3578 ): 3579 nulls_first = True 3580 3581 if self._match_text_seq("WITH", "FILL"): 3582 with_fill = self.expression( 3583 exp.WithFill, 3584 **{ # type: ignore 3585 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3586 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3587 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3588 }, 3589 ) 3590 else: 3591 with_fill = None 3592 3593 return self.expression( 3594 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3595 ) 3596 3597 def _parse_limit( 3598 self, 3599 this: t.Optional[exp.Expression] = None, 3600 top: bool = False, 3601 skip_limit_token: bool = False, 3602 ) -> t.Optional[exp.Expression]: 3603 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 3604 comments = self._prev_comments 3605 if top: 3606 limit_paren = self._match(TokenType.L_PAREN) 3607 expression = self._parse_term() if limit_paren else self._parse_number() 3608 3609 if limit_paren: 3610 self._match_r_paren() 3611 else: 3612 expression = self._parse_term() 3613 3614 if self._match(TokenType.COMMA): 3615 offset = expression 3616 expression = self._parse_term() 3617 else: 3618 offset = None 3619 3620 limit_exp = self.expression( 3621 exp.Limit, 3622 this=this, 3623 expression=expression, 3624 offset=offset, 3625 comments=comments, 3626 expressions=self._parse_limit_by(), 3627 ) 3628 3629 return limit_exp 3630 3631 if self._match(TokenType.FETCH): 3632 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3633 direction = self._prev.text.upper() if direction else "FIRST" 3634 3635 count = self._parse_field(tokens=self.FETCH_TOKENS) 3636 percent = self._match(TokenType.PERCENT) 3637 3638 self._match_set((TokenType.ROW, TokenType.ROWS)) 3639 3640 only = self._match_text_seq("ONLY") 3641 with_ties = self._match_text_seq("WITH", "TIES") 3642 3643 if only and with_ties: 3644 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3645 3646 return self.expression( 3647 exp.Fetch, 3648 direction=direction, 3649 count=count, 3650 percent=percent, 3651 with_ties=with_ties, 3652 ) 3653 3654 return this 3655 3656 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3657 if not self._match(TokenType.OFFSET): 3658 return this 3659 3660 count = self._parse_term() 3661 self._match_set((TokenType.ROW, TokenType.ROWS)) 3662 3663 return self.expression( 3664 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3665 ) 3666 3667 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3668 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3669 3670 def _parse_locks(self) -> t.List[exp.Lock]: 3671 locks = [] 3672 while True: 3673 if self._match_text_seq("FOR", "UPDATE"): 3674 update = True 3675 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3676 "LOCK", "IN", "SHARE", "MODE" 3677 ): 3678 update = False 3679 else: 3680 break 3681 3682 expressions = None 3683 if self._match_text_seq("OF"): 3684 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3685 3686 wait: t.Optional[bool | exp.Expression] = None 3687 if self._match_text_seq("NOWAIT"): 3688 wait = True 3689 elif self._match_text_seq("WAIT"): 3690 wait = self._parse_primary() 3691 elif self._match_text_seq("SKIP", "LOCKED"): 3692 wait = False 3693 3694 locks.append( 3695 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3696 ) 3697 3698 return locks 3699 3700 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3701 while this and self._match_set(self.SET_OPERATIONS): 3702 token_type = self._prev.token_type 3703 3704 if token_type == TokenType.UNION: 3705 operation = exp.Union 3706 elif token_type == TokenType.EXCEPT: 3707 operation = exp.Except 3708 else: 3709 operation = exp.Intersect 3710 3711 comments = self._prev.comments 3712 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3713 by_name = self._match_text_seq("BY", "NAME") 3714 expression = self._parse_select(nested=True, parse_set_operation=False) 3715 3716 this = self.expression( 3717 operation, 3718 comments=comments, 3719 this=this, 3720 distinct=distinct, 3721 by_name=by_name, 3722 expression=expression, 3723 ) 3724 3725 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3726 expression = this.expression 3727 3728 if expression: 3729 for arg in self.UNION_MODIFIERS: 3730 expr = expression.args.get(arg) 3731 if expr: 3732 this.set(arg, expr.pop()) 3733 3734 return this 3735 3736 def _parse_expression(self) -> t.Optional[exp.Expression]: 3737 return self._parse_alias(self._parse_conjunction()) 3738 3739 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3740 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3741 3742 def _parse_equality(self) -> t.Optional[exp.Expression]: 3743 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3744 3745 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3746 return self._parse_tokens(self._parse_range, self.COMPARISON) 3747 3748 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3749 this = this or self._parse_bitwise() 3750 negate = self._match(TokenType.NOT) 3751 3752 if self._match_set(self.RANGE_PARSERS): 3753 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3754 if not expression: 3755 return this 3756 3757 this = expression 3758 elif self._match(TokenType.ISNULL): 3759 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3760 3761 # Postgres supports ISNULL and NOTNULL for conditions. 3762 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3763 if self._match(TokenType.NOTNULL): 3764 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3765 this = self.expression(exp.Not, this=this) 3766 3767 if negate: 3768 this = self.expression(exp.Not, this=this) 3769 3770 if self._match(TokenType.IS): 3771 this = self._parse_is(this) 3772 3773 return this 3774 3775 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3776 index = self._index - 1 3777 negate = self._match(TokenType.NOT) 3778 3779 if self._match_text_seq("DISTINCT", "FROM"): 3780 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3781 return self.expression(klass, this=this, expression=self._parse_bitwise()) 3782 3783 expression = self._parse_null() or self._parse_boolean() 3784 if not expression: 3785 self._retreat(index) 3786 return None 3787 3788 this = self.expression(exp.Is, this=this, expression=expression) 3789 return self.expression(exp.Not, this=this) if negate else this 3790 3791 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3792 unnest = self._parse_unnest(with_alias=False) 3793 if unnest: 3794 this = self.expression(exp.In, this=this, unnest=unnest) 3795 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 3796 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 3797 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3798 3799 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 3800 this = self.expression(exp.In, this=this, query=expressions[0]) 3801 else: 3802 this = self.expression(exp.In, this=this, expressions=expressions) 3803 3804 if matched_l_paren: 3805 self._match_r_paren(this) 3806 elif not self._match(TokenType.R_BRACKET, expression=this): 3807 self.raise_error("Expecting ]") 3808 else: 3809 this = self.expression(exp.In, this=this, field=self._parse_field()) 3810 3811 return this 3812 3813 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 3814 low = self._parse_bitwise() 3815 self._match(TokenType.AND) 3816 high = self._parse_bitwise() 3817 return self.expression(exp.Between, this=this, low=low, high=high) 3818 3819 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3820 if not self._match(TokenType.ESCAPE): 3821 return this 3822 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3823 3824 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Interval]: 3825 index = self._index 3826 3827 if not self._match(TokenType.INTERVAL) and match_interval: 3828 return None 3829 3830 if self._match(TokenType.STRING, advance=False): 3831 this = self._parse_primary() 3832 else: 3833 this = self._parse_term() 3834 3835 if not this or ( 3836 isinstance(this, exp.Column) 3837 and not this.table 3838 and not this.this.quoted 3839 and this.name.upper() == "IS" 3840 ): 3841 self._retreat(index) 3842 return None 3843 3844 unit = self._parse_function() or ( 3845 not self._match(TokenType.ALIAS, advance=False) 3846 and self._parse_var(any_token=True, upper=True) 3847 ) 3848 3849 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3850 # each INTERVAL expression into this canonical form so it's easy to transpile 3851 if this and this.is_number: 3852 this = exp.Literal.string(this.name) 3853 elif this and this.is_string: 3854 parts = this.name.split() 3855 3856 if len(parts) == 2: 3857 if unit: 3858 # This is not actually a unit, it's something else (e.g. a "window side") 3859 unit = None 3860 self._retreat(self._index - 1) 3861 3862 this = exp.Literal.string(parts[0]) 3863 unit = self.expression(exp.Var, this=parts[1].upper()) 3864 3865 return self.expression(exp.Interval, this=this, unit=unit) 3866 3867 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3868 this = self._parse_term() 3869 3870 while True: 3871 if self._match_set(self.BITWISE): 3872 this = self.expression( 3873 self.BITWISE[self._prev.token_type], 3874 this=this, 3875 expression=self._parse_term(), 3876 ) 3877 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 3878 this = self.expression( 3879 exp.DPipe, 3880 this=this, 3881 expression=self._parse_term(), 3882 safe=not self.dialect.STRICT_STRING_CONCAT, 3883 ) 3884 elif self._match(TokenType.DQMARK): 3885 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3886 elif self._match_pair(TokenType.LT, TokenType.LT): 3887 this = self.expression( 3888 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3889 ) 3890 elif self._match_pair(TokenType.GT, TokenType.GT): 3891 this = self.expression( 3892 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3893 ) 3894 else: 3895 break 3896 3897 return this 3898 3899 def _parse_term(self) -> t.Optional[exp.Expression]: 3900 return self._parse_tokens(self._parse_factor, self.TERM) 3901 3902 def _parse_factor(self) -> t.Optional[exp.Expression]: 3903 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 3904 this = parse_method() 3905 3906 while self._match_set(self.FACTOR): 3907 this = self.expression( 3908 self.FACTOR[self._prev.token_type], 3909 this=this, 3910 comments=self._prev_comments, 3911 expression=parse_method(), 3912 ) 3913 if isinstance(this, exp.Div): 3914 this.args["typed"] = self.dialect.TYPED_DIVISION 3915 this.args["safe"] = self.dialect.SAFE_DIVISION 3916 3917 return this 3918 3919 def _parse_exponent(self) -> t.Optional[exp.Expression]: 3920 return self._parse_tokens(self._parse_unary, self.EXPONENT) 3921 3922 def _parse_unary(self) -> t.Optional[exp.Expression]: 3923 if self._match_set(self.UNARY_PARSERS): 3924 return self.UNARY_PARSERS[self._prev.token_type](self) 3925 return self._parse_at_time_zone(self._parse_type()) 3926 3927 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3928 interval = parse_interval and self._parse_interval() 3929 if interval: 3930 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 3931 while True: 3932 index = self._index 3933 self._match(TokenType.PLUS) 3934 3935 if not self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 3936 self._retreat(index) 3937 break 3938 3939 interval = self.expression( # type: ignore 3940 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 3941 ) 3942 3943 return interval 3944 3945 index = self._index 3946 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3947 this = self._parse_column() 3948 3949 if data_type: 3950 if isinstance(this, exp.Literal): 3951 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3952 if parser: 3953 return parser(self, this, data_type) 3954 return self.expression(exp.Cast, this=this, to=data_type) 3955 if not data_type.expressions: 3956 self._retreat(index) 3957 return self._parse_column() 3958 return self._parse_column_ops(data_type) 3959 3960 return this and self._parse_column_ops(this) 3961 3962 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3963 this = self._parse_type() 3964 if not this: 3965 return None 3966 3967 if isinstance(this, exp.Column) and not this.table: 3968 this = exp.var(this.name.upper()) 3969 3970 return self.expression( 3971 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3972 ) 3973 3974 def _parse_types( 3975 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3976 ) -> t.Optional[exp.Expression]: 3977 index = self._index 3978 3979 prefix = self._match_text_seq("SYSUDTLIB", ".") 3980 3981 if not self._match_set(self.TYPE_TOKENS): 3982 identifier = allow_identifiers and self._parse_id_var( 3983 any_token=False, tokens=(TokenType.VAR,) 3984 ) 3985 if identifier: 3986 tokens = self.dialect.tokenize(identifier.name) 3987 3988 if len(tokens) != 1: 3989 self.raise_error("Unexpected identifier", self._prev) 3990 3991 if tokens[0].token_type in self.TYPE_TOKENS: 3992 self._prev = tokens[0] 3993 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 3994 type_name = identifier.name 3995 3996 while self._match(TokenType.DOT): 3997 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3998 3999 return exp.DataType.build(type_name, udt=True) 4000 else: 4001 self._retreat(self._index - 1) 4002 return None 4003 else: 4004 return None 4005 4006 type_token = self._prev.token_type 4007 4008 if type_token == TokenType.PSEUDO_TYPE: 4009 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4010 4011 if type_token == TokenType.OBJECT_IDENTIFIER: 4012 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4013 4014 nested = type_token in self.NESTED_TYPE_TOKENS 4015 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4016 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4017 expressions = None 4018 maybe_func = False 4019 4020 if self._match(TokenType.L_PAREN): 4021 if is_struct: 4022 expressions = self._parse_csv(self._parse_struct_types) 4023 elif nested: 4024 expressions = self._parse_csv( 4025 lambda: self._parse_types( 4026 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4027 ) 4028 ) 4029 elif type_token in self.ENUM_TYPE_TOKENS: 4030 expressions = self._parse_csv(self._parse_equality) 4031 elif is_aggregate: 4032 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4033 any_token=False, tokens=(TokenType.VAR,) 4034 ) 4035 if not func_or_ident or not self._match(TokenType.COMMA): 4036 return None 4037 expressions = self._parse_csv( 4038 lambda: self._parse_types( 4039 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4040 ) 4041 ) 4042 expressions.insert(0, func_or_ident) 4043 else: 4044 expressions = self._parse_csv(self._parse_type_size) 4045 4046 if not expressions or not self._match(TokenType.R_PAREN): 4047 self._retreat(index) 4048 return None 4049 4050 maybe_func = True 4051 4052 this: t.Optional[exp.Expression] = None 4053 values: t.Optional[t.List[exp.Expression]] = None 4054 4055 if nested and self._match(TokenType.LT): 4056 if is_struct: 4057 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4058 else: 4059 expressions = self._parse_csv( 4060 lambda: self._parse_types( 4061 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4062 ) 4063 ) 4064 4065 if not self._match(TokenType.GT): 4066 self.raise_error("Expecting >") 4067 4068 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4069 values = self._parse_csv(self._parse_conjunction) 4070 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4071 4072 if type_token in self.TIMESTAMPS: 4073 if self._match_text_seq("WITH", "TIME", "ZONE"): 4074 maybe_func = False 4075 tz_type = ( 4076 exp.DataType.Type.TIMETZ 4077 if type_token in self.TIMES 4078 else exp.DataType.Type.TIMESTAMPTZ 4079 ) 4080 this = exp.DataType(this=tz_type, expressions=expressions) 4081 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4082 maybe_func = False 4083 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4084 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4085 maybe_func = False 4086 elif type_token == TokenType.INTERVAL: 4087 unit = self._parse_var() 4088 4089 if self._match_text_seq("TO"): 4090 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 4091 else: 4092 span = None 4093 4094 if span or not unit: 4095 this = self.expression( 4096 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 4097 ) 4098 else: 4099 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4100 4101 if maybe_func and check_func: 4102 index2 = self._index 4103 peek = self._parse_string() 4104 4105 if not peek: 4106 self._retreat(index) 4107 return None 4108 4109 self._retreat(index2) 4110 4111 if not this: 4112 if self._match_text_seq("UNSIGNED"): 4113 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4114 if not unsigned_type_token: 4115 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4116 4117 type_token = unsigned_type_token or type_token 4118 4119 this = exp.DataType( 4120 this=exp.DataType.Type[type_token.value], 4121 expressions=expressions, 4122 nested=nested, 4123 values=values, 4124 prefix=prefix, 4125 ) 4126 4127 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 4128 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 4129 4130 return this 4131 4132 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4133 index = self._index 4134 this = self._parse_type(parse_interval=False) or self._parse_id_var() 4135 self._match(TokenType.COLON) 4136 column_def = self._parse_column_def(this) 4137 4138 if type_required and ( 4139 (isinstance(this, exp.Column) and this.this is column_def) or this is column_def 4140 ): 4141 self._retreat(index) 4142 return self._parse_types() 4143 4144 return column_def 4145 4146 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4147 if not self._match_text_seq("AT", "TIME", "ZONE"): 4148 return this 4149 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4150 4151 def _parse_column(self) -> t.Optional[exp.Expression]: 4152 this = self._parse_column_reference() 4153 return self._parse_column_ops(this) if this else self._parse_bracket(this) 4154 4155 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4156 this = self._parse_field() 4157 if ( 4158 not this 4159 and self._match(TokenType.VALUES, advance=False) 4160 and self.VALUES_FOLLOWED_BY_PAREN 4161 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4162 ): 4163 this = self._parse_id_var() 4164 4165 return self.expression(exp.Column, this=this) if isinstance(this, exp.Identifier) else this 4166 4167 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4168 this = self._parse_bracket(this) 4169 4170 while self._match_set(self.COLUMN_OPERATORS): 4171 op_token = self._prev.token_type 4172 op = self.COLUMN_OPERATORS.get(op_token) 4173 4174 if op_token == TokenType.DCOLON: 4175 field = self._parse_types() 4176 if not field: 4177 self.raise_error("Expected type") 4178 elif op and self._curr: 4179 field = self._parse_column_reference() 4180 else: 4181 field = self._parse_field(anonymous_func=True, any_token=True) 4182 4183 if isinstance(field, exp.Func) and this: 4184 # bigquery allows function calls like x.y.count(...) 4185 # SAFE.SUBSTR(...) 4186 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4187 this = exp.replace_tree( 4188 this, 4189 lambda n: ( 4190 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4191 if n.table 4192 else n.this 4193 ) 4194 if isinstance(n, exp.Column) 4195 else n, 4196 ) 4197 4198 if op: 4199 this = op(self, this, field) 4200 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4201 this = self.expression( 4202 exp.Column, 4203 this=field, 4204 table=this.this, 4205 db=this.args.get("table"), 4206 catalog=this.args.get("db"), 4207 ) 4208 else: 4209 this = self.expression(exp.Dot, this=this, expression=field) 4210 this = self._parse_bracket(this) 4211 return this 4212 4213 def _parse_primary(self) -> t.Optional[exp.Expression]: 4214 if self._match_set(self.PRIMARY_PARSERS): 4215 token_type = self._prev.token_type 4216 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4217 4218 if token_type == TokenType.STRING: 4219 expressions = [primary] 4220 while self._match(TokenType.STRING): 4221 expressions.append(exp.Literal.string(self._prev.text)) 4222 4223 if len(expressions) > 1: 4224 return self.expression(exp.Concat, expressions=expressions) 4225 4226 return primary 4227 4228 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4229 return exp.Literal.number(f"0.{self._prev.text}") 4230 4231 if self._match(TokenType.L_PAREN): 4232 comments = self._prev_comments 4233 query = self._parse_select() 4234 4235 if query: 4236 expressions = [query] 4237 else: 4238 expressions = self._parse_expressions() 4239 4240 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4241 4242 if isinstance(this, exp.UNWRAPPED_QUERIES): 4243 this = self._parse_set_operations( 4244 self._parse_subquery(this=this, parse_alias=False) 4245 ) 4246 elif isinstance(this, exp.Subquery): 4247 this = self._parse_subquery( 4248 this=self._parse_set_operations(this), parse_alias=False 4249 ) 4250 elif len(expressions) > 1: 4251 this = self.expression(exp.Tuple, expressions=expressions) 4252 else: 4253 this = self.expression(exp.Paren, this=this) 4254 4255 if this: 4256 this.add_comments(comments) 4257 4258 self._match_r_paren(expression=this) 4259 return this 4260 4261 return None 4262 4263 def _parse_field( 4264 self, 4265 any_token: bool = False, 4266 tokens: t.Optional[t.Collection[TokenType]] = None, 4267 anonymous_func: bool = False, 4268 ) -> t.Optional[exp.Expression]: 4269 return ( 4270 self._parse_primary() 4271 or self._parse_function(anonymous=anonymous_func) 4272 or self._parse_id_var(any_token=any_token, tokens=tokens) 4273 ) 4274 4275 def _parse_function( 4276 self, 4277 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4278 anonymous: bool = False, 4279 optional_parens: bool = True, 4280 ) -> t.Optional[exp.Expression]: 4281 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4282 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4283 fn_syntax = False 4284 if ( 4285 self._match(TokenType.L_BRACE, advance=False) 4286 and self._next 4287 and self._next.text.upper() == "FN" 4288 ): 4289 self._advance(2) 4290 fn_syntax = True 4291 4292 func = self._parse_function_call( 4293 functions=functions, anonymous=anonymous, optional_parens=optional_parens 4294 ) 4295 4296 if fn_syntax: 4297 self._match(TokenType.R_BRACE) 4298 4299 return func 4300 4301 def _parse_function_call( 4302 self, 4303 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4304 anonymous: bool = False, 4305 optional_parens: bool = True, 4306 ) -> t.Optional[exp.Expression]: 4307 if not self._curr: 4308 return None 4309 4310 comments = self._curr.comments 4311 token_type = self._curr.token_type 4312 this = self._curr.text 4313 upper = this.upper() 4314 4315 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4316 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4317 self._advance() 4318 return self._parse_window(parser(self)) 4319 4320 if not self._next or self._next.token_type != TokenType.L_PAREN: 4321 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4322 self._advance() 4323 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4324 4325 return None 4326 4327 if token_type not in self.FUNC_TOKENS: 4328 return None 4329 4330 self._advance(2) 4331 4332 parser = self.FUNCTION_PARSERS.get(upper) 4333 if parser and not anonymous: 4334 this = parser(self) 4335 else: 4336 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4337 4338 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4339 this = self.expression(subquery_predicate, this=self._parse_select()) 4340 self._match_r_paren() 4341 return this 4342 4343 if functions is None: 4344 functions = self.FUNCTIONS 4345 4346 function = functions.get(upper) 4347 4348 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4349 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4350 4351 if alias: 4352 args = self._kv_to_prop_eq(args) 4353 4354 if function and not anonymous: 4355 if "dialect" in function.__code__.co_varnames: 4356 func = function(args, dialect=self.dialect) 4357 else: 4358 func = function(args) 4359 4360 func = self.validate_expression(func, args) 4361 if not self.dialect.NORMALIZE_FUNCTIONS: 4362 func.meta["name"] = this 4363 4364 this = func 4365 else: 4366 if token_type == TokenType.IDENTIFIER: 4367 this = exp.Identifier(this=this, quoted=True) 4368 this = self.expression(exp.Anonymous, this=this, expressions=args) 4369 4370 if isinstance(this, exp.Expression): 4371 this.add_comments(comments) 4372 4373 self._match_r_paren(this) 4374 return self._parse_window(this) 4375 4376 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4377 transformed = [] 4378 4379 for e in expressions: 4380 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4381 if isinstance(e, exp.Alias): 4382 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4383 4384 if not isinstance(e, exp.PropertyEQ): 4385 e = self.expression( 4386 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 4387 ) 4388 4389 if isinstance(e.this, exp.Column): 4390 e.this.replace(e.this.this) 4391 4392 transformed.append(e) 4393 4394 return transformed 4395 4396 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4397 return self._parse_column_def(self._parse_id_var()) 4398 4399 def _parse_user_defined_function( 4400 self, kind: t.Optional[TokenType] = None 4401 ) -> t.Optional[exp.Expression]: 4402 this = self._parse_id_var() 4403 4404 while self._match(TokenType.DOT): 4405 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4406 4407 if not self._match(TokenType.L_PAREN): 4408 return this 4409 4410 expressions = self._parse_csv(self._parse_function_parameter) 4411 self._match_r_paren() 4412 return self.expression( 4413 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4414 ) 4415 4416 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4417 literal = self._parse_primary() 4418 if literal: 4419 return self.expression(exp.Introducer, this=token.text, expression=literal) 4420 4421 return self.expression(exp.Identifier, this=token.text) 4422 4423 def _parse_session_parameter(self) -> exp.SessionParameter: 4424 kind = None 4425 this = self._parse_id_var() or self._parse_primary() 4426 4427 if this and self._match(TokenType.DOT): 4428 kind = this.name 4429 this = self._parse_var() or self._parse_primary() 4430 4431 return self.expression(exp.SessionParameter, this=this, kind=kind) 4432 4433 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4434 index = self._index 4435 4436 if self._match(TokenType.L_PAREN): 4437 expressions = t.cast( 4438 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 4439 ) 4440 4441 if not self._match(TokenType.R_PAREN): 4442 self._retreat(index) 4443 else: 4444 expressions = [self._parse_id_var()] 4445 4446 if self._match_set(self.LAMBDAS): 4447 return self.LAMBDAS[self._prev.token_type](self, expressions) 4448 4449 self._retreat(index) 4450 4451 this: t.Optional[exp.Expression] 4452 4453 if self._match(TokenType.DISTINCT): 4454 this = self.expression( 4455 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 4456 ) 4457 else: 4458 this = self._parse_select_or_expression(alias=alias) 4459 4460 return self._parse_limit( 4461 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4462 ) 4463 4464 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4465 index = self._index 4466 4467 if not self._match(TokenType.L_PAREN): 4468 return this 4469 4470 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 4471 # expr can be of both types 4472 if self._match_set(self.SELECT_START_TOKENS): 4473 self._retreat(index) 4474 return this 4475 4476 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4477 4478 self._match_r_paren() 4479 return self.expression(exp.Schema, this=this, expressions=args) 4480 4481 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4482 return self._parse_column_def(self._parse_field(any_token=True)) 4483 4484 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4485 # column defs are not really columns, they're identifiers 4486 if isinstance(this, exp.Column): 4487 this = this.this 4488 4489 kind = self._parse_types(schema=True) 4490 4491 if self._match_text_seq("FOR", "ORDINALITY"): 4492 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4493 4494 constraints: t.List[exp.Expression] = [] 4495 4496 if not kind and self._match(TokenType.ALIAS): 4497 constraints.append( 4498 self.expression( 4499 exp.ComputedColumnConstraint, 4500 this=self._parse_conjunction(), 4501 persisted=self._match_text_seq("PERSISTED"), 4502 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4503 ) 4504 ) 4505 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4506 self._match(TokenType.ALIAS) 4507 constraints.append( 4508 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4509 ) 4510 4511 while True: 4512 constraint = self._parse_column_constraint() 4513 if not constraint: 4514 break 4515 constraints.append(constraint) 4516 4517 if not kind and not constraints: 4518 return this 4519 4520 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4521 4522 def _parse_auto_increment( 4523 self, 4524 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4525 start = None 4526 increment = None 4527 4528 if self._match(TokenType.L_PAREN, advance=False): 4529 args = self._parse_wrapped_csv(self._parse_bitwise) 4530 start = seq_get(args, 0) 4531 increment = seq_get(args, 1) 4532 elif self._match_text_seq("START"): 4533 start = self._parse_bitwise() 4534 self._match_text_seq("INCREMENT") 4535 increment = self._parse_bitwise() 4536 4537 if start and increment: 4538 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4539 4540 return exp.AutoIncrementColumnConstraint() 4541 4542 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4543 if not self._match_text_seq("REFRESH"): 4544 self._retreat(self._index - 1) 4545 return None 4546 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4547 4548 def _parse_compress(self) -> exp.CompressColumnConstraint: 4549 if self._match(TokenType.L_PAREN, advance=False): 4550 return self.expression( 4551 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4552 ) 4553 4554 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4555 4556 def _parse_generated_as_identity( 4557 self, 4558 ) -> ( 4559 exp.GeneratedAsIdentityColumnConstraint 4560 | exp.ComputedColumnConstraint 4561 | exp.GeneratedAsRowColumnConstraint 4562 ): 4563 if self._match_text_seq("BY", "DEFAULT"): 4564 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4565 this = self.expression( 4566 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4567 ) 4568 else: 4569 self._match_text_seq("ALWAYS") 4570 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4571 4572 self._match(TokenType.ALIAS) 4573 4574 if self._match_text_seq("ROW"): 4575 start = self._match_text_seq("START") 4576 if not start: 4577 self._match(TokenType.END) 4578 hidden = self._match_text_seq("HIDDEN") 4579 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4580 4581 identity = self._match_text_seq("IDENTITY") 4582 4583 if self._match(TokenType.L_PAREN): 4584 if self._match(TokenType.START_WITH): 4585 this.set("start", self._parse_bitwise()) 4586 if self._match_text_seq("INCREMENT", "BY"): 4587 this.set("increment", self._parse_bitwise()) 4588 if self._match_text_seq("MINVALUE"): 4589 this.set("minvalue", self._parse_bitwise()) 4590 if self._match_text_seq("MAXVALUE"): 4591 this.set("maxvalue", self._parse_bitwise()) 4592 4593 if self._match_text_seq("CYCLE"): 4594 this.set("cycle", True) 4595 elif self._match_text_seq("NO", "CYCLE"): 4596 this.set("cycle", False) 4597 4598 if not identity: 4599 this.set("expression", self._parse_bitwise()) 4600 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4601 args = self._parse_csv(self._parse_bitwise) 4602 this.set("start", seq_get(args, 0)) 4603 this.set("increment", seq_get(args, 1)) 4604 4605 self._match_r_paren() 4606 4607 return this 4608 4609 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4610 self._match_text_seq("LENGTH") 4611 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4612 4613 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 4614 if self._match_text_seq("NULL"): 4615 return self.expression(exp.NotNullColumnConstraint) 4616 if self._match_text_seq("CASESPECIFIC"): 4617 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4618 if self._match_text_seq("FOR", "REPLICATION"): 4619 return self.expression(exp.NotForReplicationColumnConstraint) 4620 return None 4621 4622 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4623 if self._match(TokenType.CONSTRAINT): 4624 this = self._parse_id_var() 4625 else: 4626 this = None 4627 4628 if self._match_texts(self.CONSTRAINT_PARSERS): 4629 return self.expression( 4630 exp.ColumnConstraint, 4631 this=this, 4632 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4633 ) 4634 4635 return this 4636 4637 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4638 if not self._match(TokenType.CONSTRAINT): 4639 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4640 4641 return self.expression( 4642 exp.Constraint, 4643 this=self._parse_id_var(), 4644 expressions=self._parse_unnamed_constraints(), 4645 ) 4646 4647 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 4648 constraints = [] 4649 while True: 4650 constraint = self._parse_unnamed_constraint() or self._parse_function() 4651 if not constraint: 4652 break 4653 constraints.append(constraint) 4654 4655 return constraints 4656 4657 def _parse_unnamed_constraint( 4658 self, constraints: t.Optional[t.Collection[str]] = None 4659 ) -> t.Optional[exp.Expression]: 4660 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4661 constraints or self.CONSTRAINT_PARSERS 4662 ): 4663 return None 4664 4665 constraint = self._prev.text.upper() 4666 if constraint not in self.CONSTRAINT_PARSERS: 4667 self.raise_error(f"No parser found for schema constraint {constraint}.") 4668 4669 return self.CONSTRAINT_PARSERS[constraint](self) 4670 4671 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4672 self._match_text_seq("KEY") 4673 return self.expression( 4674 exp.UniqueColumnConstraint, 4675 this=self._parse_schema(self._parse_id_var(any_token=False)), 4676 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4677 on_conflict=self._parse_on_conflict(), 4678 ) 4679 4680 def _parse_key_constraint_options(self) -> t.List[str]: 4681 options = [] 4682 while True: 4683 if not self._curr: 4684 break 4685 4686 if self._match(TokenType.ON): 4687 action = None 4688 on = self._advance_any() and self._prev.text 4689 4690 if self._match_text_seq("NO", "ACTION"): 4691 action = "NO ACTION" 4692 elif self._match_text_seq("CASCADE"): 4693 action = "CASCADE" 4694 elif self._match_text_seq("RESTRICT"): 4695 action = "RESTRICT" 4696 elif self._match_pair(TokenType.SET, TokenType.NULL): 4697 action = "SET NULL" 4698 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4699 action = "SET DEFAULT" 4700 else: 4701 self.raise_error("Invalid key constraint") 4702 4703 options.append(f"ON {on} {action}") 4704 elif self._match_text_seq("NOT", "ENFORCED"): 4705 options.append("NOT ENFORCED") 4706 elif self._match_text_seq("DEFERRABLE"): 4707 options.append("DEFERRABLE") 4708 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4709 options.append("INITIALLY DEFERRED") 4710 elif self._match_text_seq("NORELY"): 4711 options.append("NORELY") 4712 elif self._match_text_seq("MATCH", "FULL"): 4713 options.append("MATCH FULL") 4714 else: 4715 break 4716 4717 return options 4718 4719 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4720 if match and not self._match(TokenType.REFERENCES): 4721 return None 4722 4723 expressions = None 4724 this = self._parse_table(schema=True) 4725 options = self._parse_key_constraint_options() 4726 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4727 4728 def _parse_foreign_key(self) -> exp.ForeignKey: 4729 expressions = self._parse_wrapped_id_vars() 4730 reference = self._parse_references() 4731 options = {} 4732 4733 while self._match(TokenType.ON): 4734 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4735 self.raise_error("Expected DELETE or UPDATE") 4736 4737 kind = self._prev.text.lower() 4738 4739 if self._match_text_seq("NO", "ACTION"): 4740 action = "NO ACTION" 4741 elif self._match(TokenType.SET): 4742 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4743 action = "SET " + self._prev.text.upper() 4744 else: 4745 self._advance() 4746 action = self._prev.text.upper() 4747 4748 options[kind] = action 4749 4750 return self.expression( 4751 exp.ForeignKey, 4752 expressions=expressions, 4753 reference=reference, 4754 **options, # type: ignore 4755 ) 4756 4757 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4758 return self._parse_field() 4759 4760 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 4761 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 4762 self._retreat(self._index - 1) 4763 return None 4764 4765 id_vars = self._parse_wrapped_id_vars() 4766 return self.expression( 4767 exp.PeriodForSystemTimeConstraint, 4768 this=seq_get(id_vars, 0), 4769 expression=seq_get(id_vars, 1), 4770 ) 4771 4772 def _parse_primary_key( 4773 self, wrapped_optional: bool = False, in_props: bool = False 4774 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4775 desc = ( 4776 self._match_set((TokenType.ASC, TokenType.DESC)) 4777 and self._prev.token_type == TokenType.DESC 4778 ) 4779 4780 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4781 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4782 4783 expressions = self._parse_wrapped_csv( 4784 self._parse_primary_key_part, optional=wrapped_optional 4785 ) 4786 options = self._parse_key_constraint_options() 4787 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4788 4789 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 4790 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 4791 4792 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4793 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4794 return this 4795 4796 bracket_kind = self._prev.token_type 4797 expressions = self._parse_csv( 4798 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 4799 ) 4800 4801 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 4802 self.raise_error("Expected ]") 4803 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 4804 self.raise_error("Expected }") 4805 4806 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4807 if bracket_kind == TokenType.L_BRACE: 4808 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 4809 elif not this or this.name.upper() == "ARRAY": 4810 this = self.expression(exp.Array, expressions=expressions) 4811 else: 4812 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 4813 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4814 4815 self._add_comments(this) 4816 return self._parse_bracket(this) 4817 4818 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4819 if self._match(TokenType.COLON): 4820 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4821 return this 4822 4823 def _parse_case(self) -> t.Optional[exp.Expression]: 4824 ifs = [] 4825 default = None 4826 4827 comments = self._prev_comments 4828 expression = self._parse_conjunction() 4829 4830 while self._match(TokenType.WHEN): 4831 this = self._parse_conjunction() 4832 self._match(TokenType.THEN) 4833 then = self._parse_conjunction() 4834 ifs.append(self.expression(exp.If, this=this, true=then)) 4835 4836 if self._match(TokenType.ELSE): 4837 default = self._parse_conjunction() 4838 4839 if not self._match(TokenType.END): 4840 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 4841 default = exp.column("interval") 4842 else: 4843 self.raise_error("Expected END after CASE", self._prev) 4844 4845 return self.expression( 4846 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 4847 ) 4848 4849 def _parse_if(self) -> t.Optional[exp.Expression]: 4850 if self._match(TokenType.L_PAREN): 4851 args = self._parse_csv(self._parse_conjunction) 4852 this = self.validate_expression(exp.If.from_arg_list(args), args) 4853 self._match_r_paren() 4854 else: 4855 index = self._index - 1 4856 4857 if self.NO_PAREN_IF_COMMANDS and index == 0: 4858 return self._parse_as_command(self._prev) 4859 4860 condition = self._parse_conjunction() 4861 4862 if not condition: 4863 self._retreat(index) 4864 return None 4865 4866 self._match(TokenType.THEN) 4867 true = self._parse_conjunction() 4868 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4869 self._match(TokenType.END) 4870 this = self.expression(exp.If, this=condition, true=true, false=false) 4871 4872 return this 4873 4874 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4875 if not self._match_text_seq("VALUE", "FOR"): 4876 self._retreat(self._index - 1) 4877 return None 4878 4879 return self.expression( 4880 exp.NextValueFor, 4881 this=self._parse_column(), 4882 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4883 ) 4884 4885 def _parse_extract(self) -> exp.Extract: 4886 this = self._parse_function() or self._parse_var() or self._parse_type() 4887 4888 if self._match(TokenType.FROM): 4889 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4890 4891 if not self._match(TokenType.COMMA): 4892 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4893 4894 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4895 4896 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4897 this = self._parse_conjunction() 4898 4899 if not self._match(TokenType.ALIAS): 4900 if self._match(TokenType.COMMA): 4901 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4902 4903 self.raise_error("Expected AS after CAST") 4904 4905 fmt = None 4906 to = self._parse_types() 4907 4908 if self._match(TokenType.FORMAT): 4909 fmt_string = self._parse_string() 4910 fmt = self._parse_at_time_zone(fmt_string) 4911 4912 if not to: 4913 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 4914 if to.this in exp.DataType.TEMPORAL_TYPES: 4915 this = self.expression( 4916 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4917 this=this, 4918 format=exp.Literal.string( 4919 format_time( 4920 fmt_string.this if fmt_string else "", 4921 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 4922 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 4923 ) 4924 ), 4925 ) 4926 4927 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4928 this.set("zone", fmt.args["zone"]) 4929 return this 4930 elif not to: 4931 self.raise_error("Expected TYPE after CAST") 4932 elif isinstance(to, exp.Identifier): 4933 to = exp.DataType.build(to.name, udt=True) 4934 elif to.this == exp.DataType.Type.CHAR: 4935 if self._match(TokenType.CHARACTER_SET): 4936 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4937 4938 return self.expression( 4939 exp.Cast if strict else exp.TryCast, 4940 this=this, 4941 to=to, 4942 format=fmt, 4943 safe=safe, 4944 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 4945 ) 4946 4947 def _parse_string_agg(self) -> exp.Expression: 4948 if self._match(TokenType.DISTINCT): 4949 args: t.List[t.Optional[exp.Expression]] = [ 4950 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4951 ] 4952 if self._match(TokenType.COMMA): 4953 args.extend(self._parse_csv(self._parse_conjunction)) 4954 else: 4955 args = self._parse_csv(self._parse_conjunction) # type: ignore 4956 4957 index = self._index 4958 if not self._match(TokenType.R_PAREN) and args: 4959 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4960 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4961 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4962 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4963 4964 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4965 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4966 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4967 if not self._match_text_seq("WITHIN", "GROUP"): 4968 self._retreat(index) 4969 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4970 4971 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4972 order = self._parse_order(this=seq_get(args, 0)) 4973 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4974 4975 def _parse_convert( 4976 self, strict: bool, safe: t.Optional[bool] = None 4977 ) -> t.Optional[exp.Expression]: 4978 this = self._parse_bitwise() 4979 4980 if self._match(TokenType.USING): 4981 to: t.Optional[exp.Expression] = self.expression( 4982 exp.CharacterSet, this=self._parse_var() 4983 ) 4984 elif self._match(TokenType.COMMA): 4985 to = self._parse_types() 4986 else: 4987 to = None 4988 4989 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 4990 4991 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4992 """ 4993 There are generally two variants of the DECODE function: 4994 4995 - DECODE(bin, charset) 4996 - DECODE(expression, search, result [, search, result] ... [, default]) 4997 4998 The second variant will always be parsed into a CASE expression. Note that NULL 4999 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5000 instead of relying on pattern matching. 5001 """ 5002 args = self._parse_csv(self._parse_conjunction) 5003 5004 if len(args) < 3: 5005 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5006 5007 expression, *expressions = args 5008 if not expression: 5009 return None 5010 5011 ifs = [] 5012 for search, result in zip(expressions[::2], expressions[1::2]): 5013 if not search or not result: 5014 return None 5015 5016 if isinstance(search, exp.Literal): 5017 ifs.append( 5018 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5019 ) 5020 elif isinstance(search, exp.Null): 5021 ifs.append( 5022 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5023 ) 5024 else: 5025 cond = exp.or_( 5026 exp.EQ(this=expression.copy(), expression=search), 5027 exp.and_( 5028 exp.Is(this=expression.copy(), expression=exp.Null()), 5029 exp.Is(this=search.copy(), expression=exp.Null()), 5030 copy=False, 5031 ), 5032 copy=False, 5033 ) 5034 ifs.append(exp.If(this=cond, true=result)) 5035 5036 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5037 5038 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5039 self._match_text_seq("KEY") 5040 key = self._parse_column() 5041 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5042 self._match_text_seq("VALUE") 5043 value = self._parse_bitwise() 5044 5045 if not key and not value: 5046 return None 5047 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5048 5049 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5050 if not this or not self._match_text_seq("FORMAT", "JSON"): 5051 return this 5052 5053 return self.expression(exp.FormatJson, this=this) 5054 5055 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5056 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5057 for value in values: 5058 if self._match_text_seq(value, "ON", on): 5059 return f"{value} ON {on}" 5060 5061 return None 5062 5063 @t.overload 5064 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5065 5066 @t.overload 5067 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5068 5069 def _parse_json_object(self, agg=False): 5070 star = self._parse_star() 5071 expressions = ( 5072 [star] 5073 if star 5074 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5075 ) 5076 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5077 5078 unique_keys = None 5079 if self._match_text_seq("WITH", "UNIQUE"): 5080 unique_keys = True 5081 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5082 unique_keys = False 5083 5084 self._match_text_seq("KEYS") 5085 5086 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5087 self._parse_type() 5088 ) 5089 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5090 5091 return self.expression( 5092 exp.JSONObjectAgg if agg else exp.JSONObject, 5093 expressions=expressions, 5094 null_handling=null_handling, 5095 unique_keys=unique_keys, 5096 return_type=return_type, 5097 encoding=encoding, 5098 ) 5099 5100 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5101 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5102 if not self._match_text_seq("NESTED"): 5103 this = self._parse_id_var() 5104 kind = self._parse_types(allow_identifiers=False) 5105 nested = None 5106 else: 5107 this = None 5108 kind = None 5109 nested = True 5110 5111 path = self._match_text_seq("PATH") and self._parse_string() 5112 nested_schema = nested and self._parse_json_schema() 5113 5114 return self.expression( 5115 exp.JSONColumnDef, 5116 this=this, 5117 kind=kind, 5118 path=path, 5119 nested_schema=nested_schema, 5120 ) 5121 5122 def _parse_json_schema(self) -> exp.JSONSchema: 5123 self._match_text_seq("COLUMNS") 5124 return self.expression( 5125 exp.JSONSchema, 5126 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5127 ) 5128 5129 def _parse_json_table(self) -> exp.JSONTable: 5130 this = self._parse_format_json(self._parse_bitwise()) 5131 path = self._match(TokenType.COMMA) and self._parse_string() 5132 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5133 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5134 schema = self._parse_json_schema() 5135 5136 return exp.JSONTable( 5137 this=this, 5138 schema=schema, 5139 path=path, 5140 error_handling=error_handling, 5141 empty_handling=empty_handling, 5142 ) 5143 5144 def _parse_match_against(self) -> exp.MatchAgainst: 5145 expressions = self._parse_csv(self._parse_column) 5146 5147 self._match_text_seq(")", "AGAINST", "(") 5148 5149 this = self._parse_string() 5150 5151 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5152 modifier = "IN NATURAL LANGUAGE MODE" 5153 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5154 modifier = f"{modifier} WITH QUERY EXPANSION" 5155 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5156 modifier = "IN BOOLEAN MODE" 5157 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5158 modifier = "WITH QUERY EXPANSION" 5159 else: 5160 modifier = None 5161 5162 return self.expression( 5163 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5164 ) 5165 5166 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5167 def _parse_open_json(self) -> exp.OpenJSON: 5168 this = self._parse_bitwise() 5169 path = self._match(TokenType.COMMA) and self._parse_string() 5170 5171 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5172 this = self._parse_field(any_token=True) 5173 kind = self._parse_types() 5174 path = self._parse_string() 5175 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5176 5177 return self.expression( 5178 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5179 ) 5180 5181 expressions = None 5182 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5183 self._match_l_paren() 5184 expressions = self._parse_csv(_parse_open_json_column_def) 5185 5186 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5187 5188 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5189 args = self._parse_csv(self._parse_bitwise) 5190 5191 if self._match(TokenType.IN): 5192 return self.expression( 5193 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5194 ) 5195 5196 if haystack_first: 5197 haystack = seq_get(args, 0) 5198 needle = seq_get(args, 1) 5199 else: 5200 needle = seq_get(args, 0) 5201 haystack = seq_get(args, 1) 5202 5203 return self.expression( 5204 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5205 ) 5206 5207 def _parse_predict(self) -> exp.Predict: 5208 self._match_text_seq("MODEL") 5209 this = self._parse_table() 5210 5211 self._match(TokenType.COMMA) 5212 self._match_text_seq("TABLE") 5213 5214 return self.expression( 5215 exp.Predict, 5216 this=this, 5217 expression=self._parse_table(), 5218 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5219 ) 5220 5221 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5222 args = self._parse_csv(self._parse_table) 5223 return exp.JoinHint(this=func_name.upper(), expressions=args) 5224 5225 def _parse_substring(self) -> exp.Substring: 5226 # Postgres supports the form: substring(string [from int] [for int]) 5227 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5228 5229 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5230 5231 if self._match(TokenType.FROM): 5232 args.append(self._parse_bitwise()) 5233 if self._match(TokenType.FOR): 5234 args.append(self._parse_bitwise()) 5235 5236 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5237 5238 def _parse_trim(self) -> exp.Trim: 5239 # https://www.w3resource.com/sql/character-functions/trim.php 5240 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5241 5242 position = None 5243 collation = None 5244 expression = None 5245 5246 if self._match_texts(self.TRIM_TYPES): 5247 position = self._prev.text.upper() 5248 5249 this = self._parse_bitwise() 5250 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5251 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5252 expression = self._parse_bitwise() 5253 5254 if invert_order: 5255 this, expression = expression, this 5256 5257 if self._match(TokenType.COLLATE): 5258 collation = self._parse_bitwise() 5259 5260 return self.expression( 5261 exp.Trim, this=this, position=position, expression=expression, collation=collation 5262 ) 5263 5264 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5265 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5266 5267 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5268 return self._parse_window(self._parse_id_var(), alias=True) 5269 5270 def _parse_respect_or_ignore_nulls( 5271 self, this: t.Optional[exp.Expression] 5272 ) -> t.Optional[exp.Expression]: 5273 if self._match_text_seq("IGNORE", "NULLS"): 5274 return self.expression(exp.IgnoreNulls, this=this) 5275 if self._match_text_seq("RESPECT", "NULLS"): 5276 return self.expression(exp.RespectNulls, this=this) 5277 return this 5278 5279 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5280 if self._match(TokenType.HAVING): 5281 self._match_texts(("MAX", "MIN")) 5282 max = self._prev.text.upper() != "MIN" 5283 return self.expression( 5284 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5285 ) 5286 5287 return this 5288 5289 def _parse_window( 5290 self, this: t.Optional[exp.Expression], alias: bool = False 5291 ) -> t.Optional[exp.Expression]: 5292 func = this 5293 comments = func.comments if isinstance(func, exp.Expression) else None 5294 5295 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5296 self._match(TokenType.WHERE) 5297 this = self.expression( 5298 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5299 ) 5300 self._match_r_paren() 5301 5302 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5303 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5304 if self._match_text_seq("WITHIN", "GROUP"): 5305 order = self._parse_wrapped(self._parse_order) 5306 this = self.expression(exp.WithinGroup, this=this, expression=order) 5307 5308 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5309 # Some dialects choose to implement and some do not. 5310 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5311 5312 # There is some code above in _parse_lambda that handles 5313 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5314 5315 # The below changes handle 5316 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5317 5318 # Oracle allows both formats 5319 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5320 # and Snowflake chose to do the same for familiarity 5321 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5322 if isinstance(this, exp.AggFunc): 5323 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5324 5325 if ignore_respect and ignore_respect is not this: 5326 ignore_respect.replace(ignore_respect.this) 5327 this = self.expression(ignore_respect.__class__, this=this) 5328 5329 this = self._parse_respect_or_ignore_nulls(this) 5330 5331 # bigquery select from window x AS (partition by ...) 5332 if alias: 5333 over = None 5334 self._match(TokenType.ALIAS) 5335 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5336 return this 5337 else: 5338 over = self._prev.text.upper() 5339 5340 if comments: 5341 func.comments = None # type: ignore 5342 5343 if not self._match(TokenType.L_PAREN): 5344 return self.expression( 5345 exp.Window, 5346 comments=comments, 5347 this=this, 5348 alias=self._parse_id_var(False), 5349 over=over, 5350 ) 5351 5352 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5353 5354 first = self._match(TokenType.FIRST) 5355 if self._match_text_seq("LAST"): 5356 first = False 5357 5358 partition, order = self._parse_partition_and_order() 5359 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5360 5361 if kind: 5362 self._match(TokenType.BETWEEN) 5363 start = self._parse_window_spec() 5364 self._match(TokenType.AND) 5365 end = self._parse_window_spec() 5366 5367 spec = self.expression( 5368 exp.WindowSpec, 5369 kind=kind, 5370 start=start["value"], 5371 start_side=start["side"], 5372 end=end["value"], 5373 end_side=end["side"], 5374 ) 5375 else: 5376 spec = None 5377 5378 self._match_r_paren() 5379 5380 window = self.expression( 5381 exp.Window, 5382 comments=comments, 5383 this=this, 5384 partition_by=partition, 5385 order=order, 5386 spec=spec, 5387 alias=window_alias, 5388 over=over, 5389 first=first, 5390 ) 5391 5392 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5393 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5394 return self._parse_window(window, alias=alias) 5395 5396 return window 5397 5398 def _parse_partition_and_order( 5399 self, 5400 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5401 return self._parse_partition_by(), self._parse_order() 5402 5403 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5404 self._match(TokenType.BETWEEN) 5405 5406 return { 5407 "value": ( 5408 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5409 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5410 or self._parse_bitwise() 5411 ), 5412 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5413 } 5414 5415 def _parse_alias( 5416 self, this: t.Optional[exp.Expression], explicit: bool = False 5417 ) -> t.Optional[exp.Expression]: 5418 any_token = self._match(TokenType.ALIAS) 5419 comments = self._prev_comments 5420 5421 if explicit and not any_token: 5422 return this 5423 5424 if self._match(TokenType.L_PAREN): 5425 aliases = self.expression( 5426 exp.Aliases, 5427 comments=comments, 5428 this=this, 5429 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5430 ) 5431 self._match_r_paren(aliases) 5432 return aliases 5433 5434 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 5435 self.STRING_ALIASES and self._parse_string_as_identifier() 5436 ) 5437 5438 if alias: 5439 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5440 column = this.this 5441 5442 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5443 if not this.comments and column and column.comments: 5444 this.comments = column.comments 5445 column.comments = None 5446 5447 return this 5448 5449 def _parse_id_var( 5450 self, 5451 any_token: bool = True, 5452 tokens: t.Optional[t.Collection[TokenType]] = None, 5453 ) -> t.Optional[exp.Expression]: 5454 identifier = self._parse_identifier() 5455 5456 if identifier: 5457 return identifier 5458 5459 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 5460 quoted = self._prev.token_type == TokenType.STRING 5461 return exp.Identifier(this=self._prev.text, quoted=quoted) 5462 5463 return None 5464 5465 def _parse_string(self) -> t.Optional[exp.Expression]: 5466 if self._match_set(self.STRING_PARSERS): 5467 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 5468 return self._parse_placeholder() 5469 5470 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5471 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5472 5473 def _parse_number(self) -> t.Optional[exp.Expression]: 5474 if self._match_set(self.NUMERIC_PARSERS): 5475 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 5476 return self._parse_placeholder() 5477 5478 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5479 if self._match(TokenType.IDENTIFIER): 5480 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5481 return self._parse_placeholder() 5482 5483 def _parse_var( 5484 self, 5485 any_token: bool = False, 5486 tokens: t.Optional[t.Collection[TokenType]] = None, 5487 upper: bool = False, 5488 ) -> t.Optional[exp.Expression]: 5489 if ( 5490 (any_token and self._advance_any()) 5491 or self._match(TokenType.VAR) 5492 or (self._match_set(tokens) if tokens else False) 5493 ): 5494 return self.expression( 5495 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5496 ) 5497 return self._parse_placeholder() 5498 5499 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5500 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5501 self._advance() 5502 return self._prev 5503 return None 5504 5505 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5506 return self._parse_var() or self._parse_string() 5507 5508 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 5509 return self._parse_primary() or self._parse_var(any_token=True) 5510 5511 def _parse_null(self) -> t.Optional[exp.Expression]: 5512 if self._match_set(self.NULL_TOKENS): 5513 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5514 return self._parse_placeholder() 5515 5516 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5517 if self._match(TokenType.TRUE): 5518 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5519 if self._match(TokenType.FALSE): 5520 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5521 return self._parse_placeholder() 5522 5523 def _parse_star(self) -> t.Optional[exp.Expression]: 5524 if self._match(TokenType.STAR): 5525 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5526 return self._parse_placeholder() 5527 5528 def _parse_parameter(self) -> exp.Parameter: 5529 self._match(TokenType.L_BRACE) 5530 this = self._parse_identifier() or self._parse_primary_or_var() 5531 expression = self._match(TokenType.COLON) and ( 5532 self._parse_identifier() or self._parse_primary_or_var() 5533 ) 5534 self._match(TokenType.R_BRACE) 5535 return self.expression(exp.Parameter, this=this, expression=expression) 5536 5537 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5538 if self._match_set(self.PLACEHOLDER_PARSERS): 5539 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5540 if placeholder: 5541 return placeholder 5542 self._advance(-1) 5543 return None 5544 5545 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 5546 if not self._match(TokenType.EXCEPT): 5547 return None 5548 if self._match(TokenType.L_PAREN, advance=False): 5549 return self._parse_wrapped_csv(self._parse_column) 5550 5551 except_column = self._parse_column() 5552 return [except_column] if except_column else None 5553 5554 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 5555 if not self._match(TokenType.REPLACE): 5556 return None 5557 if self._match(TokenType.L_PAREN, advance=False): 5558 return self._parse_wrapped_csv(self._parse_expression) 5559 5560 replace_expression = self._parse_expression() 5561 return [replace_expression] if replace_expression else None 5562 5563 def _parse_csv( 5564 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5565 ) -> t.List[exp.Expression]: 5566 parse_result = parse_method() 5567 items = [parse_result] if parse_result is not None else [] 5568 5569 while self._match(sep): 5570 self._add_comments(parse_result) 5571 parse_result = parse_method() 5572 if parse_result is not None: 5573 items.append(parse_result) 5574 5575 return items 5576 5577 def _parse_tokens( 5578 self, parse_method: t.Callable, expressions: t.Dict 5579 ) -> t.Optional[exp.Expression]: 5580 this = parse_method() 5581 5582 while self._match_set(expressions): 5583 this = self.expression( 5584 expressions[self._prev.token_type], 5585 this=this, 5586 comments=self._prev_comments, 5587 expression=parse_method(), 5588 ) 5589 5590 return this 5591 5592 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5593 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5594 5595 def _parse_wrapped_csv( 5596 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5597 ) -> t.List[exp.Expression]: 5598 return self._parse_wrapped( 5599 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5600 ) 5601 5602 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5603 wrapped = self._match(TokenType.L_PAREN) 5604 if not wrapped and not optional: 5605 self.raise_error("Expecting (") 5606 parse_result = parse_method() 5607 if wrapped: 5608 self._match_r_paren() 5609 return parse_result 5610 5611 def _parse_expressions(self) -> t.List[exp.Expression]: 5612 return self._parse_csv(self._parse_expression) 5613 5614 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5615 return self._parse_select() or self._parse_set_operations( 5616 self._parse_expression() if alias else self._parse_conjunction() 5617 ) 5618 5619 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5620 return self._parse_query_modifiers( 5621 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5622 ) 5623 5624 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5625 this = None 5626 if self._match_texts(self.TRANSACTION_KIND): 5627 this = self._prev.text 5628 5629 self._match_texts(("TRANSACTION", "WORK")) 5630 5631 modes = [] 5632 while True: 5633 mode = [] 5634 while self._match(TokenType.VAR): 5635 mode.append(self._prev.text) 5636 5637 if mode: 5638 modes.append(" ".join(mode)) 5639 if not self._match(TokenType.COMMA): 5640 break 5641 5642 return self.expression(exp.Transaction, this=this, modes=modes) 5643 5644 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5645 chain = None 5646 savepoint = None 5647 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5648 5649 self._match_texts(("TRANSACTION", "WORK")) 5650 5651 if self._match_text_seq("TO"): 5652 self._match_text_seq("SAVEPOINT") 5653 savepoint = self._parse_id_var() 5654 5655 if self._match(TokenType.AND): 5656 chain = not self._match_text_seq("NO") 5657 self._match_text_seq("CHAIN") 5658 5659 if is_rollback: 5660 return self.expression(exp.Rollback, savepoint=savepoint) 5661 5662 return self.expression(exp.Commit, chain=chain) 5663 5664 def _parse_refresh(self) -> exp.Refresh: 5665 self._match(TokenType.TABLE) 5666 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5667 5668 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5669 if not self._match_text_seq("ADD"): 5670 return None 5671 5672 self._match(TokenType.COLUMN) 5673 exists_column = self._parse_exists(not_=True) 5674 expression = self._parse_field_def() 5675 5676 if expression: 5677 expression.set("exists", exists_column) 5678 5679 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5680 if self._match_texts(("FIRST", "AFTER")): 5681 position = self._prev.text 5682 column_position = self.expression( 5683 exp.ColumnPosition, this=self._parse_column(), position=position 5684 ) 5685 expression.set("position", column_position) 5686 5687 return expression 5688 5689 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5690 drop = self._match(TokenType.DROP) and self._parse_drop() 5691 if drop and not isinstance(drop, exp.Command): 5692 drop.set("kind", drop.args.get("kind", "COLUMN")) 5693 return drop 5694 5695 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5696 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5697 return self.expression( 5698 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5699 ) 5700 5701 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5702 index = self._index - 1 5703 5704 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 5705 return self._parse_csv( 5706 lambda: self.expression( 5707 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 5708 ) 5709 ) 5710 5711 self._retreat(index) 5712 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 5713 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5714 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5715 5716 def _parse_alter_table_alter(self) -> exp.AlterColumn: 5717 self._match(TokenType.COLUMN) 5718 column = self._parse_field(any_token=True) 5719 5720 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5721 return self.expression(exp.AlterColumn, this=column, drop=True) 5722 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5723 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5724 if self._match(TokenType.COMMENT): 5725 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 5726 5727 self._match_text_seq("SET", "DATA") 5728 self._match_text_seq("TYPE") 5729 return self.expression( 5730 exp.AlterColumn, 5731 this=column, 5732 dtype=self._parse_types(), 5733 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5734 using=self._match(TokenType.USING) and self._parse_conjunction(), 5735 ) 5736 5737 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5738 index = self._index - 1 5739 5740 partition_exists = self._parse_exists() 5741 if self._match(TokenType.PARTITION, advance=False): 5742 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5743 5744 self._retreat(index) 5745 return self._parse_csv(self._parse_drop_column) 5746 5747 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 5748 if self._match(TokenType.COLUMN): 5749 exists = self._parse_exists() 5750 old_column = self._parse_column() 5751 to = self._match_text_seq("TO") 5752 new_column = self._parse_column() 5753 5754 if old_column is None or to is None or new_column is None: 5755 return None 5756 5757 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 5758 5759 self._match_text_seq("TO") 5760 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5761 5762 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5763 start = self._prev 5764 5765 if not self._match(TokenType.TABLE): 5766 return self._parse_as_command(start) 5767 5768 exists = self._parse_exists() 5769 only = self._match_text_seq("ONLY") 5770 this = self._parse_table(schema=True) 5771 5772 if self._next: 5773 self._advance() 5774 5775 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5776 if parser: 5777 actions = ensure_list(parser(self)) 5778 options = self._parse_csv(self._parse_property) 5779 5780 if not self._curr and actions: 5781 return self.expression( 5782 exp.AlterTable, 5783 this=this, 5784 exists=exists, 5785 actions=actions, 5786 only=only, 5787 options=options, 5788 ) 5789 5790 return self._parse_as_command(start) 5791 5792 def _parse_merge(self) -> exp.Merge: 5793 self._match(TokenType.INTO) 5794 target = self._parse_table() 5795 5796 if target and self._match(TokenType.ALIAS, advance=False): 5797 target.set("alias", self._parse_table_alias()) 5798 5799 self._match(TokenType.USING) 5800 using = self._parse_table() 5801 5802 self._match(TokenType.ON) 5803 on = self._parse_conjunction() 5804 5805 return self.expression( 5806 exp.Merge, 5807 this=target, 5808 using=using, 5809 on=on, 5810 expressions=self._parse_when_matched(), 5811 ) 5812 5813 def _parse_when_matched(self) -> t.List[exp.When]: 5814 whens = [] 5815 5816 while self._match(TokenType.WHEN): 5817 matched = not self._match(TokenType.NOT) 5818 self._match_text_seq("MATCHED") 5819 source = ( 5820 False 5821 if self._match_text_seq("BY", "TARGET") 5822 else self._match_text_seq("BY", "SOURCE") 5823 ) 5824 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5825 5826 self._match(TokenType.THEN) 5827 5828 if self._match(TokenType.INSERT): 5829 _this = self._parse_star() 5830 if _this: 5831 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5832 else: 5833 then = self.expression( 5834 exp.Insert, 5835 this=self._parse_value(), 5836 expression=self._match_text_seq("VALUES") and self._parse_value(), 5837 ) 5838 elif self._match(TokenType.UPDATE): 5839 expressions = self._parse_star() 5840 if expressions: 5841 then = self.expression(exp.Update, expressions=expressions) 5842 else: 5843 then = self.expression( 5844 exp.Update, 5845 expressions=self._match(TokenType.SET) 5846 and self._parse_csv(self._parse_equality), 5847 ) 5848 elif self._match(TokenType.DELETE): 5849 then = self.expression(exp.Var, this=self._prev.text) 5850 else: 5851 then = None 5852 5853 whens.append( 5854 self.expression( 5855 exp.When, 5856 matched=matched, 5857 source=source, 5858 condition=condition, 5859 then=then, 5860 ) 5861 ) 5862 return whens 5863 5864 def _parse_show(self) -> t.Optional[exp.Expression]: 5865 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5866 if parser: 5867 return parser(self) 5868 return self._parse_as_command(self._prev) 5869 5870 def _parse_set_item_assignment( 5871 self, kind: t.Optional[str] = None 5872 ) -> t.Optional[exp.Expression]: 5873 index = self._index 5874 5875 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 5876 return self._parse_set_transaction(global_=kind == "GLOBAL") 5877 5878 left = self._parse_primary() or self._parse_id_var() 5879 assignment_delimiter = self._match_texts(("=", "TO")) 5880 5881 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5882 self._retreat(index) 5883 return None 5884 5885 right = self._parse_statement() or self._parse_id_var() 5886 this = self.expression(exp.EQ, this=left, expression=right) 5887 5888 return self.expression(exp.SetItem, this=this, kind=kind) 5889 5890 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5891 self._match_text_seq("TRANSACTION") 5892 characteristics = self._parse_csv( 5893 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5894 ) 5895 return self.expression( 5896 exp.SetItem, 5897 expressions=characteristics, 5898 kind="TRANSACTION", 5899 **{"global": global_}, # type: ignore 5900 ) 5901 5902 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5903 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5904 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5905 5906 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5907 index = self._index 5908 set_ = self.expression( 5909 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5910 ) 5911 5912 if self._curr: 5913 self._retreat(index) 5914 return self._parse_as_command(self._prev) 5915 5916 return set_ 5917 5918 def _parse_var_from_options( 5919 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 5920 ) -> t.Optional[exp.Var]: 5921 start = self._curr 5922 if not start: 5923 return None 5924 5925 option = start.text.upper() 5926 continuations = options.get(option) 5927 5928 index = self._index 5929 self._advance() 5930 for keywords in continuations or []: 5931 if isinstance(keywords, str): 5932 keywords = (keywords,) 5933 5934 if self._match_text_seq(*keywords): 5935 option = f"{option} {' '.join(keywords)}" 5936 break 5937 else: 5938 if continuations or continuations is None: 5939 if raise_unmatched: 5940 self.raise_error(f"Unknown option {option}") 5941 5942 self._retreat(index) 5943 return None 5944 5945 return exp.var(option) 5946 5947 def _parse_as_command(self, start: Token) -> exp.Command: 5948 while self._curr: 5949 self._advance() 5950 text = self._find_sql(start, self._prev) 5951 size = len(start.text) 5952 self._warn_unsupported() 5953 return exp.Command(this=text[:size], expression=text[size:]) 5954 5955 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5956 settings = [] 5957 5958 self._match_l_paren() 5959 kind = self._parse_id_var() 5960 5961 if self._match(TokenType.L_PAREN): 5962 while True: 5963 key = self._parse_id_var() 5964 value = self._parse_primary() 5965 5966 if not key and value is None: 5967 break 5968 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5969 self._match(TokenType.R_PAREN) 5970 5971 self._match_r_paren() 5972 5973 return self.expression( 5974 exp.DictProperty, 5975 this=this, 5976 kind=kind.this if kind else None, 5977 settings=settings, 5978 ) 5979 5980 def _parse_dict_range(self, this: str) -> exp.DictRange: 5981 self._match_l_paren() 5982 has_min = self._match_text_seq("MIN") 5983 if has_min: 5984 min = self._parse_var() or self._parse_primary() 5985 self._match_text_seq("MAX") 5986 max = self._parse_var() or self._parse_primary() 5987 else: 5988 max = self._parse_var() or self._parse_primary() 5989 min = exp.Literal.number(0) 5990 self._match_r_paren() 5991 return self.expression(exp.DictRange, this=this, min=min, max=max) 5992 5993 def _parse_comprehension( 5994 self, this: t.Optional[exp.Expression] 5995 ) -> t.Optional[exp.Comprehension]: 5996 index = self._index 5997 expression = self._parse_column() 5998 if not self._match(TokenType.IN): 5999 self._retreat(index - 1) 6000 return None 6001 iterator = self._parse_column() 6002 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 6003 return self.expression( 6004 exp.Comprehension, 6005 this=this, 6006 expression=expression, 6007 iterator=iterator, 6008 condition=condition, 6009 ) 6010 6011 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6012 if self._match(TokenType.HEREDOC_STRING): 6013 return self.expression(exp.Heredoc, this=self._prev.text) 6014 6015 if not self._match_text_seq("$"): 6016 return None 6017 6018 tags = ["$"] 6019 tag_text = None 6020 6021 if self._is_connected(): 6022 self._advance() 6023 tags.append(self._prev.text.upper()) 6024 else: 6025 self.raise_error("No closing $ found") 6026 6027 if tags[-1] != "$": 6028 if self._is_connected() and self._match_text_seq("$"): 6029 tag_text = tags[-1] 6030 tags.append("$") 6031 else: 6032 self.raise_error("No closing $ found") 6033 6034 heredoc_start = self._curr 6035 6036 while self._curr: 6037 if self._match_text_seq(*tags, advance=False): 6038 this = self._find_sql(heredoc_start, self._prev) 6039 self._advance(len(tags)) 6040 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6041 6042 self._advance() 6043 6044 self.raise_error(f"No closing {''.join(tags)} found") 6045 return None 6046 6047 def _find_parser( 6048 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6049 ) -> t.Optional[t.Callable]: 6050 if not self._curr: 6051 return None 6052 6053 index = self._index 6054 this = [] 6055 while True: 6056 # The current token might be multiple words 6057 curr = self._curr.text.upper() 6058 key = curr.split(" ") 6059 this.append(curr) 6060 6061 self._advance() 6062 result, trie = in_trie(trie, key) 6063 if result == TrieResult.FAILED: 6064 break 6065 6066 if result == TrieResult.EXISTS: 6067 subparser = parsers[" ".join(this)] 6068 return subparser 6069 6070 self._retreat(index) 6071 return None 6072 6073 def _match(self, token_type, advance=True, expression=None): 6074 if not self._curr: 6075 return None 6076 6077 if self._curr.token_type == token_type: 6078 if advance: 6079 self._advance() 6080 self._add_comments(expression) 6081 return True 6082 6083 return None 6084 6085 def _match_set(self, types, advance=True): 6086 if not self._curr: 6087 return None 6088 6089 if self._curr.token_type in types: 6090 if advance: 6091 self._advance() 6092 return True 6093 6094 return None 6095 6096 def _match_pair(self, token_type_a, token_type_b, advance=True): 6097 if not self._curr or not self._next: 6098 return None 6099 6100 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6101 if advance: 6102 self._advance(2) 6103 return True 6104 6105 return None 6106 6107 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6108 if not self._match(TokenType.L_PAREN, expression=expression): 6109 self.raise_error("Expecting (") 6110 6111 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6112 if not self._match(TokenType.R_PAREN, expression=expression): 6113 self.raise_error("Expecting )") 6114 6115 def _match_texts(self, texts, advance=True): 6116 if self._curr and self._curr.text.upper() in texts: 6117 if advance: 6118 self._advance() 6119 return True 6120 return None 6121 6122 def _match_text_seq(self, *texts, advance=True): 6123 index = self._index 6124 for text in texts: 6125 if self._curr and self._curr.text.upper() == text: 6126 self._advance() 6127 else: 6128 self._retreat(index) 6129 return None 6130 6131 if not advance: 6132 self._retreat(index) 6133 6134 return True 6135 6136 def _replace_lambda( 6137 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 6138 ) -> t.Optional[exp.Expression]: 6139 if not node: 6140 return node 6141 6142 for column in node.find_all(exp.Column): 6143 if column.parts[0].name in lambda_variables: 6144 dot_or_id = column.to_dot() if column.table else column.this 6145 parent = column.parent 6146 6147 while isinstance(parent, exp.Dot): 6148 if not isinstance(parent.parent, exp.Dot): 6149 parent.replace(dot_or_id) 6150 break 6151 parent = parent.parent 6152 else: 6153 if column is node: 6154 node = dot_or_id 6155 else: 6156 column.replace(dot_or_id) 6157 return node 6158 6159 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6160 start = self._prev 6161 6162 # Not to be confused with TRUNCATE(number, decimals) function call 6163 if self._match(TokenType.L_PAREN): 6164 self._retreat(self._index - 2) 6165 return self._parse_function() 6166 6167 # Clickhouse supports TRUNCATE DATABASE as well 6168 is_database = self._match(TokenType.DATABASE) 6169 6170 self._match(TokenType.TABLE) 6171 6172 exists = self._parse_exists(not_=False) 6173 6174 expressions = self._parse_csv( 6175 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6176 ) 6177 6178 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6179 6180 if self._match_text_seq("RESTART", "IDENTITY"): 6181 identity = "RESTART" 6182 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6183 identity = "CONTINUE" 6184 else: 6185 identity = None 6186 6187 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6188 option = self._prev.text 6189 else: 6190 option = None 6191 6192 partition = self._parse_partition() 6193 6194 # Fallback case 6195 if self._curr: 6196 return self._parse_as_command(start) 6197 6198 return self.expression( 6199 exp.TruncateTable, 6200 expressions=expressions, 6201 is_database=is_database, 6202 exists=exists, 6203 cluster=cluster, 6204 identity=identity, 6205 option=option, 6206 partition=partition, 6207 ) 6208 6209 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6210 this = self._parse_ordered(self._parse_opclass) 6211 6212 if not self._match(TokenType.WITH): 6213 return this 6214 6215 op = self._parse_var(any_token=True) 6216 6217 return self.expression(exp.WithOperator, this=this, op=op)
26def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 27 if len(args) == 1 and args[0].is_star: 28 return exp.StarMap(this=args[0]) 29 30 keys = [] 31 values = [] 32 for i in range(0, len(args), 2): 33 keys.append(args[i]) 34 values.append(args[i + 1]) 35 36 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
52def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 53 # Default argument order is base, expression 54 this = seq_get(args, 0) 55 expression = seq_get(args, 1) 56 57 if expression: 58 if not dialect.LOG_BASE_FIRST: 59 this, expression = expression, this 60 return exp.Log(this=this, expression=expression) 61 62 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
65def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 66 def _builder(args: t.List, dialect: Dialect) -> E: 67 expression = expr_type( 68 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 69 ) 70 if len(args) > 2 and expr_type is exp.JSONExtract: 71 expression.set("expressions", args[2:]) 72 73 return expression 74 75 return _builder
88class Parser(metaclass=_Parser): 89 """ 90 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 91 92 Args: 93 error_level: The desired error level. 94 Default: ErrorLevel.IMMEDIATE 95 error_message_context: The amount of context to capture from a query string when displaying 96 the error message (in number of characters). 97 Default: 100 98 max_errors: Maximum number of error messages to include in a raised ParseError. 99 This is only relevant if error_level is ErrorLevel.RAISE. 100 Default: 3 101 """ 102 103 FUNCTIONS: t.Dict[str, t.Callable] = { 104 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 105 "CONCAT": lambda args, dialect: exp.Concat( 106 expressions=args, 107 safe=not dialect.STRICT_STRING_CONCAT, 108 coalesce=dialect.CONCAT_COALESCE, 109 ), 110 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 111 expressions=args, 112 safe=not dialect.STRICT_STRING_CONCAT, 113 coalesce=dialect.CONCAT_COALESCE, 114 ), 115 "DATE_TO_DATE_STR": lambda args: exp.Cast( 116 this=seq_get(args, 0), 117 to=exp.DataType(this=exp.DataType.Type.TEXT), 118 ), 119 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 120 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 121 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 122 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 123 "LIKE": build_like, 124 "LOG": build_logarithm, 125 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 126 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 127 "MOD": lambda args: exp.Mod(this=seq_get(args, 0), expression=seq_get(args, 1)), 128 "TIME_TO_TIME_STR": lambda args: exp.Cast( 129 this=seq_get(args, 0), 130 to=exp.DataType(this=exp.DataType.Type.TEXT), 131 ), 132 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 133 this=exp.Cast( 134 this=seq_get(args, 0), 135 to=exp.DataType(this=exp.DataType.Type.TEXT), 136 ), 137 start=exp.Literal.number(1), 138 length=exp.Literal.number(10), 139 ), 140 "VAR_MAP": build_var_map, 141 } 142 143 NO_PAREN_FUNCTIONS = { 144 TokenType.CURRENT_DATE: exp.CurrentDate, 145 TokenType.CURRENT_DATETIME: exp.CurrentDate, 146 TokenType.CURRENT_TIME: exp.CurrentTime, 147 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 148 TokenType.CURRENT_USER: exp.CurrentUser, 149 } 150 151 STRUCT_TYPE_TOKENS = { 152 TokenType.NESTED, 153 TokenType.OBJECT, 154 TokenType.STRUCT, 155 } 156 157 NESTED_TYPE_TOKENS = { 158 TokenType.ARRAY, 159 TokenType.LOWCARDINALITY, 160 TokenType.MAP, 161 TokenType.NULLABLE, 162 *STRUCT_TYPE_TOKENS, 163 } 164 165 ENUM_TYPE_TOKENS = { 166 TokenType.ENUM, 167 TokenType.ENUM8, 168 TokenType.ENUM16, 169 } 170 171 AGGREGATE_TYPE_TOKENS = { 172 TokenType.AGGREGATEFUNCTION, 173 TokenType.SIMPLEAGGREGATEFUNCTION, 174 } 175 176 TYPE_TOKENS = { 177 TokenType.BIT, 178 TokenType.BOOLEAN, 179 TokenType.TINYINT, 180 TokenType.UTINYINT, 181 TokenType.SMALLINT, 182 TokenType.USMALLINT, 183 TokenType.INT, 184 TokenType.UINT, 185 TokenType.BIGINT, 186 TokenType.UBIGINT, 187 TokenType.INT128, 188 TokenType.UINT128, 189 TokenType.INT256, 190 TokenType.UINT256, 191 TokenType.MEDIUMINT, 192 TokenType.UMEDIUMINT, 193 TokenType.FIXEDSTRING, 194 TokenType.FLOAT, 195 TokenType.DOUBLE, 196 TokenType.CHAR, 197 TokenType.NCHAR, 198 TokenType.VARCHAR, 199 TokenType.NVARCHAR, 200 TokenType.BPCHAR, 201 TokenType.TEXT, 202 TokenType.MEDIUMTEXT, 203 TokenType.LONGTEXT, 204 TokenType.MEDIUMBLOB, 205 TokenType.LONGBLOB, 206 TokenType.BINARY, 207 TokenType.VARBINARY, 208 TokenType.JSON, 209 TokenType.JSONB, 210 TokenType.INTERVAL, 211 TokenType.TINYBLOB, 212 TokenType.TINYTEXT, 213 TokenType.TIME, 214 TokenType.TIMETZ, 215 TokenType.TIMESTAMP, 216 TokenType.TIMESTAMP_S, 217 TokenType.TIMESTAMP_MS, 218 TokenType.TIMESTAMP_NS, 219 TokenType.TIMESTAMPTZ, 220 TokenType.TIMESTAMPLTZ, 221 TokenType.DATETIME, 222 TokenType.DATETIME64, 223 TokenType.DATE, 224 TokenType.DATE32, 225 TokenType.INT4RANGE, 226 TokenType.INT4MULTIRANGE, 227 TokenType.INT8RANGE, 228 TokenType.INT8MULTIRANGE, 229 TokenType.NUMRANGE, 230 TokenType.NUMMULTIRANGE, 231 TokenType.TSRANGE, 232 TokenType.TSMULTIRANGE, 233 TokenType.TSTZRANGE, 234 TokenType.TSTZMULTIRANGE, 235 TokenType.DATERANGE, 236 TokenType.DATEMULTIRANGE, 237 TokenType.DECIMAL, 238 TokenType.UDECIMAL, 239 TokenType.BIGDECIMAL, 240 TokenType.UUID, 241 TokenType.GEOGRAPHY, 242 TokenType.GEOMETRY, 243 TokenType.HLLSKETCH, 244 TokenType.HSTORE, 245 TokenType.PSEUDO_TYPE, 246 TokenType.SUPER, 247 TokenType.SERIAL, 248 TokenType.SMALLSERIAL, 249 TokenType.BIGSERIAL, 250 TokenType.XML, 251 TokenType.YEAR, 252 TokenType.UNIQUEIDENTIFIER, 253 TokenType.USERDEFINED, 254 TokenType.MONEY, 255 TokenType.SMALLMONEY, 256 TokenType.ROWVERSION, 257 TokenType.IMAGE, 258 TokenType.VARIANT, 259 TokenType.OBJECT, 260 TokenType.OBJECT_IDENTIFIER, 261 TokenType.INET, 262 TokenType.IPADDRESS, 263 TokenType.IPPREFIX, 264 TokenType.IPV4, 265 TokenType.IPV6, 266 TokenType.UNKNOWN, 267 TokenType.NULL, 268 TokenType.NAME, 269 *ENUM_TYPE_TOKENS, 270 *NESTED_TYPE_TOKENS, 271 *AGGREGATE_TYPE_TOKENS, 272 } 273 274 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 275 TokenType.BIGINT: TokenType.UBIGINT, 276 TokenType.INT: TokenType.UINT, 277 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 278 TokenType.SMALLINT: TokenType.USMALLINT, 279 TokenType.TINYINT: TokenType.UTINYINT, 280 TokenType.DECIMAL: TokenType.UDECIMAL, 281 } 282 283 SUBQUERY_PREDICATES = { 284 TokenType.ANY: exp.Any, 285 TokenType.ALL: exp.All, 286 TokenType.EXISTS: exp.Exists, 287 TokenType.SOME: exp.Any, 288 } 289 290 RESERVED_TOKENS = { 291 *Tokenizer.SINGLE_TOKENS.values(), 292 TokenType.SELECT, 293 } 294 295 DB_CREATABLES = { 296 TokenType.DATABASE, 297 TokenType.SCHEMA, 298 TokenType.TABLE, 299 TokenType.VIEW, 300 TokenType.MODEL, 301 TokenType.DICTIONARY, 302 TokenType.SEQUENCE, 303 TokenType.STORAGE_INTEGRATION, 304 } 305 306 CREATABLES = { 307 TokenType.COLUMN, 308 TokenType.CONSTRAINT, 309 TokenType.FUNCTION, 310 TokenType.INDEX, 311 TokenType.PROCEDURE, 312 TokenType.FOREIGN_KEY, 313 *DB_CREATABLES, 314 } 315 316 # Tokens that can represent identifiers 317 ID_VAR_TOKENS = { 318 TokenType.VAR, 319 TokenType.ANTI, 320 TokenType.APPLY, 321 TokenType.ASC, 322 TokenType.ASOF, 323 TokenType.AUTO_INCREMENT, 324 TokenType.BEGIN, 325 TokenType.BPCHAR, 326 TokenType.CACHE, 327 TokenType.CASE, 328 TokenType.COLLATE, 329 TokenType.COMMAND, 330 TokenType.COMMENT, 331 TokenType.COMMIT, 332 TokenType.CONSTRAINT, 333 TokenType.DEFAULT, 334 TokenType.DELETE, 335 TokenType.DESC, 336 TokenType.DESCRIBE, 337 TokenType.DICTIONARY, 338 TokenType.DIV, 339 TokenType.END, 340 TokenType.EXECUTE, 341 TokenType.ESCAPE, 342 TokenType.FALSE, 343 TokenType.FIRST, 344 TokenType.FILTER, 345 TokenType.FINAL, 346 TokenType.FORMAT, 347 TokenType.FULL, 348 TokenType.IS, 349 TokenType.ISNULL, 350 TokenType.INTERVAL, 351 TokenType.KEEP, 352 TokenType.KILL, 353 TokenType.LEFT, 354 TokenType.LOAD, 355 TokenType.MERGE, 356 TokenType.NATURAL, 357 TokenType.NEXT, 358 TokenType.OFFSET, 359 TokenType.OPERATOR, 360 TokenType.ORDINALITY, 361 TokenType.OVERLAPS, 362 TokenType.OVERWRITE, 363 TokenType.PARTITION, 364 TokenType.PERCENT, 365 TokenType.PIVOT, 366 TokenType.PRAGMA, 367 TokenType.RANGE, 368 TokenType.RECURSIVE, 369 TokenType.REFERENCES, 370 TokenType.REFRESH, 371 TokenType.REPLACE, 372 TokenType.RIGHT, 373 TokenType.ROW, 374 TokenType.ROWS, 375 TokenType.SEMI, 376 TokenType.SET, 377 TokenType.SETTINGS, 378 TokenType.SHOW, 379 TokenType.TEMPORARY, 380 TokenType.TOP, 381 TokenType.TRUE, 382 TokenType.TRUNCATE, 383 TokenType.UNIQUE, 384 TokenType.UNPIVOT, 385 TokenType.UPDATE, 386 TokenType.USE, 387 TokenType.VOLATILE, 388 TokenType.WINDOW, 389 *CREATABLES, 390 *SUBQUERY_PREDICATES, 391 *TYPE_TOKENS, 392 *NO_PAREN_FUNCTIONS, 393 } 394 395 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 396 397 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 398 TokenType.ANTI, 399 TokenType.APPLY, 400 TokenType.ASOF, 401 TokenType.FULL, 402 TokenType.LEFT, 403 TokenType.LOCK, 404 TokenType.NATURAL, 405 TokenType.OFFSET, 406 TokenType.RIGHT, 407 TokenType.SEMI, 408 TokenType.WINDOW, 409 } 410 411 ALIAS_TOKENS = ID_VAR_TOKENS 412 413 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 414 415 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 416 417 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 418 419 FUNC_TOKENS = { 420 TokenType.COLLATE, 421 TokenType.COMMAND, 422 TokenType.CURRENT_DATE, 423 TokenType.CURRENT_DATETIME, 424 TokenType.CURRENT_TIMESTAMP, 425 TokenType.CURRENT_TIME, 426 TokenType.CURRENT_USER, 427 TokenType.FILTER, 428 TokenType.FIRST, 429 TokenType.FORMAT, 430 TokenType.GLOB, 431 TokenType.IDENTIFIER, 432 TokenType.INDEX, 433 TokenType.ISNULL, 434 TokenType.ILIKE, 435 TokenType.INSERT, 436 TokenType.LIKE, 437 TokenType.MERGE, 438 TokenType.OFFSET, 439 TokenType.PRIMARY_KEY, 440 TokenType.RANGE, 441 TokenType.REPLACE, 442 TokenType.RLIKE, 443 TokenType.ROW, 444 TokenType.UNNEST, 445 TokenType.VAR, 446 TokenType.LEFT, 447 TokenType.RIGHT, 448 TokenType.SEQUENCE, 449 TokenType.DATE, 450 TokenType.DATETIME, 451 TokenType.TABLE, 452 TokenType.TIMESTAMP, 453 TokenType.TIMESTAMPTZ, 454 TokenType.TRUNCATE, 455 TokenType.WINDOW, 456 TokenType.XOR, 457 *TYPE_TOKENS, 458 *SUBQUERY_PREDICATES, 459 } 460 461 CONJUNCTION = { 462 TokenType.AND: exp.And, 463 TokenType.OR: exp.Or, 464 } 465 466 EQUALITY = { 467 TokenType.COLON_EQ: exp.PropertyEQ, 468 TokenType.EQ: exp.EQ, 469 TokenType.NEQ: exp.NEQ, 470 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 471 } 472 473 COMPARISON = { 474 TokenType.GT: exp.GT, 475 TokenType.GTE: exp.GTE, 476 TokenType.LT: exp.LT, 477 TokenType.LTE: exp.LTE, 478 } 479 480 BITWISE = { 481 TokenType.AMP: exp.BitwiseAnd, 482 TokenType.CARET: exp.BitwiseXor, 483 TokenType.PIPE: exp.BitwiseOr, 484 } 485 486 TERM = { 487 TokenType.DASH: exp.Sub, 488 TokenType.PLUS: exp.Add, 489 TokenType.MOD: exp.Mod, 490 TokenType.COLLATE: exp.Collate, 491 } 492 493 FACTOR = { 494 TokenType.DIV: exp.IntDiv, 495 TokenType.LR_ARROW: exp.Distance, 496 TokenType.SLASH: exp.Div, 497 TokenType.STAR: exp.Mul, 498 } 499 500 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 501 502 TIMES = { 503 TokenType.TIME, 504 TokenType.TIMETZ, 505 } 506 507 TIMESTAMPS = { 508 TokenType.TIMESTAMP, 509 TokenType.TIMESTAMPTZ, 510 TokenType.TIMESTAMPLTZ, 511 *TIMES, 512 } 513 514 SET_OPERATIONS = { 515 TokenType.UNION, 516 TokenType.INTERSECT, 517 TokenType.EXCEPT, 518 } 519 520 JOIN_METHODS = { 521 TokenType.ASOF, 522 TokenType.NATURAL, 523 TokenType.POSITIONAL, 524 } 525 526 JOIN_SIDES = { 527 TokenType.LEFT, 528 TokenType.RIGHT, 529 TokenType.FULL, 530 } 531 532 JOIN_KINDS = { 533 TokenType.INNER, 534 TokenType.OUTER, 535 TokenType.CROSS, 536 TokenType.SEMI, 537 TokenType.ANTI, 538 } 539 540 JOIN_HINTS: t.Set[str] = set() 541 542 LAMBDAS = { 543 TokenType.ARROW: lambda self, expressions: self.expression( 544 exp.Lambda, 545 this=self._replace_lambda( 546 self._parse_conjunction(), 547 {node.name for node in expressions}, 548 ), 549 expressions=expressions, 550 ), 551 TokenType.FARROW: lambda self, expressions: self.expression( 552 exp.Kwarg, 553 this=exp.var(expressions[0].name), 554 expression=self._parse_conjunction(), 555 ), 556 } 557 558 COLUMN_OPERATORS = { 559 TokenType.DOT: None, 560 TokenType.DCOLON: lambda self, this, to: self.expression( 561 exp.Cast if self.STRICT_CAST else exp.TryCast, 562 this=this, 563 to=to, 564 ), 565 TokenType.ARROW: lambda self, this, path: self.expression( 566 exp.JSONExtract, 567 this=this, 568 expression=self.dialect.to_json_path(path), 569 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 570 ), 571 TokenType.DARROW: lambda self, this, path: self.expression( 572 exp.JSONExtractScalar, 573 this=this, 574 expression=self.dialect.to_json_path(path), 575 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 576 ), 577 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 578 exp.JSONBExtract, 579 this=this, 580 expression=path, 581 ), 582 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 583 exp.JSONBExtractScalar, 584 this=this, 585 expression=path, 586 ), 587 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 588 exp.JSONBContains, 589 this=this, 590 expression=key, 591 ), 592 } 593 594 EXPRESSION_PARSERS = { 595 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 596 exp.Column: lambda self: self._parse_column(), 597 exp.Condition: lambda self: self._parse_conjunction(), 598 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 599 exp.Expression: lambda self: self._parse_expression(), 600 exp.From: lambda self: self._parse_from(), 601 exp.Group: lambda self: self._parse_group(), 602 exp.Having: lambda self: self._parse_having(), 603 exp.Identifier: lambda self: self._parse_id_var(), 604 exp.Join: lambda self: self._parse_join(), 605 exp.Lambda: lambda self: self._parse_lambda(), 606 exp.Lateral: lambda self: self._parse_lateral(), 607 exp.Limit: lambda self: self._parse_limit(), 608 exp.Offset: lambda self: self._parse_offset(), 609 exp.Order: lambda self: self._parse_order(), 610 exp.Ordered: lambda self: self._parse_ordered(), 611 exp.Properties: lambda self: self._parse_properties(), 612 exp.Qualify: lambda self: self._parse_qualify(), 613 exp.Returning: lambda self: self._parse_returning(), 614 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 615 exp.Table: lambda self: self._parse_table_parts(), 616 exp.TableAlias: lambda self: self._parse_table_alias(), 617 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 618 exp.Where: lambda self: self._parse_where(), 619 exp.Window: lambda self: self._parse_named_window(), 620 exp.With: lambda self: self._parse_with(), 621 "JOIN_TYPE": lambda self: self._parse_join_parts(), 622 } 623 624 STATEMENT_PARSERS = { 625 TokenType.ALTER: lambda self: self._parse_alter(), 626 TokenType.BEGIN: lambda self: self._parse_transaction(), 627 TokenType.CACHE: lambda self: self._parse_cache(), 628 TokenType.COMMENT: lambda self: self._parse_comment(), 629 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 630 TokenType.CREATE: lambda self: self._parse_create(), 631 TokenType.DELETE: lambda self: self._parse_delete(), 632 TokenType.DESC: lambda self: self._parse_describe(), 633 TokenType.DESCRIBE: lambda self: self._parse_describe(), 634 TokenType.DROP: lambda self: self._parse_drop(), 635 TokenType.INSERT: lambda self: self._parse_insert(), 636 TokenType.KILL: lambda self: self._parse_kill(), 637 TokenType.LOAD: lambda self: self._parse_load(), 638 TokenType.MERGE: lambda self: self._parse_merge(), 639 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 640 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 641 TokenType.REFRESH: lambda self: self._parse_refresh(), 642 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 643 TokenType.SET: lambda self: self._parse_set(), 644 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 645 TokenType.UNCACHE: lambda self: self._parse_uncache(), 646 TokenType.UPDATE: lambda self: self._parse_update(), 647 TokenType.USE: lambda self: self.expression( 648 exp.Use, 649 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 650 this=self._parse_table(schema=False), 651 ), 652 } 653 654 UNARY_PARSERS = { 655 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 656 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 657 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 658 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 659 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 660 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 661 } 662 663 STRING_PARSERS = { 664 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 665 exp.RawString, this=token.text 666 ), 667 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 668 exp.National, this=token.text 669 ), 670 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 671 TokenType.STRING: lambda self, token: self.expression( 672 exp.Literal, this=token.text, is_string=True 673 ), 674 TokenType.UNICODE_STRING: lambda self, token: self.expression( 675 exp.UnicodeString, 676 this=token.text, 677 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 678 ), 679 } 680 681 NUMERIC_PARSERS = { 682 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 683 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 684 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 685 TokenType.NUMBER: lambda self, token: self.expression( 686 exp.Literal, this=token.text, is_string=False 687 ), 688 } 689 690 PRIMARY_PARSERS = { 691 **STRING_PARSERS, 692 **NUMERIC_PARSERS, 693 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 694 TokenType.NULL: lambda self, _: self.expression(exp.Null), 695 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 696 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 697 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 698 TokenType.STAR: lambda self, _: self.expression( 699 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 700 ), 701 } 702 703 PLACEHOLDER_PARSERS = { 704 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 705 TokenType.PARAMETER: lambda self: self._parse_parameter(), 706 TokenType.COLON: lambda self: ( 707 self.expression(exp.Placeholder, this=self._prev.text) 708 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 709 else None 710 ), 711 } 712 713 RANGE_PARSERS = { 714 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 715 TokenType.GLOB: binary_range_parser(exp.Glob), 716 TokenType.ILIKE: binary_range_parser(exp.ILike), 717 TokenType.IN: lambda self, this: self._parse_in(this), 718 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 719 TokenType.IS: lambda self, this: self._parse_is(this), 720 TokenType.LIKE: binary_range_parser(exp.Like), 721 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 722 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 723 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 724 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 725 } 726 727 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 728 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 729 "AUTO": lambda self: self._parse_auto_property(), 730 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 731 "BACKUP": lambda self: self.expression( 732 exp.BackupProperty, this=self._parse_var(any_token=True) 733 ), 734 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 735 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 736 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 737 "CHECKSUM": lambda self: self._parse_checksum(), 738 "CLUSTER BY": lambda self: self._parse_cluster(), 739 "CLUSTERED": lambda self: self._parse_clustered_by(), 740 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 741 exp.CollateProperty, **kwargs 742 ), 743 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 744 "CONTAINS": lambda self: self._parse_contains_property(), 745 "COPY": lambda self: self._parse_copy_property(), 746 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 747 "DEFINER": lambda self: self._parse_definer(), 748 "DETERMINISTIC": lambda self: self.expression( 749 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 750 ), 751 "DISTKEY": lambda self: self._parse_distkey(), 752 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 753 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 754 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 755 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 756 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 757 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 758 "FREESPACE": lambda self: self._parse_freespace(), 759 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 760 "HEAP": lambda self: self.expression(exp.HeapProperty), 761 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 762 "IMMUTABLE": lambda self: self.expression( 763 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 764 ), 765 "INHERITS": lambda self: self.expression( 766 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 767 ), 768 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 769 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 770 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 771 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 772 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 773 "LIKE": lambda self: self._parse_create_like(), 774 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 775 "LOCK": lambda self: self._parse_locking(), 776 "LOCKING": lambda self: self._parse_locking(), 777 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 778 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 779 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 780 "MODIFIES": lambda self: self._parse_modifies_property(), 781 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 782 "NO": lambda self: self._parse_no_property(), 783 "ON": lambda self: self._parse_on_property(), 784 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 785 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 786 "PARTITION": lambda self: self._parse_partitioned_of(), 787 "PARTITION BY": lambda self: self._parse_partitioned_by(), 788 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 789 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 790 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 791 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 792 "READS": lambda self: self._parse_reads_property(), 793 "REMOTE": lambda self: self._parse_remote_with_connection(), 794 "RETURNS": lambda self: self._parse_returns(), 795 "ROW": lambda self: self._parse_row(), 796 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 797 "SAMPLE": lambda self: self.expression( 798 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 799 ), 800 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 801 "SETTINGS": lambda self: self.expression( 802 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 803 ), 804 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 805 "SORTKEY": lambda self: self._parse_sortkey(), 806 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 807 "STABLE": lambda self: self.expression( 808 exp.StabilityProperty, this=exp.Literal.string("STABLE") 809 ), 810 "STORED": lambda self: self._parse_stored(), 811 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 812 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 813 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 814 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 815 "TO": lambda self: self._parse_to_table(), 816 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 817 "TRANSFORM": lambda self: self.expression( 818 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 819 ), 820 "TTL": lambda self: self._parse_ttl(), 821 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 822 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 823 "VOLATILE": lambda self: self._parse_volatile_property(), 824 "WITH": lambda self: self._parse_with_property(), 825 } 826 827 CONSTRAINT_PARSERS = { 828 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 829 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 830 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 831 "CHARACTER SET": lambda self: self.expression( 832 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 833 ), 834 "CHECK": lambda self: self.expression( 835 exp.CheckColumnConstraint, 836 this=self._parse_wrapped(self._parse_conjunction), 837 enforced=self._match_text_seq("ENFORCED"), 838 ), 839 "COLLATE": lambda self: self.expression( 840 exp.CollateColumnConstraint, this=self._parse_var() 841 ), 842 "COMMENT": lambda self: self.expression( 843 exp.CommentColumnConstraint, this=self._parse_string() 844 ), 845 "COMPRESS": lambda self: self._parse_compress(), 846 "CLUSTERED": lambda self: self.expression( 847 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 848 ), 849 "NONCLUSTERED": lambda self: self.expression( 850 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 851 ), 852 "DEFAULT": lambda self: self.expression( 853 exp.DefaultColumnConstraint, this=self._parse_bitwise() 854 ), 855 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 856 "EXCLUDE": lambda self: self.expression( 857 exp.ExcludeColumnConstraint, this=self._parse_index_params() 858 ), 859 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 860 "FORMAT": lambda self: self.expression( 861 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 862 ), 863 "GENERATED": lambda self: self._parse_generated_as_identity(), 864 "IDENTITY": lambda self: self._parse_auto_increment(), 865 "INLINE": lambda self: self._parse_inline(), 866 "LIKE": lambda self: self._parse_create_like(), 867 "NOT": lambda self: self._parse_not_constraint(), 868 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 869 "ON": lambda self: ( 870 self._match(TokenType.UPDATE) 871 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 872 ) 873 or self.expression(exp.OnProperty, this=self._parse_id_var()), 874 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 875 "PERIOD": lambda self: self._parse_period_for_system_time(), 876 "PRIMARY KEY": lambda self: self._parse_primary_key(), 877 "REFERENCES": lambda self: self._parse_references(match=False), 878 "TITLE": lambda self: self.expression( 879 exp.TitleColumnConstraint, this=self._parse_var_or_string() 880 ), 881 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 882 "UNIQUE": lambda self: self._parse_unique(), 883 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 884 "WITH": lambda self: self.expression( 885 exp.Properties, expressions=self._parse_wrapped_properties() 886 ), 887 } 888 889 ALTER_PARSERS = { 890 "ADD": lambda self: self._parse_alter_table_add(), 891 "ALTER": lambda self: self._parse_alter_table_alter(), 892 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 893 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 894 "DROP": lambda self: self._parse_alter_table_drop(), 895 "RENAME": lambda self: self._parse_alter_table_rename(), 896 } 897 898 SCHEMA_UNNAMED_CONSTRAINTS = { 899 "CHECK", 900 "EXCLUDE", 901 "FOREIGN KEY", 902 "LIKE", 903 "PERIOD", 904 "PRIMARY KEY", 905 "UNIQUE", 906 } 907 908 NO_PAREN_FUNCTION_PARSERS = { 909 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 910 "CASE": lambda self: self._parse_case(), 911 "IF": lambda self: self._parse_if(), 912 "NEXT": lambda self: self._parse_next_value_for(), 913 } 914 915 INVALID_FUNC_NAME_TOKENS = { 916 TokenType.IDENTIFIER, 917 TokenType.STRING, 918 } 919 920 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 921 922 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 923 924 FUNCTION_PARSERS = { 925 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 926 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 927 "DECODE": lambda self: self._parse_decode(), 928 "EXTRACT": lambda self: self._parse_extract(), 929 "JSON_OBJECT": lambda self: self._parse_json_object(), 930 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 931 "JSON_TABLE": lambda self: self._parse_json_table(), 932 "MATCH": lambda self: self._parse_match_against(), 933 "OPENJSON": lambda self: self._parse_open_json(), 934 "POSITION": lambda self: self._parse_position(), 935 "PREDICT": lambda self: self._parse_predict(), 936 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 937 "STRING_AGG": lambda self: self._parse_string_agg(), 938 "SUBSTRING": lambda self: self._parse_substring(), 939 "TRIM": lambda self: self._parse_trim(), 940 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 941 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 942 } 943 944 QUERY_MODIFIER_PARSERS = { 945 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 946 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 947 TokenType.WHERE: lambda self: ("where", self._parse_where()), 948 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 949 TokenType.HAVING: lambda self: ("having", self._parse_having()), 950 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 951 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 952 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 953 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 954 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 955 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 956 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 957 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 958 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 959 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 960 TokenType.CLUSTER_BY: lambda self: ( 961 "cluster", 962 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 963 ), 964 TokenType.DISTRIBUTE_BY: lambda self: ( 965 "distribute", 966 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 967 ), 968 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 969 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 970 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 971 } 972 973 SET_PARSERS = { 974 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 975 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 976 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 977 "TRANSACTION": lambda self: self._parse_set_transaction(), 978 } 979 980 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 981 982 TYPE_LITERAL_PARSERS = { 983 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 984 } 985 986 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 987 988 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 989 990 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 991 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 992 "ISOLATION": ( 993 ("LEVEL", "REPEATABLE", "READ"), 994 ("LEVEL", "READ", "COMMITTED"), 995 ("LEVEL", "READ", "UNCOMITTED"), 996 ("LEVEL", "SERIALIZABLE"), 997 ), 998 "READ": ("WRITE", "ONLY"), 999 } 1000 1001 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1002 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1003 ) 1004 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1005 1006 CREATE_SEQUENCE: OPTIONS_TYPE = { 1007 "SCALE": ("EXTEND", "NOEXTEND"), 1008 "SHARD": ("EXTEND", "NOEXTEND"), 1009 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1010 **dict.fromkeys( 1011 ( 1012 "SESSION", 1013 "GLOBAL", 1014 "KEEP", 1015 "NOKEEP", 1016 "ORDER", 1017 "NOORDER", 1018 "NOCACHE", 1019 "CYCLE", 1020 "NOCYCLE", 1021 "NOMINVALUE", 1022 "NOMAXVALUE", 1023 "NOSCALE", 1024 "NOSHARD", 1025 ), 1026 tuple(), 1027 ), 1028 } 1029 1030 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1031 1032 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 1033 1034 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1035 1036 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1037 1038 CLONE_KEYWORDS = {"CLONE", "COPY"} 1039 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1040 1041 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1042 1043 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1044 1045 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1046 1047 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1048 1049 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1050 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1051 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1052 1053 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1054 1055 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1056 1057 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 1058 1059 DISTINCT_TOKENS = {TokenType.DISTINCT} 1060 1061 NULL_TOKENS = {TokenType.NULL} 1062 1063 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1064 1065 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1066 1067 STRICT_CAST = True 1068 1069 PREFIXED_PIVOT_COLUMNS = False 1070 IDENTIFY_PIVOT_STRINGS = False 1071 1072 LOG_DEFAULTS_TO_LN = False 1073 1074 # Whether ADD is present for each column added by ALTER TABLE 1075 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1076 1077 # Whether the table sample clause expects CSV syntax 1078 TABLESAMPLE_CSV = False 1079 1080 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1081 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1082 1083 # Whether the TRIM function expects the characters to trim as its first argument 1084 TRIM_PATTERN_FIRST = False 1085 1086 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1087 STRING_ALIASES = False 1088 1089 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1090 MODIFIERS_ATTACHED_TO_UNION = True 1091 UNION_MODIFIERS = {"order", "limit", "offset"} 1092 1093 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1094 NO_PAREN_IF_COMMANDS = True 1095 1096 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1097 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1098 1099 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1100 # If this is True and '(' is not found, the keyword will be treated as an identifier 1101 VALUES_FOLLOWED_BY_PAREN = True 1102 1103 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1104 SUPPORTS_IMPLICIT_UNNEST = False 1105 1106 __slots__ = ( 1107 "error_level", 1108 "error_message_context", 1109 "max_errors", 1110 "dialect", 1111 "sql", 1112 "errors", 1113 "_tokens", 1114 "_index", 1115 "_curr", 1116 "_next", 1117 "_prev", 1118 "_prev_comments", 1119 ) 1120 1121 # Autofilled 1122 SHOW_TRIE: t.Dict = {} 1123 SET_TRIE: t.Dict = {} 1124 1125 def __init__( 1126 self, 1127 error_level: t.Optional[ErrorLevel] = None, 1128 error_message_context: int = 100, 1129 max_errors: int = 3, 1130 dialect: DialectType = None, 1131 ): 1132 from sqlglot.dialects import Dialect 1133 1134 self.error_level = error_level or ErrorLevel.IMMEDIATE 1135 self.error_message_context = error_message_context 1136 self.max_errors = max_errors 1137 self.dialect = Dialect.get_or_raise(dialect) 1138 self.reset() 1139 1140 def reset(self): 1141 self.sql = "" 1142 self.errors = [] 1143 self._tokens = [] 1144 self._index = 0 1145 self._curr = None 1146 self._next = None 1147 self._prev = None 1148 self._prev_comments = None 1149 1150 def parse( 1151 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1152 ) -> t.List[t.Optional[exp.Expression]]: 1153 """ 1154 Parses a list of tokens and returns a list of syntax trees, one tree 1155 per parsed SQL statement. 1156 1157 Args: 1158 raw_tokens: The list of tokens. 1159 sql: The original SQL string, used to produce helpful debug messages. 1160 1161 Returns: 1162 The list of the produced syntax trees. 1163 """ 1164 return self._parse( 1165 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1166 ) 1167 1168 def parse_into( 1169 self, 1170 expression_types: exp.IntoType, 1171 raw_tokens: t.List[Token], 1172 sql: t.Optional[str] = None, 1173 ) -> t.List[t.Optional[exp.Expression]]: 1174 """ 1175 Parses a list of tokens into a given Expression type. If a collection of Expression 1176 types is given instead, this method will try to parse the token list into each one 1177 of them, stopping at the first for which the parsing succeeds. 1178 1179 Args: 1180 expression_types: The expression type(s) to try and parse the token list into. 1181 raw_tokens: The list of tokens. 1182 sql: The original SQL string, used to produce helpful debug messages. 1183 1184 Returns: 1185 The target Expression. 1186 """ 1187 errors = [] 1188 for expression_type in ensure_list(expression_types): 1189 parser = self.EXPRESSION_PARSERS.get(expression_type) 1190 if not parser: 1191 raise TypeError(f"No parser registered for {expression_type}") 1192 1193 try: 1194 return self._parse(parser, raw_tokens, sql) 1195 except ParseError as e: 1196 e.errors[0]["into_expression"] = expression_type 1197 errors.append(e) 1198 1199 raise ParseError( 1200 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1201 errors=merge_errors(errors), 1202 ) from errors[-1] 1203 1204 def _parse( 1205 self, 1206 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1207 raw_tokens: t.List[Token], 1208 sql: t.Optional[str] = None, 1209 ) -> t.List[t.Optional[exp.Expression]]: 1210 self.reset() 1211 self.sql = sql or "" 1212 1213 total = len(raw_tokens) 1214 chunks: t.List[t.List[Token]] = [[]] 1215 1216 for i, token in enumerate(raw_tokens): 1217 if token.token_type == TokenType.SEMICOLON: 1218 if i < total - 1: 1219 chunks.append([]) 1220 else: 1221 chunks[-1].append(token) 1222 1223 expressions = [] 1224 1225 for tokens in chunks: 1226 self._index = -1 1227 self._tokens = tokens 1228 self._advance() 1229 1230 expressions.append(parse_method(self)) 1231 1232 if self._index < len(self._tokens): 1233 self.raise_error("Invalid expression / Unexpected token") 1234 1235 self.check_errors() 1236 1237 return expressions 1238 1239 def check_errors(self) -> None: 1240 """Logs or raises any found errors, depending on the chosen error level setting.""" 1241 if self.error_level == ErrorLevel.WARN: 1242 for error in self.errors: 1243 logger.error(str(error)) 1244 elif self.error_level == ErrorLevel.RAISE and self.errors: 1245 raise ParseError( 1246 concat_messages(self.errors, self.max_errors), 1247 errors=merge_errors(self.errors), 1248 ) 1249 1250 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1251 """ 1252 Appends an error in the list of recorded errors or raises it, depending on the chosen 1253 error level setting. 1254 """ 1255 token = token or self._curr or self._prev or Token.string("") 1256 start = token.start 1257 end = token.end + 1 1258 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1259 highlight = self.sql[start:end] 1260 end_context = self.sql[end : end + self.error_message_context] 1261 1262 error = ParseError.new( 1263 f"{message}. Line {token.line}, Col: {token.col}.\n" 1264 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1265 description=message, 1266 line=token.line, 1267 col=token.col, 1268 start_context=start_context, 1269 highlight=highlight, 1270 end_context=end_context, 1271 ) 1272 1273 if self.error_level == ErrorLevel.IMMEDIATE: 1274 raise error 1275 1276 self.errors.append(error) 1277 1278 def expression( 1279 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1280 ) -> E: 1281 """ 1282 Creates a new, validated Expression. 1283 1284 Args: 1285 exp_class: The expression class to instantiate. 1286 comments: An optional list of comments to attach to the expression. 1287 kwargs: The arguments to set for the expression along with their respective values. 1288 1289 Returns: 1290 The target expression. 1291 """ 1292 instance = exp_class(**kwargs) 1293 instance.add_comments(comments) if comments else self._add_comments(instance) 1294 return self.validate_expression(instance) 1295 1296 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1297 if expression and self._prev_comments: 1298 expression.add_comments(self._prev_comments) 1299 self._prev_comments = None 1300 1301 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1302 """ 1303 Validates an Expression, making sure that all its mandatory arguments are set. 1304 1305 Args: 1306 expression: The expression to validate. 1307 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1308 1309 Returns: 1310 The validated expression. 1311 """ 1312 if self.error_level != ErrorLevel.IGNORE: 1313 for error_message in expression.error_messages(args): 1314 self.raise_error(error_message) 1315 1316 return expression 1317 1318 def _find_sql(self, start: Token, end: Token) -> str: 1319 return self.sql[start.start : end.end + 1] 1320 1321 def _is_connected(self) -> bool: 1322 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1323 1324 def _advance(self, times: int = 1) -> None: 1325 self._index += times 1326 self._curr = seq_get(self._tokens, self._index) 1327 self._next = seq_get(self._tokens, self._index + 1) 1328 1329 if self._index > 0: 1330 self._prev = self._tokens[self._index - 1] 1331 self._prev_comments = self._prev.comments 1332 else: 1333 self._prev = None 1334 self._prev_comments = None 1335 1336 def _retreat(self, index: int) -> None: 1337 if index != self._index: 1338 self._advance(index - self._index) 1339 1340 def _warn_unsupported(self) -> None: 1341 if len(self._tokens) <= 1: 1342 return 1343 1344 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1345 # interested in emitting a warning for the one being currently processed. 1346 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1347 1348 logger.warning( 1349 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1350 ) 1351 1352 def _parse_command(self) -> exp.Command: 1353 self._warn_unsupported() 1354 return self.expression( 1355 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1356 ) 1357 1358 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1359 """ 1360 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. This behavior can 1361 be different depending on the uset-set ErrorLevel, so _try_parse aims to solve this by setting & resetting 1362 the parser state accordingly 1363 """ 1364 index = self._index 1365 error_level = self.error_level 1366 1367 self.error_level = ErrorLevel.IMMEDIATE 1368 try: 1369 this = parse_method() 1370 except ParseError: 1371 this = None 1372 finally: 1373 if not this or retreat: 1374 self._retreat(index) 1375 self.error_level = error_level 1376 1377 return this 1378 1379 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1380 start = self._prev 1381 exists = self._parse_exists() if allow_exists else None 1382 1383 self._match(TokenType.ON) 1384 1385 kind = self._match_set(self.CREATABLES) and self._prev 1386 if not kind: 1387 return self._parse_as_command(start) 1388 1389 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1390 this = self._parse_user_defined_function(kind=kind.token_type) 1391 elif kind.token_type == TokenType.TABLE: 1392 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1393 elif kind.token_type == TokenType.COLUMN: 1394 this = self._parse_column() 1395 else: 1396 this = self._parse_id_var() 1397 1398 self._match(TokenType.IS) 1399 1400 return self.expression( 1401 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1402 ) 1403 1404 def _parse_to_table( 1405 self, 1406 ) -> exp.ToTableProperty: 1407 table = self._parse_table_parts(schema=True) 1408 return self.expression(exp.ToTableProperty, this=table) 1409 1410 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1411 def _parse_ttl(self) -> exp.Expression: 1412 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1413 this = self._parse_bitwise() 1414 1415 if self._match_text_seq("DELETE"): 1416 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1417 if self._match_text_seq("RECOMPRESS"): 1418 return self.expression( 1419 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1420 ) 1421 if self._match_text_seq("TO", "DISK"): 1422 return self.expression( 1423 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1424 ) 1425 if self._match_text_seq("TO", "VOLUME"): 1426 return self.expression( 1427 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1428 ) 1429 1430 return this 1431 1432 expressions = self._parse_csv(_parse_ttl_action) 1433 where = self._parse_where() 1434 group = self._parse_group() 1435 1436 aggregates = None 1437 if group and self._match(TokenType.SET): 1438 aggregates = self._parse_csv(self._parse_set_item) 1439 1440 return self.expression( 1441 exp.MergeTreeTTL, 1442 expressions=expressions, 1443 where=where, 1444 group=group, 1445 aggregates=aggregates, 1446 ) 1447 1448 def _parse_statement(self) -> t.Optional[exp.Expression]: 1449 if self._curr is None: 1450 return None 1451 1452 if self._match_set(self.STATEMENT_PARSERS): 1453 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1454 1455 if self._match_set(Tokenizer.COMMANDS): 1456 return self._parse_command() 1457 1458 expression = self._parse_expression() 1459 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1460 return self._parse_query_modifiers(expression) 1461 1462 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1463 start = self._prev 1464 temporary = self._match(TokenType.TEMPORARY) 1465 materialized = self._match_text_seq("MATERIALIZED") 1466 1467 kind = self._match_set(self.CREATABLES) and self._prev.text 1468 if not kind: 1469 return self._parse_as_command(start) 1470 1471 if_exists = exists or self._parse_exists() 1472 table = self._parse_table_parts( 1473 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1474 ) 1475 1476 if self._match(TokenType.L_PAREN, advance=False): 1477 expressions = self._parse_wrapped_csv(self._parse_types) 1478 else: 1479 expressions = None 1480 1481 return self.expression( 1482 exp.Drop, 1483 comments=start.comments, 1484 exists=if_exists, 1485 this=table, 1486 expressions=expressions, 1487 kind=kind, 1488 temporary=temporary, 1489 materialized=materialized, 1490 cascade=self._match_text_seq("CASCADE"), 1491 constraints=self._match_text_seq("CONSTRAINTS"), 1492 purge=self._match_text_seq("PURGE"), 1493 ) 1494 1495 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1496 return ( 1497 self._match_text_seq("IF") 1498 and (not not_ or self._match(TokenType.NOT)) 1499 and self._match(TokenType.EXISTS) 1500 ) 1501 1502 def _parse_create(self) -> exp.Create | exp.Command: 1503 # Note: this can't be None because we've matched a statement parser 1504 start = self._prev 1505 comments = self._prev_comments 1506 1507 replace = ( 1508 start.token_type == TokenType.REPLACE 1509 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1510 or self._match_pair(TokenType.OR, TokenType.ALTER) 1511 ) 1512 1513 unique = self._match(TokenType.UNIQUE) 1514 1515 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1516 self._advance() 1517 1518 properties = None 1519 create_token = self._match_set(self.CREATABLES) and self._prev 1520 1521 if not create_token: 1522 # exp.Properties.Location.POST_CREATE 1523 properties = self._parse_properties() 1524 create_token = self._match_set(self.CREATABLES) and self._prev 1525 1526 if not properties or not create_token: 1527 return self._parse_as_command(start) 1528 1529 exists = self._parse_exists(not_=True) 1530 this = None 1531 expression: t.Optional[exp.Expression] = None 1532 indexes = None 1533 no_schema_binding = None 1534 begin = None 1535 end = None 1536 clone = None 1537 1538 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1539 nonlocal properties 1540 if properties and temp_props: 1541 properties.expressions.extend(temp_props.expressions) 1542 elif temp_props: 1543 properties = temp_props 1544 1545 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1546 this = self._parse_user_defined_function(kind=create_token.token_type) 1547 1548 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1549 extend_props(self._parse_properties()) 1550 1551 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1552 1553 if not expression: 1554 if self._match(TokenType.COMMAND): 1555 expression = self._parse_as_command(self._prev) 1556 else: 1557 begin = self._match(TokenType.BEGIN) 1558 return_ = self._match_text_seq("RETURN") 1559 1560 if self._match(TokenType.STRING, advance=False): 1561 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1562 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1563 expression = self._parse_string() 1564 extend_props(self._parse_properties()) 1565 else: 1566 expression = self._parse_statement() 1567 1568 end = self._match_text_seq("END") 1569 1570 if return_: 1571 expression = self.expression(exp.Return, this=expression) 1572 elif create_token.token_type == TokenType.INDEX: 1573 this = self._parse_index(index=self._parse_id_var()) 1574 elif create_token.token_type in self.DB_CREATABLES: 1575 table_parts = self._parse_table_parts( 1576 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1577 ) 1578 1579 # exp.Properties.Location.POST_NAME 1580 self._match(TokenType.COMMA) 1581 extend_props(self._parse_properties(before=True)) 1582 1583 this = self._parse_schema(this=table_parts) 1584 1585 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1586 extend_props(self._parse_properties()) 1587 1588 self._match(TokenType.ALIAS) 1589 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1590 # exp.Properties.Location.POST_ALIAS 1591 extend_props(self._parse_properties()) 1592 1593 if create_token.token_type == TokenType.SEQUENCE: 1594 expression = self._parse_types() 1595 extend_props(self._parse_properties()) 1596 else: 1597 expression = self._parse_ddl_select() 1598 1599 if create_token.token_type == TokenType.TABLE: 1600 # exp.Properties.Location.POST_EXPRESSION 1601 extend_props(self._parse_properties()) 1602 1603 indexes = [] 1604 while True: 1605 index = self._parse_index() 1606 1607 # exp.Properties.Location.POST_INDEX 1608 extend_props(self._parse_properties()) 1609 1610 if not index: 1611 break 1612 else: 1613 self._match(TokenType.COMMA) 1614 indexes.append(index) 1615 elif create_token.token_type == TokenType.VIEW: 1616 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1617 no_schema_binding = True 1618 1619 shallow = self._match_text_seq("SHALLOW") 1620 1621 if self._match_texts(self.CLONE_KEYWORDS): 1622 copy = self._prev.text.lower() == "copy" 1623 clone = self.expression( 1624 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1625 ) 1626 1627 if self._curr: 1628 return self._parse_as_command(start) 1629 1630 return self.expression( 1631 exp.Create, 1632 comments=comments, 1633 this=this, 1634 kind=create_token.text.upper(), 1635 replace=replace, 1636 unique=unique, 1637 expression=expression, 1638 exists=exists, 1639 properties=properties, 1640 indexes=indexes, 1641 no_schema_binding=no_schema_binding, 1642 begin=begin, 1643 end=end, 1644 clone=clone, 1645 ) 1646 1647 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1648 seq = exp.SequenceProperties() 1649 1650 options = [] 1651 index = self._index 1652 1653 while self._curr: 1654 if self._match_text_seq("INCREMENT"): 1655 self._match_text_seq("BY") 1656 self._match_text_seq("=") 1657 seq.set("increment", self._parse_term()) 1658 elif self._match_text_seq("MINVALUE"): 1659 seq.set("minvalue", self._parse_term()) 1660 elif self._match_text_seq("MAXVALUE"): 1661 seq.set("maxvalue", self._parse_term()) 1662 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1663 self._match_text_seq("=") 1664 seq.set("start", self._parse_term()) 1665 elif self._match_text_seq("CACHE"): 1666 # T-SQL allows empty CACHE which is initialized dynamically 1667 seq.set("cache", self._parse_number() or True) 1668 elif self._match_text_seq("OWNED", "BY"): 1669 # "OWNED BY NONE" is the default 1670 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1671 else: 1672 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1673 if opt: 1674 options.append(opt) 1675 else: 1676 break 1677 1678 seq.set("options", options if options else None) 1679 return None if self._index == index else seq 1680 1681 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1682 # only used for teradata currently 1683 self._match(TokenType.COMMA) 1684 1685 kwargs = { 1686 "no": self._match_text_seq("NO"), 1687 "dual": self._match_text_seq("DUAL"), 1688 "before": self._match_text_seq("BEFORE"), 1689 "default": self._match_text_seq("DEFAULT"), 1690 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1691 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1692 "after": self._match_text_seq("AFTER"), 1693 "minimum": self._match_texts(("MIN", "MINIMUM")), 1694 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1695 } 1696 1697 if self._match_texts(self.PROPERTY_PARSERS): 1698 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1699 try: 1700 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1701 except TypeError: 1702 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1703 1704 return None 1705 1706 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1707 return self._parse_wrapped_csv(self._parse_property) 1708 1709 def _parse_property(self) -> t.Optional[exp.Expression]: 1710 if self._match_texts(self.PROPERTY_PARSERS): 1711 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1712 1713 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1714 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1715 1716 if self._match_text_seq("COMPOUND", "SORTKEY"): 1717 return self._parse_sortkey(compound=True) 1718 1719 if self._match_text_seq("SQL", "SECURITY"): 1720 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1721 1722 index = self._index 1723 key = self._parse_column() 1724 1725 if not self._match(TokenType.EQ): 1726 self._retreat(index) 1727 return self._parse_sequence_properties() 1728 1729 return self.expression( 1730 exp.Property, 1731 this=key.to_dot() if isinstance(key, exp.Column) else key, 1732 value=self._parse_bitwise() or self._parse_var(any_token=True), 1733 ) 1734 1735 def _parse_stored(self) -> exp.FileFormatProperty: 1736 self._match(TokenType.ALIAS) 1737 1738 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1739 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1740 1741 return self.expression( 1742 exp.FileFormatProperty, 1743 this=( 1744 self.expression( 1745 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1746 ) 1747 if input_format or output_format 1748 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1749 ), 1750 ) 1751 1752 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1753 self._match(TokenType.EQ) 1754 self._match(TokenType.ALIAS) 1755 return self.expression(exp_class, this=self._parse_field(), **kwargs) 1756 1757 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1758 properties = [] 1759 while True: 1760 if before: 1761 prop = self._parse_property_before() 1762 else: 1763 prop = self._parse_property() 1764 if not prop: 1765 break 1766 for p in ensure_list(prop): 1767 properties.append(p) 1768 1769 if properties: 1770 return self.expression(exp.Properties, expressions=properties) 1771 1772 return None 1773 1774 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1775 return self.expression( 1776 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1777 ) 1778 1779 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1780 if self._index >= 2: 1781 pre_volatile_token = self._tokens[self._index - 2] 1782 else: 1783 pre_volatile_token = None 1784 1785 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1786 return exp.VolatileProperty() 1787 1788 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1789 1790 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1791 self._match_pair(TokenType.EQ, TokenType.ON) 1792 1793 prop = self.expression(exp.WithSystemVersioningProperty) 1794 if self._match(TokenType.L_PAREN): 1795 self._match_text_seq("HISTORY_TABLE", "=") 1796 prop.set("this", self._parse_table_parts()) 1797 1798 if self._match(TokenType.COMMA): 1799 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1800 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1801 1802 self._match_r_paren() 1803 1804 return prop 1805 1806 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1807 if self._match(TokenType.L_PAREN, advance=False): 1808 return self._parse_wrapped_properties() 1809 1810 if self._match_text_seq("JOURNAL"): 1811 return self._parse_withjournaltable() 1812 1813 if self._match_texts(self.VIEW_ATTRIBUTES): 1814 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 1815 1816 if self._match_text_seq("DATA"): 1817 return self._parse_withdata(no=False) 1818 elif self._match_text_seq("NO", "DATA"): 1819 return self._parse_withdata(no=True) 1820 1821 if not self._next: 1822 return None 1823 1824 return self._parse_withisolatedloading() 1825 1826 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1827 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1828 self._match(TokenType.EQ) 1829 1830 user = self._parse_id_var() 1831 self._match(TokenType.PARAMETER) 1832 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1833 1834 if not user or not host: 1835 return None 1836 1837 return exp.DefinerProperty(this=f"{user}@{host}") 1838 1839 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1840 self._match(TokenType.TABLE) 1841 self._match(TokenType.EQ) 1842 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1843 1844 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1845 return self.expression(exp.LogProperty, no=no) 1846 1847 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1848 return self.expression(exp.JournalProperty, **kwargs) 1849 1850 def _parse_checksum(self) -> exp.ChecksumProperty: 1851 self._match(TokenType.EQ) 1852 1853 on = None 1854 if self._match(TokenType.ON): 1855 on = True 1856 elif self._match_text_seq("OFF"): 1857 on = False 1858 1859 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1860 1861 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 1862 return self.expression( 1863 exp.Cluster, 1864 expressions=( 1865 self._parse_wrapped_csv(self._parse_ordered) 1866 if wrapped 1867 else self._parse_csv(self._parse_ordered) 1868 ), 1869 ) 1870 1871 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1872 self._match_text_seq("BY") 1873 1874 self._match_l_paren() 1875 expressions = self._parse_csv(self._parse_column) 1876 self._match_r_paren() 1877 1878 if self._match_text_seq("SORTED", "BY"): 1879 self._match_l_paren() 1880 sorted_by = self._parse_csv(self._parse_ordered) 1881 self._match_r_paren() 1882 else: 1883 sorted_by = None 1884 1885 self._match(TokenType.INTO) 1886 buckets = self._parse_number() 1887 self._match_text_seq("BUCKETS") 1888 1889 return self.expression( 1890 exp.ClusteredByProperty, 1891 expressions=expressions, 1892 sorted_by=sorted_by, 1893 buckets=buckets, 1894 ) 1895 1896 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1897 if not self._match_text_seq("GRANTS"): 1898 self._retreat(self._index - 1) 1899 return None 1900 1901 return self.expression(exp.CopyGrantsProperty) 1902 1903 def _parse_freespace(self) -> exp.FreespaceProperty: 1904 self._match(TokenType.EQ) 1905 return self.expression( 1906 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1907 ) 1908 1909 def _parse_mergeblockratio( 1910 self, no: bool = False, default: bool = False 1911 ) -> exp.MergeBlockRatioProperty: 1912 if self._match(TokenType.EQ): 1913 return self.expression( 1914 exp.MergeBlockRatioProperty, 1915 this=self._parse_number(), 1916 percent=self._match(TokenType.PERCENT), 1917 ) 1918 1919 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1920 1921 def _parse_datablocksize( 1922 self, 1923 default: t.Optional[bool] = None, 1924 minimum: t.Optional[bool] = None, 1925 maximum: t.Optional[bool] = None, 1926 ) -> exp.DataBlocksizeProperty: 1927 self._match(TokenType.EQ) 1928 size = self._parse_number() 1929 1930 units = None 1931 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1932 units = self._prev.text 1933 1934 return self.expression( 1935 exp.DataBlocksizeProperty, 1936 size=size, 1937 units=units, 1938 default=default, 1939 minimum=minimum, 1940 maximum=maximum, 1941 ) 1942 1943 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1944 self._match(TokenType.EQ) 1945 always = self._match_text_seq("ALWAYS") 1946 manual = self._match_text_seq("MANUAL") 1947 never = self._match_text_seq("NEVER") 1948 default = self._match_text_seq("DEFAULT") 1949 1950 autotemp = None 1951 if self._match_text_seq("AUTOTEMP"): 1952 autotemp = self._parse_schema() 1953 1954 return self.expression( 1955 exp.BlockCompressionProperty, 1956 always=always, 1957 manual=manual, 1958 never=never, 1959 default=default, 1960 autotemp=autotemp, 1961 ) 1962 1963 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 1964 index = self._index 1965 no = self._match_text_seq("NO") 1966 concurrent = self._match_text_seq("CONCURRENT") 1967 1968 if not self._match_text_seq("ISOLATED", "LOADING"): 1969 self._retreat(index) 1970 return None 1971 1972 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 1973 return self.expression( 1974 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 1975 ) 1976 1977 def _parse_locking(self) -> exp.LockingProperty: 1978 if self._match(TokenType.TABLE): 1979 kind = "TABLE" 1980 elif self._match(TokenType.VIEW): 1981 kind = "VIEW" 1982 elif self._match(TokenType.ROW): 1983 kind = "ROW" 1984 elif self._match_text_seq("DATABASE"): 1985 kind = "DATABASE" 1986 else: 1987 kind = None 1988 1989 if kind in ("DATABASE", "TABLE", "VIEW"): 1990 this = self._parse_table_parts() 1991 else: 1992 this = None 1993 1994 if self._match(TokenType.FOR): 1995 for_or_in = "FOR" 1996 elif self._match(TokenType.IN): 1997 for_or_in = "IN" 1998 else: 1999 for_or_in = None 2000 2001 if self._match_text_seq("ACCESS"): 2002 lock_type = "ACCESS" 2003 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2004 lock_type = "EXCLUSIVE" 2005 elif self._match_text_seq("SHARE"): 2006 lock_type = "SHARE" 2007 elif self._match_text_seq("READ"): 2008 lock_type = "READ" 2009 elif self._match_text_seq("WRITE"): 2010 lock_type = "WRITE" 2011 elif self._match_text_seq("CHECKSUM"): 2012 lock_type = "CHECKSUM" 2013 else: 2014 lock_type = None 2015 2016 override = self._match_text_seq("OVERRIDE") 2017 2018 return self.expression( 2019 exp.LockingProperty, 2020 this=this, 2021 kind=kind, 2022 for_or_in=for_or_in, 2023 lock_type=lock_type, 2024 override=override, 2025 ) 2026 2027 def _parse_partition_by(self) -> t.List[exp.Expression]: 2028 if self._match(TokenType.PARTITION_BY): 2029 return self._parse_csv(self._parse_conjunction) 2030 return [] 2031 2032 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2033 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2034 if self._match_text_seq("MINVALUE"): 2035 return exp.var("MINVALUE") 2036 if self._match_text_seq("MAXVALUE"): 2037 return exp.var("MAXVALUE") 2038 return self._parse_bitwise() 2039 2040 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2041 expression = None 2042 from_expressions = None 2043 to_expressions = None 2044 2045 if self._match(TokenType.IN): 2046 this = self._parse_wrapped_csv(self._parse_bitwise) 2047 elif self._match(TokenType.FROM): 2048 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2049 self._match_text_seq("TO") 2050 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2051 elif self._match_text_seq("WITH", "(", "MODULUS"): 2052 this = self._parse_number() 2053 self._match_text_seq(",", "REMAINDER") 2054 expression = self._parse_number() 2055 self._match_r_paren() 2056 else: 2057 self.raise_error("Failed to parse partition bound spec.") 2058 2059 return self.expression( 2060 exp.PartitionBoundSpec, 2061 this=this, 2062 expression=expression, 2063 from_expressions=from_expressions, 2064 to_expressions=to_expressions, 2065 ) 2066 2067 # https://www.postgresql.org/docs/current/sql-createtable.html 2068 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2069 if not self._match_text_seq("OF"): 2070 self._retreat(self._index - 1) 2071 return None 2072 2073 this = self._parse_table(schema=True) 2074 2075 if self._match(TokenType.DEFAULT): 2076 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2077 elif self._match_text_seq("FOR", "VALUES"): 2078 expression = self._parse_partition_bound_spec() 2079 else: 2080 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2081 2082 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2083 2084 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2085 self._match(TokenType.EQ) 2086 return self.expression( 2087 exp.PartitionedByProperty, 2088 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2089 ) 2090 2091 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2092 if self._match_text_seq("AND", "STATISTICS"): 2093 statistics = True 2094 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2095 statistics = False 2096 else: 2097 statistics = None 2098 2099 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2100 2101 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2102 if self._match_text_seq("SQL"): 2103 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2104 return None 2105 2106 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2107 if self._match_text_seq("SQL", "DATA"): 2108 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2109 return None 2110 2111 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2112 if self._match_text_seq("PRIMARY", "INDEX"): 2113 return exp.NoPrimaryIndexProperty() 2114 if self._match_text_seq("SQL"): 2115 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2116 return None 2117 2118 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2119 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2120 return exp.OnCommitProperty() 2121 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2122 return exp.OnCommitProperty(delete=True) 2123 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2124 2125 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2126 if self._match_text_seq("SQL", "DATA"): 2127 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2128 return None 2129 2130 def _parse_distkey(self) -> exp.DistKeyProperty: 2131 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2132 2133 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2134 table = self._parse_table(schema=True) 2135 2136 options = [] 2137 while self._match_texts(("INCLUDING", "EXCLUDING")): 2138 this = self._prev.text.upper() 2139 2140 id_var = self._parse_id_var() 2141 if not id_var: 2142 return None 2143 2144 options.append( 2145 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2146 ) 2147 2148 return self.expression(exp.LikeProperty, this=table, expressions=options) 2149 2150 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2151 return self.expression( 2152 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2153 ) 2154 2155 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2156 self._match(TokenType.EQ) 2157 return self.expression( 2158 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2159 ) 2160 2161 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2162 self._match_text_seq("WITH", "CONNECTION") 2163 return self.expression( 2164 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2165 ) 2166 2167 def _parse_returns(self) -> exp.ReturnsProperty: 2168 value: t.Optional[exp.Expression] 2169 is_table = self._match(TokenType.TABLE) 2170 2171 if is_table: 2172 if self._match(TokenType.LT): 2173 value = self.expression( 2174 exp.Schema, 2175 this="TABLE", 2176 expressions=self._parse_csv(self._parse_struct_types), 2177 ) 2178 if not self._match(TokenType.GT): 2179 self.raise_error("Expecting >") 2180 else: 2181 value = self._parse_schema(exp.var("TABLE")) 2182 else: 2183 value = self._parse_types() 2184 2185 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 2186 2187 def _parse_describe(self) -> exp.Describe: 2188 kind = self._match_set(self.CREATABLES) and self._prev.text 2189 extended = self._match_text_seq("EXTENDED") 2190 this = self._parse_table(schema=True) 2191 properties = self._parse_properties() 2192 expressions = properties.expressions if properties else None 2193 return self.expression( 2194 exp.Describe, this=this, extended=extended, kind=kind, expressions=expressions 2195 ) 2196 2197 def _parse_insert(self) -> exp.Insert: 2198 comments = ensure_list(self._prev_comments) 2199 hint = self._parse_hint() 2200 overwrite = self._match(TokenType.OVERWRITE) 2201 ignore = self._match(TokenType.IGNORE) 2202 local = self._match_text_seq("LOCAL") 2203 alternative = None 2204 is_function = None 2205 2206 if self._match_text_seq("DIRECTORY"): 2207 this: t.Optional[exp.Expression] = self.expression( 2208 exp.Directory, 2209 this=self._parse_var_or_string(), 2210 local=local, 2211 row_format=self._parse_row_format(match_row=True), 2212 ) 2213 else: 2214 if self._match(TokenType.OR): 2215 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2216 2217 self._match(TokenType.INTO) 2218 comments += ensure_list(self._prev_comments) 2219 self._match(TokenType.TABLE) 2220 is_function = self._match(TokenType.FUNCTION) 2221 2222 this = self._parse_table(schema=True) if not is_function else self._parse_function() 2223 2224 returning = self._parse_returning() 2225 2226 return self.expression( 2227 exp.Insert, 2228 comments=comments, 2229 hint=hint, 2230 is_function=is_function, 2231 this=this, 2232 by_name=self._match_text_seq("BY", "NAME"), 2233 exists=self._parse_exists(), 2234 partition=self._parse_partition(), 2235 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 2236 and self._parse_conjunction(), 2237 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2238 conflict=self._parse_on_conflict(), 2239 returning=returning or self._parse_returning(), 2240 overwrite=overwrite, 2241 alternative=alternative, 2242 ignore=ignore, 2243 ) 2244 2245 def _parse_kill(self) -> exp.Kill: 2246 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2247 2248 return self.expression( 2249 exp.Kill, 2250 this=self._parse_primary(), 2251 kind=kind, 2252 ) 2253 2254 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2255 conflict = self._match_text_seq("ON", "CONFLICT") 2256 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2257 2258 if not conflict and not duplicate: 2259 return None 2260 2261 conflict_keys = None 2262 constraint = None 2263 2264 if conflict: 2265 if self._match_text_seq("ON", "CONSTRAINT"): 2266 constraint = self._parse_id_var() 2267 elif self._match(TokenType.L_PAREN): 2268 conflict_keys = self._parse_csv(self._parse_id_var) 2269 self._match_r_paren() 2270 2271 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2272 if self._prev.token_type == TokenType.UPDATE: 2273 self._match(TokenType.SET) 2274 expressions = self._parse_csv(self._parse_equality) 2275 else: 2276 expressions = None 2277 2278 return self.expression( 2279 exp.OnConflict, 2280 duplicate=duplicate, 2281 expressions=expressions, 2282 action=action, 2283 conflict_keys=conflict_keys, 2284 constraint=constraint, 2285 ) 2286 2287 def _parse_returning(self) -> t.Optional[exp.Returning]: 2288 if not self._match(TokenType.RETURNING): 2289 return None 2290 return self.expression( 2291 exp.Returning, 2292 expressions=self._parse_csv(self._parse_expression), 2293 into=self._match(TokenType.INTO) and self._parse_table_part(), 2294 ) 2295 2296 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2297 if not self._match(TokenType.FORMAT): 2298 return None 2299 return self._parse_row_format() 2300 2301 def _parse_row_format( 2302 self, match_row: bool = False 2303 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2304 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2305 return None 2306 2307 if self._match_text_seq("SERDE"): 2308 this = self._parse_string() 2309 2310 serde_properties = None 2311 if self._match(TokenType.SERDE_PROPERTIES): 2312 serde_properties = self.expression( 2313 exp.SerdeProperties, expressions=self._parse_wrapped_properties() 2314 ) 2315 2316 return self.expression( 2317 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2318 ) 2319 2320 self._match_text_seq("DELIMITED") 2321 2322 kwargs = {} 2323 2324 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2325 kwargs["fields"] = self._parse_string() 2326 if self._match_text_seq("ESCAPED", "BY"): 2327 kwargs["escaped"] = self._parse_string() 2328 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2329 kwargs["collection_items"] = self._parse_string() 2330 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2331 kwargs["map_keys"] = self._parse_string() 2332 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2333 kwargs["lines"] = self._parse_string() 2334 if self._match_text_seq("NULL", "DEFINED", "AS"): 2335 kwargs["null"] = self._parse_string() 2336 2337 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2338 2339 def _parse_load(self) -> exp.LoadData | exp.Command: 2340 if self._match_text_seq("DATA"): 2341 local = self._match_text_seq("LOCAL") 2342 self._match_text_seq("INPATH") 2343 inpath = self._parse_string() 2344 overwrite = self._match(TokenType.OVERWRITE) 2345 self._match_pair(TokenType.INTO, TokenType.TABLE) 2346 2347 return self.expression( 2348 exp.LoadData, 2349 this=self._parse_table(schema=True), 2350 local=local, 2351 overwrite=overwrite, 2352 inpath=inpath, 2353 partition=self._parse_partition(), 2354 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2355 serde=self._match_text_seq("SERDE") and self._parse_string(), 2356 ) 2357 return self._parse_as_command(self._prev) 2358 2359 def _parse_delete(self) -> exp.Delete: 2360 # This handles MySQL's "Multiple-Table Syntax" 2361 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2362 tables = None 2363 comments = self._prev_comments 2364 if not self._match(TokenType.FROM, advance=False): 2365 tables = self._parse_csv(self._parse_table) or None 2366 2367 returning = self._parse_returning() 2368 2369 return self.expression( 2370 exp.Delete, 2371 comments=comments, 2372 tables=tables, 2373 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2374 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2375 where=self._parse_where(), 2376 returning=returning or self._parse_returning(), 2377 limit=self._parse_limit(), 2378 ) 2379 2380 def _parse_update(self) -> exp.Update: 2381 comments = self._prev_comments 2382 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2383 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2384 returning = self._parse_returning() 2385 return self.expression( 2386 exp.Update, 2387 comments=comments, 2388 **{ # type: ignore 2389 "this": this, 2390 "expressions": expressions, 2391 "from": self._parse_from(joins=True), 2392 "where": self._parse_where(), 2393 "returning": returning or self._parse_returning(), 2394 "order": self._parse_order(), 2395 "limit": self._parse_limit(), 2396 }, 2397 ) 2398 2399 def _parse_uncache(self) -> exp.Uncache: 2400 if not self._match(TokenType.TABLE): 2401 self.raise_error("Expecting TABLE after UNCACHE") 2402 2403 return self.expression( 2404 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2405 ) 2406 2407 def _parse_cache(self) -> exp.Cache: 2408 lazy = self._match_text_seq("LAZY") 2409 self._match(TokenType.TABLE) 2410 table = self._parse_table(schema=True) 2411 2412 options = [] 2413 if self._match_text_seq("OPTIONS"): 2414 self._match_l_paren() 2415 k = self._parse_string() 2416 self._match(TokenType.EQ) 2417 v = self._parse_string() 2418 options = [k, v] 2419 self._match_r_paren() 2420 2421 self._match(TokenType.ALIAS) 2422 return self.expression( 2423 exp.Cache, 2424 this=table, 2425 lazy=lazy, 2426 options=options, 2427 expression=self._parse_select(nested=True), 2428 ) 2429 2430 def _parse_partition(self) -> t.Optional[exp.Partition]: 2431 if not self._match(TokenType.PARTITION): 2432 return None 2433 2434 return self.expression( 2435 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2436 ) 2437 2438 def _parse_value(self) -> exp.Tuple: 2439 if self._match(TokenType.L_PAREN): 2440 expressions = self._parse_csv(self._parse_expression) 2441 self._match_r_paren() 2442 return self.expression(exp.Tuple, expressions=expressions) 2443 2444 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2445 return self.expression(exp.Tuple, expressions=[self._parse_expression()]) 2446 2447 def _parse_projections(self) -> t.List[exp.Expression]: 2448 return self._parse_expressions() 2449 2450 def _parse_select( 2451 self, 2452 nested: bool = False, 2453 table: bool = False, 2454 parse_subquery_alias: bool = True, 2455 parse_set_operation: bool = True, 2456 ) -> t.Optional[exp.Expression]: 2457 cte = self._parse_with() 2458 2459 if cte: 2460 this = self._parse_statement() 2461 2462 if not this: 2463 self.raise_error("Failed to parse any statement following CTE") 2464 return cte 2465 2466 if "with" in this.arg_types: 2467 this.set("with", cte) 2468 else: 2469 self.raise_error(f"{this.key} does not support CTE") 2470 this = cte 2471 2472 return this 2473 2474 # duckdb supports leading with FROM x 2475 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2476 2477 if self._match(TokenType.SELECT): 2478 comments = self._prev_comments 2479 2480 hint = self._parse_hint() 2481 all_ = self._match(TokenType.ALL) 2482 distinct = self._match_set(self.DISTINCT_TOKENS) 2483 2484 kind = ( 2485 self._match(TokenType.ALIAS) 2486 and self._match_texts(("STRUCT", "VALUE")) 2487 and self._prev.text.upper() 2488 ) 2489 2490 if distinct: 2491 distinct = self.expression( 2492 exp.Distinct, 2493 on=self._parse_value() if self._match(TokenType.ON) else None, 2494 ) 2495 2496 if all_ and distinct: 2497 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2498 2499 limit = self._parse_limit(top=True) 2500 projections = self._parse_projections() 2501 2502 this = self.expression( 2503 exp.Select, 2504 kind=kind, 2505 hint=hint, 2506 distinct=distinct, 2507 expressions=projections, 2508 limit=limit, 2509 ) 2510 this.comments = comments 2511 2512 into = self._parse_into() 2513 if into: 2514 this.set("into", into) 2515 2516 if not from_: 2517 from_ = self._parse_from() 2518 2519 if from_: 2520 this.set("from", from_) 2521 2522 this = self._parse_query_modifiers(this) 2523 elif (table or nested) and self._match(TokenType.L_PAREN): 2524 if self._match(TokenType.PIVOT): 2525 this = self._parse_simplified_pivot() 2526 elif self._match(TokenType.FROM): 2527 this = exp.select("*").from_( 2528 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2529 ) 2530 else: 2531 this = ( 2532 self._parse_table() 2533 if table 2534 else self._parse_select(nested=True, parse_set_operation=False) 2535 ) 2536 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2537 2538 self._match_r_paren() 2539 2540 # We return early here so that the UNION isn't attached to the subquery by the 2541 # following call to _parse_set_operations, but instead becomes the parent node 2542 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2543 elif self._match(TokenType.VALUES, advance=False): 2544 this = self._parse_derived_table_values() 2545 elif from_: 2546 this = exp.select("*").from_(from_.this, copy=False) 2547 else: 2548 this = None 2549 2550 if parse_set_operation: 2551 return self._parse_set_operations(this) 2552 return this 2553 2554 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2555 if not skip_with_token and not self._match(TokenType.WITH): 2556 return None 2557 2558 comments = self._prev_comments 2559 recursive = self._match(TokenType.RECURSIVE) 2560 2561 expressions = [] 2562 while True: 2563 expressions.append(self._parse_cte()) 2564 2565 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2566 break 2567 else: 2568 self._match(TokenType.WITH) 2569 2570 return self.expression( 2571 exp.With, comments=comments, expressions=expressions, recursive=recursive 2572 ) 2573 2574 def _parse_cte(self) -> exp.CTE: 2575 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2576 if not alias or not alias.this: 2577 self.raise_error("Expected CTE to have alias") 2578 2579 self._match(TokenType.ALIAS) 2580 2581 if self._match_text_seq("NOT", "MATERIALIZED"): 2582 materialized = False 2583 elif self._match_text_seq("MATERIALIZED"): 2584 materialized = True 2585 else: 2586 materialized = None 2587 2588 return self.expression( 2589 exp.CTE, 2590 this=self._parse_wrapped(self._parse_statement), 2591 alias=alias, 2592 materialized=materialized, 2593 ) 2594 2595 def _parse_table_alias( 2596 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2597 ) -> t.Optional[exp.TableAlias]: 2598 any_token = self._match(TokenType.ALIAS) 2599 alias = ( 2600 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2601 or self._parse_string_as_identifier() 2602 ) 2603 2604 index = self._index 2605 if self._match(TokenType.L_PAREN): 2606 columns = self._parse_csv(self._parse_function_parameter) 2607 self._match_r_paren() if columns else self._retreat(index) 2608 else: 2609 columns = None 2610 2611 if not alias and not columns: 2612 return None 2613 2614 return self.expression(exp.TableAlias, this=alias, columns=columns) 2615 2616 def _parse_subquery( 2617 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2618 ) -> t.Optional[exp.Subquery]: 2619 if not this: 2620 return None 2621 2622 return self.expression( 2623 exp.Subquery, 2624 this=this, 2625 pivots=self._parse_pivots(), 2626 alias=self._parse_table_alias() if parse_alias else None, 2627 ) 2628 2629 def _implicit_unnests_to_explicit(self, this: E) -> E: 2630 from sqlglot.optimizer.normalize_identifiers import ( 2631 normalize_identifiers as _norm, 2632 ) 2633 2634 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2635 for i, join in enumerate(this.args.get("joins") or []): 2636 table = join.this 2637 normalized_table = table.copy() 2638 normalized_table.meta["maybe_column"] = True 2639 normalized_table = _norm(normalized_table, dialect=self.dialect) 2640 2641 if isinstance(table, exp.Table) and not join.args.get("on"): 2642 if normalized_table.parts[0].name in refs: 2643 table_as_column = table.to_column() 2644 unnest = exp.Unnest(expressions=[table_as_column]) 2645 2646 # Table.to_column creates a parent Alias node that we want to convert to 2647 # a TableAlias and attach to the Unnest, so it matches the parser's output 2648 if isinstance(table.args.get("alias"), exp.TableAlias): 2649 table_as_column.replace(table_as_column.this) 2650 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2651 2652 table.replace(unnest) 2653 2654 refs.add(normalized_table.alias_or_name) 2655 2656 return this 2657 2658 def _parse_query_modifiers( 2659 self, this: t.Optional[exp.Expression] 2660 ) -> t.Optional[exp.Expression]: 2661 if isinstance(this, (exp.Query, exp.Table)): 2662 for join in self._parse_joins(): 2663 this.append("joins", join) 2664 for lateral in iter(self._parse_lateral, None): 2665 this.append("laterals", lateral) 2666 2667 while True: 2668 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2669 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2670 key, expression = parser(self) 2671 2672 if expression: 2673 this.set(key, expression) 2674 if key == "limit": 2675 offset = expression.args.pop("offset", None) 2676 2677 if offset: 2678 offset = exp.Offset(expression=offset) 2679 this.set("offset", offset) 2680 2681 limit_by_expressions = expression.expressions 2682 expression.set("expressions", None) 2683 offset.set("expressions", limit_by_expressions) 2684 continue 2685 break 2686 2687 if self.SUPPORTS_IMPLICIT_UNNEST and this and "from" in this.args: 2688 this = self._implicit_unnests_to_explicit(this) 2689 2690 return this 2691 2692 def _parse_hint(self) -> t.Optional[exp.Hint]: 2693 if self._match(TokenType.HINT): 2694 hints = [] 2695 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2696 hints.extend(hint) 2697 2698 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2699 self.raise_error("Expected */ after HINT") 2700 2701 return self.expression(exp.Hint, expressions=hints) 2702 2703 return None 2704 2705 def _parse_into(self) -> t.Optional[exp.Into]: 2706 if not self._match(TokenType.INTO): 2707 return None 2708 2709 temp = self._match(TokenType.TEMPORARY) 2710 unlogged = self._match_text_seq("UNLOGGED") 2711 self._match(TokenType.TABLE) 2712 2713 return self.expression( 2714 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2715 ) 2716 2717 def _parse_from( 2718 self, joins: bool = False, skip_from_token: bool = False 2719 ) -> t.Optional[exp.From]: 2720 if not skip_from_token and not self._match(TokenType.FROM): 2721 return None 2722 2723 return self.expression( 2724 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2725 ) 2726 2727 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2728 if not self._match(TokenType.MATCH_RECOGNIZE): 2729 return None 2730 2731 self._match_l_paren() 2732 2733 partition = self._parse_partition_by() 2734 order = self._parse_order() 2735 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2736 2737 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2738 rows = exp.var("ONE ROW PER MATCH") 2739 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2740 text = "ALL ROWS PER MATCH" 2741 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2742 text += " SHOW EMPTY MATCHES" 2743 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2744 text += " OMIT EMPTY MATCHES" 2745 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2746 text += " WITH UNMATCHED ROWS" 2747 rows = exp.var(text) 2748 else: 2749 rows = None 2750 2751 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2752 text = "AFTER MATCH SKIP" 2753 if self._match_text_seq("PAST", "LAST", "ROW"): 2754 text += " PAST LAST ROW" 2755 elif self._match_text_seq("TO", "NEXT", "ROW"): 2756 text += " TO NEXT ROW" 2757 elif self._match_text_seq("TO", "FIRST"): 2758 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2759 elif self._match_text_seq("TO", "LAST"): 2760 text += f" TO LAST {self._advance_any().text}" # type: ignore 2761 after = exp.var(text) 2762 else: 2763 after = None 2764 2765 if self._match_text_seq("PATTERN"): 2766 self._match_l_paren() 2767 2768 if not self._curr: 2769 self.raise_error("Expecting )", self._curr) 2770 2771 paren = 1 2772 start = self._curr 2773 2774 while self._curr and paren > 0: 2775 if self._curr.token_type == TokenType.L_PAREN: 2776 paren += 1 2777 if self._curr.token_type == TokenType.R_PAREN: 2778 paren -= 1 2779 2780 end = self._prev 2781 self._advance() 2782 2783 if paren > 0: 2784 self.raise_error("Expecting )", self._curr) 2785 2786 pattern = exp.var(self._find_sql(start, end)) 2787 else: 2788 pattern = None 2789 2790 define = ( 2791 self._parse_csv(self._parse_name_as_expression) 2792 if self._match_text_seq("DEFINE") 2793 else None 2794 ) 2795 2796 self._match_r_paren() 2797 2798 return self.expression( 2799 exp.MatchRecognize, 2800 partition_by=partition, 2801 order=order, 2802 measures=measures, 2803 rows=rows, 2804 after=after, 2805 pattern=pattern, 2806 define=define, 2807 alias=self._parse_table_alias(), 2808 ) 2809 2810 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2811 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2812 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 2813 cross_apply = False 2814 2815 if cross_apply is not None: 2816 this = self._parse_select(table=True) 2817 view = None 2818 outer = None 2819 elif self._match(TokenType.LATERAL): 2820 this = self._parse_select(table=True) 2821 view = self._match(TokenType.VIEW) 2822 outer = self._match(TokenType.OUTER) 2823 else: 2824 return None 2825 2826 if not this: 2827 this = ( 2828 self._parse_unnest() 2829 or self._parse_function() 2830 or self._parse_id_var(any_token=False) 2831 ) 2832 2833 while self._match(TokenType.DOT): 2834 this = exp.Dot( 2835 this=this, 2836 expression=self._parse_function() or self._parse_id_var(any_token=False), 2837 ) 2838 2839 if view: 2840 table = self._parse_id_var(any_token=False) 2841 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2842 table_alias: t.Optional[exp.TableAlias] = self.expression( 2843 exp.TableAlias, this=table, columns=columns 2844 ) 2845 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2846 # We move the alias from the lateral's child node to the lateral itself 2847 table_alias = this.args["alias"].pop() 2848 else: 2849 table_alias = self._parse_table_alias() 2850 2851 return self.expression( 2852 exp.Lateral, 2853 this=this, 2854 view=view, 2855 outer=outer, 2856 alias=table_alias, 2857 cross_apply=cross_apply, 2858 ) 2859 2860 def _parse_join_parts( 2861 self, 2862 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2863 return ( 2864 self._match_set(self.JOIN_METHODS) and self._prev, 2865 self._match_set(self.JOIN_SIDES) and self._prev, 2866 self._match_set(self.JOIN_KINDS) and self._prev, 2867 ) 2868 2869 def _parse_join( 2870 self, skip_join_token: bool = False, parse_bracket: bool = False 2871 ) -> t.Optional[exp.Join]: 2872 if self._match(TokenType.COMMA): 2873 return self.expression(exp.Join, this=self._parse_table()) 2874 2875 index = self._index 2876 method, side, kind = self._parse_join_parts() 2877 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2878 join = self._match(TokenType.JOIN) 2879 2880 if not skip_join_token and not join: 2881 self._retreat(index) 2882 kind = None 2883 method = None 2884 side = None 2885 2886 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2887 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2888 2889 if not skip_join_token and not join and not outer_apply and not cross_apply: 2890 return None 2891 2892 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2893 2894 if method: 2895 kwargs["method"] = method.text 2896 if side: 2897 kwargs["side"] = side.text 2898 if kind: 2899 kwargs["kind"] = kind.text 2900 if hint: 2901 kwargs["hint"] = hint 2902 2903 if self._match(TokenType.ON): 2904 kwargs["on"] = self._parse_conjunction() 2905 elif self._match(TokenType.USING): 2906 kwargs["using"] = self._parse_wrapped_id_vars() 2907 elif not isinstance(kwargs["this"], exp.Unnest) and not ( 2908 kind and kind.token_type == TokenType.CROSS 2909 ): 2910 index = self._index 2911 joins: t.Optional[list] = list(self._parse_joins()) 2912 2913 if joins and self._match(TokenType.ON): 2914 kwargs["on"] = self._parse_conjunction() 2915 elif joins and self._match(TokenType.USING): 2916 kwargs["using"] = self._parse_wrapped_id_vars() 2917 else: 2918 joins = None 2919 self._retreat(index) 2920 2921 kwargs["this"].set("joins", joins if joins else None) 2922 2923 comments = [c for token in (method, side, kind) if token for c in token.comments] 2924 return self.expression(exp.Join, comments=comments, **kwargs) 2925 2926 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2927 this = self._parse_conjunction() 2928 2929 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2930 return this 2931 2932 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 2933 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 2934 2935 return this 2936 2937 def _parse_index_params(self) -> exp.IndexParameters: 2938 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2939 2940 if self._match(TokenType.L_PAREN, advance=False): 2941 columns = self._parse_wrapped_csv(self._parse_with_operator) 2942 else: 2943 columns = None 2944 2945 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 2946 partition_by = self._parse_partition_by() 2947 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 2948 tablespace = ( 2949 self._parse_var(any_token=True) 2950 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 2951 else None 2952 ) 2953 where = self._parse_where() 2954 2955 return self.expression( 2956 exp.IndexParameters, 2957 using=using, 2958 columns=columns, 2959 include=include, 2960 partition_by=partition_by, 2961 where=where, 2962 with_storage=with_storage, 2963 tablespace=tablespace, 2964 ) 2965 2966 def _parse_index( 2967 self, 2968 index: t.Optional[exp.Expression] = None, 2969 ) -> t.Optional[exp.Index]: 2970 if index: 2971 unique = None 2972 primary = None 2973 amp = None 2974 2975 self._match(TokenType.ON) 2976 self._match(TokenType.TABLE) # hive 2977 table = self._parse_table_parts(schema=True) 2978 else: 2979 unique = self._match(TokenType.UNIQUE) 2980 primary = self._match_text_seq("PRIMARY") 2981 amp = self._match_text_seq("AMP") 2982 2983 if not self._match(TokenType.INDEX): 2984 return None 2985 2986 index = self._parse_id_var() 2987 table = None 2988 2989 params = self._parse_index_params() 2990 2991 return self.expression( 2992 exp.Index, 2993 this=index, 2994 table=table, 2995 unique=unique, 2996 primary=primary, 2997 amp=amp, 2998 params=params, 2999 ) 3000 3001 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3002 hints: t.List[exp.Expression] = [] 3003 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3004 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3005 hints.append( 3006 self.expression( 3007 exp.WithTableHint, 3008 expressions=self._parse_csv( 3009 lambda: self._parse_function() or self._parse_var(any_token=True) 3010 ), 3011 ) 3012 ) 3013 self._match_r_paren() 3014 else: 3015 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3016 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3017 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3018 3019 self._match_texts(("INDEX", "KEY")) 3020 if self._match(TokenType.FOR): 3021 hint.set("target", self._advance_any() and self._prev.text.upper()) 3022 3023 hint.set("expressions", self._parse_wrapped_id_vars()) 3024 hints.append(hint) 3025 3026 return hints or None 3027 3028 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3029 return ( 3030 (not schema and self._parse_function(optional_parens=False)) 3031 or self._parse_id_var(any_token=False) 3032 or self._parse_string_as_identifier() 3033 or self._parse_placeholder() 3034 ) 3035 3036 def _parse_table_parts( 3037 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3038 ) -> exp.Table: 3039 catalog = None 3040 db = None 3041 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3042 3043 while self._match(TokenType.DOT): 3044 if catalog: 3045 # This allows nesting the table in arbitrarily many dot expressions if needed 3046 table = self.expression( 3047 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3048 ) 3049 else: 3050 catalog = db 3051 db = table 3052 # "" used for tsql FROM a..b case 3053 table = self._parse_table_part(schema=schema) or "" 3054 3055 if ( 3056 wildcard 3057 and self._is_connected() 3058 and (isinstance(table, exp.Identifier) or not table) 3059 and self._match(TokenType.STAR) 3060 ): 3061 if isinstance(table, exp.Identifier): 3062 table.args["this"] += "*" 3063 else: 3064 table = exp.Identifier(this="*") 3065 3066 if is_db_reference: 3067 catalog = db 3068 db = table 3069 table = None 3070 3071 if not table and not is_db_reference: 3072 self.raise_error(f"Expected table name but got {self._curr}") 3073 if not db and is_db_reference: 3074 self.raise_error(f"Expected database name but got {self._curr}") 3075 3076 return self.expression( 3077 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 3078 ) 3079 3080 def _parse_table( 3081 self, 3082 schema: bool = False, 3083 joins: bool = False, 3084 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3085 parse_bracket: bool = False, 3086 is_db_reference: bool = False, 3087 ) -> t.Optional[exp.Expression]: 3088 lateral = self._parse_lateral() 3089 if lateral: 3090 return lateral 3091 3092 unnest = self._parse_unnest() 3093 if unnest: 3094 return unnest 3095 3096 values = self._parse_derived_table_values() 3097 if values: 3098 return values 3099 3100 subquery = self._parse_select(table=True) 3101 if subquery: 3102 if not subquery.args.get("pivots"): 3103 subquery.set("pivots", self._parse_pivots()) 3104 return subquery 3105 3106 bracket = parse_bracket and self._parse_bracket(None) 3107 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3108 3109 only = self._match(TokenType.ONLY) 3110 3111 this = t.cast( 3112 exp.Expression, 3113 bracket 3114 or self._parse_bracket( 3115 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3116 ), 3117 ) 3118 3119 if only: 3120 this.set("only", only) 3121 3122 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3123 self._match_text_seq("*") 3124 3125 if schema: 3126 return self._parse_schema(this=this) 3127 3128 version = self._parse_version() 3129 3130 if version: 3131 this.set("version", version) 3132 3133 if self.dialect.ALIAS_POST_TABLESAMPLE: 3134 table_sample = self._parse_table_sample() 3135 3136 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3137 if alias: 3138 this.set("alias", alias) 3139 3140 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3141 return self.expression( 3142 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3143 ) 3144 3145 this.set("hints", self._parse_table_hints()) 3146 3147 if not this.args.get("pivots"): 3148 this.set("pivots", self._parse_pivots()) 3149 3150 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3151 table_sample = self._parse_table_sample() 3152 3153 if table_sample: 3154 table_sample.set("this", this) 3155 this = table_sample 3156 3157 if joins: 3158 for join in self._parse_joins(): 3159 this.append("joins", join) 3160 3161 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3162 this.set("ordinality", True) 3163 this.set("alias", self._parse_table_alias()) 3164 3165 return this 3166 3167 def _parse_version(self) -> t.Optional[exp.Version]: 3168 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3169 this = "TIMESTAMP" 3170 elif self._match(TokenType.VERSION_SNAPSHOT): 3171 this = "VERSION" 3172 else: 3173 return None 3174 3175 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3176 kind = self._prev.text.upper() 3177 start = self._parse_bitwise() 3178 self._match_texts(("TO", "AND")) 3179 end = self._parse_bitwise() 3180 expression: t.Optional[exp.Expression] = self.expression( 3181 exp.Tuple, expressions=[start, end] 3182 ) 3183 elif self._match_text_seq("CONTAINED", "IN"): 3184 kind = "CONTAINED IN" 3185 expression = self.expression( 3186 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3187 ) 3188 elif self._match(TokenType.ALL): 3189 kind = "ALL" 3190 expression = None 3191 else: 3192 self._match_text_seq("AS", "OF") 3193 kind = "AS OF" 3194 expression = self._parse_type() 3195 3196 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3197 3198 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3199 if not self._match(TokenType.UNNEST): 3200 return None 3201 3202 expressions = self._parse_wrapped_csv(self._parse_equality) 3203 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3204 3205 alias = self._parse_table_alias() if with_alias else None 3206 3207 if alias: 3208 if self.dialect.UNNEST_COLUMN_ONLY: 3209 if alias.args.get("columns"): 3210 self.raise_error("Unexpected extra column alias in unnest.") 3211 3212 alias.set("columns", [alias.this]) 3213 alias.set("this", None) 3214 3215 columns = alias.args.get("columns") or [] 3216 if offset and len(expressions) < len(columns): 3217 offset = columns.pop() 3218 3219 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3220 self._match(TokenType.ALIAS) 3221 offset = self._parse_id_var( 3222 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3223 ) or exp.to_identifier("offset") 3224 3225 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3226 3227 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3228 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3229 if not is_derived and not self._match_text_seq("VALUES"): 3230 return None 3231 3232 expressions = self._parse_csv(self._parse_value) 3233 alias = self._parse_table_alias() 3234 3235 if is_derived: 3236 self._match_r_paren() 3237 3238 return self.expression( 3239 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3240 ) 3241 3242 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3243 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3244 as_modifier and self._match_text_seq("USING", "SAMPLE") 3245 ): 3246 return None 3247 3248 bucket_numerator = None 3249 bucket_denominator = None 3250 bucket_field = None 3251 percent = None 3252 size = None 3253 seed = None 3254 3255 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3256 matched_l_paren = self._match(TokenType.L_PAREN) 3257 3258 if self.TABLESAMPLE_CSV: 3259 num = None 3260 expressions = self._parse_csv(self._parse_primary) 3261 else: 3262 expressions = None 3263 num = ( 3264 self._parse_factor() 3265 if self._match(TokenType.NUMBER, advance=False) 3266 else self._parse_primary() or self._parse_placeholder() 3267 ) 3268 3269 if self._match_text_seq("BUCKET"): 3270 bucket_numerator = self._parse_number() 3271 self._match_text_seq("OUT", "OF") 3272 bucket_denominator = bucket_denominator = self._parse_number() 3273 self._match(TokenType.ON) 3274 bucket_field = self._parse_field() 3275 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3276 percent = num 3277 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3278 size = num 3279 else: 3280 percent = num 3281 3282 if matched_l_paren: 3283 self._match_r_paren() 3284 3285 if self._match(TokenType.L_PAREN): 3286 method = self._parse_var(upper=True) 3287 seed = self._match(TokenType.COMMA) and self._parse_number() 3288 self._match_r_paren() 3289 elif self._match_texts(("SEED", "REPEATABLE")): 3290 seed = self._parse_wrapped(self._parse_number) 3291 3292 return self.expression( 3293 exp.TableSample, 3294 expressions=expressions, 3295 method=method, 3296 bucket_numerator=bucket_numerator, 3297 bucket_denominator=bucket_denominator, 3298 bucket_field=bucket_field, 3299 percent=percent, 3300 size=size, 3301 seed=seed, 3302 ) 3303 3304 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3305 return list(iter(self._parse_pivot, None)) or None 3306 3307 def _parse_joins(self) -> t.Iterator[exp.Join]: 3308 return iter(self._parse_join, None) 3309 3310 # https://duckdb.org/docs/sql/statements/pivot 3311 def _parse_simplified_pivot(self) -> exp.Pivot: 3312 def _parse_on() -> t.Optional[exp.Expression]: 3313 this = self._parse_bitwise() 3314 return self._parse_in(this) if self._match(TokenType.IN) else this 3315 3316 this = self._parse_table() 3317 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3318 using = self._match(TokenType.USING) and self._parse_csv( 3319 lambda: self._parse_alias(self._parse_function()) 3320 ) 3321 group = self._parse_group() 3322 return self.expression( 3323 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3324 ) 3325 3326 def _parse_pivot_in(self) -> exp.In: 3327 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3328 this = self._parse_conjunction() 3329 3330 self._match(TokenType.ALIAS) 3331 alias = self._parse_field() 3332 if alias: 3333 return self.expression(exp.PivotAlias, this=this, alias=alias) 3334 3335 return this 3336 3337 value = self._parse_column() 3338 3339 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3340 self.raise_error("Expecting IN (") 3341 3342 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3343 3344 self._match_r_paren() 3345 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3346 3347 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3348 index = self._index 3349 include_nulls = None 3350 3351 if self._match(TokenType.PIVOT): 3352 unpivot = False 3353 elif self._match(TokenType.UNPIVOT): 3354 unpivot = True 3355 3356 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3357 if self._match_text_seq("INCLUDE", "NULLS"): 3358 include_nulls = True 3359 elif self._match_text_seq("EXCLUDE", "NULLS"): 3360 include_nulls = False 3361 else: 3362 return None 3363 3364 expressions = [] 3365 3366 if not self._match(TokenType.L_PAREN): 3367 self._retreat(index) 3368 return None 3369 3370 if unpivot: 3371 expressions = self._parse_csv(self._parse_column) 3372 else: 3373 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3374 3375 if not expressions: 3376 self.raise_error("Failed to parse PIVOT's aggregation list") 3377 3378 if not self._match(TokenType.FOR): 3379 self.raise_error("Expecting FOR") 3380 3381 field = self._parse_pivot_in() 3382 3383 self._match_r_paren() 3384 3385 pivot = self.expression( 3386 exp.Pivot, 3387 expressions=expressions, 3388 field=field, 3389 unpivot=unpivot, 3390 include_nulls=include_nulls, 3391 ) 3392 3393 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3394 pivot.set("alias", self._parse_table_alias()) 3395 3396 if not unpivot: 3397 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3398 3399 columns: t.List[exp.Expression] = [] 3400 for fld in pivot.args["field"].expressions: 3401 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3402 for name in names: 3403 if self.PREFIXED_PIVOT_COLUMNS: 3404 name = f"{name}_{field_name}" if name else field_name 3405 else: 3406 name = f"{field_name}_{name}" if name else field_name 3407 3408 columns.append(exp.to_identifier(name)) 3409 3410 pivot.set("columns", columns) 3411 3412 return pivot 3413 3414 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3415 return [agg.alias for agg in aggregations] 3416 3417 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3418 if not skip_where_token and not self._match(TokenType.PREWHERE): 3419 return None 3420 3421 return self.expression( 3422 exp.PreWhere, comments=self._prev_comments, this=self._parse_conjunction() 3423 ) 3424 3425 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3426 if not skip_where_token and not self._match(TokenType.WHERE): 3427 return None 3428 3429 return self.expression( 3430 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3431 ) 3432 3433 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3434 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3435 return None 3436 3437 elements = defaultdict(list) 3438 3439 if self._match(TokenType.ALL): 3440 return self.expression(exp.Group, all=True) 3441 3442 while True: 3443 expressions = self._parse_csv(self._parse_conjunction) 3444 if expressions: 3445 elements["expressions"].extend(expressions) 3446 3447 grouping_sets = self._parse_grouping_sets() 3448 if grouping_sets: 3449 elements["grouping_sets"].extend(grouping_sets) 3450 3451 rollup = None 3452 cube = None 3453 totals = None 3454 3455 index = self._index 3456 with_ = self._match(TokenType.WITH) 3457 if self._match(TokenType.ROLLUP): 3458 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3459 elements["rollup"].extend(ensure_list(rollup)) 3460 3461 if self._match(TokenType.CUBE): 3462 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3463 elements["cube"].extend(ensure_list(cube)) 3464 3465 if self._match_text_seq("TOTALS"): 3466 totals = True 3467 elements["totals"] = True # type: ignore 3468 3469 if not (grouping_sets or rollup or cube or totals): 3470 if with_: 3471 self._retreat(index) 3472 break 3473 3474 return self.expression(exp.Group, **elements) # type: ignore 3475 3476 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3477 if not self._match(TokenType.GROUPING_SETS): 3478 return None 3479 3480 return self._parse_wrapped_csv(self._parse_grouping_set) 3481 3482 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3483 if self._match(TokenType.L_PAREN): 3484 grouping_set = self._parse_csv(self._parse_column) 3485 self._match_r_paren() 3486 return self.expression(exp.Tuple, expressions=grouping_set) 3487 3488 return self._parse_column() 3489 3490 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3491 if not skip_having_token and not self._match(TokenType.HAVING): 3492 return None 3493 return self.expression(exp.Having, this=self._parse_conjunction()) 3494 3495 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3496 if not self._match(TokenType.QUALIFY): 3497 return None 3498 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3499 3500 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3501 if skip_start_token: 3502 start = None 3503 elif self._match(TokenType.START_WITH): 3504 start = self._parse_conjunction() 3505 else: 3506 return None 3507 3508 self._match(TokenType.CONNECT_BY) 3509 nocycle = self._match_text_seq("NOCYCLE") 3510 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3511 exp.Prior, this=self._parse_bitwise() 3512 ) 3513 connect = self._parse_conjunction() 3514 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3515 3516 if not start and self._match(TokenType.START_WITH): 3517 start = self._parse_conjunction() 3518 3519 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3520 3521 def _parse_name_as_expression(self) -> exp.Alias: 3522 return self.expression( 3523 exp.Alias, 3524 alias=self._parse_id_var(any_token=True), 3525 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3526 ) 3527 3528 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3529 if self._match_text_seq("INTERPOLATE"): 3530 return self._parse_wrapped_csv(self._parse_name_as_expression) 3531 return None 3532 3533 def _parse_order( 3534 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3535 ) -> t.Optional[exp.Expression]: 3536 siblings = None 3537 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3538 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3539 return this 3540 3541 siblings = True 3542 3543 return self.expression( 3544 exp.Order, 3545 this=this, 3546 expressions=self._parse_csv(self._parse_ordered), 3547 interpolate=self._parse_interpolate(), 3548 siblings=siblings, 3549 ) 3550 3551 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3552 if not self._match(token): 3553 return None 3554 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3555 3556 def _parse_ordered( 3557 self, parse_method: t.Optional[t.Callable] = None 3558 ) -> t.Optional[exp.Ordered]: 3559 this = parse_method() if parse_method else self._parse_conjunction() 3560 if not this: 3561 return None 3562 3563 asc = self._match(TokenType.ASC) 3564 desc = self._match(TokenType.DESC) or (asc and False) 3565 3566 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3567 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3568 3569 nulls_first = is_nulls_first or False 3570 explicitly_null_ordered = is_nulls_first or is_nulls_last 3571 3572 if ( 3573 not explicitly_null_ordered 3574 and ( 3575 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3576 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3577 ) 3578 and self.dialect.NULL_ORDERING != "nulls_are_last" 3579 ): 3580 nulls_first = True 3581 3582 if self._match_text_seq("WITH", "FILL"): 3583 with_fill = self.expression( 3584 exp.WithFill, 3585 **{ # type: ignore 3586 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3587 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3588 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3589 }, 3590 ) 3591 else: 3592 with_fill = None 3593 3594 return self.expression( 3595 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3596 ) 3597 3598 def _parse_limit( 3599 self, 3600 this: t.Optional[exp.Expression] = None, 3601 top: bool = False, 3602 skip_limit_token: bool = False, 3603 ) -> t.Optional[exp.Expression]: 3604 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 3605 comments = self._prev_comments 3606 if top: 3607 limit_paren = self._match(TokenType.L_PAREN) 3608 expression = self._parse_term() if limit_paren else self._parse_number() 3609 3610 if limit_paren: 3611 self._match_r_paren() 3612 else: 3613 expression = self._parse_term() 3614 3615 if self._match(TokenType.COMMA): 3616 offset = expression 3617 expression = self._parse_term() 3618 else: 3619 offset = None 3620 3621 limit_exp = self.expression( 3622 exp.Limit, 3623 this=this, 3624 expression=expression, 3625 offset=offset, 3626 comments=comments, 3627 expressions=self._parse_limit_by(), 3628 ) 3629 3630 return limit_exp 3631 3632 if self._match(TokenType.FETCH): 3633 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3634 direction = self._prev.text.upper() if direction else "FIRST" 3635 3636 count = self._parse_field(tokens=self.FETCH_TOKENS) 3637 percent = self._match(TokenType.PERCENT) 3638 3639 self._match_set((TokenType.ROW, TokenType.ROWS)) 3640 3641 only = self._match_text_seq("ONLY") 3642 with_ties = self._match_text_seq("WITH", "TIES") 3643 3644 if only and with_ties: 3645 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3646 3647 return self.expression( 3648 exp.Fetch, 3649 direction=direction, 3650 count=count, 3651 percent=percent, 3652 with_ties=with_ties, 3653 ) 3654 3655 return this 3656 3657 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3658 if not self._match(TokenType.OFFSET): 3659 return this 3660 3661 count = self._parse_term() 3662 self._match_set((TokenType.ROW, TokenType.ROWS)) 3663 3664 return self.expression( 3665 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3666 ) 3667 3668 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3669 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3670 3671 def _parse_locks(self) -> t.List[exp.Lock]: 3672 locks = [] 3673 while True: 3674 if self._match_text_seq("FOR", "UPDATE"): 3675 update = True 3676 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3677 "LOCK", "IN", "SHARE", "MODE" 3678 ): 3679 update = False 3680 else: 3681 break 3682 3683 expressions = None 3684 if self._match_text_seq("OF"): 3685 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3686 3687 wait: t.Optional[bool | exp.Expression] = None 3688 if self._match_text_seq("NOWAIT"): 3689 wait = True 3690 elif self._match_text_seq("WAIT"): 3691 wait = self._parse_primary() 3692 elif self._match_text_seq("SKIP", "LOCKED"): 3693 wait = False 3694 3695 locks.append( 3696 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3697 ) 3698 3699 return locks 3700 3701 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3702 while this and self._match_set(self.SET_OPERATIONS): 3703 token_type = self._prev.token_type 3704 3705 if token_type == TokenType.UNION: 3706 operation = exp.Union 3707 elif token_type == TokenType.EXCEPT: 3708 operation = exp.Except 3709 else: 3710 operation = exp.Intersect 3711 3712 comments = self._prev.comments 3713 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3714 by_name = self._match_text_seq("BY", "NAME") 3715 expression = self._parse_select(nested=True, parse_set_operation=False) 3716 3717 this = self.expression( 3718 operation, 3719 comments=comments, 3720 this=this, 3721 distinct=distinct, 3722 by_name=by_name, 3723 expression=expression, 3724 ) 3725 3726 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3727 expression = this.expression 3728 3729 if expression: 3730 for arg in self.UNION_MODIFIERS: 3731 expr = expression.args.get(arg) 3732 if expr: 3733 this.set(arg, expr.pop()) 3734 3735 return this 3736 3737 def _parse_expression(self) -> t.Optional[exp.Expression]: 3738 return self._parse_alias(self._parse_conjunction()) 3739 3740 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3741 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3742 3743 def _parse_equality(self) -> t.Optional[exp.Expression]: 3744 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3745 3746 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3747 return self._parse_tokens(self._parse_range, self.COMPARISON) 3748 3749 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3750 this = this or self._parse_bitwise() 3751 negate = self._match(TokenType.NOT) 3752 3753 if self._match_set(self.RANGE_PARSERS): 3754 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3755 if not expression: 3756 return this 3757 3758 this = expression 3759 elif self._match(TokenType.ISNULL): 3760 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3761 3762 # Postgres supports ISNULL and NOTNULL for conditions. 3763 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3764 if self._match(TokenType.NOTNULL): 3765 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3766 this = self.expression(exp.Not, this=this) 3767 3768 if negate: 3769 this = self.expression(exp.Not, this=this) 3770 3771 if self._match(TokenType.IS): 3772 this = self._parse_is(this) 3773 3774 return this 3775 3776 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3777 index = self._index - 1 3778 negate = self._match(TokenType.NOT) 3779 3780 if self._match_text_seq("DISTINCT", "FROM"): 3781 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3782 return self.expression(klass, this=this, expression=self._parse_bitwise()) 3783 3784 expression = self._parse_null() or self._parse_boolean() 3785 if not expression: 3786 self._retreat(index) 3787 return None 3788 3789 this = self.expression(exp.Is, this=this, expression=expression) 3790 return self.expression(exp.Not, this=this) if negate else this 3791 3792 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3793 unnest = self._parse_unnest(with_alias=False) 3794 if unnest: 3795 this = self.expression(exp.In, this=this, unnest=unnest) 3796 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 3797 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 3798 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3799 3800 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 3801 this = self.expression(exp.In, this=this, query=expressions[0]) 3802 else: 3803 this = self.expression(exp.In, this=this, expressions=expressions) 3804 3805 if matched_l_paren: 3806 self._match_r_paren(this) 3807 elif not self._match(TokenType.R_BRACKET, expression=this): 3808 self.raise_error("Expecting ]") 3809 else: 3810 this = self.expression(exp.In, this=this, field=self._parse_field()) 3811 3812 return this 3813 3814 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 3815 low = self._parse_bitwise() 3816 self._match(TokenType.AND) 3817 high = self._parse_bitwise() 3818 return self.expression(exp.Between, this=this, low=low, high=high) 3819 3820 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3821 if not self._match(TokenType.ESCAPE): 3822 return this 3823 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3824 3825 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Interval]: 3826 index = self._index 3827 3828 if not self._match(TokenType.INTERVAL) and match_interval: 3829 return None 3830 3831 if self._match(TokenType.STRING, advance=False): 3832 this = self._parse_primary() 3833 else: 3834 this = self._parse_term() 3835 3836 if not this or ( 3837 isinstance(this, exp.Column) 3838 and not this.table 3839 and not this.this.quoted 3840 and this.name.upper() == "IS" 3841 ): 3842 self._retreat(index) 3843 return None 3844 3845 unit = self._parse_function() or ( 3846 not self._match(TokenType.ALIAS, advance=False) 3847 and self._parse_var(any_token=True, upper=True) 3848 ) 3849 3850 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3851 # each INTERVAL expression into this canonical form so it's easy to transpile 3852 if this and this.is_number: 3853 this = exp.Literal.string(this.name) 3854 elif this and this.is_string: 3855 parts = this.name.split() 3856 3857 if len(parts) == 2: 3858 if unit: 3859 # This is not actually a unit, it's something else (e.g. a "window side") 3860 unit = None 3861 self._retreat(self._index - 1) 3862 3863 this = exp.Literal.string(parts[0]) 3864 unit = self.expression(exp.Var, this=parts[1].upper()) 3865 3866 return self.expression(exp.Interval, this=this, unit=unit) 3867 3868 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3869 this = self._parse_term() 3870 3871 while True: 3872 if self._match_set(self.BITWISE): 3873 this = self.expression( 3874 self.BITWISE[self._prev.token_type], 3875 this=this, 3876 expression=self._parse_term(), 3877 ) 3878 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 3879 this = self.expression( 3880 exp.DPipe, 3881 this=this, 3882 expression=self._parse_term(), 3883 safe=not self.dialect.STRICT_STRING_CONCAT, 3884 ) 3885 elif self._match(TokenType.DQMARK): 3886 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3887 elif self._match_pair(TokenType.LT, TokenType.LT): 3888 this = self.expression( 3889 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3890 ) 3891 elif self._match_pair(TokenType.GT, TokenType.GT): 3892 this = self.expression( 3893 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3894 ) 3895 else: 3896 break 3897 3898 return this 3899 3900 def _parse_term(self) -> t.Optional[exp.Expression]: 3901 return self._parse_tokens(self._parse_factor, self.TERM) 3902 3903 def _parse_factor(self) -> t.Optional[exp.Expression]: 3904 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 3905 this = parse_method() 3906 3907 while self._match_set(self.FACTOR): 3908 this = self.expression( 3909 self.FACTOR[self._prev.token_type], 3910 this=this, 3911 comments=self._prev_comments, 3912 expression=parse_method(), 3913 ) 3914 if isinstance(this, exp.Div): 3915 this.args["typed"] = self.dialect.TYPED_DIVISION 3916 this.args["safe"] = self.dialect.SAFE_DIVISION 3917 3918 return this 3919 3920 def _parse_exponent(self) -> t.Optional[exp.Expression]: 3921 return self._parse_tokens(self._parse_unary, self.EXPONENT) 3922 3923 def _parse_unary(self) -> t.Optional[exp.Expression]: 3924 if self._match_set(self.UNARY_PARSERS): 3925 return self.UNARY_PARSERS[self._prev.token_type](self) 3926 return self._parse_at_time_zone(self._parse_type()) 3927 3928 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3929 interval = parse_interval and self._parse_interval() 3930 if interval: 3931 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 3932 while True: 3933 index = self._index 3934 self._match(TokenType.PLUS) 3935 3936 if not self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 3937 self._retreat(index) 3938 break 3939 3940 interval = self.expression( # type: ignore 3941 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 3942 ) 3943 3944 return interval 3945 3946 index = self._index 3947 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3948 this = self._parse_column() 3949 3950 if data_type: 3951 if isinstance(this, exp.Literal): 3952 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3953 if parser: 3954 return parser(self, this, data_type) 3955 return self.expression(exp.Cast, this=this, to=data_type) 3956 if not data_type.expressions: 3957 self._retreat(index) 3958 return self._parse_column() 3959 return self._parse_column_ops(data_type) 3960 3961 return this and self._parse_column_ops(this) 3962 3963 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3964 this = self._parse_type() 3965 if not this: 3966 return None 3967 3968 if isinstance(this, exp.Column) and not this.table: 3969 this = exp.var(this.name.upper()) 3970 3971 return self.expression( 3972 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3973 ) 3974 3975 def _parse_types( 3976 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3977 ) -> t.Optional[exp.Expression]: 3978 index = self._index 3979 3980 prefix = self._match_text_seq("SYSUDTLIB", ".") 3981 3982 if not self._match_set(self.TYPE_TOKENS): 3983 identifier = allow_identifiers and self._parse_id_var( 3984 any_token=False, tokens=(TokenType.VAR,) 3985 ) 3986 if identifier: 3987 tokens = self.dialect.tokenize(identifier.name) 3988 3989 if len(tokens) != 1: 3990 self.raise_error("Unexpected identifier", self._prev) 3991 3992 if tokens[0].token_type in self.TYPE_TOKENS: 3993 self._prev = tokens[0] 3994 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 3995 type_name = identifier.name 3996 3997 while self._match(TokenType.DOT): 3998 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3999 4000 return exp.DataType.build(type_name, udt=True) 4001 else: 4002 self._retreat(self._index - 1) 4003 return None 4004 else: 4005 return None 4006 4007 type_token = self._prev.token_type 4008 4009 if type_token == TokenType.PSEUDO_TYPE: 4010 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4011 4012 if type_token == TokenType.OBJECT_IDENTIFIER: 4013 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4014 4015 nested = type_token in self.NESTED_TYPE_TOKENS 4016 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4017 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4018 expressions = None 4019 maybe_func = False 4020 4021 if self._match(TokenType.L_PAREN): 4022 if is_struct: 4023 expressions = self._parse_csv(self._parse_struct_types) 4024 elif nested: 4025 expressions = self._parse_csv( 4026 lambda: self._parse_types( 4027 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4028 ) 4029 ) 4030 elif type_token in self.ENUM_TYPE_TOKENS: 4031 expressions = self._parse_csv(self._parse_equality) 4032 elif is_aggregate: 4033 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4034 any_token=False, tokens=(TokenType.VAR,) 4035 ) 4036 if not func_or_ident or not self._match(TokenType.COMMA): 4037 return None 4038 expressions = self._parse_csv( 4039 lambda: self._parse_types( 4040 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4041 ) 4042 ) 4043 expressions.insert(0, func_or_ident) 4044 else: 4045 expressions = self._parse_csv(self._parse_type_size) 4046 4047 if not expressions or not self._match(TokenType.R_PAREN): 4048 self._retreat(index) 4049 return None 4050 4051 maybe_func = True 4052 4053 this: t.Optional[exp.Expression] = None 4054 values: t.Optional[t.List[exp.Expression]] = None 4055 4056 if nested and self._match(TokenType.LT): 4057 if is_struct: 4058 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4059 else: 4060 expressions = self._parse_csv( 4061 lambda: self._parse_types( 4062 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4063 ) 4064 ) 4065 4066 if not self._match(TokenType.GT): 4067 self.raise_error("Expecting >") 4068 4069 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4070 values = self._parse_csv(self._parse_conjunction) 4071 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4072 4073 if type_token in self.TIMESTAMPS: 4074 if self._match_text_seq("WITH", "TIME", "ZONE"): 4075 maybe_func = False 4076 tz_type = ( 4077 exp.DataType.Type.TIMETZ 4078 if type_token in self.TIMES 4079 else exp.DataType.Type.TIMESTAMPTZ 4080 ) 4081 this = exp.DataType(this=tz_type, expressions=expressions) 4082 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4083 maybe_func = False 4084 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4085 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4086 maybe_func = False 4087 elif type_token == TokenType.INTERVAL: 4088 unit = self._parse_var() 4089 4090 if self._match_text_seq("TO"): 4091 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 4092 else: 4093 span = None 4094 4095 if span or not unit: 4096 this = self.expression( 4097 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 4098 ) 4099 else: 4100 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4101 4102 if maybe_func and check_func: 4103 index2 = self._index 4104 peek = self._parse_string() 4105 4106 if not peek: 4107 self._retreat(index) 4108 return None 4109 4110 self._retreat(index2) 4111 4112 if not this: 4113 if self._match_text_seq("UNSIGNED"): 4114 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4115 if not unsigned_type_token: 4116 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4117 4118 type_token = unsigned_type_token or type_token 4119 4120 this = exp.DataType( 4121 this=exp.DataType.Type[type_token.value], 4122 expressions=expressions, 4123 nested=nested, 4124 values=values, 4125 prefix=prefix, 4126 ) 4127 4128 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 4129 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 4130 4131 return this 4132 4133 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4134 index = self._index 4135 this = self._parse_type(parse_interval=False) or self._parse_id_var() 4136 self._match(TokenType.COLON) 4137 column_def = self._parse_column_def(this) 4138 4139 if type_required and ( 4140 (isinstance(this, exp.Column) and this.this is column_def) or this is column_def 4141 ): 4142 self._retreat(index) 4143 return self._parse_types() 4144 4145 return column_def 4146 4147 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4148 if not self._match_text_seq("AT", "TIME", "ZONE"): 4149 return this 4150 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4151 4152 def _parse_column(self) -> t.Optional[exp.Expression]: 4153 this = self._parse_column_reference() 4154 return self._parse_column_ops(this) if this else self._parse_bracket(this) 4155 4156 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4157 this = self._parse_field() 4158 if ( 4159 not this 4160 and self._match(TokenType.VALUES, advance=False) 4161 and self.VALUES_FOLLOWED_BY_PAREN 4162 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4163 ): 4164 this = self._parse_id_var() 4165 4166 return self.expression(exp.Column, this=this) if isinstance(this, exp.Identifier) else this 4167 4168 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4169 this = self._parse_bracket(this) 4170 4171 while self._match_set(self.COLUMN_OPERATORS): 4172 op_token = self._prev.token_type 4173 op = self.COLUMN_OPERATORS.get(op_token) 4174 4175 if op_token == TokenType.DCOLON: 4176 field = self._parse_types() 4177 if not field: 4178 self.raise_error("Expected type") 4179 elif op and self._curr: 4180 field = self._parse_column_reference() 4181 else: 4182 field = self._parse_field(anonymous_func=True, any_token=True) 4183 4184 if isinstance(field, exp.Func) and this: 4185 # bigquery allows function calls like x.y.count(...) 4186 # SAFE.SUBSTR(...) 4187 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4188 this = exp.replace_tree( 4189 this, 4190 lambda n: ( 4191 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4192 if n.table 4193 else n.this 4194 ) 4195 if isinstance(n, exp.Column) 4196 else n, 4197 ) 4198 4199 if op: 4200 this = op(self, this, field) 4201 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4202 this = self.expression( 4203 exp.Column, 4204 this=field, 4205 table=this.this, 4206 db=this.args.get("table"), 4207 catalog=this.args.get("db"), 4208 ) 4209 else: 4210 this = self.expression(exp.Dot, this=this, expression=field) 4211 this = self._parse_bracket(this) 4212 return this 4213 4214 def _parse_primary(self) -> t.Optional[exp.Expression]: 4215 if self._match_set(self.PRIMARY_PARSERS): 4216 token_type = self._prev.token_type 4217 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4218 4219 if token_type == TokenType.STRING: 4220 expressions = [primary] 4221 while self._match(TokenType.STRING): 4222 expressions.append(exp.Literal.string(self._prev.text)) 4223 4224 if len(expressions) > 1: 4225 return self.expression(exp.Concat, expressions=expressions) 4226 4227 return primary 4228 4229 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4230 return exp.Literal.number(f"0.{self._prev.text}") 4231 4232 if self._match(TokenType.L_PAREN): 4233 comments = self._prev_comments 4234 query = self._parse_select() 4235 4236 if query: 4237 expressions = [query] 4238 else: 4239 expressions = self._parse_expressions() 4240 4241 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4242 4243 if isinstance(this, exp.UNWRAPPED_QUERIES): 4244 this = self._parse_set_operations( 4245 self._parse_subquery(this=this, parse_alias=False) 4246 ) 4247 elif isinstance(this, exp.Subquery): 4248 this = self._parse_subquery( 4249 this=self._parse_set_operations(this), parse_alias=False 4250 ) 4251 elif len(expressions) > 1: 4252 this = self.expression(exp.Tuple, expressions=expressions) 4253 else: 4254 this = self.expression(exp.Paren, this=this) 4255 4256 if this: 4257 this.add_comments(comments) 4258 4259 self._match_r_paren(expression=this) 4260 return this 4261 4262 return None 4263 4264 def _parse_field( 4265 self, 4266 any_token: bool = False, 4267 tokens: t.Optional[t.Collection[TokenType]] = None, 4268 anonymous_func: bool = False, 4269 ) -> t.Optional[exp.Expression]: 4270 return ( 4271 self._parse_primary() 4272 or self._parse_function(anonymous=anonymous_func) 4273 or self._parse_id_var(any_token=any_token, tokens=tokens) 4274 ) 4275 4276 def _parse_function( 4277 self, 4278 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4279 anonymous: bool = False, 4280 optional_parens: bool = True, 4281 ) -> t.Optional[exp.Expression]: 4282 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4283 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4284 fn_syntax = False 4285 if ( 4286 self._match(TokenType.L_BRACE, advance=False) 4287 and self._next 4288 and self._next.text.upper() == "FN" 4289 ): 4290 self._advance(2) 4291 fn_syntax = True 4292 4293 func = self._parse_function_call( 4294 functions=functions, anonymous=anonymous, optional_parens=optional_parens 4295 ) 4296 4297 if fn_syntax: 4298 self._match(TokenType.R_BRACE) 4299 4300 return func 4301 4302 def _parse_function_call( 4303 self, 4304 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4305 anonymous: bool = False, 4306 optional_parens: bool = True, 4307 ) -> t.Optional[exp.Expression]: 4308 if not self._curr: 4309 return None 4310 4311 comments = self._curr.comments 4312 token_type = self._curr.token_type 4313 this = self._curr.text 4314 upper = this.upper() 4315 4316 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4317 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4318 self._advance() 4319 return self._parse_window(parser(self)) 4320 4321 if not self._next or self._next.token_type != TokenType.L_PAREN: 4322 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4323 self._advance() 4324 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4325 4326 return None 4327 4328 if token_type not in self.FUNC_TOKENS: 4329 return None 4330 4331 self._advance(2) 4332 4333 parser = self.FUNCTION_PARSERS.get(upper) 4334 if parser and not anonymous: 4335 this = parser(self) 4336 else: 4337 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4338 4339 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4340 this = self.expression(subquery_predicate, this=self._parse_select()) 4341 self._match_r_paren() 4342 return this 4343 4344 if functions is None: 4345 functions = self.FUNCTIONS 4346 4347 function = functions.get(upper) 4348 4349 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4350 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4351 4352 if alias: 4353 args = self._kv_to_prop_eq(args) 4354 4355 if function and not anonymous: 4356 if "dialect" in function.__code__.co_varnames: 4357 func = function(args, dialect=self.dialect) 4358 else: 4359 func = function(args) 4360 4361 func = self.validate_expression(func, args) 4362 if not self.dialect.NORMALIZE_FUNCTIONS: 4363 func.meta["name"] = this 4364 4365 this = func 4366 else: 4367 if token_type == TokenType.IDENTIFIER: 4368 this = exp.Identifier(this=this, quoted=True) 4369 this = self.expression(exp.Anonymous, this=this, expressions=args) 4370 4371 if isinstance(this, exp.Expression): 4372 this.add_comments(comments) 4373 4374 self._match_r_paren(this) 4375 return self._parse_window(this) 4376 4377 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4378 transformed = [] 4379 4380 for e in expressions: 4381 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4382 if isinstance(e, exp.Alias): 4383 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4384 4385 if not isinstance(e, exp.PropertyEQ): 4386 e = self.expression( 4387 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 4388 ) 4389 4390 if isinstance(e.this, exp.Column): 4391 e.this.replace(e.this.this) 4392 4393 transformed.append(e) 4394 4395 return transformed 4396 4397 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4398 return self._parse_column_def(self._parse_id_var()) 4399 4400 def _parse_user_defined_function( 4401 self, kind: t.Optional[TokenType] = None 4402 ) -> t.Optional[exp.Expression]: 4403 this = self._parse_id_var() 4404 4405 while self._match(TokenType.DOT): 4406 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4407 4408 if not self._match(TokenType.L_PAREN): 4409 return this 4410 4411 expressions = self._parse_csv(self._parse_function_parameter) 4412 self._match_r_paren() 4413 return self.expression( 4414 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4415 ) 4416 4417 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4418 literal = self._parse_primary() 4419 if literal: 4420 return self.expression(exp.Introducer, this=token.text, expression=literal) 4421 4422 return self.expression(exp.Identifier, this=token.text) 4423 4424 def _parse_session_parameter(self) -> exp.SessionParameter: 4425 kind = None 4426 this = self._parse_id_var() or self._parse_primary() 4427 4428 if this and self._match(TokenType.DOT): 4429 kind = this.name 4430 this = self._parse_var() or self._parse_primary() 4431 4432 return self.expression(exp.SessionParameter, this=this, kind=kind) 4433 4434 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4435 index = self._index 4436 4437 if self._match(TokenType.L_PAREN): 4438 expressions = t.cast( 4439 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 4440 ) 4441 4442 if not self._match(TokenType.R_PAREN): 4443 self._retreat(index) 4444 else: 4445 expressions = [self._parse_id_var()] 4446 4447 if self._match_set(self.LAMBDAS): 4448 return self.LAMBDAS[self._prev.token_type](self, expressions) 4449 4450 self._retreat(index) 4451 4452 this: t.Optional[exp.Expression] 4453 4454 if self._match(TokenType.DISTINCT): 4455 this = self.expression( 4456 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 4457 ) 4458 else: 4459 this = self._parse_select_or_expression(alias=alias) 4460 4461 return self._parse_limit( 4462 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4463 ) 4464 4465 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4466 index = self._index 4467 4468 if not self._match(TokenType.L_PAREN): 4469 return this 4470 4471 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 4472 # expr can be of both types 4473 if self._match_set(self.SELECT_START_TOKENS): 4474 self._retreat(index) 4475 return this 4476 4477 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4478 4479 self._match_r_paren() 4480 return self.expression(exp.Schema, this=this, expressions=args) 4481 4482 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4483 return self._parse_column_def(self._parse_field(any_token=True)) 4484 4485 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4486 # column defs are not really columns, they're identifiers 4487 if isinstance(this, exp.Column): 4488 this = this.this 4489 4490 kind = self._parse_types(schema=True) 4491 4492 if self._match_text_seq("FOR", "ORDINALITY"): 4493 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4494 4495 constraints: t.List[exp.Expression] = [] 4496 4497 if not kind and self._match(TokenType.ALIAS): 4498 constraints.append( 4499 self.expression( 4500 exp.ComputedColumnConstraint, 4501 this=self._parse_conjunction(), 4502 persisted=self._match_text_seq("PERSISTED"), 4503 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4504 ) 4505 ) 4506 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4507 self._match(TokenType.ALIAS) 4508 constraints.append( 4509 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4510 ) 4511 4512 while True: 4513 constraint = self._parse_column_constraint() 4514 if not constraint: 4515 break 4516 constraints.append(constraint) 4517 4518 if not kind and not constraints: 4519 return this 4520 4521 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4522 4523 def _parse_auto_increment( 4524 self, 4525 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4526 start = None 4527 increment = None 4528 4529 if self._match(TokenType.L_PAREN, advance=False): 4530 args = self._parse_wrapped_csv(self._parse_bitwise) 4531 start = seq_get(args, 0) 4532 increment = seq_get(args, 1) 4533 elif self._match_text_seq("START"): 4534 start = self._parse_bitwise() 4535 self._match_text_seq("INCREMENT") 4536 increment = self._parse_bitwise() 4537 4538 if start and increment: 4539 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4540 4541 return exp.AutoIncrementColumnConstraint() 4542 4543 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4544 if not self._match_text_seq("REFRESH"): 4545 self._retreat(self._index - 1) 4546 return None 4547 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4548 4549 def _parse_compress(self) -> exp.CompressColumnConstraint: 4550 if self._match(TokenType.L_PAREN, advance=False): 4551 return self.expression( 4552 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4553 ) 4554 4555 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4556 4557 def _parse_generated_as_identity( 4558 self, 4559 ) -> ( 4560 exp.GeneratedAsIdentityColumnConstraint 4561 | exp.ComputedColumnConstraint 4562 | exp.GeneratedAsRowColumnConstraint 4563 ): 4564 if self._match_text_seq("BY", "DEFAULT"): 4565 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4566 this = self.expression( 4567 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4568 ) 4569 else: 4570 self._match_text_seq("ALWAYS") 4571 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4572 4573 self._match(TokenType.ALIAS) 4574 4575 if self._match_text_seq("ROW"): 4576 start = self._match_text_seq("START") 4577 if not start: 4578 self._match(TokenType.END) 4579 hidden = self._match_text_seq("HIDDEN") 4580 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4581 4582 identity = self._match_text_seq("IDENTITY") 4583 4584 if self._match(TokenType.L_PAREN): 4585 if self._match(TokenType.START_WITH): 4586 this.set("start", self._parse_bitwise()) 4587 if self._match_text_seq("INCREMENT", "BY"): 4588 this.set("increment", self._parse_bitwise()) 4589 if self._match_text_seq("MINVALUE"): 4590 this.set("minvalue", self._parse_bitwise()) 4591 if self._match_text_seq("MAXVALUE"): 4592 this.set("maxvalue", self._parse_bitwise()) 4593 4594 if self._match_text_seq("CYCLE"): 4595 this.set("cycle", True) 4596 elif self._match_text_seq("NO", "CYCLE"): 4597 this.set("cycle", False) 4598 4599 if not identity: 4600 this.set("expression", self._parse_bitwise()) 4601 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4602 args = self._parse_csv(self._parse_bitwise) 4603 this.set("start", seq_get(args, 0)) 4604 this.set("increment", seq_get(args, 1)) 4605 4606 self._match_r_paren() 4607 4608 return this 4609 4610 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4611 self._match_text_seq("LENGTH") 4612 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4613 4614 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 4615 if self._match_text_seq("NULL"): 4616 return self.expression(exp.NotNullColumnConstraint) 4617 if self._match_text_seq("CASESPECIFIC"): 4618 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4619 if self._match_text_seq("FOR", "REPLICATION"): 4620 return self.expression(exp.NotForReplicationColumnConstraint) 4621 return None 4622 4623 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4624 if self._match(TokenType.CONSTRAINT): 4625 this = self._parse_id_var() 4626 else: 4627 this = None 4628 4629 if self._match_texts(self.CONSTRAINT_PARSERS): 4630 return self.expression( 4631 exp.ColumnConstraint, 4632 this=this, 4633 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4634 ) 4635 4636 return this 4637 4638 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4639 if not self._match(TokenType.CONSTRAINT): 4640 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4641 4642 return self.expression( 4643 exp.Constraint, 4644 this=self._parse_id_var(), 4645 expressions=self._parse_unnamed_constraints(), 4646 ) 4647 4648 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 4649 constraints = [] 4650 while True: 4651 constraint = self._parse_unnamed_constraint() or self._parse_function() 4652 if not constraint: 4653 break 4654 constraints.append(constraint) 4655 4656 return constraints 4657 4658 def _parse_unnamed_constraint( 4659 self, constraints: t.Optional[t.Collection[str]] = None 4660 ) -> t.Optional[exp.Expression]: 4661 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4662 constraints or self.CONSTRAINT_PARSERS 4663 ): 4664 return None 4665 4666 constraint = self._prev.text.upper() 4667 if constraint not in self.CONSTRAINT_PARSERS: 4668 self.raise_error(f"No parser found for schema constraint {constraint}.") 4669 4670 return self.CONSTRAINT_PARSERS[constraint](self) 4671 4672 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4673 self._match_text_seq("KEY") 4674 return self.expression( 4675 exp.UniqueColumnConstraint, 4676 this=self._parse_schema(self._parse_id_var(any_token=False)), 4677 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4678 on_conflict=self._parse_on_conflict(), 4679 ) 4680 4681 def _parse_key_constraint_options(self) -> t.List[str]: 4682 options = [] 4683 while True: 4684 if not self._curr: 4685 break 4686 4687 if self._match(TokenType.ON): 4688 action = None 4689 on = self._advance_any() and self._prev.text 4690 4691 if self._match_text_seq("NO", "ACTION"): 4692 action = "NO ACTION" 4693 elif self._match_text_seq("CASCADE"): 4694 action = "CASCADE" 4695 elif self._match_text_seq("RESTRICT"): 4696 action = "RESTRICT" 4697 elif self._match_pair(TokenType.SET, TokenType.NULL): 4698 action = "SET NULL" 4699 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4700 action = "SET DEFAULT" 4701 else: 4702 self.raise_error("Invalid key constraint") 4703 4704 options.append(f"ON {on} {action}") 4705 elif self._match_text_seq("NOT", "ENFORCED"): 4706 options.append("NOT ENFORCED") 4707 elif self._match_text_seq("DEFERRABLE"): 4708 options.append("DEFERRABLE") 4709 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4710 options.append("INITIALLY DEFERRED") 4711 elif self._match_text_seq("NORELY"): 4712 options.append("NORELY") 4713 elif self._match_text_seq("MATCH", "FULL"): 4714 options.append("MATCH FULL") 4715 else: 4716 break 4717 4718 return options 4719 4720 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4721 if match and not self._match(TokenType.REFERENCES): 4722 return None 4723 4724 expressions = None 4725 this = self._parse_table(schema=True) 4726 options = self._parse_key_constraint_options() 4727 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4728 4729 def _parse_foreign_key(self) -> exp.ForeignKey: 4730 expressions = self._parse_wrapped_id_vars() 4731 reference = self._parse_references() 4732 options = {} 4733 4734 while self._match(TokenType.ON): 4735 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4736 self.raise_error("Expected DELETE or UPDATE") 4737 4738 kind = self._prev.text.lower() 4739 4740 if self._match_text_seq("NO", "ACTION"): 4741 action = "NO ACTION" 4742 elif self._match(TokenType.SET): 4743 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4744 action = "SET " + self._prev.text.upper() 4745 else: 4746 self._advance() 4747 action = self._prev.text.upper() 4748 4749 options[kind] = action 4750 4751 return self.expression( 4752 exp.ForeignKey, 4753 expressions=expressions, 4754 reference=reference, 4755 **options, # type: ignore 4756 ) 4757 4758 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4759 return self._parse_field() 4760 4761 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 4762 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 4763 self._retreat(self._index - 1) 4764 return None 4765 4766 id_vars = self._parse_wrapped_id_vars() 4767 return self.expression( 4768 exp.PeriodForSystemTimeConstraint, 4769 this=seq_get(id_vars, 0), 4770 expression=seq_get(id_vars, 1), 4771 ) 4772 4773 def _parse_primary_key( 4774 self, wrapped_optional: bool = False, in_props: bool = False 4775 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4776 desc = ( 4777 self._match_set((TokenType.ASC, TokenType.DESC)) 4778 and self._prev.token_type == TokenType.DESC 4779 ) 4780 4781 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4782 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4783 4784 expressions = self._parse_wrapped_csv( 4785 self._parse_primary_key_part, optional=wrapped_optional 4786 ) 4787 options = self._parse_key_constraint_options() 4788 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4789 4790 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 4791 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 4792 4793 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4794 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4795 return this 4796 4797 bracket_kind = self._prev.token_type 4798 expressions = self._parse_csv( 4799 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 4800 ) 4801 4802 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 4803 self.raise_error("Expected ]") 4804 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 4805 self.raise_error("Expected }") 4806 4807 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4808 if bracket_kind == TokenType.L_BRACE: 4809 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 4810 elif not this or this.name.upper() == "ARRAY": 4811 this = self.expression(exp.Array, expressions=expressions) 4812 else: 4813 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 4814 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4815 4816 self._add_comments(this) 4817 return self._parse_bracket(this) 4818 4819 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4820 if self._match(TokenType.COLON): 4821 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4822 return this 4823 4824 def _parse_case(self) -> t.Optional[exp.Expression]: 4825 ifs = [] 4826 default = None 4827 4828 comments = self._prev_comments 4829 expression = self._parse_conjunction() 4830 4831 while self._match(TokenType.WHEN): 4832 this = self._parse_conjunction() 4833 self._match(TokenType.THEN) 4834 then = self._parse_conjunction() 4835 ifs.append(self.expression(exp.If, this=this, true=then)) 4836 4837 if self._match(TokenType.ELSE): 4838 default = self._parse_conjunction() 4839 4840 if not self._match(TokenType.END): 4841 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 4842 default = exp.column("interval") 4843 else: 4844 self.raise_error("Expected END after CASE", self._prev) 4845 4846 return self.expression( 4847 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 4848 ) 4849 4850 def _parse_if(self) -> t.Optional[exp.Expression]: 4851 if self._match(TokenType.L_PAREN): 4852 args = self._parse_csv(self._parse_conjunction) 4853 this = self.validate_expression(exp.If.from_arg_list(args), args) 4854 self._match_r_paren() 4855 else: 4856 index = self._index - 1 4857 4858 if self.NO_PAREN_IF_COMMANDS and index == 0: 4859 return self._parse_as_command(self._prev) 4860 4861 condition = self._parse_conjunction() 4862 4863 if not condition: 4864 self._retreat(index) 4865 return None 4866 4867 self._match(TokenType.THEN) 4868 true = self._parse_conjunction() 4869 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4870 self._match(TokenType.END) 4871 this = self.expression(exp.If, this=condition, true=true, false=false) 4872 4873 return this 4874 4875 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4876 if not self._match_text_seq("VALUE", "FOR"): 4877 self._retreat(self._index - 1) 4878 return None 4879 4880 return self.expression( 4881 exp.NextValueFor, 4882 this=self._parse_column(), 4883 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4884 ) 4885 4886 def _parse_extract(self) -> exp.Extract: 4887 this = self._parse_function() or self._parse_var() or self._parse_type() 4888 4889 if self._match(TokenType.FROM): 4890 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4891 4892 if not self._match(TokenType.COMMA): 4893 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4894 4895 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4896 4897 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4898 this = self._parse_conjunction() 4899 4900 if not self._match(TokenType.ALIAS): 4901 if self._match(TokenType.COMMA): 4902 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4903 4904 self.raise_error("Expected AS after CAST") 4905 4906 fmt = None 4907 to = self._parse_types() 4908 4909 if self._match(TokenType.FORMAT): 4910 fmt_string = self._parse_string() 4911 fmt = self._parse_at_time_zone(fmt_string) 4912 4913 if not to: 4914 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 4915 if to.this in exp.DataType.TEMPORAL_TYPES: 4916 this = self.expression( 4917 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4918 this=this, 4919 format=exp.Literal.string( 4920 format_time( 4921 fmt_string.this if fmt_string else "", 4922 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 4923 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 4924 ) 4925 ), 4926 ) 4927 4928 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4929 this.set("zone", fmt.args["zone"]) 4930 return this 4931 elif not to: 4932 self.raise_error("Expected TYPE after CAST") 4933 elif isinstance(to, exp.Identifier): 4934 to = exp.DataType.build(to.name, udt=True) 4935 elif to.this == exp.DataType.Type.CHAR: 4936 if self._match(TokenType.CHARACTER_SET): 4937 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4938 4939 return self.expression( 4940 exp.Cast if strict else exp.TryCast, 4941 this=this, 4942 to=to, 4943 format=fmt, 4944 safe=safe, 4945 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 4946 ) 4947 4948 def _parse_string_agg(self) -> exp.Expression: 4949 if self._match(TokenType.DISTINCT): 4950 args: t.List[t.Optional[exp.Expression]] = [ 4951 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4952 ] 4953 if self._match(TokenType.COMMA): 4954 args.extend(self._parse_csv(self._parse_conjunction)) 4955 else: 4956 args = self._parse_csv(self._parse_conjunction) # type: ignore 4957 4958 index = self._index 4959 if not self._match(TokenType.R_PAREN) and args: 4960 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4961 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4962 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4963 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4964 4965 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4966 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4967 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4968 if not self._match_text_seq("WITHIN", "GROUP"): 4969 self._retreat(index) 4970 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4971 4972 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4973 order = self._parse_order(this=seq_get(args, 0)) 4974 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4975 4976 def _parse_convert( 4977 self, strict: bool, safe: t.Optional[bool] = None 4978 ) -> t.Optional[exp.Expression]: 4979 this = self._parse_bitwise() 4980 4981 if self._match(TokenType.USING): 4982 to: t.Optional[exp.Expression] = self.expression( 4983 exp.CharacterSet, this=self._parse_var() 4984 ) 4985 elif self._match(TokenType.COMMA): 4986 to = self._parse_types() 4987 else: 4988 to = None 4989 4990 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 4991 4992 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4993 """ 4994 There are generally two variants of the DECODE function: 4995 4996 - DECODE(bin, charset) 4997 - DECODE(expression, search, result [, search, result] ... [, default]) 4998 4999 The second variant will always be parsed into a CASE expression. Note that NULL 5000 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5001 instead of relying on pattern matching. 5002 """ 5003 args = self._parse_csv(self._parse_conjunction) 5004 5005 if len(args) < 3: 5006 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5007 5008 expression, *expressions = args 5009 if not expression: 5010 return None 5011 5012 ifs = [] 5013 for search, result in zip(expressions[::2], expressions[1::2]): 5014 if not search or not result: 5015 return None 5016 5017 if isinstance(search, exp.Literal): 5018 ifs.append( 5019 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5020 ) 5021 elif isinstance(search, exp.Null): 5022 ifs.append( 5023 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5024 ) 5025 else: 5026 cond = exp.or_( 5027 exp.EQ(this=expression.copy(), expression=search), 5028 exp.and_( 5029 exp.Is(this=expression.copy(), expression=exp.Null()), 5030 exp.Is(this=search.copy(), expression=exp.Null()), 5031 copy=False, 5032 ), 5033 copy=False, 5034 ) 5035 ifs.append(exp.If(this=cond, true=result)) 5036 5037 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5038 5039 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5040 self._match_text_seq("KEY") 5041 key = self._parse_column() 5042 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5043 self._match_text_seq("VALUE") 5044 value = self._parse_bitwise() 5045 5046 if not key and not value: 5047 return None 5048 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5049 5050 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5051 if not this or not self._match_text_seq("FORMAT", "JSON"): 5052 return this 5053 5054 return self.expression(exp.FormatJson, this=this) 5055 5056 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5057 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5058 for value in values: 5059 if self._match_text_seq(value, "ON", on): 5060 return f"{value} ON {on}" 5061 5062 return None 5063 5064 @t.overload 5065 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5066 5067 @t.overload 5068 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5069 5070 def _parse_json_object(self, agg=False): 5071 star = self._parse_star() 5072 expressions = ( 5073 [star] 5074 if star 5075 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5076 ) 5077 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5078 5079 unique_keys = None 5080 if self._match_text_seq("WITH", "UNIQUE"): 5081 unique_keys = True 5082 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5083 unique_keys = False 5084 5085 self._match_text_seq("KEYS") 5086 5087 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5088 self._parse_type() 5089 ) 5090 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5091 5092 return self.expression( 5093 exp.JSONObjectAgg if agg else exp.JSONObject, 5094 expressions=expressions, 5095 null_handling=null_handling, 5096 unique_keys=unique_keys, 5097 return_type=return_type, 5098 encoding=encoding, 5099 ) 5100 5101 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5102 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5103 if not self._match_text_seq("NESTED"): 5104 this = self._parse_id_var() 5105 kind = self._parse_types(allow_identifiers=False) 5106 nested = None 5107 else: 5108 this = None 5109 kind = None 5110 nested = True 5111 5112 path = self._match_text_seq("PATH") and self._parse_string() 5113 nested_schema = nested and self._parse_json_schema() 5114 5115 return self.expression( 5116 exp.JSONColumnDef, 5117 this=this, 5118 kind=kind, 5119 path=path, 5120 nested_schema=nested_schema, 5121 ) 5122 5123 def _parse_json_schema(self) -> exp.JSONSchema: 5124 self._match_text_seq("COLUMNS") 5125 return self.expression( 5126 exp.JSONSchema, 5127 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5128 ) 5129 5130 def _parse_json_table(self) -> exp.JSONTable: 5131 this = self._parse_format_json(self._parse_bitwise()) 5132 path = self._match(TokenType.COMMA) and self._parse_string() 5133 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5134 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5135 schema = self._parse_json_schema() 5136 5137 return exp.JSONTable( 5138 this=this, 5139 schema=schema, 5140 path=path, 5141 error_handling=error_handling, 5142 empty_handling=empty_handling, 5143 ) 5144 5145 def _parse_match_against(self) -> exp.MatchAgainst: 5146 expressions = self._parse_csv(self._parse_column) 5147 5148 self._match_text_seq(")", "AGAINST", "(") 5149 5150 this = self._parse_string() 5151 5152 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5153 modifier = "IN NATURAL LANGUAGE MODE" 5154 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5155 modifier = f"{modifier} WITH QUERY EXPANSION" 5156 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5157 modifier = "IN BOOLEAN MODE" 5158 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5159 modifier = "WITH QUERY EXPANSION" 5160 else: 5161 modifier = None 5162 5163 return self.expression( 5164 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5165 ) 5166 5167 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5168 def _parse_open_json(self) -> exp.OpenJSON: 5169 this = self._parse_bitwise() 5170 path = self._match(TokenType.COMMA) and self._parse_string() 5171 5172 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5173 this = self._parse_field(any_token=True) 5174 kind = self._parse_types() 5175 path = self._parse_string() 5176 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5177 5178 return self.expression( 5179 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5180 ) 5181 5182 expressions = None 5183 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5184 self._match_l_paren() 5185 expressions = self._parse_csv(_parse_open_json_column_def) 5186 5187 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5188 5189 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5190 args = self._parse_csv(self._parse_bitwise) 5191 5192 if self._match(TokenType.IN): 5193 return self.expression( 5194 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5195 ) 5196 5197 if haystack_first: 5198 haystack = seq_get(args, 0) 5199 needle = seq_get(args, 1) 5200 else: 5201 needle = seq_get(args, 0) 5202 haystack = seq_get(args, 1) 5203 5204 return self.expression( 5205 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5206 ) 5207 5208 def _parse_predict(self) -> exp.Predict: 5209 self._match_text_seq("MODEL") 5210 this = self._parse_table() 5211 5212 self._match(TokenType.COMMA) 5213 self._match_text_seq("TABLE") 5214 5215 return self.expression( 5216 exp.Predict, 5217 this=this, 5218 expression=self._parse_table(), 5219 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5220 ) 5221 5222 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5223 args = self._parse_csv(self._parse_table) 5224 return exp.JoinHint(this=func_name.upper(), expressions=args) 5225 5226 def _parse_substring(self) -> exp.Substring: 5227 # Postgres supports the form: substring(string [from int] [for int]) 5228 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5229 5230 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5231 5232 if self._match(TokenType.FROM): 5233 args.append(self._parse_bitwise()) 5234 if self._match(TokenType.FOR): 5235 args.append(self._parse_bitwise()) 5236 5237 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5238 5239 def _parse_trim(self) -> exp.Trim: 5240 # https://www.w3resource.com/sql/character-functions/trim.php 5241 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5242 5243 position = None 5244 collation = None 5245 expression = None 5246 5247 if self._match_texts(self.TRIM_TYPES): 5248 position = self._prev.text.upper() 5249 5250 this = self._parse_bitwise() 5251 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5252 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5253 expression = self._parse_bitwise() 5254 5255 if invert_order: 5256 this, expression = expression, this 5257 5258 if self._match(TokenType.COLLATE): 5259 collation = self._parse_bitwise() 5260 5261 return self.expression( 5262 exp.Trim, this=this, position=position, expression=expression, collation=collation 5263 ) 5264 5265 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5266 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5267 5268 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5269 return self._parse_window(self._parse_id_var(), alias=True) 5270 5271 def _parse_respect_or_ignore_nulls( 5272 self, this: t.Optional[exp.Expression] 5273 ) -> t.Optional[exp.Expression]: 5274 if self._match_text_seq("IGNORE", "NULLS"): 5275 return self.expression(exp.IgnoreNulls, this=this) 5276 if self._match_text_seq("RESPECT", "NULLS"): 5277 return self.expression(exp.RespectNulls, this=this) 5278 return this 5279 5280 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5281 if self._match(TokenType.HAVING): 5282 self._match_texts(("MAX", "MIN")) 5283 max = self._prev.text.upper() != "MIN" 5284 return self.expression( 5285 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5286 ) 5287 5288 return this 5289 5290 def _parse_window( 5291 self, this: t.Optional[exp.Expression], alias: bool = False 5292 ) -> t.Optional[exp.Expression]: 5293 func = this 5294 comments = func.comments if isinstance(func, exp.Expression) else None 5295 5296 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5297 self._match(TokenType.WHERE) 5298 this = self.expression( 5299 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5300 ) 5301 self._match_r_paren() 5302 5303 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5304 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5305 if self._match_text_seq("WITHIN", "GROUP"): 5306 order = self._parse_wrapped(self._parse_order) 5307 this = self.expression(exp.WithinGroup, this=this, expression=order) 5308 5309 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5310 # Some dialects choose to implement and some do not. 5311 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5312 5313 # There is some code above in _parse_lambda that handles 5314 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5315 5316 # The below changes handle 5317 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5318 5319 # Oracle allows both formats 5320 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5321 # and Snowflake chose to do the same for familiarity 5322 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5323 if isinstance(this, exp.AggFunc): 5324 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5325 5326 if ignore_respect and ignore_respect is not this: 5327 ignore_respect.replace(ignore_respect.this) 5328 this = self.expression(ignore_respect.__class__, this=this) 5329 5330 this = self._parse_respect_or_ignore_nulls(this) 5331 5332 # bigquery select from window x AS (partition by ...) 5333 if alias: 5334 over = None 5335 self._match(TokenType.ALIAS) 5336 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5337 return this 5338 else: 5339 over = self._prev.text.upper() 5340 5341 if comments: 5342 func.comments = None # type: ignore 5343 5344 if not self._match(TokenType.L_PAREN): 5345 return self.expression( 5346 exp.Window, 5347 comments=comments, 5348 this=this, 5349 alias=self._parse_id_var(False), 5350 over=over, 5351 ) 5352 5353 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5354 5355 first = self._match(TokenType.FIRST) 5356 if self._match_text_seq("LAST"): 5357 first = False 5358 5359 partition, order = self._parse_partition_and_order() 5360 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5361 5362 if kind: 5363 self._match(TokenType.BETWEEN) 5364 start = self._parse_window_spec() 5365 self._match(TokenType.AND) 5366 end = self._parse_window_spec() 5367 5368 spec = self.expression( 5369 exp.WindowSpec, 5370 kind=kind, 5371 start=start["value"], 5372 start_side=start["side"], 5373 end=end["value"], 5374 end_side=end["side"], 5375 ) 5376 else: 5377 spec = None 5378 5379 self._match_r_paren() 5380 5381 window = self.expression( 5382 exp.Window, 5383 comments=comments, 5384 this=this, 5385 partition_by=partition, 5386 order=order, 5387 spec=spec, 5388 alias=window_alias, 5389 over=over, 5390 first=first, 5391 ) 5392 5393 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5394 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5395 return self._parse_window(window, alias=alias) 5396 5397 return window 5398 5399 def _parse_partition_and_order( 5400 self, 5401 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5402 return self._parse_partition_by(), self._parse_order() 5403 5404 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5405 self._match(TokenType.BETWEEN) 5406 5407 return { 5408 "value": ( 5409 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5410 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5411 or self._parse_bitwise() 5412 ), 5413 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5414 } 5415 5416 def _parse_alias( 5417 self, this: t.Optional[exp.Expression], explicit: bool = False 5418 ) -> t.Optional[exp.Expression]: 5419 any_token = self._match(TokenType.ALIAS) 5420 comments = self._prev_comments 5421 5422 if explicit and not any_token: 5423 return this 5424 5425 if self._match(TokenType.L_PAREN): 5426 aliases = self.expression( 5427 exp.Aliases, 5428 comments=comments, 5429 this=this, 5430 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5431 ) 5432 self._match_r_paren(aliases) 5433 return aliases 5434 5435 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 5436 self.STRING_ALIASES and self._parse_string_as_identifier() 5437 ) 5438 5439 if alias: 5440 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5441 column = this.this 5442 5443 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5444 if not this.comments and column and column.comments: 5445 this.comments = column.comments 5446 column.comments = None 5447 5448 return this 5449 5450 def _parse_id_var( 5451 self, 5452 any_token: bool = True, 5453 tokens: t.Optional[t.Collection[TokenType]] = None, 5454 ) -> t.Optional[exp.Expression]: 5455 identifier = self._parse_identifier() 5456 5457 if identifier: 5458 return identifier 5459 5460 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 5461 quoted = self._prev.token_type == TokenType.STRING 5462 return exp.Identifier(this=self._prev.text, quoted=quoted) 5463 5464 return None 5465 5466 def _parse_string(self) -> t.Optional[exp.Expression]: 5467 if self._match_set(self.STRING_PARSERS): 5468 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 5469 return self._parse_placeholder() 5470 5471 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5472 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5473 5474 def _parse_number(self) -> t.Optional[exp.Expression]: 5475 if self._match_set(self.NUMERIC_PARSERS): 5476 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 5477 return self._parse_placeholder() 5478 5479 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5480 if self._match(TokenType.IDENTIFIER): 5481 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5482 return self._parse_placeholder() 5483 5484 def _parse_var( 5485 self, 5486 any_token: bool = False, 5487 tokens: t.Optional[t.Collection[TokenType]] = None, 5488 upper: bool = False, 5489 ) -> t.Optional[exp.Expression]: 5490 if ( 5491 (any_token and self._advance_any()) 5492 or self._match(TokenType.VAR) 5493 or (self._match_set(tokens) if tokens else False) 5494 ): 5495 return self.expression( 5496 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5497 ) 5498 return self._parse_placeholder() 5499 5500 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5501 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5502 self._advance() 5503 return self._prev 5504 return None 5505 5506 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5507 return self._parse_var() or self._parse_string() 5508 5509 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 5510 return self._parse_primary() or self._parse_var(any_token=True) 5511 5512 def _parse_null(self) -> t.Optional[exp.Expression]: 5513 if self._match_set(self.NULL_TOKENS): 5514 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5515 return self._parse_placeholder() 5516 5517 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5518 if self._match(TokenType.TRUE): 5519 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5520 if self._match(TokenType.FALSE): 5521 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5522 return self._parse_placeholder() 5523 5524 def _parse_star(self) -> t.Optional[exp.Expression]: 5525 if self._match(TokenType.STAR): 5526 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5527 return self._parse_placeholder() 5528 5529 def _parse_parameter(self) -> exp.Parameter: 5530 self._match(TokenType.L_BRACE) 5531 this = self._parse_identifier() or self._parse_primary_or_var() 5532 expression = self._match(TokenType.COLON) and ( 5533 self._parse_identifier() or self._parse_primary_or_var() 5534 ) 5535 self._match(TokenType.R_BRACE) 5536 return self.expression(exp.Parameter, this=this, expression=expression) 5537 5538 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5539 if self._match_set(self.PLACEHOLDER_PARSERS): 5540 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5541 if placeholder: 5542 return placeholder 5543 self._advance(-1) 5544 return None 5545 5546 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 5547 if not self._match(TokenType.EXCEPT): 5548 return None 5549 if self._match(TokenType.L_PAREN, advance=False): 5550 return self._parse_wrapped_csv(self._parse_column) 5551 5552 except_column = self._parse_column() 5553 return [except_column] if except_column else None 5554 5555 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 5556 if not self._match(TokenType.REPLACE): 5557 return None 5558 if self._match(TokenType.L_PAREN, advance=False): 5559 return self._parse_wrapped_csv(self._parse_expression) 5560 5561 replace_expression = self._parse_expression() 5562 return [replace_expression] if replace_expression else None 5563 5564 def _parse_csv( 5565 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5566 ) -> t.List[exp.Expression]: 5567 parse_result = parse_method() 5568 items = [parse_result] if parse_result is not None else [] 5569 5570 while self._match(sep): 5571 self._add_comments(parse_result) 5572 parse_result = parse_method() 5573 if parse_result is not None: 5574 items.append(parse_result) 5575 5576 return items 5577 5578 def _parse_tokens( 5579 self, parse_method: t.Callable, expressions: t.Dict 5580 ) -> t.Optional[exp.Expression]: 5581 this = parse_method() 5582 5583 while self._match_set(expressions): 5584 this = self.expression( 5585 expressions[self._prev.token_type], 5586 this=this, 5587 comments=self._prev_comments, 5588 expression=parse_method(), 5589 ) 5590 5591 return this 5592 5593 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5594 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5595 5596 def _parse_wrapped_csv( 5597 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5598 ) -> t.List[exp.Expression]: 5599 return self._parse_wrapped( 5600 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5601 ) 5602 5603 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5604 wrapped = self._match(TokenType.L_PAREN) 5605 if not wrapped and not optional: 5606 self.raise_error("Expecting (") 5607 parse_result = parse_method() 5608 if wrapped: 5609 self._match_r_paren() 5610 return parse_result 5611 5612 def _parse_expressions(self) -> t.List[exp.Expression]: 5613 return self._parse_csv(self._parse_expression) 5614 5615 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5616 return self._parse_select() or self._parse_set_operations( 5617 self._parse_expression() if alias else self._parse_conjunction() 5618 ) 5619 5620 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5621 return self._parse_query_modifiers( 5622 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5623 ) 5624 5625 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5626 this = None 5627 if self._match_texts(self.TRANSACTION_KIND): 5628 this = self._prev.text 5629 5630 self._match_texts(("TRANSACTION", "WORK")) 5631 5632 modes = [] 5633 while True: 5634 mode = [] 5635 while self._match(TokenType.VAR): 5636 mode.append(self._prev.text) 5637 5638 if mode: 5639 modes.append(" ".join(mode)) 5640 if not self._match(TokenType.COMMA): 5641 break 5642 5643 return self.expression(exp.Transaction, this=this, modes=modes) 5644 5645 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5646 chain = None 5647 savepoint = None 5648 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5649 5650 self._match_texts(("TRANSACTION", "WORK")) 5651 5652 if self._match_text_seq("TO"): 5653 self._match_text_seq("SAVEPOINT") 5654 savepoint = self._parse_id_var() 5655 5656 if self._match(TokenType.AND): 5657 chain = not self._match_text_seq("NO") 5658 self._match_text_seq("CHAIN") 5659 5660 if is_rollback: 5661 return self.expression(exp.Rollback, savepoint=savepoint) 5662 5663 return self.expression(exp.Commit, chain=chain) 5664 5665 def _parse_refresh(self) -> exp.Refresh: 5666 self._match(TokenType.TABLE) 5667 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5668 5669 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5670 if not self._match_text_seq("ADD"): 5671 return None 5672 5673 self._match(TokenType.COLUMN) 5674 exists_column = self._parse_exists(not_=True) 5675 expression = self._parse_field_def() 5676 5677 if expression: 5678 expression.set("exists", exists_column) 5679 5680 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5681 if self._match_texts(("FIRST", "AFTER")): 5682 position = self._prev.text 5683 column_position = self.expression( 5684 exp.ColumnPosition, this=self._parse_column(), position=position 5685 ) 5686 expression.set("position", column_position) 5687 5688 return expression 5689 5690 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5691 drop = self._match(TokenType.DROP) and self._parse_drop() 5692 if drop and not isinstance(drop, exp.Command): 5693 drop.set("kind", drop.args.get("kind", "COLUMN")) 5694 return drop 5695 5696 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5697 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5698 return self.expression( 5699 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5700 ) 5701 5702 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5703 index = self._index - 1 5704 5705 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 5706 return self._parse_csv( 5707 lambda: self.expression( 5708 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 5709 ) 5710 ) 5711 5712 self._retreat(index) 5713 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 5714 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5715 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5716 5717 def _parse_alter_table_alter(self) -> exp.AlterColumn: 5718 self._match(TokenType.COLUMN) 5719 column = self._parse_field(any_token=True) 5720 5721 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5722 return self.expression(exp.AlterColumn, this=column, drop=True) 5723 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5724 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5725 if self._match(TokenType.COMMENT): 5726 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 5727 5728 self._match_text_seq("SET", "DATA") 5729 self._match_text_seq("TYPE") 5730 return self.expression( 5731 exp.AlterColumn, 5732 this=column, 5733 dtype=self._parse_types(), 5734 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5735 using=self._match(TokenType.USING) and self._parse_conjunction(), 5736 ) 5737 5738 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5739 index = self._index - 1 5740 5741 partition_exists = self._parse_exists() 5742 if self._match(TokenType.PARTITION, advance=False): 5743 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5744 5745 self._retreat(index) 5746 return self._parse_csv(self._parse_drop_column) 5747 5748 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 5749 if self._match(TokenType.COLUMN): 5750 exists = self._parse_exists() 5751 old_column = self._parse_column() 5752 to = self._match_text_seq("TO") 5753 new_column = self._parse_column() 5754 5755 if old_column is None or to is None or new_column is None: 5756 return None 5757 5758 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 5759 5760 self._match_text_seq("TO") 5761 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5762 5763 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5764 start = self._prev 5765 5766 if not self._match(TokenType.TABLE): 5767 return self._parse_as_command(start) 5768 5769 exists = self._parse_exists() 5770 only = self._match_text_seq("ONLY") 5771 this = self._parse_table(schema=True) 5772 5773 if self._next: 5774 self._advance() 5775 5776 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5777 if parser: 5778 actions = ensure_list(parser(self)) 5779 options = self._parse_csv(self._parse_property) 5780 5781 if not self._curr and actions: 5782 return self.expression( 5783 exp.AlterTable, 5784 this=this, 5785 exists=exists, 5786 actions=actions, 5787 only=only, 5788 options=options, 5789 ) 5790 5791 return self._parse_as_command(start) 5792 5793 def _parse_merge(self) -> exp.Merge: 5794 self._match(TokenType.INTO) 5795 target = self._parse_table() 5796 5797 if target and self._match(TokenType.ALIAS, advance=False): 5798 target.set("alias", self._parse_table_alias()) 5799 5800 self._match(TokenType.USING) 5801 using = self._parse_table() 5802 5803 self._match(TokenType.ON) 5804 on = self._parse_conjunction() 5805 5806 return self.expression( 5807 exp.Merge, 5808 this=target, 5809 using=using, 5810 on=on, 5811 expressions=self._parse_when_matched(), 5812 ) 5813 5814 def _parse_when_matched(self) -> t.List[exp.When]: 5815 whens = [] 5816 5817 while self._match(TokenType.WHEN): 5818 matched = not self._match(TokenType.NOT) 5819 self._match_text_seq("MATCHED") 5820 source = ( 5821 False 5822 if self._match_text_seq("BY", "TARGET") 5823 else self._match_text_seq("BY", "SOURCE") 5824 ) 5825 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5826 5827 self._match(TokenType.THEN) 5828 5829 if self._match(TokenType.INSERT): 5830 _this = self._parse_star() 5831 if _this: 5832 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5833 else: 5834 then = self.expression( 5835 exp.Insert, 5836 this=self._parse_value(), 5837 expression=self._match_text_seq("VALUES") and self._parse_value(), 5838 ) 5839 elif self._match(TokenType.UPDATE): 5840 expressions = self._parse_star() 5841 if expressions: 5842 then = self.expression(exp.Update, expressions=expressions) 5843 else: 5844 then = self.expression( 5845 exp.Update, 5846 expressions=self._match(TokenType.SET) 5847 and self._parse_csv(self._parse_equality), 5848 ) 5849 elif self._match(TokenType.DELETE): 5850 then = self.expression(exp.Var, this=self._prev.text) 5851 else: 5852 then = None 5853 5854 whens.append( 5855 self.expression( 5856 exp.When, 5857 matched=matched, 5858 source=source, 5859 condition=condition, 5860 then=then, 5861 ) 5862 ) 5863 return whens 5864 5865 def _parse_show(self) -> t.Optional[exp.Expression]: 5866 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5867 if parser: 5868 return parser(self) 5869 return self._parse_as_command(self._prev) 5870 5871 def _parse_set_item_assignment( 5872 self, kind: t.Optional[str] = None 5873 ) -> t.Optional[exp.Expression]: 5874 index = self._index 5875 5876 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 5877 return self._parse_set_transaction(global_=kind == "GLOBAL") 5878 5879 left = self._parse_primary() or self._parse_id_var() 5880 assignment_delimiter = self._match_texts(("=", "TO")) 5881 5882 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5883 self._retreat(index) 5884 return None 5885 5886 right = self._parse_statement() or self._parse_id_var() 5887 this = self.expression(exp.EQ, this=left, expression=right) 5888 5889 return self.expression(exp.SetItem, this=this, kind=kind) 5890 5891 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5892 self._match_text_seq("TRANSACTION") 5893 characteristics = self._parse_csv( 5894 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5895 ) 5896 return self.expression( 5897 exp.SetItem, 5898 expressions=characteristics, 5899 kind="TRANSACTION", 5900 **{"global": global_}, # type: ignore 5901 ) 5902 5903 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5904 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5905 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5906 5907 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5908 index = self._index 5909 set_ = self.expression( 5910 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5911 ) 5912 5913 if self._curr: 5914 self._retreat(index) 5915 return self._parse_as_command(self._prev) 5916 5917 return set_ 5918 5919 def _parse_var_from_options( 5920 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 5921 ) -> t.Optional[exp.Var]: 5922 start = self._curr 5923 if not start: 5924 return None 5925 5926 option = start.text.upper() 5927 continuations = options.get(option) 5928 5929 index = self._index 5930 self._advance() 5931 for keywords in continuations or []: 5932 if isinstance(keywords, str): 5933 keywords = (keywords,) 5934 5935 if self._match_text_seq(*keywords): 5936 option = f"{option} {' '.join(keywords)}" 5937 break 5938 else: 5939 if continuations or continuations is None: 5940 if raise_unmatched: 5941 self.raise_error(f"Unknown option {option}") 5942 5943 self._retreat(index) 5944 return None 5945 5946 return exp.var(option) 5947 5948 def _parse_as_command(self, start: Token) -> exp.Command: 5949 while self._curr: 5950 self._advance() 5951 text = self._find_sql(start, self._prev) 5952 size = len(start.text) 5953 self._warn_unsupported() 5954 return exp.Command(this=text[:size], expression=text[size:]) 5955 5956 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5957 settings = [] 5958 5959 self._match_l_paren() 5960 kind = self._parse_id_var() 5961 5962 if self._match(TokenType.L_PAREN): 5963 while True: 5964 key = self._parse_id_var() 5965 value = self._parse_primary() 5966 5967 if not key and value is None: 5968 break 5969 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5970 self._match(TokenType.R_PAREN) 5971 5972 self._match_r_paren() 5973 5974 return self.expression( 5975 exp.DictProperty, 5976 this=this, 5977 kind=kind.this if kind else None, 5978 settings=settings, 5979 ) 5980 5981 def _parse_dict_range(self, this: str) -> exp.DictRange: 5982 self._match_l_paren() 5983 has_min = self._match_text_seq("MIN") 5984 if has_min: 5985 min = self._parse_var() or self._parse_primary() 5986 self._match_text_seq("MAX") 5987 max = self._parse_var() or self._parse_primary() 5988 else: 5989 max = self._parse_var() or self._parse_primary() 5990 min = exp.Literal.number(0) 5991 self._match_r_paren() 5992 return self.expression(exp.DictRange, this=this, min=min, max=max) 5993 5994 def _parse_comprehension( 5995 self, this: t.Optional[exp.Expression] 5996 ) -> t.Optional[exp.Comprehension]: 5997 index = self._index 5998 expression = self._parse_column() 5999 if not self._match(TokenType.IN): 6000 self._retreat(index - 1) 6001 return None 6002 iterator = self._parse_column() 6003 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 6004 return self.expression( 6005 exp.Comprehension, 6006 this=this, 6007 expression=expression, 6008 iterator=iterator, 6009 condition=condition, 6010 ) 6011 6012 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6013 if self._match(TokenType.HEREDOC_STRING): 6014 return self.expression(exp.Heredoc, this=self._prev.text) 6015 6016 if not self._match_text_seq("$"): 6017 return None 6018 6019 tags = ["$"] 6020 tag_text = None 6021 6022 if self._is_connected(): 6023 self._advance() 6024 tags.append(self._prev.text.upper()) 6025 else: 6026 self.raise_error("No closing $ found") 6027 6028 if tags[-1] != "$": 6029 if self._is_connected() and self._match_text_seq("$"): 6030 tag_text = tags[-1] 6031 tags.append("$") 6032 else: 6033 self.raise_error("No closing $ found") 6034 6035 heredoc_start = self._curr 6036 6037 while self._curr: 6038 if self._match_text_seq(*tags, advance=False): 6039 this = self._find_sql(heredoc_start, self._prev) 6040 self._advance(len(tags)) 6041 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6042 6043 self._advance() 6044 6045 self.raise_error(f"No closing {''.join(tags)} found") 6046 return None 6047 6048 def _find_parser( 6049 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6050 ) -> t.Optional[t.Callable]: 6051 if not self._curr: 6052 return None 6053 6054 index = self._index 6055 this = [] 6056 while True: 6057 # The current token might be multiple words 6058 curr = self._curr.text.upper() 6059 key = curr.split(" ") 6060 this.append(curr) 6061 6062 self._advance() 6063 result, trie = in_trie(trie, key) 6064 if result == TrieResult.FAILED: 6065 break 6066 6067 if result == TrieResult.EXISTS: 6068 subparser = parsers[" ".join(this)] 6069 return subparser 6070 6071 self._retreat(index) 6072 return None 6073 6074 def _match(self, token_type, advance=True, expression=None): 6075 if not self._curr: 6076 return None 6077 6078 if self._curr.token_type == token_type: 6079 if advance: 6080 self._advance() 6081 self._add_comments(expression) 6082 return True 6083 6084 return None 6085 6086 def _match_set(self, types, advance=True): 6087 if not self._curr: 6088 return None 6089 6090 if self._curr.token_type in types: 6091 if advance: 6092 self._advance() 6093 return True 6094 6095 return None 6096 6097 def _match_pair(self, token_type_a, token_type_b, advance=True): 6098 if not self._curr or not self._next: 6099 return None 6100 6101 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6102 if advance: 6103 self._advance(2) 6104 return True 6105 6106 return None 6107 6108 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6109 if not self._match(TokenType.L_PAREN, expression=expression): 6110 self.raise_error("Expecting (") 6111 6112 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6113 if not self._match(TokenType.R_PAREN, expression=expression): 6114 self.raise_error("Expecting )") 6115 6116 def _match_texts(self, texts, advance=True): 6117 if self._curr and self._curr.text.upper() in texts: 6118 if advance: 6119 self._advance() 6120 return True 6121 return None 6122 6123 def _match_text_seq(self, *texts, advance=True): 6124 index = self._index 6125 for text in texts: 6126 if self._curr and self._curr.text.upper() == text: 6127 self._advance() 6128 else: 6129 self._retreat(index) 6130 return None 6131 6132 if not advance: 6133 self._retreat(index) 6134 6135 return True 6136 6137 def _replace_lambda( 6138 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 6139 ) -> t.Optional[exp.Expression]: 6140 if not node: 6141 return node 6142 6143 for column in node.find_all(exp.Column): 6144 if column.parts[0].name in lambda_variables: 6145 dot_or_id = column.to_dot() if column.table else column.this 6146 parent = column.parent 6147 6148 while isinstance(parent, exp.Dot): 6149 if not isinstance(parent.parent, exp.Dot): 6150 parent.replace(dot_or_id) 6151 break 6152 parent = parent.parent 6153 else: 6154 if column is node: 6155 node = dot_or_id 6156 else: 6157 column.replace(dot_or_id) 6158 return node 6159 6160 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6161 start = self._prev 6162 6163 # Not to be confused with TRUNCATE(number, decimals) function call 6164 if self._match(TokenType.L_PAREN): 6165 self._retreat(self._index - 2) 6166 return self._parse_function() 6167 6168 # Clickhouse supports TRUNCATE DATABASE as well 6169 is_database = self._match(TokenType.DATABASE) 6170 6171 self._match(TokenType.TABLE) 6172 6173 exists = self._parse_exists(not_=False) 6174 6175 expressions = self._parse_csv( 6176 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6177 ) 6178 6179 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6180 6181 if self._match_text_seq("RESTART", "IDENTITY"): 6182 identity = "RESTART" 6183 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6184 identity = "CONTINUE" 6185 else: 6186 identity = None 6187 6188 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6189 option = self._prev.text 6190 else: 6191 option = None 6192 6193 partition = self._parse_partition() 6194 6195 # Fallback case 6196 if self._curr: 6197 return self._parse_as_command(start) 6198 6199 return self.expression( 6200 exp.TruncateTable, 6201 expressions=expressions, 6202 is_database=is_database, 6203 exists=exists, 6204 cluster=cluster, 6205 identity=identity, 6206 option=option, 6207 partition=partition, 6208 ) 6209 6210 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6211 this = self._parse_ordered(self._parse_opclass) 6212 6213 if not self._match(TokenType.WITH): 6214 return this 6215 6216 op = self._parse_var(any_token=True) 6217 6218 return self.expression(exp.WithOperator, this=this, op=op)
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1125 def __init__( 1126 self, 1127 error_level: t.Optional[ErrorLevel] = None, 1128 error_message_context: int = 100, 1129 max_errors: int = 3, 1130 dialect: DialectType = None, 1131 ): 1132 from sqlglot.dialects import Dialect 1133 1134 self.error_level = error_level or ErrorLevel.IMMEDIATE 1135 self.error_message_context = error_message_context 1136 self.max_errors = max_errors 1137 self.dialect = Dialect.get_or_raise(dialect) 1138 self.reset()
1150 def parse( 1151 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1152 ) -> t.List[t.Optional[exp.Expression]]: 1153 """ 1154 Parses a list of tokens and returns a list of syntax trees, one tree 1155 per parsed SQL statement. 1156 1157 Args: 1158 raw_tokens: The list of tokens. 1159 sql: The original SQL string, used to produce helpful debug messages. 1160 1161 Returns: 1162 The list of the produced syntax trees. 1163 """ 1164 return self._parse( 1165 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1166 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1168 def parse_into( 1169 self, 1170 expression_types: exp.IntoType, 1171 raw_tokens: t.List[Token], 1172 sql: t.Optional[str] = None, 1173 ) -> t.List[t.Optional[exp.Expression]]: 1174 """ 1175 Parses a list of tokens into a given Expression type. If a collection of Expression 1176 types is given instead, this method will try to parse the token list into each one 1177 of them, stopping at the first for which the parsing succeeds. 1178 1179 Args: 1180 expression_types: The expression type(s) to try and parse the token list into. 1181 raw_tokens: The list of tokens. 1182 sql: The original SQL string, used to produce helpful debug messages. 1183 1184 Returns: 1185 The target Expression. 1186 """ 1187 errors = [] 1188 for expression_type in ensure_list(expression_types): 1189 parser = self.EXPRESSION_PARSERS.get(expression_type) 1190 if not parser: 1191 raise TypeError(f"No parser registered for {expression_type}") 1192 1193 try: 1194 return self._parse(parser, raw_tokens, sql) 1195 except ParseError as e: 1196 e.errors[0]["into_expression"] = expression_type 1197 errors.append(e) 1198 1199 raise ParseError( 1200 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1201 errors=merge_errors(errors), 1202 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1239 def check_errors(self) -> None: 1240 """Logs or raises any found errors, depending on the chosen error level setting.""" 1241 if self.error_level == ErrorLevel.WARN: 1242 for error in self.errors: 1243 logger.error(str(error)) 1244 elif self.error_level == ErrorLevel.RAISE and self.errors: 1245 raise ParseError( 1246 concat_messages(self.errors, self.max_errors), 1247 errors=merge_errors(self.errors), 1248 )
Logs or raises any found errors, depending on the chosen error level setting.
1250 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1251 """ 1252 Appends an error in the list of recorded errors or raises it, depending on the chosen 1253 error level setting. 1254 """ 1255 token = token or self._curr or self._prev or Token.string("") 1256 start = token.start 1257 end = token.end + 1 1258 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1259 highlight = self.sql[start:end] 1260 end_context = self.sql[end : end + self.error_message_context] 1261 1262 error = ParseError.new( 1263 f"{message}. Line {token.line}, Col: {token.col}.\n" 1264 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1265 description=message, 1266 line=token.line, 1267 col=token.col, 1268 start_context=start_context, 1269 highlight=highlight, 1270 end_context=end_context, 1271 ) 1272 1273 if self.error_level == ErrorLevel.IMMEDIATE: 1274 raise error 1275 1276 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1278 def expression( 1279 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1280 ) -> E: 1281 """ 1282 Creates a new, validated Expression. 1283 1284 Args: 1285 exp_class: The expression class to instantiate. 1286 comments: An optional list of comments to attach to the expression. 1287 kwargs: The arguments to set for the expression along with their respective values. 1288 1289 Returns: 1290 The target expression. 1291 """ 1292 instance = exp_class(**kwargs) 1293 instance.add_comments(comments) if comments else self._add_comments(instance) 1294 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1301 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1302 """ 1303 Validates an Expression, making sure that all its mandatory arguments are set. 1304 1305 Args: 1306 expression: The expression to validate. 1307 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1308 1309 Returns: 1310 The validated expression. 1311 """ 1312 if self.error_level != ErrorLevel.IGNORE: 1313 for error_message in expression.error_messages(args): 1314 self.raise_error(error_message) 1315 1316 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.