sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_collection, ensure_list, seq_get 10from sqlglot.tokens import Token, Tokenizer, TokenType 11from sqlglot.trie import in_trie, new_trie 12 13logger = logging.getLogger("sqlglot") 14 15E = t.TypeVar("E", bound=exp.Expression) 16 17 18def parse_var_map(args: t.Sequence) -> exp.Expression: 19 if len(args) == 1 and args[0].is_star: 20 return exp.StarMap(this=args[0]) 21 22 keys = [] 23 values = [] 24 for i in range(0, len(args), 2): 25 keys.append(args[i]) 26 values.append(args[i + 1]) 27 return exp.VarMap( 28 keys=exp.Array(expressions=keys), 29 values=exp.Array(expressions=values), 30 ) 31 32 33def parse_like(args): 34 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 35 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 36 37 38def binary_range_parser( 39 expr_type: t.Type[exp.Expression], 40) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 41 return lambda self, this: self._parse_escape( 42 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 43 ) 44 45 46class _Parser(type): 47 def __new__(cls, clsname, bases, attrs): 48 klass = super().__new__(cls, clsname, bases, attrs) 49 klass._show_trie = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 50 klass._set_trie = new_trie(key.split(" ") for key in klass.SET_PARSERS) 51 52 return klass 53 54 55class Parser(metaclass=_Parser): 56 """ 57 Parser consumes a list of tokens produced by the `sqlglot.tokens.Tokenizer` and produces 58 a parsed syntax tree. 59 60 Args: 61 error_level: the desired error level. 62 Default: ErrorLevel.RAISE 63 error_message_context: determines the amount of context to capture from a 64 query string when displaying the error message (in number of characters). 65 Default: 50. 66 index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. 67 Default: 0 68 alias_post_tablesample: If the table alias comes after tablesample. 69 Default: False 70 max_errors: Maximum number of error messages to include in a raised ParseError. 71 This is only relevant if error_level is ErrorLevel.RAISE. 72 Default: 3 73 null_ordering: Indicates the default null ordering method to use if not explicitly set. 74 Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". 75 Default: "nulls_are_small" 76 """ 77 78 FUNCTIONS: t.Dict[str, t.Callable] = { 79 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 80 "DATE_TO_DATE_STR": lambda args: exp.Cast( 81 this=seq_get(args, 0), 82 to=exp.DataType(this=exp.DataType.Type.TEXT), 83 ), 84 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 85 "IFNULL": exp.Coalesce.from_arg_list, 86 "LIKE": parse_like, 87 "TIME_TO_TIME_STR": lambda args: exp.Cast( 88 this=seq_get(args, 0), 89 to=exp.DataType(this=exp.DataType.Type.TEXT), 90 ), 91 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 92 this=exp.Cast( 93 this=seq_get(args, 0), 94 to=exp.DataType(this=exp.DataType.Type.TEXT), 95 ), 96 start=exp.Literal.number(1), 97 length=exp.Literal.number(10), 98 ), 99 "VAR_MAP": parse_var_map, 100 } 101 102 NO_PAREN_FUNCTIONS = { 103 TokenType.CURRENT_DATE: exp.CurrentDate, 104 TokenType.CURRENT_DATETIME: exp.CurrentDate, 105 TokenType.CURRENT_TIME: exp.CurrentTime, 106 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 107 TokenType.CURRENT_USER: exp.CurrentUser, 108 } 109 110 JOIN_HINTS: t.Set[str] = set() 111 112 NESTED_TYPE_TOKENS = { 113 TokenType.ARRAY, 114 TokenType.MAP, 115 TokenType.NULLABLE, 116 TokenType.STRUCT, 117 } 118 119 TYPE_TOKENS = { 120 TokenType.BIT, 121 TokenType.BOOLEAN, 122 TokenType.TINYINT, 123 TokenType.UTINYINT, 124 TokenType.SMALLINT, 125 TokenType.USMALLINT, 126 TokenType.INT, 127 TokenType.UINT, 128 TokenType.BIGINT, 129 TokenType.UBIGINT, 130 TokenType.INT128, 131 TokenType.UINT128, 132 TokenType.INT256, 133 TokenType.UINT256, 134 TokenType.FLOAT, 135 TokenType.DOUBLE, 136 TokenType.CHAR, 137 TokenType.NCHAR, 138 TokenType.VARCHAR, 139 TokenType.NVARCHAR, 140 TokenType.TEXT, 141 TokenType.MEDIUMTEXT, 142 TokenType.LONGTEXT, 143 TokenType.MEDIUMBLOB, 144 TokenType.LONGBLOB, 145 TokenType.BINARY, 146 TokenType.VARBINARY, 147 TokenType.JSON, 148 TokenType.JSONB, 149 TokenType.INTERVAL, 150 TokenType.TIME, 151 TokenType.TIMESTAMP, 152 TokenType.TIMESTAMPTZ, 153 TokenType.TIMESTAMPLTZ, 154 TokenType.DATETIME, 155 TokenType.DATETIME64, 156 TokenType.DATE, 157 TokenType.DECIMAL, 158 TokenType.BIGDECIMAL, 159 TokenType.UUID, 160 TokenType.GEOGRAPHY, 161 TokenType.GEOMETRY, 162 TokenType.HLLSKETCH, 163 TokenType.HSTORE, 164 TokenType.PSEUDO_TYPE, 165 TokenType.SUPER, 166 TokenType.SERIAL, 167 TokenType.SMALLSERIAL, 168 TokenType.BIGSERIAL, 169 TokenType.XML, 170 TokenType.UNIQUEIDENTIFIER, 171 TokenType.MONEY, 172 TokenType.SMALLMONEY, 173 TokenType.ROWVERSION, 174 TokenType.IMAGE, 175 TokenType.VARIANT, 176 TokenType.OBJECT, 177 TokenType.INET, 178 *NESTED_TYPE_TOKENS, 179 } 180 181 SUBQUERY_PREDICATES = { 182 TokenType.ANY: exp.Any, 183 TokenType.ALL: exp.All, 184 TokenType.EXISTS: exp.Exists, 185 TokenType.SOME: exp.Any, 186 } 187 188 RESERVED_KEYWORDS = {*Tokenizer.SINGLE_TOKENS.values(), TokenType.SELECT} 189 190 DB_CREATABLES = { 191 TokenType.DATABASE, 192 TokenType.SCHEMA, 193 TokenType.TABLE, 194 TokenType.VIEW, 195 } 196 197 CREATABLES = { 198 TokenType.COLUMN, 199 TokenType.FUNCTION, 200 TokenType.INDEX, 201 TokenType.PROCEDURE, 202 *DB_CREATABLES, 203 } 204 205 ID_VAR_TOKENS = { 206 TokenType.VAR, 207 TokenType.ANTI, 208 TokenType.APPLY, 209 TokenType.AUTO_INCREMENT, 210 TokenType.BEGIN, 211 TokenType.BOTH, 212 TokenType.BUCKET, 213 TokenType.CACHE, 214 TokenType.CASCADE, 215 TokenType.COLLATE, 216 TokenType.COMMAND, 217 TokenType.COMMENT, 218 TokenType.COMMIT, 219 TokenType.COMPOUND, 220 TokenType.CONSTRAINT, 221 TokenType.DEFAULT, 222 TokenType.DELETE, 223 TokenType.DESCRIBE, 224 TokenType.DIV, 225 TokenType.END, 226 TokenType.EXECUTE, 227 TokenType.ESCAPE, 228 TokenType.FALSE, 229 TokenType.FIRST, 230 TokenType.FILTER, 231 TokenType.FOLLOWING, 232 TokenType.FORMAT, 233 TokenType.FULL, 234 TokenType.IF, 235 TokenType.IS, 236 TokenType.ISNULL, 237 TokenType.INTERVAL, 238 TokenType.KEEP, 239 TokenType.LAZY, 240 TokenType.LEADING, 241 TokenType.LEFT, 242 TokenType.LOCAL, 243 TokenType.MATERIALIZED, 244 TokenType.MERGE, 245 TokenType.NATURAL, 246 TokenType.NEXT, 247 TokenType.OFFSET, 248 TokenType.ONLY, 249 TokenType.OPTIONS, 250 TokenType.ORDINALITY, 251 TokenType.OVERWRITE, 252 TokenType.PARTITION, 253 TokenType.PERCENT, 254 TokenType.PIVOT, 255 TokenType.PRAGMA, 256 TokenType.PRECEDING, 257 TokenType.RANGE, 258 TokenType.REFERENCES, 259 TokenType.RIGHT, 260 TokenType.ROW, 261 TokenType.ROWS, 262 TokenType.SEED, 263 TokenType.SEMI, 264 TokenType.SET, 265 TokenType.SETTINGS, 266 TokenType.SHOW, 267 TokenType.SORTKEY, 268 TokenType.TEMPORARY, 269 TokenType.TOP, 270 TokenType.TRAILING, 271 TokenType.TRUE, 272 TokenType.UNBOUNDED, 273 TokenType.UNIQUE, 274 TokenType.UNLOGGED, 275 TokenType.UNPIVOT, 276 TokenType.VOLATILE, 277 TokenType.WINDOW, 278 *CREATABLES, 279 *SUBQUERY_PREDICATES, 280 *TYPE_TOKENS, 281 *NO_PAREN_FUNCTIONS, 282 } 283 284 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 285 286 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 287 TokenType.APPLY, 288 TokenType.FULL, 289 TokenType.LEFT, 290 TokenType.LOCK, 291 TokenType.NATURAL, 292 TokenType.OFFSET, 293 TokenType.RIGHT, 294 TokenType.WINDOW, 295 } 296 297 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 298 299 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 300 301 TRIM_TYPES = {TokenType.LEADING, TokenType.TRAILING, TokenType.BOTH} 302 303 FUNC_TOKENS = { 304 TokenType.COMMAND, 305 TokenType.CURRENT_DATE, 306 TokenType.CURRENT_DATETIME, 307 TokenType.CURRENT_TIMESTAMP, 308 TokenType.CURRENT_TIME, 309 TokenType.CURRENT_USER, 310 TokenType.FILTER, 311 TokenType.FIRST, 312 TokenType.FORMAT, 313 TokenType.GLOB, 314 TokenType.IDENTIFIER, 315 TokenType.INDEX, 316 TokenType.ISNULL, 317 TokenType.ILIKE, 318 TokenType.LIKE, 319 TokenType.MERGE, 320 TokenType.OFFSET, 321 TokenType.PRIMARY_KEY, 322 TokenType.RANGE, 323 TokenType.REPLACE, 324 TokenType.ROW, 325 TokenType.UNNEST, 326 TokenType.VAR, 327 TokenType.LEFT, 328 TokenType.RIGHT, 329 TokenType.DATE, 330 TokenType.DATETIME, 331 TokenType.TABLE, 332 TokenType.TIMESTAMP, 333 TokenType.TIMESTAMPTZ, 334 TokenType.WINDOW, 335 *TYPE_TOKENS, 336 *SUBQUERY_PREDICATES, 337 } 338 339 CONJUNCTION = { 340 TokenType.AND: exp.And, 341 TokenType.OR: exp.Or, 342 } 343 344 EQUALITY = { 345 TokenType.EQ: exp.EQ, 346 TokenType.NEQ: exp.NEQ, 347 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 348 } 349 350 COMPARISON = { 351 TokenType.GT: exp.GT, 352 TokenType.GTE: exp.GTE, 353 TokenType.LT: exp.LT, 354 TokenType.LTE: exp.LTE, 355 } 356 357 BITWISE = { 358 TokenType.AMP: exp.BitwiseAnd, 359 TokenType.CARET: exp.BitwiseXor, 360 TokenType.PIPE: exp.BitwiseOr, 361 TokenType.DPIPE: exp.DPipe, 362 } 363 364 TERM = { 365 TokenType.DASH: exp.Sub, 366 TokenType.PLUS: exp.Add, 367 TokenType.MOD: exp.Mod, 368 TokenType.COLLATE: exp.Collate, 369 } 370 371 FACTOR = { 372 TokenType.DIV: exp.IntDiv, 373 TokenType.LR_ARROW: exp.Distance, 374 TokenType.SLASH: exp.Div, 375 TokenType.STAR: exp.Mul, 376 } 377 378 TIMESTAMPS = { 379 TokenType.TIME, 380 TokenType.TIMESTAMP, 381 TokenType.TIMESTAMPTZ, 382 TokenType.TIMESTAMPLTZ, 383 } 384 385 SET_OPERATIONS = { 386 TokenType.UNION, 387 TokenType.INTERSECT, 388 TokenType.EXCEPT, 389 } 390 391 JOIN_SIDES = { 392 TokenType.LEFT, 393 TokenType.RIGHT, 394 TokenType.FULL, 395 } 396 397 JOIN_KINDS = { 398 TokenType.INNER, 399 TokenType.OUTER, 400 TokenType.CROSS, 401 TokenType.SEMI, 402 TokenType.ANTI, 403 } 404 405 LAMBDAS = { 406 TokenType.ARROW: lambda self, expressions: self.expression( 407 exp.Lambda, 408 this=self._replace_lambda( 409 self._parse_conjunction(), 410 {node.name for node in expressions}, 411 ), 412 expressions=expressions, 413 ), 414 TokenType.FARROW: lambda self, expressions: self.expression( 415 exp.Kwarg, 416 this=exp.Var(this=expressions[0].name), 417 expression=self._parse_conjunction(), 418 ), 419 } 420 421 COLUMN_OPERATORS = { 422 TokenType.DOT: None, 423 TokenType.DCOLON: lambda self, this, to: self.expression( 424 exp.Cast if self.STRICT_CAST else exp.TryCast, 425 this=this, 426 to=to, 427 ), 428 TokenType.ARROW: lambda self, this, path: self.expression( 429 exp.JSONExtract, 430 this=this, 431 expression=path, 432 ), 433 TokenType.DARROW: lambda self, this, path: self.expression( 434 exp.JSONExtractScalar, 435 this=this, 436 expression=path, 437 ), 438 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 439 exp.JSONBExtract, 440 this=this, 441 expression=path, 442 ), 443 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 444 exp.JSONBExtractScalar, 445 this=this, 446 expression=path, 447 ), 448 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 449 exp.JSONBContains, 450 this=this, 451 expression=key, 452 ), 453 } 454 455 EXPRESSION_PARSERS = { 456 exp.Column: lambda self: self._parse_column(), 457 exp.DataType: lambda self: self._parse_types(), 458 exp.From: lambda self: self._parse_from(), 459 exp.Group: lambda self: self._parse_group(), 460 exp.Identifier: lambda self: self._parse_id_var(), 461 exp.Lateral: lambda self: self._parse_lateral(), 462 exp.Join: lambda self: self._parse_join(), 463 exp.Order: lambda self: self._parse_order(), 464 exp.Cluster: lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster), 465 exp.Sort: lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort), 466 exp.Lambda: lambda self: self._parse_lambda(), 467 exp.Limit: lambda self: self._parse_limit(), 468 exp.Offset: lambda self: self._parse_offset(), 469 exp.TableAlias: lambda self: self._parse_table_alias(), 470 exp.Table: lambda self: self._parse_table(), 471 exp.Condition: lambda self: self._parse_conjunction(), 472 exp.Expression: lambda self: self._parse_statement(), 473 exp.Properties: lambda self: self._parse_properties(), 474 exp.Where: lambda self: self._parse_where(), 475 exp.Ordered: lambda self: self._parse_ordered(), 476 exp.Having: lambda self: self._parse_having(), 477 exp.With: lambda self: self._parse_with(), 478 exp.Window: lambda self: self._parse_named_window(), 479 exp.Qualify: lambda self: self._parse_qualify(), 480 exp.Returning: lambda self: self._parse_returning(), 481 "JOIN_TYPE": lambda self: self._parse_join_side_and_kind(), 482 } 483 484 STATEMENT_PARSERS = { 485 TokenType.ALTER: lambda self: self._parse_alter(), 486 TokenType.BEGIN: lambda self: self._parse_transaction(), 487 TokenType.CACHE: lambda self: self._parse_cache(), 488 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 489 TokenType.COMMENT: lambda self: self._parse_comment(), 490 TokenType.CREATE: lambda self: self._parse_create(), 491 TokenType.DELETE: lambda self: self._parse_delete(), 492 TokenType.DESC: lambda self: self._parse_describe(), 493 TokenType.DESCRIBE: lambda self: self._parse_describe(), 494 TokenType.DROP: lambda self: self._parse_drop(), 495 TokenType.END: lambda self: self._parse_commit_or_rollback(), 496 TokenType.INSERT: lambda self: self._parse_insert(), 497 TokenType.LOAD_DATA: lambda self: self._parse_load_data(), 498 TokenType.MERGE: lambda self: self._parse_merge(), 499 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 500 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 501 TokenType.SET: lambda self: self._parse_set(), 502 TokenType.UNCACHE: lambda self: self._parse_uncache(), 503 TokenType.UPDATE: lambda self: self._parse_update(), 504 TokenType.USE: lambda self: self.expression( 505 exp.Use, 506 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 507 and exp.Var(this=self._prev.text), 508 this=self._parse_table(schema=False), 509 ), 510 } 511 512 UNARY_PARSERS = { 513 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 514 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 515 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 516 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 517 } 518 519 PRIMARY_PARSERS = { 520 TokenType.STRING: lambda self, token: self.expression( 521 exp.Literal, this=token.text, is_string=True 522 ), 523 TokenType.NUMBER: lambda self, token: self.expression( 524 exp.Literal, this=token.text, is_string=False 525 ), 526 TokenType.STAR: lambda self, _: self.expression( 527 exp.Star, 528 **{"except": self._parse_except(), "replace": self._parse_replace()}, 529 ), 530 TokenType.NULL: lambda self, _: self.expression(exp.Null), 531 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 532 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 533 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 534 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 535 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 536 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 537 TokenType.NATIONAL: lambda self, token: self._parse_national(token), 538 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 539 } 540 541 PLACEHOLDER_PARSERS = { 542 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 543 TokenType.PARAMETER: lambda self: self._parse_parameter(), 544 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 545 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 546 else None, 547 } 548 549 RANGE_PARSERS = { 550 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 551 TokenType.GLOB: binary_range_parser(exp.Glob), 552 TokenType.ILIKE: binary_range_parser(exp.ILike), 553 TokenType.IN: lambda self, this: self._parse_in(this), 554 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 555 TokenType.IS: lambda self, this: self._parse_is(this), 556 TokenType.LIKE: binary_range_parser(exp.Like), 557 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 558 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 559 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 560 } 561 562 PROPERTY_PARSERS = { 563 "AFTER": lambda self: self._parse_afterjournal( 564 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 565 ), 566 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 567 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 568 "BEFORE": lambda self: self._parse_journal( 569 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 570 ), 571 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 572 "CHARACTER SET": lambda self: self._parse_character_set(), 573 "CHECKSUM": lambda self: self._parse_checksum(), 574 "CLUSTER BY": lambda self: self.expression( 575 exp.Cluster, expressions=self._parse_csv(self._parse_ordered) 576 ), 577 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 578 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 579 "DATABLOCKSIZE": lambda self: self._parse_datablocksize( 580 default=self._prev.text.upper() == "DEFAULT" 581 ), 582 "DEFINER": lambda self: self._parse_definer(), 583 "DETERMINISTIC": lambda self: self.expression( 584 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 585 ), 586 "DISTKEY": lambda self: self._parse_distkey(), 587 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 588 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 589 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 590 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 591 "FALLBACK": lambda self: self._parse_fallback(no=self._prev.text.upper() == "NO"), 592 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 593 "FREESPACE": lambda self: self._parse_freespace(), 594 "GLOBAL": lambda self: self._parse_temporary(global_=True), 595 "IMMUTABLE": lambda self: self.expression( 596 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 597 ), 598 "JOURNAL": lambda self: self._parse_journal( 599 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 600 ), 601 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 602 "LIKE": lambda self: self._parse_create_like(), 603 "LOCAL": lambda self: self._parse_afterjournal(no=False, dual=False, local=True), 604 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 605 "LOCK": lambda self: self._parse_locking(), 606 "LOCKING": lambda self: self._parse_locking(), 607 "LOG": lambda self: self._parse_log(no=self._prev.text.upper() == "NO"), 608 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 609 "MAX": lambda self: self._parse_datablocksize(), 610 "MAXIMUM": lambda self: self._parse_datablocksize(), 611 "MERGEBLOCKRATIO": lambda self: self._parse_mergeblockratio( 612 no=self._prev.text.upper() == "NO", default=self._prev.text.upper() == "DEFAULT" 613 ), 614 "MIN": lambda self: self._parse_datablocksize(), 615 "MINIMUM": lambda self: self._parse_datablocksize(), 616 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 617 "NO": lambda self: self._parse_noprimaryindex(), 618 "NOT": lambda self: self._parse_afterjournal(no=False, dual=False, local=False), 619 "ON": lambda self: self._parse_oncommit(), 620 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 621 "PARTITION BY": lambda self: self._parse_partitioned_by(), 622 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 623 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 624 "PRIMARY KEY": lambda self: self._parse_primary_key(), 625 "RETURNS": lambda self: self._parse_returns(), 626 "ROW": lambda self: self._parse_row(), 627 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 628 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 629 "SETTINGS": lambda self: self.expression( 630 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 631 ), 632 "SORTKEY": lambda self: self._parse_sortkey(), 633 "STABLE": lambda self: self.expression( 634 exp.StabilityProperty, this=exp.Literal.string("STABLE") 635 ), 636 "STORED": lambda self: self._parse_stored(), 637 "TABLE_FORMAT": lambda self: self._parse_property_assignment(exp.TableFormatProperty), 638 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 639 "TEMP": lambda self: self._parse_temporary(global_=False), 640 "TEMPORARY": lambda self: self._parse_temporary(global_=False), 641 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 642 "TTL": lambda self: self._parse_ttl(), 643 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 644 "VOLATILE": lambda self: self._parse_volatile_property(), 645 "WITH": lambda self: self._parse_with_property(), 646 } 647 648 CONSTRAINT_PARSERS = { 649 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 650 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 651 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 652 "CHARACTER SET": lambda self: self.expression( 653 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 654 ), 655 "CHECK": lambda self: self.expression( 656 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 657 ), 658 "COLLATE": lambda self: self.expression( 659 exp.CollateColumnConstraint, this=self._parse_var() 660 ), 661 "COMMENT": lambda self: self.expression( 662 exp.CommentColumnConstraint, this=self._parse_string() 663 ), 664 "COMPRESS": lambda self: self._parse_compress(), 665 "DEFAULT": lambda self: self.expression( 666 exp.DefaultColumnConstraint, this=self._parse_bitwise() 667 ), 668 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 669 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 670 "FORMAT": lambda self: self.expression( 671 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 672 ), 673 "GENERATED": lambda self: self._parse_generated_as_identity(), 674 "IDENTITY": lambda self: self._parse_auto_increment(), 675 "INLINE": lambda self: self._parse_inline(), 676 "LIKE": lambda self: self._parse_create_like(), 677 "NOT": lambda self: self._parse_not_constraint(), 678 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 679 "ON": lambda self: self._match(TokenType.UPDATE) 680 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()), 681 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 682 "PRIMARY KEY": lambda self: self._parse_primary_key(), 683 "REFERENCES": lambda self: self._parse_references(match=False), 684 "TITLE": lambda self: self.expression( 685 exp.TitleColumnConstraint, this=self._parse_var_or_string() 686 ), 687 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 688 "UNIQUE": lambda self: self._parse_unique(), 689 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 690 } 691 692 ALTER_PARSERS = { 693 "ADD": lambda self: self._parse_alter_table_add(), 694 "ALTER": lambda self: self._parse_alter_table_alter(), 695 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 696 "DROP": lambda self: self._parse_alter_table_drop(), 697 "RENAME": lambda self: self._parse_alter_table_rename(), 698 } 699 700 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 701 702 NO_PAREN_FUNCTION_PARSERS = { 703 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 704 TokenType.CASE: lambda self: self._parse_case(), 705 TokenType.IF: lambda self: self._parse_if(), 706 TokenType.NEXT_VALUE_FOR: lambda self: self.expression( 707 exp.NextValueFor, 708 this=self._parse_column(), 709 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 710 ), 711 } 712 713 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { 714 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 715 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 716 "DECODE": lambda self: self._parse_decode(), 717 "EXTRACT": lambda self: self._parse_extract(), 718 "JSON_OBJECT": lambda self: self._parse_json_object(), 719 "LOG": lambda self: self._parse_logarithm(), 720 "MATCH": lambda self: self._parse_match_against(), 721 "OPENJSON": lambda self: self._parse_open_json(), 722 "POSITION": lambda self: self._parse_position(), 723 "STRING_AGG": lambda self: self._parse_string_agg(), 724 "SUBSTRING": lambda self: self._parse_substring(), 725 "TRIM": lambda self: self._parse_trim(), 726 "TRY_CAST": lambda self: self._parse_cast(False), 727 "TRY_CONVERT": lambda self: self._parse_convert(False), 728 } 729 730 QUERY_MODIFIER_PARSERS = { 731 "joins": lambda self: list(iter(self._parse_join, None)), 732 "laterals": lambda self: list(iter(self._parse_lateral, None)), 733 "match": lambda self: self._parse_match_recognize(), 734 "where": lambda self: self._parse_where(), 735 "group": lambda self: self._parse_group(), 736 "having": lambda self: self._parse_having(), 737 "qualify": lambda self: self._parse_qualify(), 738 "windows": lambda self: self._parse_window_clause(), 739 "order": lambda self: self._parse_order(), 740 "limit": lambda self: self._parse_limit(), 741 "offset": lambda self: self._parse_offset(), 742 "locks": lambda self: self._parse_locks(), 743 "sample": lambda self: self._parse_table_sample(as_modifier=True), 744 } 745 746 SET_PARSERS = { 747 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 748 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 749 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 750 "TRANSACTION": lambda self: self._parse_set_transaction(), 751 } 752 753 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 754 755 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 756 757 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 758 759 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 760 761 TRANSACTION_CHARACTERISTICS = { 762 "ISOLATION LEVEL REPEATABLE READ", 763 "ISOLATION LEVEL READ COMMITTED", 764 "ISOLATION LEVEL READ UNCOMMITTED", 765 "ISOLATION LEVEL SERIALIZABLE", 766 "READ WRITE", 767 "READ ONLY", 768 } 769 770 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 771 772 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 773 774 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 775 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 776 777 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 778 779 STRICT_CAST = True 780 781 CONVERT_TYPE_FIRST = False 782 783 PREFIXED_PIVOT_COLUMNS = False 784 IDENTIFY_PIVOT_STRINGS = False 785 786 LOG_BASE_FIRST = True 787 LOG_DEFAULTS_TO_LN = False 788 789 __slots__ = ( 790 "error_level", 791 "error_message_context", 792 "sql", 793 "errors", 794 "index_offset", 795 "unnest_column_only", 796 "alias_post_tablesample", 797 "max_errors", 798 "null_ordering", 799 "_tokens", 800 "_index", 801 "_curr", 802 "_next", 803 "_prev", 804 "_prev_comments", 805 "_show_trie", 806 "_set_trie", 807 ) 808 809 def __init__( 810 self, 811 error_level: t.Optional[ErrorLevel] = None, 812 error_message_context: int = 100, 813 index_offset: int = 0, 814 unnest_column_only: bool = False, 815 alias_post_tablesample: bool = False, 816 max_errors: int = 3, 817 null_ordering: t.Optional[str] = None, 818 ): 819 self.error_level = error_level or ErrorLevel.IMMEDIATE 820 self.error_message_context = error_message_context 821 self.index_offset = index_offset 822 self.unnest_column_only = unnest_column_only 823 self.alias_post_tablesample = alias_post_tablesample 824 self.max_errors = max_errors 825 self.null_ordering = null_ordering 826 self.reset() 827 828 def reset(self): 829 self.sql = "" 830 self.errors = [] 831 self._tokens = [] 832 self._index = 0 833 self._curr = None 834 self._next = None 835 self._prev = None 836 self._prev_comments = None 837 838 def parse( 839 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 840 ) -> t.List[t.Optional[exp.Expression]]: 841 """ 842 Parses a list of tokens and returns a list of syntax trees, one tree 843 per parsed SQL statement. 844 845 Args: 846 raw_tokens: the list of tokens. 847 sql: the original SQL string, used to produce helpful debug messages. 848 849 Returns: 850 The list of syntax trees. 851 """ 852 return self._parse( 853 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 854 ) 855 856 def parse_into( 857 self, 858 expression_types: exp.IntoType, 859 raw_tokens: t.List[Token], 860 sql: t.Optional[str] = None, 861 ) -> t.List[t.Optional[exp.Expression]]: 862 """ 863 Parses a list of tokens into a given Expression type. If a collection of Expression 864 types is given instead, this method will try to parse the token list into each one 865 of them, stopping at the first for which the parsing succeeds. 866 867 Args: 868 expression_types: the expression type(s) to try and parse the token list into. 869 raw_tokens: the list of tokens. 870 sql: the original SQL string, used to produce helpful debug messages. 871 872 Returns: 873 The target Expression. 874 """ 875 errors = [] 876 for expression_type in ensure_collection(expression_types): 877 parser = self.EXPRESSION_PARSERS.get(expression_type) 878 if not parser: 879 raise TypeError(f"No parser registered for {expression_type}") 880 try: 881 return self._parse(parser, raw_tokens, sql) 882 except ParseError as e: 883 e.errors[0]["into_expression"] = expression_type 884 errors.append(e) 885 raise ParseError( 886 f"Failed to parse into {expression_types}", 887 errors=merge_errors(errors), 888 ) from errors[-1] 889 890 def _parse( 891 self, 892 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 893 raw_tokens: t.List[Token], 894 sql: t.Optional[str] = None, 895 ) -> t.List[t.Optional[exp.Expression]]: 896 self.reset() 897 self.sql = sql or "" 898 total = len(raw_tokens) 899 chunks: t.List[t.List[Token]] = [[]] 900 901 for i, token in enumerate(raw_tokens): 902 if token.token_type == TokenType.SEMICOLON: 903 if i < total - 1: 904 chunks.append([]) 905 else: 906 chunks[-1].append(token) 907 908 expressions = [] 909 910 for tokens in chunks: 911 self._index = -1 912 self._tokens = tokens 913 self._advance() 914 915 expressions.append(parse_method(self)) 916 917 if self._index < len(self._tokens): 918 self.raise_error("Invalid expression / Unexpected token") 919 920 self.check_errors() 921 922 return expressions 923 924 def check_errors(self) -> None: 925 """ 926 Logs or raises any found errors, depending on the chosen error level setting. 927 """ 928 if self.error_level == ErrorLevel.WARN: 929 for error in self.errors: 930 logger.error(str(error)) 931 elif self.error_level == ErrorLevel.RAISE and self.errors: 932 raise ParseError( 933 concat_messages(self.errors, self.max_errors), 934 errors=merge_errors(self.errors), 935 ) 936 937 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 938 """ 939 Appends an error in the list of recorded errors or raises it, depending on the chosen 940 error level setting. 941 """ 942 token = token or self._curr or self._prev or Token.string("") 943 start = token.start 944 end = token.end 945 start_context = self.sql[max(start - self.error_message_context, 0) : start] 946 highlight = self.sql[start:end] 947 end_context = self.sql[end : end + self.error_message_context] 948 949 error = ParseError.new( 950 f"{message}. Line {token.line}, Col: {token.col}.\n" 951 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 952 description=message, 953 line=token.line, 954 col=token.col, 955 start_context=start_context, 956 highlight=highlight, 957 end_context=end_context, 958 ) 959 960 if self.error_level == ErrorLevel.IMMEDIATE: 961 raise error 962 963 self.errors.append(error) 964 965 def expression( 966 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 967 ) -> E: 968 """ 969 Creates a new, validated Expression. 970 971 Args: 972 exp_class: the expression class to instantiate. 973 comments: an optional list of comments to attach to the expression. 974 kwargs: the arguments to set for the expression along with their respective values. 975 976 Returns: 977 The target expression. 978 """ 979 instance = exp_class(**kwargs) 980 instance.add_comments(comments) if comments else self._add_comments(instance) 981 self.validate_expression(instance) 982 return instance 983 984 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 985 if expression and self._prev_comments: 986 expression.add_comments(self._prev_comments) 987 self._prev_comments = None 988 989 def validate_expression( 990 self, expression: exp.Expression, args: t.Optional[t.List] = None 991 ) -> None: 992 """ 993 Validates an already instantiated expression, making sure that all its mandatory arguments 994 are set. 995 996 Args: 997 expression: the expression to validate. 998 args: an optional list of items that was used to instantiate the expression, if it's a Func. 999 """ 1000 if self.error_level == ErrorLevel.IGNORE: 1001 return 1002 1003 for error_message in expression.error_messages(args): 1004 self.raise_error(error_message) 1005 1006 def _find_sql(self, start: Token, end: Token) -> str: 1007 return self.sql[start.start : end.end] 1008 1009 def _advance(self, times: int = 1) -> None: 1010 self._index += times 1011 self._curr = seq_get(self._tokens, self._index) 1012 self._next = seq_get(self._tokens, self._index + 1) 1013 if self._index > 0: 1014 self._prev = self._tokens[self._index - 1] 1015 self._prev_comments = self._prev.comments 1016 else: 1017 self._prev = None 1018 self._prev_comments = None 1019 1020 def _retreat(self, index: int) -> None: 1021 if index != self._index: 1022 self._advance(index - self._index) 1023 1024 def _parse_command(self) -> exp.Command: 1025 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1026 1027 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1028 start = self._prev 1029 exists = self._parse_exists() if allow_exists else None 1030 1031 self._match(TokenType.ON) 1032 1033 kind = self._match_set(self.CREATABLES) and self._prev 1034 1035 if not kind: 1036 return self._parse_as_command(start) 1037 1038 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1039 this = self._parse_user_defined_function(kind=kind.token_type) 1040 elif kind.token_type == TokenType.TABLE: 1041 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1042 elif kind.token_type == TokenType.COLUMN: 1043 this = self._parse_column() 1044 else: 1045 this = self._parse_id_var() 1046 1047 self._match(TokenType.IS) 1048 1049 return self.expression( 1050 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1051 ) 1052 1053 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1054 def _parse_ttl(self) -> exp.Expression: 1055 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1056 this = self._parse_bitwise() 1057 1058 if self._match_text_seq("DELETE"): 1059 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1060 if self._match_text_seq("RECOMPRESS"): 1061 return self.expression( 1062 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1063 ) 1064 if self._match_text_seq("TO", "DISK"): 1065 return self.expression( 1066 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1067 ) 1068 if self._match_text_seq("TO", "VOLUME"): 1069 return self.expression( 1070 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1071 ) 1072 1073 return this 1074 1075 expressions = self._parse_csv(_parse_ttl_action) 1076 where = self._parse_where() 1077 group = self._parse_group() 1078 1079 aggregates = None 1080 if group and self._match(TokenType.SET): 1081 aggregates = self._parse_csv(self._parse_set_item) 1082 1083 return self.expression( 1084 exp.MergeTreeTTL, 1085 expressions=expressions, 1086 where=where, 1087 group=group, 1088 aggregates=aggregates, 1089 ) 1090 1091 def _parse_statement(self) -> t.Optional[exp.Expression]: 1092 if self._curr is None: 1093 return None 1094 1095 if self._match_set(self.STATEMENT_PARSERS): 1096 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1097 1098 if self._match_set(Tokenizer.COMMANDS): 1099 return self._parse_command() 1100 1101 expression = self._parse_expression() 1102 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1103 return self._parse_query_modifiers(expression) 1104 1105 def _parse_drop(self) -> t.Optional[exp.Drop | exp.Command]: 1106 start = self._prev 1107 temporary = self._match(TokenType.TEMPORARY) 1108 materialized = self._match(TokenType.MATERIALIZED) 1109 kind = self._match_set(self.CREATABLES) and self._prev.text 1110 if not kind: 1111 return self._parse_as_command(start) 1112 1113 return self.expression( 1114 exp.Drop, 1115 exists=self._parse_exists(), 1116 this=self._parse_table(schema=True), 1117 kind=kind, 1118 temporary=temporary, 1119 materialized=materialized, 1120 cascade=self._match(TokenType.CASCADE), 1121 constraints=self._match_text_seq("CONSTRAINTS"), 1122 purge=self._match_text_seq("PURGE"), 1123 ) 1124 1125 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1126 return ( 1127 self._match(TokenType.IF) 1128 and (not not_ or self._match(TokenType.NOT)) 1129 and self._match(TokenType.EXISTS) 1130 ) 1131 1132 def _parse_create(self) -> t.Optional[exp.Expression]: 1133 start = self._prev 1134 replace = self._prev.text.upper() == "REPLACE" or self._match_pair( 1135 TokenType.OR, TokenType.REPLACE 1136 ) 1137 unique = self._match(TokenType.UNIQUE) 1138 1139 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1140 self._match(TokenType.TABLE) 1141 1142 properties = None 1143 create_token = self._match_set(self.CREATABLES) and self._prev 1144 1145 if not create_token: 1146 properties = self._parse_properties() # exp.Properties.Location.POST_CREATE 1147 create_token = self._match_set(self.CREATABLES) and self._prev 1148 1149 if not properties or not create_token: 1150 return self._parse_as_command(start) 1151 1152 exists = self._parse_exists(not_=True) 1153 this = None 1154 expression = None 1155 indexes = None 1156 no_schema_binding = None 1157 begin = None 1158 clone = None 1159 1160 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1161 this = self._parse_user_defined_function(kind=create_token.token_type) 1162 temp_properties = self._parse_properties() 1163 if properties and temp_properties: 1164 properties.expressions.extend(temp_properties.expressions) 1165 elif temp_properties: 1166 properties = temp_properties 1167 1168 self._match(TokenType.ALIAS) 1169 begin = self._match(TokenType.BEGIN) 1170 return_ = self._match_text_seq("RETURN") 1171 expression = self._parse_statement() 1172 1173 if return_: 1174 expression = self.expression(exp.Return, this=expression) 1175 elif create_token.token_type == TokenType.INDEX: 1176 this = self._parse_index() 1177 elif create_token.token_type in self.DB_CREATABLES: 1178 table_parts = self._parse_table_parts(schema=True) 1179 1180 # exp.Properties.Location.POST_NAME 1181 if self._match(TokenType.COMMA): 1182 temp_properties = self._parse_properties(before=True) 1183 if properties and temp_properties: 1184 properties.expressions.extend(temp_properties.expressions) 1185 elif temp_properties: 1186 properties = temp_properties 1187 1188 this = self._parse_schema(this=table_parts) 1189 1190 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1191 temp_properties = self._parse_properties() 1192 if properties and temp_properties: 1193 properties.expressions.extend(temp_properties.expressions) 1194 elif temp_properties: 1195 properties = temp_properties 1196 1197 self._match(TokenType.ALIAS) 1198 1199 # exp.Properties.Location.POST_ALIAS 1200 if not ( 1201 self._match(TokenType.SELECT, advance=False) 1202 or self._match(TokenType.WITH, advance=False) 1203 or self._match(TokenType.L_PAREN, advance=False) 1204 ): 1205 temp_properties = self._parse_properties() 1206 if properties and temp_properties: 1207 properties.expressions.extend(temp_properties.expressions) 1208 elif temp_properties: 1209 properties = temp_properties 1210 1211 expression = self._parse_ddl_select() 1212 1213 if create_token.token_type == TokenType.TABLE: 1214 # exp.Properties.Location.POST_EXPRESSION 1215 temp_properties = self._parse_properties() 1216 if properties and temp_properties: 1217 properties.expressions.extend(temp_properties.expressions) 1218 elif temp_properties: 1219 properties = temp_properties 1220 1221 indexes = [] 1222 while True: 1223 index = self._parse_create_table_index() 1224 1225 # exp.Properties.Location.POST_INDEX 1226 if self._match(TokenType.PARTITION_BY, advance=False): 1227 temp_properties = self._parse_properties() 1228 if properties and temp_properties: 1229 properties.expressions.extend(temp_properties.expressions) 1230 elif temp_properties: 1231 properties = temp_properties 1232 1233 if not index: 1234 break 1235 else: 1236 indexes.append(index) 1237 elif create_token.token_type == TokenType.VIEW: 1238 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1239 no_schema_binding = True 1240 1241 if self._match_text_seq("CLONE"): 1242 clone = self._parse_table(schema=True) 1243 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1244 clone_kind = ( 1245 self._match(TokenType.L_PAREN) 1246 and self._match_texts(self.CLONE_KINDS) 1247 and self._prev.text.upper() 1248 ) 1249 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1250 self._match(TokenType.R_PAREN) 1251 clone = self.expression( 1252 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1253 ) 1254 1255 return self.expression( 1256 exp.Create, 1257 this=this, 1258 kind=create_token.text, 1259 replace=replace, 1260 unique=unique, 1261 expression=expression, 1262 exists=exists, 1263 properties=properties, 1264 indexes=indexes, 1265 no_schema_binding=no_schema_binding, 1266 begin=begin, 1267 clone=clone, 1268 ) 1269 1270 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1271 self._match(TokenType.COMMA) 1272 1273 # parsers look to _prev for no/dual/default, so need to consume first 1274 self._match_text_seq("NO") 1275 self._match_text_seq("DUAL") 1276 self._match_text_seq("DEFAULT") 1277 1278 if self.PROPERTY_PARSERS.get(self._curr.text.upper()): 1279 return self.PROPERTY_PARSERS[self._curr.text.upper()](self) 1280 1281 return None 1282 1283 def _parse_property(self) -> t.Optional[exp.Expression]: 1284 if self._match_texts(self.PROPERTY_PARSERS): 1285 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1286 1287 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1288 return self._parse_character_set(default=True) 1289 1290 if self._match_pair(TokenType.COMPOUND, TokenType.SORTKEY): 1291 return self._parse_sortkey(compound=True) 1292 1293 if self._match_text_seq("SQL", "SECURITY"): 1294 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1295 1296 assignment = self._match_pair( 1297 TokenType.VAR, TokenType.EQ, advance=False 1298 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1299 1300 if assignment: 1301 key = self._parse_var_or_string() 1302 self._match(TokenType.EQ) 1303 return self.expression(exp.Property, this=key, value=self._parse_column()) 1304 1305 return None 1306 1307 def _parse_stored(self) -> exp.Expression: 1308 self._match(TokenType.ALIAS) 1309 1310 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1311 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1312 1313 return self.expression( 1314 exp.FileFormatProperty, 1315 this=self.expression( 1316 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1317 ) 1318 if input_format or output_format 1319 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1320 ) 1321 1322 def _parse_property_assignment(self, exp_class: t.Type[exp.Expression]) -> exp.Expression: 1323 self._match(TokenType.EQ) 1324 self._match(TokenType.ALIAS) 1325 return self.expression(exp_class, this=self._parse_field()) 1326 1327 def _parse_properties(self, before=None) -> t.Optional[exp.Expression]: 1328 properties = [] 1329 1330 while True: 1331 if before: 1332 identified_property = self._parse_property_before() 1333 else: 1334 identified_property = self._parse_property() 1335 1336 if not identified_property: 1337 break 1338 for p in ensure_list(identified_property): 1339 properties.append(p) 1340 1341 if properties: 1342 return self.expression(exp.Properties, expressions=properties) 1343 1344 return None 1345 1346 def _parse_fallback(self, no=False) -> exp.Expression: 1347 self._match_text_seq("FALLBACK") 1348 return self.expression( 1349 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1350 ) 1351 1352 def _parse_volatile_property(self) -> exp.Expression: 1353 if self._index >= 2: 1354 pre_volatile_token = self._tokens[self._index - 2] 1355 else: 1356 pre_volatile_token = None 1357 1358 if pre_volatile_token and pre_volatile_token.token_type in ( 1359 TokenType.CREATE, 1360 TokenType.REPLACE, 1361 TokenType.UNIQUE, 1362 ): 1363 return exp.VolatileProperty() 1364 1365 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1366 1367 def _parse_with_property( 1368 self, 1369 ) -> t.Union[t.Optional[exp.Expression], t.List[t.Optional[exp.Expression]]]: 1370 self._match(TokenType.WITH) 1371 if self._match(TokenType.L_PAREN, advance=False): 1372 return self._parse_wrapped_csv(self._parse_property) 1373 1374 if self._match_text_seq("JOURNAL"): 1375 return self._parse_withjournaltable() 1376 1377 if self._match_text_seq("DATA"): 1378 return self._parse_withdata(no=False) 1379 elif self._match_text_seq("NO", "DATA"): 1380 return self._parse_withdata(no=True) 1381 1382 if not self._next: 1383 return None 1384 1385 return self._parse_withisolatedloading() 1386 1387 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1388 def _parse_definer(self) -> t.Optional[exp.Expression]: 1389 self._match(TokenType.EQ) 1390 1391 user = self._parse_id_var() 1392 self._match(TokenType.PARAMETER) 1393 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1394 1395 if not user or not host: 1396 return None 1397 1398 return exp.DefinerProperty(this=f"{user}@{host}") 1399 1400 def _parse_withjournaltable(self) -> exp.Expression: 1401 self._match(TokenType.TABLE) 1402 self._match(TokenType.EQ) 1403 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1404 1405 def _parse_log(self, no=False) -> exp.Expression: 1406 self._match_text_seq("LOG") 1407 return self.expression(exp.LogProperty, no=no) 1408 1409 def _parse_journal(self, no=False, dual=False) -> exp.Expression: 1410 before = self._match_text_seq("BEFORE") 1411 self._match_text_seq("JOURNAL") 1412 return self.expression(exp.JournalProperty, no=no, dual=dual, before=before) 1413 1414 def _parse_afterjournal(self, no=False, dual=False, local=None) -> exp.Expression: 1415 self._match_text_seq("NOT") 1416 self._match_text_seq("LOCAL") 1417 self._match_text_seq("AFTER", "JOURNAL") 1418 return self.expression(exp.AfterJournalProperty, no=no, dual=dual, local=local) 1419 1420 def _parse_checksum(self) -> exp.Expression: 1421 self._match_text_seq("CHECKSUM") 1422 self._match(TokenType.EQ) 1423 1424 on = None 1425 if self._match(TokenType.ON): 1426 on = True 1427 elif self._match_text_seq("OFF"): 1428 on = False 1429 default = self._match(TokenType.DEFAULT) 1430 1431 return self.expression( 1432 exp.ChecksumProperty, 1433 on=on, 1434 default=default, 1435 ) 1436 1437 def _parse_freespace(self) -> exp.Expression: 1438 self._match_text_seq("FREESPACE") 1439 self._match(TokenType.EQ) 1440 return self.expression( 1441 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1442 ) 1443 1444 def _parse_mergeblockratio(self, no=False, default=False) -> exp.Expression: 1445 self._match_text_seq("MERGEBLOCKRATIO") 1446 if self._match(TokenType.EQ): 1447 return self.expression( 1448 exp.MergeBlockRatioProperty, 1449 this=self._parse_number(), 1450 percent=self._match(TokenType.PERCENT), 1451 ) 1452 else: 1453 return self.expression( 1454 exp.MergeBlockRatioProperty, 1455 no=no, 1456 default=default, 1457 ) 1458 1459 def _parse_datablocksize(self, default=None) -> exp.Expression: 1460 if default: 1461 self._match_text_seq("DATABLOCKSIZE") 1462 return self.expression(exp.DataBlocksizeProperty, default=True) 1463 elif self._match_texts(("MIN", "MINIMUM")): 1464 self._match_text_seq("DATABLOCKSIZE") 1465 return self.expression(exp.DataBlocksizeProperty, min=True) 1466 elif self._match_texts(("MAX", "MAXIMUM")): 1467 self._match_text_seq("DATABLOCKSIZE") 1468 return self.expression(exp.DataBlocksizeProperty, min=False) 1469 1470 self._match_text_seq("DATABLOCKSIZE") 1471 self._match(TokenType.EQ) 1472 size = self._parse_number() 1473 units = None 1474 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1475 units = self._prev.text 1476 return self.expression(exp.DataBlocksizeProperty, size=size, units=units) 1477 1478 def _parse_blockcompression(self) -> exp.Expression: 1479 self._match_text_seq("BLOCKCOMPRESSION") 1480 self._match(TokenType.EQ) 1481 always = self._match_text_seq("ALWAYS") 1482 manual = self._match_text_seq("MANUAL") 1483 never = self._match_text_seq("NEVER") 1484 default = self._match_text_seq("DEFAULT") 1485 autotemp = None 1486 if self._match_text_seq("AUTOTEMP"): 1487 autotemp = self._parse_schema() 1488 1489 return self.expression( 1490 exp.BlockCompressionProperty, 1491 always=always, 1492 manual=manual, 1493 never=never, 1494 default=default, 1495 autotemp=autotemp, 1496 ) 1497 1498 def _parse_withisolatedloading(self) -> exp.Expression: 1499 no = self._match_text_seq("NO") 1500 concurrent = self._match_text_seq("CONCURRENT") 1501 self._match_text_seq("ISOLATED", "LOADING") 1502 for_all = self._match_text_seq("FOR", "ALL") 1503 for_insert = self._match_text_seq("FOR", "INSERT") 1504 for_none = self._match_text_seq("FOR", "NONE") 1505 return self.expression( 1506 exp.IsolatedLoadingProperty, 1507 no=no, 1508 concurrent=concurrent, 1509 for_all=for_all, 1510 for_insert=for_insert, 1511 for_none=for_none, 1512 ) 1513 1514 def _parse_locking(self) -> exp.Expression: 1515 if self._match(TokenType.TABLE): 1516 kind = "TABLE" 1517 elif self._match(TokenType.VIEW): 1518 kind = "VIEW" 1519 elif self._match(TokenType.ROW): 1520 kind = "ROW" 1521 elif self._match_text_seq("DATABASE"): 1522 kind = "DATABASE" 1523 else: 1524 kind = None 1525 1526 if kind in ("DATABASE", "TABLE", "VIEW"): 1527 this = self._parse_table_parts() 1528 else: 1529 this = None 1530 1531 if self._match(TokenType.FOR): 1532 for_or_in = "FOR" 1533 elif self._match(TokenType.IN): 1534 for_or_in = "IN" 1535 else: 1536 for_or_in = None 1537 1538 if self._match_text_seq("ACCESS"): 1539 lock_type = "ACCESS" 1540 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1541 lock_type = "EXCLUSIVE" 1542 elif self._match_text_seq("SHARE"): 1543 lock_type = "SHARE" 1544 elif self._match_text_seq("READ"): 1545 lock_type = "READ" 1546 elif self._match_text_seq("WRITE"): 1547 lock_type = "WRITE" 1548 elif self._match_text_seq("CHECKSUM"): 1549 lock_type = "CHECKSUM" 1550 else: 1551 lock_type = None 1552 1553 override = self._match_text_seq("OVERRIDE") 1554 1555 return self.expression( 1556 exp.LockingProperty, 1557 this=this, 1558 kind=kind, 1559 for_or_in=for_or_in, 1560 lock_type=lock_type, 1561 override=override, 1562 ) 1563 1564 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1565 if self._match(TokenType.PARTITION_BY): 1566 return self._parse_csv(self._parse_conjunction) 1567 return [] 1568 1569 def _parse_partitioned_by(self) -> exp.Expression: 1570 self._match(TokenType.EQ) 1571 return self.expression( 1572 exp.PartitionedByProperty, 1573 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1574 ) 1575 1576 def _parse_withdata(self, no=False) -> exp.Expression: 1577 if self._match_text_seq("AND", "STATISTICS"): 1578 statistics = True 1579 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1580 statistics = False 1581 else: 1582 statistics = None 1583 1584 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1585 1586 def _parse_noprimaryindex(self) -> exp.Expression: 1587 self._match_text_seq("PRIMARY", "INDEX") 1588 return exp.NoPrimaryIndexProperty() 1589 1590 def _parse_oncommit(self) -> exp.Expression: 1591 self._match_text_seq("COMMIT", "PRESERVE", "ROWS") 1592 return exp.OnCommitProperty() 1593 1594 def _parse_distkey(self) -> exp.Expression: 1595 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1596 1597 def _parse_create_like(self) -> t.Optional[exp.Expression]: 1598 table = self._parse_table(schema=True) 1599 options = [] 1600 while self._match_texts(("INCLUDING", "EXCLUDING")): 1601 this = self._prev.text.upper() 1602 id_var = self._parse_id_var() 1603 1604 if not id_var: 1605 return None 1606 1607 options.append( 1608 self.expression( 1609 exp.Property, 1610 this=this, 1611 value=exp.Var(this=id_var.this.upper()), 1612 ) 1613 ) 1614 return self.expression(exp.LikeProperty, this=table, expressions=options) 1615 1616 def _parse_sortkey(self, compound: bool = False) -> exp.Expression: 1617 return self.expression( 1618 exp.SortKeyProperty, this=self._parse_wrapped_csv(self._parse_id_var), compound=compound 1619 ) 1620 1621 def _parse_character_set(self, default: bool = False) -> exp.Expression: 1622 self._match(TokenType.EQ) 1623 return self.expression( 1624 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1625 ) 1626 1627 def _parse_returns(self) -> exp.Expression: 1628 value: t.Optional[exp.Expression] 1629 is_table = self._match(TokenType.TABLE) 1630 1631 if is_table: 1632 if self._match(TokenType.LT): 1633 value = self.expression( 1634 exp.Schema, 1635 this="TABLE", 1636 expressions=self._parse_csv(self._parse_struct_types), 1637 ) 1638 if not self._match(TokenType.GT): 1639 self.raise_error("Expecting >") 1640 else: 1641 value = self._parse_schema(exp.Var(this="TABLE")) 1642 else: 1643 value = self._parse_types() 1644 1645 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1646 1647 def _parse_temporary(self, global_=False) -> exp.Expression: 1648 self._match(TokenType.TEMPORARY) # in case calling from "GLOBAL" 1649 return self.expression(exp.TemporaryProperty, global_=global_) 1650 1651 def _parse_describe(self) -> exp.Expression: 1652 kind = self._match_set(self.CREATABLES) and self._prev.text 1653 this = self._parse_table() 1654 1655 return self.expression(exp.Describe, this=this, kind=kind) 1656 1657 def _parse_insert(self) -> exp.Expression: 1658 overwrite = self._match(TokenType.OVERWRITE) 1659 local = self._match(TokenType.LOCAL) 1660 alternative = None 1661 1662 if self._match_text_seq("DIRECTORY"): 1663 this: t.Optional[exp.Expression] = self.expression( 1664 exp.Directory, 1665 this=self._parse_var_or_string(), 1666 local=local, 1667 row_format=self._parse_row_format(match_row=True), 1668 ) 1669 else: 1670 if self._match(TokenType.OR): 1671 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1672 1673 self._match(TokenType.INTO) 1674 self._match(TokenType.TABLE) 1675 this = self._parse_table(schema=True) 1676 1677 return self.expression( 1678 exp.Insert, 1679 this=this, 1680 exists=self._parse_exists(), 1681 partition=self._parse_partition(), 1682 expression=self._parse_ddl_select(), 1683 conflict=self._parse_on_conflict(), 1684 returning=self._parse_returning(), 1685 overwrite=overwrite, 1686 alternative=alternative, 1687 ) 1688 1689 def _parse_on_conflict(self) -> t.Optional[exp.Expression]: 1690 conflict = self._match_text_seq("ON", "CONFLICT") 1691 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1692 1693 if not (conflict or duplicate): 1694 return None 1695 1696 nothing = None 1697 expressions = None 1698 key = None 1699 constraint = None 1700 1701 if conflict: 1702 if self._match_text_seq("ON", "CONSTRAINT"): 1703 constraint = self._parse_id_var() 1704 else: 1705 key = self._parse_csv(self._parse_value) 1706 1707 self._match_text_seq("DO") 1708 if self._match_text_seq("NOTHING"): 1709 nothing = True 1710 else: 1711 self._match(TokenType.UPDATE) 1712 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1713 1714 return self.expression( 1715 exp.OnConflict, 1716 duplicate=duplicate, 1717 expressions=expressions, 1718 nothing=nothing, 1719 key=key, 1720 constraint=constraint, 1721 ) 1722 1723 def _parse_returning(self) -> t.Optional[exp.Expression]: 1724 if not self._match(TokenType.RETURNING): 1725 return None 1726 1727 return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column)) 1728 1729 def _parse_row(self) -> t.Optional[exp.Expression]: 1730 if not self._match(TokenType.FORMAT): 1731 return None 1732 return self._parse_row_format() 1733 1734 def _parse_row_format(self, match_row: bool = False) -> t.Optional[exp.Expression]: 1735 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1736 return None 1737 1738 if self._match_text_seq("SERDE"): 1739 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) 1740 1741 self._match_text_seq("DELIMITED") 1742 1743 kwargs = {} 1744 1745 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1746 kwargs["fields"] = self._parse_string() 1747 if self._match_text_seq("ESCAPED", "BY"): 1748 kwargs["escaped"] = self._parse_string() 1749 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1750 kwargs["collection_items"] = self._parse_string() 1751 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1752 kwargs["map_keys"] = self._parse_string() 1753 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1754 kwargs["lines"] = self._parse_string() 1755 if self._match_text_seq("NULL", "DEFINED", "AS"): 1756 kwargs["null"] = self._parse_string() 1757 1758 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1759 1760 def _parse_load_data(self) -> exp.Expression: 1761 local = self._match(TokenType.LOCAL) 1762 self._match_text_seq("INPATH") 1763 inpath = self._parse_string() 1764 overwrite = self._match(TokenType.OVERWRITE) 1765 self._match_pair(TokenType.INTO, TokenType.TABLE) 1766 1767 return self.expression( 1768 exp.LoadData, 1769 this=self._parse_table(schema=True), 1770 local=local, 1771 overwrite=overwrite, 1772 inpath=inpath, 1773 partition=self._parse_partition(), 1774 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1775 serde=self._match_text_seq("SERDE") and self._parse_string(), 1776 ) 1777 1778 def _parse_delete(self) -> exp.Expression: 1779 self._match(TokenType.FROM) 1780 1781 return self.expression( 1782 exp.Delete, 1783 this=self._parse_table(), 1784 using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()), 1785 where=self._parse_where(), 1786 returning=self._parse_returning(), 1787 ) 1788 1789 def _parse_update(self) -> exp.Expression: 1790 return self.expression( 1791 exp.Update, 1792 **{ # type: ignore 1793 "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS), 1794 "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality), 1795 "from": self._parse_from(modifiers=True), 1796 "where": self._parse_where(), 1797 "returning": self._parse_returning(), 1798 }, 1799 ) 1800 1801 def _parse_uncache(self) -> exp.Expression: 1802 if not self._match(TokenType.TABLE): 1803 self.raise_error("Expecting TABLE after UNCACHE") 1804 1805 return self.expression( 1806 exp.Uncache, 1807 exists=self._parse_exists(), 1808 this=self._parse_table(schema=True), 1809 ) 1810 1811 def _parse_cache(self) -> exp.Expression: 1812 lazy = self._match(TokenType.LAZY) 1813 self._match(TokenType.TABLE) 1814 table = self._parse_table(schema=True) 1815 options = [] 1816 1817 if self._match(TokenType.OPTIONS): 1818 self._match_l_paren() 1819 k = self._parse_string() 1820 self._match(TokenType.EQ) 1821 v = self._parse_string() 1822 options = [k, v] 1823 self._match_r_paren() 1824 1825 self._match(TokenType.ALIAS) 1826 return self.expression( 1827 exp.Cache, 1828 this=table, 1829 lazy=lazy, 1830 options=options, 1831 expression=self._parse_select(nested=True), 1832 ) 1833 1834 def _parse_partition(self) -> t.Optional[exp.Expression]: 1835 if not self._match(TokenType.PARTITION): 1836 return None 1837 1838 return self.expression( 1839 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1840 ) 1841 1842 def _parse_value(self) -> exp.Expression: 1843 if self._match(TokenType.L_PAREN): 1844 expressions = self._parse_csv(self._parse_conjunction) 1845 self._match_r_paren() 1846 return self.expression(exp.Tuple, expressions=expressions) 1847 1848 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1849 # Source: https://prestodb.io/docs/current/sql/values.html 1850 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1851 1852 def _parse_select( 1853 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1854 ) -> t.Optional[exp.Expression]: 1855 cte = self._parse_with() 1856 if cte: 1857 this = self._parse_statement() 1858 1859 if not this: 1860 self.raise_error("Failed to parse any statement following CTE") 1861 return cte 1862 1863 if "with" in this.arg_types: 1864 this.set("with", cte) 1865 else: 1866 self.raise_error(f"{this.key} does not support CTE") 1867 this = cte 1868 elif self._match(TokenType.SELECT): 1869 comments = self._prev_comments 1870 1871 hint = self._parse_hint() 1872 all_ = self._match(TokenType.ALL) 1873 distinct = self._match(TokenType.DISTINCT) 1874 1875 kind = ( 1876 self._match(TokenType.ALIAS) 1877 and self._match_texts(("STRUCT", "VALUE")) 1878 and self._prev.text 1879 ) 1880 1881 if distinct: 1882 distinct = self.expression( 1883 exp.Distinct, 1884 on=self._parse_value() if self._match(TokenType.ON) else None, 1885 ) 1886 1887 if all_ and distinct: 1888 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1889 1890 limit = self._parse_limit(top=True) 1891 expressions = self._parse_csv(self._parse_expression) 1892 1893 this = self.expression( 1894 exp.Select, 1895 kind=kind, 1896 hint=hint, 1897 distinct=distinct, 1898 expressions=expressions, 1899 limit=limit, 1900 ) 1901 this.comments = comments 1902 1903 into = self._parse_into() 1904 if into: 1905 this.set("into", into) 1906 1907 from_ = self._parse_from() 1908 if from_: 1909 this.set("from", from_) 1910 1911 this = self._parse_query_modifiers(this) 1912 elif (table or nested) and self._match(TokenType.L_PAREN): 1913 this = self._parse_table() if table else self._parse_select(nested=True) 1914 this = self._parse_set_operations(self._parse_query_modifiers(this)) 1915 self._match_r_paren() 1916 1917 # early return so that subquery unions aren't parsed again 1918 # SELECT * FROM (SELECT 1) UNION ALL SELECT 1 1919 # Union ALL should be a property of the top select node, not the subquery 1920 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 1921 elif self._match(TokenType.VALUES): 1922 this = self.expression( 1923 exp.Values, 1924 expressions=self._parse_csv(self._parse_value), 1925 alias=self._parse_table_alias(), 1926 ) 1927 else: 1928 this = None 1929 1930 return self._parse_set_operations(this) 1931 1932 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.Expression]: 1933 if not skip_with_token and not self._match(TokenType.WITH): 1934 return None 1935 1936 comments = self._prev_comments 1937 recursive = self._match(TokenType.RECURSIVE) 1938 1939 expressions = [] 1940 while True: 1941 expressions.append(self._parse_cte()) 1942 1943 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 1944 break 1945 else: 1946 self._match(TokenType.WITH) 1947 1948 return self.expression( 1949 exp.With, comments=comments, expressions=expressions, recursive=recursive 1950 ) 1951 1952 def _parse_cte(self) -> exp.Expression: 1953 alias = self._parse_table_alias() 1954 if not alias or not alias.this: 1955 self.raise_error("Expected CTE to have alias") 1956 1957 self._match(TokenType.ALIAS) 1958 1959 return self.expression( 1960 exp.CTE, 1961 this=self._parse_wrapped(self._parse_statement), 1962 alias=alias, 1963 ) 1964 1965 def _parse_table_alias( 1966 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 1967 ) -> t.Optional[exp.Expression]: 1968 any_token = self._match(TokenType.ALIAS) 1969 alias = ( 1970 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 1971 or self._parse_string_as_identifier() 1972 ) 1973 1974 index = self._index 1975 if self._match(TokenType.L_PAREN): 1976 columns = self._parse_csv(self._parse_function_parameter) 1977 self._match_r_paren() if columns else self._retreat(index) 1978 else: 1979 columns = None 1980 1981 if not alias and not columns: 1982 return None 1983 1984 return self.expression(exp.TableAlias, this=alias, columns=columns) 1985 1986 def _parse_subquery( 1987 self, this: t.Optional[exp.Expression], parse_alias: bool = True 1988 ) -> exp.Expression: 1989 return self.expression( 1990 exp.Subquery, 1991 this=this, 1992 pivots=self._parse_pivots(), 1993 alias=self._parse_table_alias() if parse_alias else None, 1994 ) 1995 1996 def _parse_query_modifiers( 1997 self, this: t.Optional[exp.Expression] 1998 ) -> t.Optional[exp.Expression]: 1999 if isinstance(this, self.MODIFIABLES): 2000 for key, parser in self.QUERY_MODIFIER_PARSERS.items(): 2001 expression = parser(self) 2002 2003 if expression: 2004 this.set(key, expression) 2005 return this 2006 2007 def _parse_hint(self) -> t.Optional[exp.Expression]: 2008 if self._match(TokenType.HINT): 2009 hints = self._parse_csv(self._parse_function) 2010 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2011 self.raise_error("Expected */ after HINT") 2012 return self.expression(exp.Hint, expressions=hints) 2013 2014 return None 2015 2016 def _parse_into(self) -> t.Optional[exp.Expression]: 2017 if not self._match(TokenType.INTO): 2018 return None 2019 2020 temp = self._match(TokenType.TEMPORARY) 2021 unlogged = self._match(TokenType.UNLOGGED) 2022 self._match(TokenType.TABLE) 2023 2024 return self.expression( 2025 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2026 ) 2027 2028 def _parse_from(self, modifiers: bool = False) -> t.Optional[exp.Expression]: 2029 if not self._match(TokenType.FROM): 2030 return None 2031 2032 comments = self._prev_comments 2033 this = self._parse_table() 2034 2035 return self.expression( 2036 exp.From, 2037 comments=comments, 2038 this=self._parse_query_modifiers(this) if modifiers else this, 2039 ) 2040 2041 def _parse_match_recognize(self) -> t.Optional[exp.Expression]: 2042 if not self._match(TokenType.MATCH_RECOGNIZE): 2043 return None 2044 2045 self._match_l_paren() 2046 2047 partition = self._parse_partition_by() 2048 order = self._parse_order() 2049 measures = ( 2050 self._parse_csv(self._parse_expression) if self._match_text_seq("MEASURES") else None 2051 ) 2052 2053 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2054 rows = exp.Var(this="ONE ROW PER MATCH") 2055 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2056 text = "ALL ROWS PER MATCH" 2057 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2058 text += f" SHOW EMPTY MATCHES" 2059 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2060 text += f" OMIT EMPTY MATCHES" 2061 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2062 text += f" WITH UNMATCHED ROWS" 2063 rows = exp.Var(this=text) 2064 else: 2065 rows = None 2066 2067 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2068 text = "AFTER MATCH SKIP" 2069 if self._match_text_seq("PAST", "LAST", "ROW"): 2070 text += f" PAST LAST ROW" 2071 elif self._match_text_seq("TO", "NEXT", "ROW"): 2072 text += f" TO NEXT ROW" 2073 elif self._match_text_seq("TO", "FIRST"): 2074 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2075 elif self._match_text_seq("TO", "LAST"): 2076 text += f" TO LAST {self._advance_any().text}" # type: ignore 2077 after = exp.Var(this=text) 2078 else: 2079 after = None 2080 2081 if self._match_text_seq("PATTERN"): 2082 self._match_l_paren() 2083 2084 if not self._curr: 2085 self.raise_error("Expecting )", self._curr) 2086 2087 paren = 1 2088 start = self._curr 2089 2090 while self._curr and paren > 0: 2091 if self._curr.token_type == TokenType.L_PAREN: 2092 paren += 1 2093 if self._curr.token_type == TokenType.R_PAREN: 2094 paren -= 1 2095 end = self._prev 2096 self._advance() 2097 if paren > 0: 2098 self.raise_error("Expecting )", self._curr) 2099 pattern = exp.Var(this=self._find_sql(start, end)) 2100 else: 2101 pattern = None 2102 2103 define = ( 2104 self._parse_csv( 2105 lambda: self.expression( 2106 exp.Alias, 2107 alias=self._parse_id_var(any_token=True), 2108 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2109 ) 2110 ) 2111 if self._match_text_seq("DEFINE") 2112 else None 2113 ) 2114 2115 self._match_r_paren() 2116 2117 return self.expression( 2118 exp.MatchRecognize, 2119 partition_by=partition, 2120 order=order, 2121 measures=measures, 2122 rows=rows, 2123 after=after, 2124 pattern=pattern, 2125 define=define, 2126 alias=self._parse_table_alias(), 2127 ) 2128 2129 def _parse_lateral(self) -> t.Optional[exp.Expression]: 2130 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2131 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2132 2133 if outer_apply or cross_apply: 2134 this = self._parse_select(table=True) 2135 view = None 2136 outer = not cross_apply 2137 elif self._match(TokenType.LATERAL): 2138 this = self._parse_select(table=True) 2139 view = self._match(TokenType.VIEW) 2140 outer = self._match(TokenType.OUTER) 2141 else: 2142 return None 2143 2144 if not this: 2145 this = self._parse_function() or self._parse_id_var(any_token=False) 2146 while self._match(TokenType.DOT): 2147 this = exp.Dot( 2148 this=this, 2149 expression=self._parse_function() or self._parse_id_var(any_token=False), 2150 ) 2151 2152 table_alias: t.Optional[exp.Expression] 2153 2154 if view: 2155 table = self._parse_id_var(any_token=False) 2156 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2157 table_alias = self.expression(exp.TableAlias, this=table, columns=columns) 2158 else: 2159 table_alias = self._parse_table_alias() 2160 2161 expression = self.expression( 2162 exp.Lateral, 2163 this=this, 2164 view=view, 2165 outer=outer, 2166 alias=table_alias, 2167 ) 2168 2169 return expression 2170 2171 def _parse_join_side_and_kind( 2172 self, 2173 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2174 return ( 2175 self._match(TokenType.NATURAL) and self._prev, 2176 self._match_set(self.JOIN_SIDES) and self._prev, 2177 self._match_set(self.JOIN_KINDS) and self._prev, 2178 ) 2179 2180 def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Expression]: 2181 if self._match(TokenType.COMMA): 2182 return self.expression(exp.Join, this=self._parse_table()) 2183 2184 index = self._index 2185 natural, side, kind = self._parse_join_side_and_kind() 2186 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2187 join = self._match(TokenType.JOIN) 2188 2189 if not skip_join_token and not join: 2190 self._retreat(index) 2191 kind = None 2192 natural = None 2193 side = None 2194 2195 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2196 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2197 2198 if not skip_join_token and not join and not outer_apply and not cross_apply: 2199 return None 2200 2201 if outer_apply: 2202 side = Token(TokenType.LEFT, "LEFT") 2203 2204 kwargs: t.Dict[ 2205 str, t.Optional[exp.Expression] | bool | str | t.List[t.Optional[exp.Expression]] 2206 ] = {"this": self._parse_table()} 2207 2208 if natural: 2209 kwargs["natural"] = True 2210 if side: 2211 kwargs["side"] = side.text 2212 if kind: 2213 kwargs["kind"] = kind.text 2214 if hint: 2215 kwargs["hint"] = hint 2216 2217 if self._match(TokenType.ON): 2218 kwargs["on"] = self._parse_conjunction() 2219 elif self._match(TokenType.USING): 2220 kwargs["using"] = self._parse_wrapped_id_vars() 2221 2222 return self.expression(exp.Join, **kwargs) # type: ignore 2223 2224 def _parse_index(self) -> exp.Expression: 2225 index = self._parse_id_var() 2226 self._match(TokenType.ON) 2227 self._match(TokenType.TABLE) # hive 2228 2229 return self.expression( 2230 exp.Index, 2231 this=index, 2232 table=self.expression(exp.Table, this=self._parse_id_var()), 2233 columns=self._parse_expression(), 2234 ) 2235 2236 def _parse_create_table_index(self) -> t.Optional[exp.Expression]: 2237 unique = self._match(TokenType.UNIQUE) 2238 primary = self._match_text_seq("PRIMARY") 2239 amp = self._match_text_seq("AMP") 2240 if not self._match(TokenType.INDEX): 2241 return None 2242 index = self._parse_id_var() 2243 columns = None 2244 if self._match(TokenType.L_PAREN, advance=False): 2245 columns = self._parse_wrapped_csv(self._parse_column) 2246 return self.expression( 2247 exp.Index, 2248 this=index, 2249 columns=columns, 2250 unique=unique, 2251 primary=primary, 2252 amp=amp, 2253 ) 2254 2255 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2256 return ( 2257 (not schema and self._parse_function()) 2258 or self._parse_id_var(any_token=False) 2259 or self._parse_string_as_identifier() 2260 or self._parse_placeholder() 2261 ) 2262 2263 def _parse_table_parts(self, schema: bool = False) -> exp.Expression: 2264 catalog = None 2265 db = None 2266 table = self._parse_table_part(schema=schema) 2267 2268 while self._match(TokenType.DOT): 2269 if catalog: 2270 # This allows nesting the table in arbitrarily many dot expressions if needed 2271 table = self.expression( 2272 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2273 ) 2274 else: 2275 catalog = db 2276 db = table 2277 table = self._parse_table_part(schema=schema) 2278 2279 if not table: 2280 self.raise_error(f"Expected table name but got {self._curr}") 2281 2282 return self.expression( 2283 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2284 ) 2285 2286 def _parse_table( 2287 self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2288 ) -> t.Optional[exp.Expression]: 2289 lateral = self._parse_lateral() 2290 if lateral: 2291 return lateral 2292 2293 unnest = self._parse_unnest() 2294 if unnest: 2295 return unnest 2296 2297 values = self._parse_derived_table_values() 2298 if values: 2299 return values 2300 2301 subquery = self._parse_select(table=True) 2302 if subquery: 2303 if not subquery.args.get("pivots"): 2304 subquery.set("pivots", self._parse_pivots()) 2305 return subquery 2306 2307 this = self._parse_table_parts(schema=schema) 2308 2309 if schema: 2310 return self._parse_schema(this=this) 2311 2312 if self.alias_post_tablesample: 2313 table_sample = self._parse_table_sample() 2314 2315 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2316 if alias: 2317 this.set("alias", alias) 2318 2319 if not this.args.get("pivots"): 2320 this.set("pivots", self._parse_pivots()) 2321 2322 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2323 this.set( 2324 "hints", 2325 self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)), 2326 ) 2327 self._match_r_paren() 2328 2329 if not self.alias_post_tablesample: 2330 table_sample = self._parse_table_sample() 2331 2332 if table_sample: 2333 table_sample.set("this", this) 2334 this = table_sample 2335 2336 return this 2337 2338 def _parse_unnest(self) -> t.Optional[exp.Expression]: 2339 if not self._match(TokenType.UNNEST): 2340 return None 2341 2342 expressions = self._parse_wrapped_csv(self._parse_type) 2343 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2344 alias = self._parse_table_alias() 2345 2346 if alias and self.unnest_column_only: 2347 if alias.args.get("columns"): 2348 self.raise_error("Unexpected extra column alias in unnest.") 2349 alias.set("columns", [alias.this]) 2350 alias.set("this", None) 2351 2352 offset = None 2353 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2354 self._match(TokenType.ALIAS) 2355 offset = self._parse_id_var() or exp.Identifier(this="offset") 2356 2357 return self.expression( 2358 exp.Unnest, 2359 expressions=expressions, 2360 ordinality=ordinality, 2361 alias=alias, 2362 offset=offset, 2363 ) 2364 2365 def _parse_derived_table_values(self) -> t.Optional[exp.Expression]: 2366 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2367 if not is_derived and not self._match(TokenType.VALUES): 2368 return None 2369 2370 expressions = self._parse_csv(self._parse_value) 2371 2372 if is_derived: 2373 self._match_r_paren() 2374 2375 return self.expression(exp.Values, expressions=expressions, alias=self._parse_table_alias()) 2376 2377 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.Expression]: 2378 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2379 as_modifier and self._match_text_seq("USING", "SAMPLE") 2380 ): 2381 return None 2382 2383 bucket_numerator = None 2384 bucket_denominator = None 2385 bucket_field = None 2386 percent = None 2387 rows = None 2388 size = None 2389 seed = None 2390 2391 kind = ( 2392 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2393 ) 2394 method = self._parse_var(tokens=(TokenType.ROW,)) 2395 2396 self._match(TokenType.L_PAREN) 2397 2398 num = self._parse_number() 2399 2400 if self._match(TokenType.BUCKET): 2401 bucket_numerator = self._parse_number() 2402 self._match(TokenType.OUT_OF) 2403 bucket_denominator = bucket_denominator = self._parse_number() 2404 self._match(TokenType.ON) 2405 bucket_field = self._parse_field() 2406 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2407 percent = num 2408 elif self._match(TokenType.ROWS): 2409 rows = num 2410 else: 2411 size = num 2412 2413 self._match(TokenType.R_PAREN) 2414 2415 if self._match(TokenType.L_PAREN): 2416 method = self._parse_var() 2417 seed = self._match(TokenType.COMMA) and self._parse_number() 2418 self._match_r_paren() 2419 elif self._match_texts(("SEED", "REPEATABLE")): 2420 seed = self._parse_wrapped(self._parse_number) 2421 2422 return self.expression( 2423 exp.TableSample, 2424 method=method, 2425 bucket_numerator=bucket_numerator, 2426 bucket_denominator=bucket_denominator, 2427 bucket_field=bucket_field, 2428 percent=percent, 2429 rows=rows, 2430 size=size, 2431 seed=seed, 2432 kind=kind, 2433 ) 2434 2435 def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]: 2436 return list(iter(self._parse_pivot, None)) 2437 2438 def _parse_pivot(self) -> t.Optional[exp.Expression]: 2439 index = self._index 2440 2441 if self._match(TokenType.PIVOT): 2442 unpivot = False 2443 elif self._match(TokenType.UNPIVOT): 2444 unpivot = True 2445 else: 2446 return None 2447 2448 expressions = [] 2449 field = None 2450 2451 if not self._match(TokenType.L_PAREN): 2452 self._retreat(index) 2453 return None 2454 2455 if unpivot: 2456 expressions = self._parse_csv(self._parse_column) 2457 else: 2458 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2459 2460 if not expressions: 2461 self.raise_error("Failed to parse PIVOT's aggregation list") 2462 2463 if not self._match(TokenType.FOR): 2464 self.raise_error("Expecting FOR") 2465 2466 value = self._parse_column() 2467 2468 if not self._match(TokenType.IN): 2469 self.raise_error("Expecting IN") 2470 2471 field = self._parse_in(value) 2472 2473 self._match_r_paren() 2474 2475 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2476 2477 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2478 pivot.set("alias", self._parse_table_alias()) 2479 2480 if not unpivot: 2481 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2482 2483 columns: t.List[exp.Expression] = [] 2484 for fld in pivot.args["field"].expressions: 2485 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2486 for name in names: 2487 if self.PREFIXED_PIVOT_COLUMNS: 2488 name = f"{name}_{field_name}" if name else field_name 2489 else: 2490 name = f"{field_name}_{name}" if name else field_name 2491 2492 columns.append(exp.to_identifier(name)) 2493 2494 pivot.set("columns", columns) 2495 2496 return pivot 2497 2498 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2499 return [agg.alias for agg in aggregations] 2500 2501 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Expression]: 2502 if not skip_where_token and not self._match(TokenType.WHERE): 2503 return None 2504 2505 return self.expression( 2506 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2507 ) 2508 2509 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Expression]: 2510 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2511 return None 2512 2513 elements = defaultdict(list) 2514 2515 while True: 2516 expressions = self._parse_csv(self._parse_conjunction) 2517 if expressions: 2518 elements["expressions"].extend(expressions) 2519 2520 grouping_sets = self._parse_grouping_sets() 2521 if grouping_sets: 2522 elements["grouping_sets"].extend(grouping_sets) 2523 2524 rollup = None 2525 cube = None 2526 totals = None 2527 2528 with_ = self._match(TokenType.WITH) 2529 if self._match(TokenType.ROLLUP): 2530 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2531 elements["rollup"].extend(ensure_list(rollup)) 2532 2533 if self._match(TokenType.CUBE): 2534 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2535 elements["cube"].extend(ensure_list(cube)) 2536 2537 if self._match_text_seq("TOTALS"): 2538 totals = True 2539 elements["totals"] = True # type: ignore 2540 2541 if not (grouping_sets or rollup or cube or totals): 2542 break 2543 2544 return self.expression(exp.Group, **elements) # type: ignore 2545 2546 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2547 if not self._match(TokenType.GROUPING_SETS): 2548 return None 2549 2550 return self._parse_wrapped_csv(self._parse_grouping_set) 2551 2552 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2553 if self._match(TokenType.L_PAREN): 2554 grouping_set = self._parse_csv(self._parse_column) 2555 self._match_r_paren() 2556 return self.expression(exp.Tuple, expressions=grouping_set) 2557 2558 return self._parse_column() 2559 2560 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Expression]: 2561 if not skip_having_token and not self._match(TokenType.HAVING): 2562 return None 2563 return self.expression(exp.Having, this=self._parse_conjunction()) 2564 2565 def _parse_qualify(self) -> t.Optional[exp.Expression]: 2566 if not self._match(TokenType.QUALIFY): 2567 return None 2568 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2569 2570 def _parse_order( 2571 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2572 ) -> t.Optional[exp.Expression]: 2573 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2574 return this 2575 2576 return self.expression( 2577 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2578 ) 2579 2580 def _parse_sort( 2581 self, token_type: TokenType, exp_class: t.Type[exp.Expression] 2582 ) -> t.Optional[exp.Expression]: 2583 if not self._match(token_type): 2584 return None 2585 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2586 2587 def _parse_ordered(self) -> exp.Expression: 2588 this = self._parse_conjunction() 2589 self._match(TokenType.ASC) 2590 is_desc = self._match(TokenType.DESC) 2591 is_nulls_first = self._match(TokenType.NULLS_FIRST) 2592 is_nulls_last = self._match(TokenType.NULLS_LAST) 2593 desc = is_desc or False 2594 asc = not desc 2595 nulls_first = is_nulls_first or False 2596 explicitly_null_ordered = is_nulls_first or is_nulls_last 2597 if ( 2598 not explicitly_null_ordered 2599 and ( 2600 (asc and self.null_ordering == "nulls_are_small") 2601 or (desc and self.null_ordering != "nulls_are_small") 2602 ) 2603 and self.null_ordering != "nulls_are_last" 2604 ): 2605 nulls_first = True 2606 2607 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2608 2609 def _parse_limit( 2610 self, this: t.Optional[exp.Expression] = None, top: bool = False 2611 ) -> t.Optional[exp.Expression]: 2612 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2613 limit_paren = self._match(TokenType.L_PAREN) 2614 limit_exp = self.expression( 2615 exp.Limit, this=this, expression=self._parse_number() if top else self._parse_term() 2616 ) 2617 2618 if limit_paren: 2619 self._match_r_paren() 2620 2621 return limit_exp 2622 2623 if self._match(TokenType.FETCH): 2624 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2625 direction = self._prev.text if direction else "FIRST" 2626 2627 count = self._parse_number() 2628 percent = self._match(TokenType.PERCENT) 2629 2630 self._match_set((TokenType.ROW, TokenType.ROWS)) 2631 2632 only = self._match(TokenType.ONLY) 2633 with_ties = self._match_text_seq("WITH", "TIES") 2634 2635 if only and with_ties: 2636 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2637 2638 return self.expression( 2639 exp.Fetch, 2640 direction=direction, 2641 count=count, 2642 percent=percent, 2643 with_ties=with_ties, 2644 ) 2645 2646 return this 2647 2648 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2649 if not self._match_set((TokenType.OFFSET, TokenType.COMMA)): 2650 return this 2651 2652 count = self._parse_number() 2653 self._match_set((TokenType.ROW, TokenType.ROWS)) 2654 return self.expression(exp.Offset, this=this, expression=count) 2655 2656 def _parse_locks(self) -> t.List[exp.Expression]: 2657 # Lists are invariant, so we need to use a type hint here 2658 locks: t.List[exp.Expression] = [] 2659 2660 while True: 2661 if self._match_text_seq("FOR", "UPDATE"): 2662 update = True 2663 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2664 "LOCK", "IN", "SHARE", "MODE" 2665 ): 2666 update = False 2667 else: 2668 break 2669 2670 expressions = None 2671 if self._match_text_seq("OF"): 2672 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2673 2674 wait: t.Optional[bool | exp.Expression] = None 2675 if self._match_text_seq("NOWAIT"): 2676 wait = True 2677 elif self._match_text_seq("WAIT"): 2678 wait = self._parse_primary() 2679 elif self._match_text_seq("SKIP", "LOCKED"): 2680 wait = False 2681 2682 locks.append( 2683 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2684 ) 2685 2686 return locks 2687 2688 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2689 if not self._match_set(self.SET_OPERATIONS): 2690 return this 2691 2692 token_type = self._prev.token_type 2693 2694 if token_type == TokenType.UNION: 2695 expression = exp.Union 2696 elif token_type == TokenType.EXCEPT: 2697 expression = exp.Except 2698 else: 2699 expression = exp.Intersect 2700 2701 return self.expression( 2702 expression, 2703 this=this, 2704 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2705 expression=self._parse_set_operations(self._parse_select(nested=True)), 2706 ) 2707 2708 def _parse_expression(self) -> t.Optional[exp.Expression]: 2709 return self._parse_alias(self._parse_conjunction()) 2710 2711 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2712 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2713 2714 def _parse_equality(self) -> t.Optional[exp.Expression]: 2715 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2716 2717 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2718 return self._parse_tokens(self._parse_range, self.COMPARISON) 2719 2720 def _parse_range(self) -> t.Optional[exp.Expression]: 2721 this = self._parse_bitwise() 2722 negate = self._match(TokenType.NOT) 2723 2724 if self._match_set(self.RANGE_PARSERS): 2725 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2726 if not expression: 2727 return this 2728 2729 this = expression 2730 elif self._match(TokenType.ISNULL): 2731 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2732 2733 # Postgres supports ISNULL and NOTNULL for conditions. 2734 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2735 if self._match(TokenType.NOTNULL): 2736 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2737 this = self.expression(exp.Not, this=this) 2738 2739 if negate: 2740 this = self.expression(exp.Not, this=this) 2741 2742 if self._match(TokenType.IS): 2743 this = self._parse_is(this) 2744 2745 return this 2746 2747 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2748 index = self._index - 1 2749 negate = self._match(TokenType.NOT) 2750 if self._match(TokenType.DISTINCT_FROM): 2751 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2752 return self.expression(klass, this=this, expression=self._parse_expression()) 2753 2754 expression = self._parse_null() or self._parse_boolean() 2755 if not expression: 2756 self._retreat(index) 2757 return None 2758 2759 this = self.expression(exp.Is, this=this, expression=expression) 2760 return self.expression(exp.Not, this=this) if negate else this 2761 2762 def _parse_in(self, this: t.Optional[exp.Expression]) -> exp.Expression: 2763 unnest = self._parse_unnest() 2764 if unnest: 2765 this = self.expression(exp.In, this=this, unnest=unnest) 2766 elif self._match(TokenType.L_PAREN): 2767 expressions = self._parse_csv(self._parse_select_or_expression) 2768 2769 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2770 this = self.expression(exp.In, this=this, query=expressions[0]) 2771 else: 2772 this = self.expression(exp.In, this=this, expressions=expressions) 2773 2774 self._match_r_paren(this) 2775 else: 2776 this = self.expression(exp.In, this=this, field=self._parse_field()) 2777 2778 return this 2779 2780 def _parse_between(self, this: exp.Expression) -> exp.Expression: 2781 low = self._parse_bitwise() 2782 self._match(TokenType.AND) 2783 high = self._parse_bitwise() 2784 return self.expression(exp.Between, this=this, low=low, high=high) 2785 2786 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2787 if not self._match(TokenType.ESCAPE): 2788 return this 2789 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2790 2791 def _parse_interval(self) -> t.Optional[exp.Expression]: 2792 if not self._match(TokenType.INTERVAL): 2793 return None 2794 2795 this = self._parse_primary() or self._parse_term() 2796 unit = self._parse_function() or self._parse_var() 2797 2798 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 2799 # each INTERVAL expression into this canonical form so it's easy to transpile 2800 if this and isinstance(this, exp.Literal): 2801 if this.is_number: 2802 this = exp.Literal.string(this.name) 2803 2804 # Try to not clutter Snowflake's multi-part intervals like INTERVAL '1 day, 1 year' 2805 parts = this.name.split() 2806 if not unit and len(parts) <= 2: 2807 this = exp.Literal.string(seq_get(parts, 0)) 2808 unit = self.expression(exp.Var, this=seq_get(parts, 1)) 2809 2810 return self.expression(exp.Interval, this=this, unit=unit) 2811 2812 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 2813 this = self._parse_term() 2814 2815 while True: 2816 if self._match_set(self.BITWISE): 2817 this = self.expression( 2818 self.BITWISE[self._prev.token_type], 2819 this=this, 2820 expression=self._parse_term(), 2821 ) 2822 elif self._match_pair(TokenType.LT, TokenType.LT): 2823 this = self.expression( 2824 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 2825 ) 2826 elif self._match_pair(TokenType.GT, TokenType.GT): 2827 this = self.expression( 2828 exp.BitwiseRightShift, this=this, expression=self._parse_term() 2829 ) 2830 else: 2831 break 2832 2833 return this 2834 2835 def _parse_term(self) -> t.Optional[exp.Expression]: 2836 return self._parse_tokens(self._parse_factor, self.TERM) 2837 2838 def _parse_factor(self) -> t.Optional[exp.Expression]: 2839 return self._parse_tokens(self._parse_unary, self.FACTOR) 2840 2841 def _parse_unary(self) -> t.Optional[exp.Expression]: 2842 if self._match_set(self.UNARY_PARSERS): 2843 return self.UNARY_PARSERS[self._prev.token_type](self) 2844 return self._parse_at_time_zone(self._parse_type()) 2845 2846 def _parse_type(self) -> t.Optional[exp.Expression]: 2847 interval = self._parse_interval() 2848 if interval: 2849 return interval 2850 2851 index = self._index 2852 data_type = self._parse_types(check_func=True) 2853 this = self._parse_column() 2854 2855 if data_type: 2856 if isinstance(this, exp.Literal): 2857 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 2858 if parser: 2859 return parser(self, this, data_type) 2860 return self.expression(exp.Cast, this=this, to=data_type) 2861 if not data_type.expressions: 2862 self._retreat(index) 2863 return self._parse_column() 2864 return data_type 2865 2866 return this 2867 2868 def _parse_type_size(self) -> t.Optional[exp.Expression]: 2869 this = self._parse_type() 2870 if not this: 2871 return None 2872 2873 return self.expression( 2874 exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True) 2875 ) 2876 2877 def _parse_types(self, check_func: bool = False) -> t.Optional[exp.Expression]: 2878 index = self._index 2879 2880 prefix = self._match_text_seq("SYSUDTLIB", ".") 2881 2882 if not self._match_set(self.TYPE_TOKENS): 2883 return None 2884 2885 type_token = self._prev.token_type 2886 2887 if type_token == TokenType.PSEUDO_TYPE: 2888 return self.expression(exp.PseudoType, this=self._prev.text) 2889 2890 nested = type_token in self.NESTED_TYPE_TOKENS 2891 is_struct = type_token == TokenType.STRUCT 2892 expressions = None 2893 maybe_func = False 2894 2895 if self._match(TokenType.L_PAREN): 2896 if is_struct: 2897 expressions = self._parse_csv(self._parse_struct_types) 2898 elif nested: 2899 expressions = self._parse_csv(self._parse_types) 2900 else: 2901 expressions = self._parse_csv(self._parse_type_size) 2902 2903 if not expressions or not self._match(TokenType.R_PAREN): 2904 self._retreat(index) 2905 return None 2906 2907 maybe_func = True 2908 2909 if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2910 this = exp.DataType( 2911 this=exp.DataType.Type.ARRAY, 2912 expressions=[exp.DataType.build(type_token.value, expressions=expressions)], 2913 nested=True, 2914 ) 2915 2916 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2917 this = exp.DataType( 2918 this=exp.DataType.Type.ARRAY, 2919 expressions=[this], 2920 nested=True, 2921 ) 2922 2923 return this 2924 2925 if self._match(TokenType.L_BRACKET): 2926 self._retreat(index) 2927 return None 2928 2929 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 2930 if nested and self._match(TokenType.LT): 2931 if is_struct: 2932 expressions = self._parse_csv(self._parse_struct_types) 2933 else: 2934 expressions = self._parse_csv(self._parse_types) 2935 2936 if not self._match(TokenType.GT): 2937 self.raise_error("Expecting >") 2938 2939 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 2940 values = self._parse_csv(self._parse_conjunction) 2941 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 2942 2943 value: t.Optional[exp.Expression] = None 2944 if type_token in self.TIMESTAMPS: 2945 if self._match(TokenType.WITH_TIME_ZONE) or type_token == TokenType.TIMESTAMPTZ: 2946 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 2947 elif ( 2948 self._match(TokenType.WITH_LOCAL_TIME_ZONE) or type_token == TokenType.TIMESTAMPLTZ 2949 ): 2950 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 2951 elif self._match(TokenType.WITHOUT_TIME_ZONE): 2952 if type_token == TokenType.TIME: 2953 value = exp.DataType(this=exp.DataType.Type.TIME, expressions=expressions) 2954 else: 2955 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2956 2957 maybe_func = maybe_func and value is None 2958 2959 if value is None: 2960 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2961 elif type_token == TokenType.INTERVAL: 2962 unit = self._parse_var() 2963 2964 if not unit: 2965 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 2966 else: 2967 value = self.expression(exp.Interval, unit=unit) 2968 2969 if maybe_func and check_func: 2970 index2 = self._index 2971 peek = self._parse_string() 2972 2973 if not peek: 2974 self._retreat(index) 2975 return None 2976 2977 self._retreat(index2) 2978 2979 if value: 2980 return value 2981 2982 return exp.DataType( 2983 this=exp.DataType.Type[type_token.value.upper()], 2984 expressions=expressions, 2985 nested=nested, 2986 values=values, 2987 prefix=prefix, 2988 ) 2989 2990 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 2991 this = self._parse_type() or self._parse_id_var() 2992 self._match(TokenType.COLON) 2993 return self._parse_column_def(this) 2994 2995 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2996 if not self._match(TokenType.AT_TIME_ZONE): 2997 return this 2998 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 2999 3000 def _parse_column(self) -> t.Optional[exp.Expression]: 3001 this = self._parse_field() 3002 if isinstance(this, exp.Identifier): 3003 this = self.expression(exp.Column, this=this) 3004 elif not this: 3005 return self._parse_bracket(this) 3006 this = self._parse_bracket(this) 3007 3008 while self._match_set(self.COLUMN_OPERATORS): 3009 op_token = self._prev.token_type 3010 op = self.COLUMN_OPERATORS.get(op_token) 3011 3012 if op_token == TokenType.DCOLON: 3013 field = self._parse_types() 3014 if not field: 3015 self.raise_error("Expected type") 3016 elif op and self._curr: 3017 self._advance() 3018 value = self._prev.text 3019 field = ( 3020 exp.Literal.number(value) 3021 if self._prev.token_type == TokenType.NUMBER 3022 else exp.Literal.string(value) 3023 ) 3024 else: 3025 field = ( 3026 self._parse_star() 3027 or self._parse_function(anonymous=True) 3028 or self._parse_id_var() 3029 ) 3030 3031 if isinstance(field, exp.Func): 3032 # bigquery allows function calls like x.y.count(...) 3033 # SAFE.SUBSTR(...) 3034 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3035 this = self._replace_columns_with_dots(this) 3036 3037 if op: 3038 this = op(self, this, field) 3039 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3040 this = self.expression( 3041 exp.Column, 3042 this=field, 3043 table=this.this, 3044 db=this.args.get("table"), 3045 catalog=this.args.get("db"), 3046 ) 3047 else: 3048 this = self.expression(exp.Dot, this=this, expression=field) 3049 this = self._parse_bracket(this) 3050 3051 return this 3052 3053 def _parse_primary(self) -> t.Optional[exp.Expression]: 3054 if self._match_set(self.PRIMARY_PARSERS): 3055 token_type = self._prev.token_type 3056 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3057 3058 if token_type == TokenType.STRING: 3059 expressions = [primary] 3060 while self._match(TokenType.STRING): 3061 expressions.append(exp.Literal.string(self._prev.text)) 3062 if len(expressions) > 1: 3063 return self.expression(exp.Concat, expressions=expressions) 3064 return primary 3065 3066 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3067 return exp.Literal.number(f"0.{self._prev.text}") 3068 3069 if self._match(TokenType.L_PAREN): 3070 comments = self._prev_comments 3071 query = self._parse_select() 3072 3073 if query: 3074 expressions = [query] 3075 else: 3076 expressions = self._parse_csv(self._parse_expression) 3077 3078 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3079 3080 if isinstance(this, exp.Subqueryable): 3081 this = self._parse_set_operations( 3082 self._parse_subquery(this=this, parse_alias=False) 3083 ) 3084 elif len(expressions) > 1: 3085 this = self.expression(exp.Tuple, expressions=expressions) 3086 else: 3087 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3088 3089 if this: 3090 this.add_comments(comments) 3091 self._match_r_paren(expression=this) 3092 3093 return this 3094 3095 return None 3096 3097 def _parse_field( 3098 self, 3099 any_token: bool = False, 3100 tokens: t.Optional[t.Collection[TokenType]] = None, 3101 ) -> t.Optional[exp.Expression]: 3102 return ( 3103 self._parse_primary() 3104 or self._parse_function() 3105 or self._parse_id_var(any_token=any_token, tokens=tokens) 3106 ) 3107 3108 def _parse_function( 3109 self, functions: t.Optional[t.Dict[str, t.Callable]] = None, anonymous: bool = False 3110 ) -> t.Optional[exp.Expression]: 3111 if not self._curr: 3112 return None 3113 3114 token_type = self._curr.token_type 3115 3116 if self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 3117 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 3118 3119 if not self._next or self._next.token_type != TokenType.L_PAREN: 3120 if token_type in self.NO_PAREN_FUNCTIONS: 3121 self._advance() 3122 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3123 3124 return None 3125 3126 if token_type not in self.FUNC_TOKENS: 3127 return None 3128 3129 this = self._curr.text 3130 upper = this.upper() 3131 self._advance(2) 3132 3133 parser = self.FUNCTION_PARSERS.get(upper) 3134 3135 if parser and not anonymous: 3136 this = parser(self) 3137 else: 3138 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3139 3140 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3141 this = self.expression(subquery_predicate, this=self._parse_select()) 3142 self._match_r_paren() 3143 return this 3144 3145 if functions is None: 3146 functions = self.FUNCTIONS 3147 3148 function = functions.get(upper) 3149 args = self._parse_csv(self._parse_lambda) 3150 3151 if function and not anonymous: 3152 this = function(args) 3153 self.validate_expression(this, args) 3154 else: 3155 this = self.expression(exp.Anonymous, this=this, expressions=args) 3156 3157 self._match_r_paren(this) 3158 return self._parse_window(this) 3159 3160 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3161 return self._parse_column_def(self._parse_id_var()) 3162 3163 def _parse_user_defined_function( 3164 self, kind: t.Optional[TokenType] = None 3165 ) -> t.Optional[exp.Expression]: 3166 this = self._parse_id_var() 3167 3168 while self._match(TokenType.DOT): 3169 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3170 3171 if not self._match(TokenType.L_PAREN): 3172 return this 3173 3174 expressions = self._parse_csv(self._parse_function_parameter) 3175 self._match_r_paren() 3176 return self.expression( 3177 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3178 ) 3179 3180 def _parse_introducer(self, token: Token) -> t.Optional[exp.Expression]: 3181 literal = self._parse_primary() 3182 if literal: 3183 return self.expression(exp.Introducer, this=token.text, expression=literal) 3184 3185 return self.expression(exp.Identifier, this=token.text) 3186 3187 def _parse_national(self, token: Token) -> exp.Expression: 3188 return self.expression(exp.National, this=exp.Literal.string(token.text)) 3189 3190 def _parse_session_parameter(self) -> exp.Expression: 3191 kind = None 3192 this = self._parse_id_var() or self._parse_primary() 3193 3194 if this and self._match(TokenType.DOT): 3195 kind = this.name 3196 this = self._parse_var() or self._parse_primary() 3197 3198 return self.expression(exp.SessionParameter, this=this, kind=kind) 3199 3200 def _parse_lambda(self) -> t.Optional[exp.Expression]: 3201 index = self._index 3202 3203 if self._match(TokenType.L_PAREN): 3204 expressions = self._parse_csv(self._parse_id_var) 3205 3206 if not self._match(TokenType.R_PAREN): 3207 self._retreat(index) 3208 else: 3209 expressions = [self._parse_id_var()] 3210 3211 if self._match_set(self.LAMBDAS): 3212 return self.LAMBDAS[self._prev.token_type](self, expressions) 3213 3214 self._retreat(index) 3215 3216 this: t.Optional[exp.Expression] 3217 3218 if self._match(TokenType.DISTINCT): 3219 this = self.expression( 3220 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3221 ) 3222 else: 3223 this = self._parse_select_or_expression() 3224 3225 if isinstance(this, exp.EQ): 3226 left = this.this 3227 if isinstance(left, exp.Column): 3228 left.replace(exp.Var(this=left.text("this"))) 3229 3230 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3231 3232 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3233 index = self._index 3234 3235 try: 3236 if self._parse_select(nested=True): 3237 return this 3238 except Exception: 3239 pass 3240 finally: 3241 self._retreat(index) 3242 3243 if not self._match(TokenType.L_PAREN): 3244 return this 3245 3246 args = self._parse_csv( 3247 lambda: self._parse_constraint() 3248 or self._parse_column_def(self._parse_field(any_token=True)) 3249 ) 3250 self._match_r_paren() 3251 return self.expression(exp.Schema, this=this, expressions=args) 3252 3253 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3254 # column defs are not really columns, they're identifiers 3255 if isinstance(this, exp.Column): 3256 this = this.this 3257 kind = self._parse_types() 3258 3259 if self._match_text_seq("FOR", "ORDINALITY"): 3260 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3261 3262 constraints = [] 3263 while True: 3264 constraint = self._parse_column_constraint() 3265 if not constraint: 3266 break 3267 constraints.append(constraint) 3268 3269 if not kind and not constraints: 3270 return this 3271 3272 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3273 3274 def _parse_auto_increment(self) -> exp.Expression: 3275 start = None 3276 increment = None 3277 3278 if self._match(TokenType.L_PAREN, advance=False): 3279 args = self._parse_wrapped_csv(self._parse_bitwise) 3280 start = seq_get(args, 0) 3281 increment = seq_get(args, 1) 3282 elif self._match_text_seq("START"): 3283 start = self._parse_bitwise() 3284 self._match_text_seq("INCREMENT") 3285 increment = self._parse_bitwise() 3286 3287 if start and increment: 3288 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3289 3290 return exp.AutoIncrementColumnConstraint() 3291 3292 def _parse_compress(self) -> exp.Expression: 3293 if self._match(TokenType.L_PAREN, advance=False): 3294 return self.expression( 3295 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3296 ) 3297 3298 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3299 3300 def _parse_generated_as_identity(self) -> exp.Expression: 3301 if self._match(TokenType.BY_DEFAULT): 3302 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3303 this = self.expression( 3304 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3305 ) 3306 else: 3307 self._match_text_seq("ALWAYS") 3308 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3309 3310 self._match_text_seq("AS", "IDENTITY") 3311 if self._match(TokenType.L_PAREN): 3312 if self._match_text_seq("START", "WITH"): 3313 this.set("start", self._parse_bitwise()) 3314 if self._match_text_seq("INCREMENT", "BY"): 3315 this.set("increment", self._parse_bitwise()) 3316 if self._match_text_seq("MINVALUE"): 3317 this.set("minvalue", self._parse_bitwise()) 3318 if self._match_text_seq("MAXVALUE"): 3319 this.set("maxvalue", self._parse_bitwise()) 3320 3321 if self._match_text_seq("CYCLE"): 3322 this.set("cycle", True) 3323 elif self._match_text_seq("NO", "CYCLE"): 3324 this.set("cycle", False) 3325 3326 self._match_r_paren() 3327 3328 return this 3329 3330 def _parse_inline(self) -> t.Optional[exp.Expression]: 3331 self._match_text_seq("LENGTH") 3332 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3333 3334 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 3335 if self._match_text_seq("NULL"): 3336 return self.expression(exp.NotNullColumnConstraint) 3337 if self._match_text_seq("CASESPECIFIC"): 3338 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3339 return None 3340 3341 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3342 if self._match(TokenType.CONSTRAINT): 3343 this = self._parse_id_var() 3344 else: 3345 this = None 3346 3347 if self._match_texts(self.CONSTRAINT_PARSERS): 3348 return self.expression( 3349 exp.ColumnConstraint, 3350 this=this, 3351 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3352 ) 3353 3354 return this 3355 3356 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3357 if not self._match(TokenType.CONSTRAINT): 3358 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3359 3360 this = self._parse_id_var() 3361 expressions = [] 3362 3363 while True: 3364 constraint = self._parse_unnamed_constraint() or self._parse_function() 3365 if not constraint: 3366 break 3367 expressions.append(constraint) 3368 3369 return self.expression(exp.Constraint, this=this, expressions=expressions) 3370 3371 def _parse_unnamed_constraint( 3372 self, constraints: t.Optional[t.Collection[str]] = None 3373 ) -> t.Optional[exp.Expression]: 3374 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3375 return None 3376 3377 constraint = self._prev.text.upper() 3378 if constraint not in self.CONSTRAINT_PARSERS: 3379 self.raise_error(f"No parser found for schema constraint {constraint}.") 3380 3381 return self.CONSTRAINT_PARSERS[constraint](self) 3382 3383 def _parse_unique(self) -> exp.Expression: 3384 if not self._match(TokenType.L_PAREN, advance=False): 3385 return self.expression(exp.UniqueColumnConstraint) 3386 return self.expression(exp.Unique, expressions=self._parse_wrapped_id_vars()) 3387 3388 def _parse_key_constraint_options(self) -> t.List[str]: 3389 options = [] 3390 while True: 3391 if not self._curr: 3392 break 3393 3394 if self._match(TokenType.ON): 3395 action = None 3396 on = self._advance_any() and self._prev.text 3397 3398 if self._match(TokenType.NO_ACTION): 3399 action = "NO ACTION" 3400 elif self._match(TokenType.CASCADE): 3401 action = "CASCADE" 3402 elif self._match_pair(TokenType.SET, TokenType.NULL): 3403 action = "SET NULL" 3404 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3405 action = "SET DEFAULT" 3406 else: 3407 self.raise_error("Invalid key constraint") 3408 3409 options.append(f"ON {on} {action}") 3410 elif self._match_text_seq("NOT", "ENFORCED"): 3411 options.append("NOT ENFORCED") 3412 elif self._match_text_seq("DEFERRABLE"): 3413 options.append("DEFERRABLE") 3414 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3415 options.append("INITIALLY DEFERRED") 3416 elif self._match_text_seq("NORELY"): 3417 options.append("NORELY") 3418 elif self._match_text_seq("MATCH", "FULL"): 3419 options.append("MATCH FULL") 3420 else: 3421 break 3422 3423 return options 3424 3425 def _parse_references(self, match=True) -> t.Optional[exp.Expression]: 3426 if match and not self._match(TokenType.REFERENCES): 3427 return None 3428 3429 expressions = None 3430 this = self._parse_id_var() 3431 3432 if self._match(TokenType.L_PAREN, advance=False): 3433 expressions = self._parse_wrapped_id_vars() 3434 3435 options = self._parse_key_constraint_options() 3436 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3437 3438 def _parse_foreign_key(self) -> exp.Expression: 3439 expressions = self._parse_wrapped_id_vars() 3440 reference = self._parse_references() 3441 options = {} 3442 3443 while self._match(TokenType.ON): 3444 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3445 self.raise_error("Expected DELETE or UPDATE") 3446 3447 kind = self._prev.text.lower() 3448 3449 if self._match(TokenType.NO_ACTION): 3450 action = "NO ACTION" 3451 elif self._match(TokenType.SET): 3452 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3453 action = "SET " + self._prev.text.upper() 3454 else: 3455 self._advance() 3456 action = self._prev.text.upper() 3457 3458 options[kind] = action 3459 3460 return self.expression( 3461 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3462 ) 3463 3464 def _parse_primary_key(self) -> exp.Expression: 3465 desc = ( 3466 self._match_set((TokenType.ASC, TokenType.DESC)) 3467 and self._prev.token_type == TokenType.DESC 3468 ) 3469 3470 if not self._match(TokenType.L_PAREN, advance=False): 3471 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3472 3473 expressions = self._parse_wrapped_csv(self._parse_field) 3474 options = self._parse_key_constraint_options() 3475 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3476 3477 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3478 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3479 return this 3480 3481 bracket_kind = self._prev.token_type 3482 expressions: t.List[t.Optional[exp.Expression]] 3483 3484 if self._match(TokenType.COLON): 3485 expressions = [self.expression(exp.Slice, expression=self._parse_conjunction())] 3486 else: 3487 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3488 3489 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3490 if bracket_kind == TokenType.L_BRACE: 3491 this = self.expression(exp.Struct, expressions=expressions) 3492 elif not this or this.name.upper() == "ARRAY": 3493 this = self.expression(exp.Array, expressions=expressions) 3494 else: 3495 expressions = apply_index_offset(this, expressions, -self.index_offset) 3496 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3497 3498 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3499 self.raise_error("Expected ]") 3500 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3501 self.raise_error("Expected }") 3502 3503 self._add_comments(this) 3504 return self._parse_bracket(this) 3505 3506 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3507 if self._match(TokenType.COLON): 3508 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3509 return this 3510 3511 def _parse_case(self) -> t.Optional[exp.Expression]: 3512 ifs = [] 3513 default = None 3514 3515 expression = self._parse_conjunction() 3516 3517 while self._match(TokenType.WHEN): 3518 this = self._parse_conjunction() 3519 self._match(TokenType.THEN) 3520 then = self._parse_conjunction() 3521 ifs.append(self.expression(exp.If, this=this, true=then)) 3522 3523 if self._match(TokenType.ELSE): 3524 default = self._parse_conjunction() 3525 3526 if not self._match(TokenType.END): 3527 self.raise_error("Expected END after CASE", self._prev) 3528 3529 return self._parse_window( 3530 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3531 ) 3532 3533 def _parse_if(self) -> t.Optional[exp.Expression]: 3534 if self._match(TokenType.L_PAREN): 3535 args = self._parse_csv(self._parse_conjunction) 3536 this = exp.If.from_arg_list(args) 3537 self.validate_expression(this, args) 3538 self._match_r_paren() 3539 else: 3540 index = self._index - 1 3541 condition = self._parse_conjunction() 3542 3543 if not condition: 3544 self._retreat(index) 3545 return None 3546 3547 self._match(TokenType.THEN) 3548 true = self._parse_conjunction() 3549 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3550 self._match(TokenType.END) 3551 this = self.expression(exp.If, this=condition, true=true, false=false) 3552 3553 return self._parse_window(this) 3554 3555 def _parse_extract(self) -> exp.Expression: 3556 this = self._parse_function() or self._parse_var() or self._parse_type() 3557 3558 if self._match(TokenType.FROM): 3559 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3560 3561 if not self._match(TokenType.COMMA): 3562 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3563 3564 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3565 3566 def _parse_cast(self, strict: bool) -> exp.Expression: 3567 this = self._parse_conjunction() 3568 3569 if not self._match(TokenType.ALIAS): 3570 if self._match(TokenType.COMMA): 3571 return self.expression( 3572 exp.CastToStrType, this=this, expression=self._parse_string() 3573 ) 3574 else: 3575 self.raise_error("Expected AS after CAST") 3576 3577 to = self._parse_types() 3578 3579 if not to: 3580 self.raise_error("Expected TYPE after CAST") 3581 elif to.this == exp.DataType.Type.CHAR: 3582 if self._match(TokenType.CHARACTER_SET): 3583 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3584 3585 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3586 3587 def _parse_string_agg(self) -> exp.Expression: 3588 expression: t.Optional[exp.Expression] 3589 3590 if self._match(TokenType.DISTINCT): 3591 args = self._parse_csv(self._parse_conjunction) 3592 expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)]) 3593 else: 3594 args = self._parse_csv(self._parse_conjunction) 3595 expression = seq_get(args, 0) 3596 3597 index = self._index 3598 if not self._match(TokenType.R_PAREN): 3599 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3600 order = self._parse_order(this=expression) 3601 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3602 3603 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3604 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3605 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3606 if not self._match(TokenType.WITHIN_GROUP): 3607 self._retreat(index) 3608 this = exp.GroupConcat.from_arg_list(args) 3609 self.validate_expression(this, args) 3610 return this 3611 3612 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3613 order = self._parse_order(this=expression) 3614 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3615 3616 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3617 to: t.Optional[exp.Expression] 3618 this = self._parse_bitwise() 3619 3620 if self._match(TokenType.USING): 3621 to = self.expression(exp.CharacterSet, this=self._parse_var()) 3622 elif self._match(TokenType.COMMA): 3623 to = self._parse_bitwise() 3624 else: 3625 to = None 3626 3627 # Swap the argument order if needed to produce the correct AST 3628 if self.CONVERT_TYPE_FIRST: 3629 this, to = to, this 3630 3631 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3632 3633 def _parse_decode(self) -> t.Optional[exp.Expression]: 3634 """ 3635 There are generally two variants of the DECODE function: 3636 3637 - DECODE(bin, charset) 3638 - DECODE(expression, search, result [, search, result] ... [, default]) 3639 3640 The second variant will always be parsed into a CASE expression. Note that NULL 3641 needs special treatment, since we need to explicitly check for it with `IS NULL`, 3642 instead of relying on pattern matching. 3643 """ 3644 args = self._parse_csv(self._parse_conjunction) 3645 3646 if len(args) < 3: 3647 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 3648 3649 expression, *expressions = args 3650 if not expression: 3651 return None 3652 3653 ifs = [] 3654 for search, result in zip(expressions[::2], expressions[1::2]): 3655 if not search or not result: 3656 return None 3657 3658 if isinstance(search, exp.Literal): 3659 ifs.append( 3660 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 3661 ) 3662 elif isinstance(search, exp.Null): 3663 ifs.append( 3664 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 3665 ) 3666 else: 3667 cond = exp.or_( 3668 exp.EQ(this=expression.copy(), expression=search), 3669 exp.and_( 3670 exp.Is(this=expression.copy(), expression=exp.Null()), 3671 exp.Is(this=search.copy(), expression=exp.Null()), 3672 copy=False, 3673 ), 3674 copy=False, 3675 ) 3676 ifs.append(exp.If(this=cond, true=result)) 3677 3678 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 3679 3680 def _parse_json_key_value(self) -> t.Optional[exp.Expression]: 3681 self._match_text_seq("KEY") 3682 key = self._parse_field() 3683 self._match(TokenType.COLON) 3684 self._match_text_seq("VALUE") 3685 value = self._parse_field() 3686 if not key and not value: 3687 return None 3688 return self.expression(exp.JSONKeyValue, this=key, expression=value) 3689 3690 def _parse_json_object(self) -> exp.Expression: 3691 expressions = self._parse_csv(self._parse_json_key_value) 3692 3693 null_handling = None 3694 if self._match_text_seq("NULL", "ON", "NULL"): 3695 null_handling = "NULL ON NULL" 3696 elif self._match_text_seq("ABSENT", "ON", "NULL"): 3697 null_handling = "ABSENT ON NULL" 3698 3699 unique_keys = None 3700 if self._match_text_seq("WITH", "UNIQUE"): 3701 unique_keys = True 3702 elif self._match_text_seq("WITHOUT", "UNIQUE"): 3703 unique_keys = False 3704 3705 self._match_text_seq("KEYS") 3706 3707 return_type = self._match_text_seq("RETURNING") and self._parse_type() 3708 format_json = self._match_text_seq("FORMAT", "JSON") 3709 encoding = self._match_text_seq("ENCODING") and self._parse_var() 3710 3711 return self.expression( 3712 exp.JSONObject, 3713 expressions=expressions, 3714 null_handling=null_handling, 3715 unique_keys=unique_keys, 3716 return_type=return_type, 3717 format_json=format_json, 3718 encoding=encoding, 3719 ) 3720 3721 def _parse_logarithm(self) -> exp.Expression: 3722 # Default argument order is base, expression 3723 args = self._parse_csv(self._parse_range) 3724 3725 if len(args) > 1: 3726 if not self.LOG_BASE_FIRST: 3727 args.reverse() 3728 return exp.Log.from_arg_list(args) 3729 3730 return self.expression( 3731 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 3732 ) 3733 3734 def _parse_match_against(self) -> exp.Expression: 3735 expressions = self._parse_csv(self._parse_column) 3736 3737 self._match_text_seq(")", "AGAINST", "(") 3738 3739 this = self._parse_string() 3740 3741 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 3742 modifier = "IN NATURAL LANGUAGE MODE" 3743 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3744 modifier = f"{modifier} WITH QUERY EXPANSION" 3745 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 3746 modifier = "IN BOOLEAN MODE" 3747 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3748 modifier = "WITH QUERY EXPANSION" 3749 else: 3750 modifier = None 3751 3752 return self.expression( 3753 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 3754 ) 3755 3756 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 3757 def _parse_open_json(self) -> exp.Expression: 3758 this = self._parse_bitwise() 3759 path = self._match(TokenType.COMMA) and self._parse_string() 3760 3761 def _parse_open_json_column_def() -> exp.Expression: 3762 this = self._parse_field(any_token=True) 3763 kind = self._parse_types() 3764 path = self._parse_string() 3765 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 3766 return self.expression( 3767 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 3768 ) 3769 3770 expressions = None 3771 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 3772 self._match_l_paren() 3773 expressions = self._parse_csv(_parse_open_json_column_def) 3774 3775 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 3776 3777 def _parse_position(self, haystack_first: bool = False) -> exp.Expression: 3778 args = self._parse_csv(self._parse_bitwise) 3779 3780 if self._match(TokenType.IN): 3781 return self.expression( 3782 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 3783 ) 3784 3785 if haystack_first: 3786 haystack = seq_get(args, 0) 3787 needle = seq_get(args, 1) 3788 else: 3789 needle = seq_get(args, 0) 3790 haystack = seq_get(args, 1) 3791 3792 this = exp.StrPosition(this=haystack, substr=needle, position=seq_get(args, 2)) 3793 3794 self.validate_expression(this, args) 3795 3796 return this 3797 3798 def _parse_join_hint(self, func_name: str) -> exp.Expression: 3799 args = self._parse_csv(self._parse_table) 3800 return exp.JoinHint(this=func_name.upper(), expressions=args) 3801 3802 def _parse_substring(self) -> exp.Expression: 3803 # Postgres supports the form: substring(string [from int] [for int]) 3804 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 3805 3806 args = self._parse_csv(self._parse_bitwise) 3807 3808 if self._match(TokenType.FROM): 3809 args.append(self._parse_bitwise()) 3810 if self._match(TokenType.FOR): 3811 args.append(self._parse_bitwise()) 3812 3813 this = exp.Substring.from_arg_list(args) 3814 self.validate_expression(this, args) 3815 3816 return this 3817 3818 def _parse_trim(self) -> exp.Expression: 3819 # https://www.w3resource.com/sql/character-functions/trim.php 3820 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 3821 3822 position = None 3823 collation = None 3824 3825 if self._match_set(self.TRIM_TYPES): 3826 position = self._prev.text.upper() 3827 3828 expression = self._parse_bitwise() 3829 if self._match_set((TokenType.FROM, TokenType.COMMA)): 3830 this = self._parse_bitwise() 3831 else: 3832 this = expression 3833 expression = None 3834 3835 if self._match(TokenType.COLLATE): 3836 collation = self._parse_bitwise() 3837 3838 return self.expression( 3839 exp.Trim, 3840 this=this, 3841 position=position, 3842 expression=expression, 3843 collation=collation, 3844 ) 3845 3846 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3847 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 3848 3849 def _parse_named_window(self) -> t.Optional[exp.Expression]: 3850 return self._parse_window(self._parse_id_var(), alias=True) 3851 3852 def _parse_respect_or_ignore_nulls( 3853 self, this: t.Optional[exp.Expression] 3854 ) -> t.Optional[exp.Expression]: 3855 if self._match(TokenType.IGNORE_NULLS): 3856 return self.expression(exp.IgnoreNulls, this=this) 3857 if self._match(TokenType.RESPECT_NULLS): 3858 return self.expression(exp.RespectNulls, this=this) 3859 return this 3860 3861 def _parse_window( 3862 self, this: t.Optional[exp.Expression], alias: bool = False 3863 ) -> t.Optional[exp.Expression]: 3864 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 3865 this = self.expression(exp.Filter, this=this, expression=self._parse_where()) 3866 self._match_r_paren() 3867 3868 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 3869 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 3870 if self._match(TokenType.WITHIN_GROUP): 3871 order = self._parse_wrapped(self._parse_order) 3872 this = self.expression(exp.WithinGroup, this=this, expression=order) 3873 3874 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 3875 # Some dialects choose to implement and some do not. 3876 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 3877 3878 # There is some code above in _parse_lambda that handles 3879 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 3880 3881 # The below changes handle 3882 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 3883 3884 # Oracle allows both formats 3885 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 3886 # and Snowflake chose to do the same for familiarity 3887 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 3888 this = self._parse_respect_or_ignore_nulls(this) 3889 3890 # bigquery select from window x AS (partition by ...) 3891 if alias: 3892 over = None 3893 self._match(TokenType.ALIAS) 3894 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 3895 return this 3896 else: 3897 over = self._prev.text.upper() 3898 3899 if not self._match(TokenType.L_PAREN): 3900 return self.expression( 3901 exp.Window, this=this, alias=self._parse_id_var(False), over=over 3902 ) 3903 3904 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 3905 3906 first = self._match(TokenType.FIRST) 3907 if self._match_text_seq("LAST"): 3908 first = False 3909 3910 partition = self._parse_partition_by() 3911 order = self._parse_order() 3912 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 3913 3914 if kind: 3915 self._match(TokenType.BETWEEN) 3916 start = self._parse_window_spec() 3917 self._match(TokenType.AND) 3918 end = self._parse_window_spec() 3919 3920 spec = self.expression( 3921 exp.WindowSpec, 3922 kind=kind, 3923 start=start["value"], 3924 start_side=start["side"], 3925 end=end["value"], 3926 end_side=end["side"], 3927 ) 3928 else: 3929 spec = None 3930 3931 self._match_r_paren() 3932 3933 return self.expression( 3934 exp.Window, 3935 this=this, 3936 partition_by=partition, 3937 order=order, 3938 spec=spec, 3939 alias=window_alias, 3940 over=over, 3941 first=first, 3942 ) 3943 3944 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 3945 self._match(TokenType.BETWEEN) 3946 3947 return { 3948 "value": ( 3949 self._match_set((TokenType.UNBOUNDED, TokenType.CURRENT_ROW)) and self._prev.text 3950 ) 3951 or self._parse_bitwise(), 3952 "side": self._match_set((TokenType.PRECEDING, TokenType.FOLLOWING)) and self._prev.text, 3953 } 3954 3955 def _parse_alias( 3956 self, this: t.Optional[exp.Expression], explicit: bool = False 3957 ) -> t.Optional[exp.Expression]: 3958 any_token = self._match(TokenType.ALIAS) 3959 3960 if explicit and not any_token: 3961 return this 3962 3963 if self._match(TokenType.L_PAREN): 3964 aliases = self.expression( 3965 exp.Aliases, 3966 this=this, 3967 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 3968 ) 3969 self._match_r_paren(aliases) 3970 return aliases 3971 3972 alias = self._parse_id_var(any_token) 3973 3974 if alias: 3975 return self.expression(exp.Alias, this=this, alias=alias) 3976 3977 return this 3978 3979 def _parse_id_var( 3980 self, 3981 any_token: bool = True, 3982 tokens: t.Optional[t.Collection[TokenType]] = None, 3983 prefix_tokens: t.Optional[t.Collection[TokenType]] = None, 3984 ) -> t.Optional[exp.Expression]: 3985 identifier = self._parse_identifier() 3986 3987 if identifier: 3988 return identifier 3989 3990 prefix = "" 3991 3992 if prefix_tokens: 3993 while self._match_set(prefix_tokens): 3994 prefix += self._prev.text 3995 3996 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 3997 quoted = self._prev.token_type == TokenType.STRING 3998 return exp.Identifier(this=prefix + self._prev.text, quoted=quoted) 3999 4000 return None 4001 4002 def _parse_string(self) -> t.Optional[exp.Expression]: 4003 if self._match(TokenType.STRING): 4004 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4005 return self._parse_placeholder() 4006 4007 def _parse_string_as_identifier(self) -> t.Optional[exp.Expression]: 4008 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4009 4010 def _parse_number(self) -> t.Optional[exp.Expression]: 4011 if self._match(TokenType.NUMBER): 4012 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4013 return self._parse_placeholder() 4014 4015 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4016 if self._match(TokenType.IDENTIFIER): 4017 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4018 return self._parse_placeholder() 4019 4020 def _parse_var( 4021 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4022 ) -> t.Optional[exp.Expression]: 4023 if ( 4024 (any_token and self._advance_any()) 4025 or self._match(TokenType.VAR) 4026 or (self._match_set(tokens) if tokens else False) 4027 ): 4028 return self.expression(exp.Var, this=self._prev.text) 4029 return self._parse_placeholder() 4030 4031 def _advance_any(self) -> t.Optional[Token]: 4032 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4033 self._advance() 4034 return self._prev 4035 return None 4036 4037 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4038 return self._parse_var() or self._parse_string() 4039 4040 def _parse_null(self) -> t.Optional[exp.Expression]: 4041 if self._match(TokenType.NULL): 4042 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4043 return None 4044 4045 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4046 if self._match(TokenType.TRUE): 4047 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4048 if self._match(TokenType.FALSE): 4049 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4050 return None 4051 4052 def _parse_star(self) -> t.Optional[exp.Expression]: 4053 if self._match(TokenType.STAR): 4054 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4055 return None 4056 4057 def _parse_parameter(self) -> exp.Expression: 4058 wrapped = self._match(TokenType.L_BRACE) 4059 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4060 self._match(TokenType.R_BRACE) 4061 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4062 4063 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4064 if self._match_set(self.PLACEHOLDER_PARSERS): 4065 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4066 if placeholder: 4067 return placeholder 4068 self._advance(-1) 4069 return None 4070 4071 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4072 if not self._match(TokenType.EXCEPT): 4073 return None 4074 if self._match(TokenType.L_PAREN, advance=False): 4075 return self._parse_wrapped_csv(self._parse_column) 4076 return self._parse_csv(self._parse_column) 4077 4078 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4079 if not self._match(TokenType.REPLACE): 4080 return None 4081 if self._match(TokenType.L_PAREN, advance=False): 4082 return self._parse_wrapped_csv(self._parse_expression) 4083 return self._parse_csv(self._parse_expression) 4084 4085 def _parse_csv( 4086 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4087 ) -> t.List[t.Optional[exp.Expression]]: 4088 parse_result = parse_method() 4089 items = [parse_result] if parse_result is not None else [] 4090 4091 while self._match(sep): 4092 self._add_comments(parse_result) 4093 parse_result = parse_method() 4094 if parse_result is not None: 4095 items.append(parse_result) 4096 4097 return items 4098 4099 def _parse_tokens( 4100 self, parse_method: t.Callable, expressions: t.Dict 4101 ) -> t.Optional[exp.Expression]: 4102 this = parse_method() 4103 4104 while self._match_set(expressions): 4105 this = self.expression( 4106 expressions[self._prev.token_type], 4107 this=this, 4108 comments=self._prev_comments, 4109 expression=parse_method(), 4110 ) 4111 4112 return this 4113 4114 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]: 4115 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4116 4117 def _parse_wrapped_csv( 4118 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4119 ) -> t.List[t.Optional[exp.Expression]]: 4120 return self._parse_wrapped( 4121 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4122 ) 4123 4124 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4125 wrapped = self._match(TokenType.L_PAREN) 4126 if not wrapped and not optional: 4127 self.raise_error("Expecting (") 4128 parse_result = parse_method() 4129 if wrapped: 4130 self._match_r_paren() 4131 return parse_result 4132 4133 def _parse_select_or_expression(self) -> t.Optional[exp.Expression]: 4134 return self._parse_select() or self._parse_set_operations(self._parse_expression()) 4135 4136 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4137 return self._parse_set_operations( 4138 self._parse_select(nested=True, parse_subquery_alias=False) 4139 ) 4140 4141 def _parse_transaction(self) -> exp.Expression: 4142 this = None 4143 if self._match_texts(self.TRANSACTION_KIND): 4144 this = self._prev.text 4145 4146 self._match_texts({"TRANSACTION", "WORK"}) 4147 4148 modes = [] 4149 while True: 4150 mode = [] 4151 while self._match(TokenType.VAR): 4152 mode.append(self._prev.text) 4153 4154 if mode: 4155 modes.append(" ".join(mode)) 4156 if not self._match(TokenType.COMMA): 4157 break 4158 4159 return self.expression(exp.Transaction, this=this, modes=modes) 4160 4161 def _parse_commit_or_rollback(self) -> exp.Expression: 4162 chain = None 4163 savepoint = None 4164 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4165 4166 self._match_texts({"TRANSACTION", "WORK"}) 4167 4168 if self._match_text_seq("TO"): 4169 self._match_text_seq("SAVEPOINT") 4170 savepoint = self._parse_id_var() 4171 4172 if self._match(TokenType.AND): 4173 chain = not self._match_text_seq("NO") 4174 self._match_text_seq("CHAIN") 4175 4176 if is_rollback: 4177 return self.expression(exp.Rollback, savepoint=savepoint) 4178 return self.expression(exp.Commit, chain=chain) 4179 4180 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4181 if not self._match_text_seq("ADD"): 4182 return None 4183 4184 self._match(TokenType.COLUMN) 4185 exists_column = self._parse_exists(not_=True) 4186 expression = self._parse_column_def(self._parse_field(any_token=True)) 4187 4188 if expression: 4189 expression.set("exists", exists_column) 4190 4191 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4192 if self._match_texts(("FIRST", "AFTER")): 4193 position = self._prev.text 4194 column_position = self.expression( 4195 exp.ColumnPosition, this=self._parse_column(), position=position 4196 ) 4197 expression.set("position", column_position) 4198 4199 return expression 4200 4201 def _parse_drop_column(self) -> t.Optional[exp.Expression]: 4202 drop = self._match(TokenType.DROP) and self._parse_drop() 4203 if drop and not isinstance(drop, exp.Command): 4204 drop.set("kind", drop.args.get("kind", "COLUMN")) 4205 return drop 4206 4207 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4208 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.Expression: 4209 return self.expression( 4210 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4211 ) 4212 4213 def _parse_add_constraint(self) -> t.Optional[exp.Expression]: 4214 this = None 4215 kind = self._prev.token_type 4216 4217 if kind == TokenType.CONSTRAINT: 4218 this = self._parse_id_var() 4219 4220 if self._match_text_seq("CHECK"): 4221 expression = self._parse_wrapped(self._parse_conjunction) 4222 enforced = self._match_text_seq("ENFORCED") 4223 4224 return self.expression( 4225 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4226 ) 4227 4228 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4229 expression = self._parse_foreign_key() 4230 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4231 expression = self._parse_primary_key() 4232 else: 4233 expression = None 4234 4235 return self.expression(exp.AddConstraint, this=this, expression=expression) 4236 4237 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 4238 index = self._index - 1 4239 4240 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4241 return self._parse_csv(self._parse_add_constraint) 4242 4243 self._retreat(index) 4244 return self._parse_csv(self._parse_add_column) 4245 4246 def _parse_alter_table_alter(self) -> exp.Expression: 4247 self._match(TokenType.COLUMN) 4248 column = self._parse_field(any_token=True) 4249 4250 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4251 return self.expression(exp.AlterColumn, this=column, drop=True) 4252 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4253 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4254 4255 self._match_text_seq("SET", "DATA") 4256 return self.expression( 4257 exp.AlterColumn, 4258 this=column, 4259 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4260 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4261 using=self._match(TokenType.USING) and self._parse_conjunction(), 4262 ) 4263 4264 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 4265 index = self._index - 1 4266 4267 partition_exists = self._parse_exists() 4268 if self._match(TokenType.PARTITION, advance=False): 4269 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4270 4271 self._retreat(index) 4272 return self._parse_csv(self._parse_drop_column) 4273 4274 def _parse_alter_table_rename(self) -> exp.Expression: 4275 self._match_text_seq("TO") 4276 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4277 4278 def _parse_alter(self) -> t.Optional[exp.Expression]: 4279 start = self._prev 4280 4281 if not self._match(TokenType.TABLE): 4282 return self._parse_as_command(start) 4283 4284 exists = self._parse_exists() 4285 this = self._parse_table(schema=True) 4286 4287 if self._next: 4288 self._advance() 4289 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4290 4291 if parser: 4292 actions = ensure_list(parser(self)) 4293 4294 if not self._curr: 4295 return self.expression( 4296 exp.AlterTable, 4297 this=this, 4298 exists=exists, 4299 actions=actions, 4300 ) 4301 return self._parse_as_command(start) 4302 4303 def _parse_merge(self) -> exp.Expression: 4304 self._match(TokenType.INTO) 4305 target = self._parse_table() 4306 4307 self._match(TokenType.USING) 4308 using = self._parse_table() 4309 4310 self._match(TokenType.ON) 4311 on = self._parse_conjunction() 4312 4313 whens = [] 4314 while self._match(TokenType.WHEN): 4315 matched = not self._match(TokenType.NOT) 4316 self._match_text_seq("MATCHED") 4317 source = ( 4318 False 4319 if self._match_text_seq("BY", "TARGET") 4320 else self._match_text_seq("BY", "SOURCE") 4321 ) 4322 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4323 4324 self._match(TokenType.THEN) 4325 4326 if self._match(TokenType.INSERT): 4327 _this = self._parse_star() 4328 if _this: 4329 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4330 else: 4331 then = self.expression( 4332 exp.Insert, 4333 this=self._parse_value(), 4334 expression=self._match(TokenType.VALUES) and self._parse_value(), 4335 ) 4336 elif self._match(TokenType.UPDATE): 4337 expressions = self._parse_star() 4338 if expressions: 4339 then = self.expression(exp.Update, expressions=expressions) 4340 else: 4341 then = self.expression( 4342 exp.Update, 4343 expressions=self._match(TokenType.SET) 4344 and self._parse_csv(self._parse_equality), 4345 ) 4346 elif self._match(TokenType.DELETE): 4347 then = self.expression(exp.Var, this=self._prev.text) 4348 else: 4349 then = None 4350 4351 whens.append( 4352 self.expression( 4353 exp.When, 4354 matched=matched, 4355 source=source, 4356 condition=condition, 4357 then=then, 4358 ) 4359 ) 4360 4361 return self.expression( 4362 exp.Merge, 4363 this=target, 4364 using=using, 4365 on=on, 4366 expressions=whens, 4367 ) 4368 4369 def _parse_show(self) -> t.Optional[exp.Expression]: 4370 parser = self._find_parser(self.SHOW_PARSERS, self._show_trie) # type: ignore 4371 if parser: 4372 return parser(self) 4373 self._advance() 4374 return self.expression(exp.Show, this=self._prev.text.upper()) 4375 4376 def _parse_set_item_assignment( 4377 self, kind: t.Optional[str] = None 4378 ) -> t.Optional[exp.Expression]: 4379 index = self._index 4380 4381 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4382 return self._parse_set_transaction(global_=kind == "GLOBAL") 4383 4384 left = self._parse_primary() or self._parse_id_var() 4385 4386 if not self._match_texts(("=", "TO")): 4387 self._retreat(index) 4388 return None 4389 4390 right = self._parse_statement() or self._parse_id_var() 4391 this = self.expression( 4392 exp.EQ, 4393 this=left, 4394 expression=right, 4395 ) 4396 4397 return self.expression( 4398 exp.SetItem, 4399 this=this, 4400 kind=kind, 4401 ) 4402 4403 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4404 self._match_text_seq("TRANSACTION") 4405 characteristics = self._parse_csv( 4406 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4407 ) 4408 return self.expression( 4409 exp.SetItem, 4410 expressions=characteristics, 4411 kind="TRANSACTION", 4412 **{"global": global_}, # type: ignore 4413 ) 4414 4415 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4416 parser = self._find_parser(self.SET_PARSERS, self._set_trie) # type: ignore 4417 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4418 4419 def _parse_set(self) -> exp.Expression: 4420 index = self._index 4421 set_ = self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item)) 4422 4423 if self._curr: 4424 self._retreat(index) 4425 return self._parse_as_command(self._prev) 4426 4427 return set_ 4428 4429 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Expression]: 4430 for option in options: 4431 if self._match_text_seq(*option.split(" ")): 4432 return exp.Var(this=option) 4433 return None 4434 4435 def _parse_as_command(self, start: Token) -> exp.Command: 4436 while self._curr: 4437 self._advance() 4438 text = self._find_sql(start, self._prev) 4439 size = len(start.text) 4440 return exp.Command(this=text[:size], expression=text[size:]) 4441 4442 def _find_parser( 4443 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4444 ) -> t.Optional[t.Callable]: 4445 if not self._curr: 4446 return None 4447 4448 index = self._index 4449 this = [] 4450 while True: 4451 # The current token might be multiple words 4452 curr = self._curr.text.upper() 4453 key = curr.split(" ") 4454 this.append(curr) 4455 self._advance() 4456 result, trie = in_trie(trie, key) 4457 if result == 0: 4458 break 4459 if result == 2: 4460 subparser = parsers[" ".join(this)] 4461 return subparser 4462 self._retreat(index) 4463 return None 4464 4465 def _match(self, token_type, advance=True, expression=None): 4466 if not self._curr: 4467 return None 4468 4469 if self._curr.token_type == token_type: 4470 if advance: 4471 self._advance() 4472 self._add_comments(expression) 4473 return True 4474 4475 return None 4476 4477 def _match_set(self, types, advance=True): 4478 if not self._curr: 4479 return None 4480 4481 if self._curr.token_type in types: 4482 if advance: 4483 self._advance() 4484 return True 4485 4486 return None 4487 4488 def _match_pair(self, token_type_a, token_type_b, advance=True): 4489 if not self._curr or not self._next: 4490 return None 4491 4492 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4493 if advance: 4494 self._advance(2) 4495 return True 4496 4497 return None 4498 4499 def _match_l_paren(self, expression=None): 4500 if not self._match(TokenType.L_PAREN, expression=expression): 4501 self.raise_error("Expecting (") 4502 4503 def _match_r_paren(self, expression=None): 4504 if not self._match(TokenType.R_PAREN, expression=expression): 4505 self.raise_error("Expecting )") 4506 4507 def _match_texts(self, texts, advance=True): 4508 if self._curr and self._curr.text.upper() in texts: 4509 if advance: 4510 self._advance() 4511 return True 4512 return False 4513 4514 def _match_text_seq(self, *texts, advance=True): 4515 index = self._index 4516 for text in texts: 4517 if self._curr and self._curr.text.upper() == text: 4518 self._advance() 4519 else: 4520 self._retreat(index) 4521 return False 4522 4523 if not advance: 4524 self._retreat(index) 4525 4526 return True 4527 4528 def _replace_columns_with_dots(self, this): 4529 if isinstance(this, exp.Dot): 4530 exp.replace_children(this, self._replace_columns_with_dots) 4531 elif isinstance(this, exp.Column): 4532 exp.replace_children(this, self._replace_columns_with_dots) 4533 table = this.args.get("table") 4534 this = ( 4535 self.expression(exp.Dot, this=table, expression=this.this) 4536 if table 4537 else self.expression(exp.Var, this=this.name) 4538 ) 4539 elif isinstance(this, exp.Identifier): 4540 this = self.expression(exp.Var, this=this.name) 4541 return this 4542 4543 def _replace_lambda(self, node, lambda_variables): 4544 for column in node.find_all(exp.Column): 4545 if column.parts[0].name in lambda_variables: 4546 dot_or_id = column.to_dot() if column.table else column.this 4547 parent = column.parent 4548 4549 while isinstance(parent, exp.Dot): 4550 if not isinstance(parent.parent, exp.Dot): 4551 parent.replace(dot_or_id) 4552 break 4553 parent = parent.parent 4554 else: 4555 if column is node: 4556 node = dot_or_id 4557 else: 4558 column.replace(dot_or_id) 4559 return node
19def parse_var_map(args: t.Sequence) -> exp.Expression: 20 if len(args) == 1 and args[0].is_star: 21 return exp.StarMap(this=args[0]) 22 23 keys = [] 24 values = [] 25 for i in range(0, len(args), 2): 26 keys.append(args[i]) 27 values.append(args[i + 1]) 28 return exp.VarMap( 29 keys=exp.Array(expressions=keys), 30 values=exp.Array(expressions=values), 31 )
56class Parser(metaclass=_Parser): 57 """ 58 Parser consumes a list of tokens produced by the `sqlglot.tokens.Tokenizer` and produces 59 a parsed syntax tree. 60 61 Args: 62 error_level: the desired error level. 63 Default: ErrorLevel.RAISE 64 error_message_context: determines the amount of context to capture from a 65 query string when displaying the error message (in number of characters). 66 Default: 50. 67 index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. 68 Default: 0 69 alias_post_tablesample: If the table alias comes after tablesample. 70 Default: False 71 max_errors: Maximum number of error messages to include in a raised ParseError. 72 This is only relevant if error_level is ErrorLevel.RAISE. 73 Default: 3 74 null_ordering: Indicates the default null ordering method to use if not explicitly set. 75 Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". 76 Default: "nulls_are_small" 77 """ 78 79 FUNCTIONS: t.Dict[str, t.Callable] = { 80 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 81 "DATE_TO_DATE_STR": lambda args: exp.Cast( 82 this=seq_get(args, 0), 83 to=exp.DataType(this=exp.DataType.Type.TEXT), 84 ), 85 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 86 "IFNULL": exp.Coalesce.from_arg_list, 87 "LIKE": parse_like, 88 "TIME_TO_TIME_STR": lambda args: exp.Cast( 89 this=seq_get(args, 0), 90 to=exp.DataType(this=exp.DataType.Type.TEXT), 91 ), 92 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 93 this=exp.Cast( 94 this=seq_get(args, 0), 95 to=exp.DataType(this=exp.DataType.Type.TEXT), 96 ), 97 start=exp.Literal.number(1), 98 length=exp.Literal.number(10), 99 ), 100 "VAR_MAP": parse_var_map, 101 } 102 103 NO_PAREN_FUNCTIONS = { 104 TokenType.CURRENT_DATE: exp.CurrentDate, 105 TokenType.CURRENT_DATETIME: exp.CurrentDate, 106 TokenType.CURRENT_TIME: exp.CurrentTime, 107 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 108 TokenType.CURRENT_USER: exp.CurrentUser, 109 } 110 111 JOIN_HINTS: t.Set[str] = set() 112 113 NESTED_TYPE_TOKENS = { 114 TokenType.ARRAY, 115 TokenType.MAP, 116 TokenType.NULLABLE, 117 TokenType.STRUCT, 118 } 119 120 TYPE_TOKENS = { 121 TokenType.BIT, 122 TokenType.BOOLEAN, 123 TokenType.TINYINT, 124 TokenType.UTINYINT, 125 TokenType.SMALLINT, 126 TokenType.USMALLINT, 127 TokenType.INT, 128 TokenType.UINT, 129 TokenType.BIGINT, 130 TokenType.UBIGINT, 131 TokenType.INT128, 132 TokenType.UINT128, 133 TokenType.INT256, 134 TokenType.UINT256, 135 TokenType.FLOAT, 136 TokenType.DOUBLE, 137 TokenType.CHAR, 138 TokenType.NCHAR, 139 TokenType.VARCHAR, 140 TokenType.NVARCHAR, 141 TokenType.TEXT, 142 TokenType.MEDIUMTEXT, 143 TokenType.LONGTEXT, 144 TokenType.MEDIUMBLOB, 145 TokenType.LONGBLOB, 146 TokenType.BINARY, 147 TokenType.VARBINARY, 148 TokenType.JSON, 149 TokenType.JSONB, 150 TokenType.INTERVAL, 151 TokenType.TIME, 152 TokenType.TIMESTAMP, 153 TokenType.TIMESTAMPTZ, 154 TokenType.TIMESTAMPLTZ, 155 TokenType.DATETIME, 156 TokenType.DATETIME64, 157 TokenType.DATE, 158 TokenType.DECIMAL, 159 TokenType.BIGDECIMAL, 160 TokenType.UUID, 161 TokenType.GEOGRAPHY, 162 TokenType.GEOMETRY, 163 TokenType.HLLSKETCH, 164 TokenType.HSTORE, 165 TokenType.PSEUDO_TYPE, 166 TokenType.SUPER, 167 TokenType.SERIAL, 168 TokenType.SMALLSERIAL, 169 TokenType.BIGSERIAL, 170 TokenType.XML, 171 TokenType.UNIQUEIDENTIFIER, 172 TokenType.MONEY, 173 TokenType.SMALLMONEY, 174 TokenType.ROWVERSION, 175 TokenType.IMAGE, 176 TokenType.VARIANT, 177 TokenType.OBJECT, 178 TokenType.INET, 179 *NESTED_TYPE_TOKENS, 180 } 181 182 SUBQUERY_PREDICATES = { 183 TokenType.ANY: exp.Any, 184 TokenType.ALL: exp.All, 185 TokenType.EXISTS: exp.Exists, 186 TokenType.SOME: exp.Any, 187 } 188 189 RESERVED_KEYWORDS = {*Tokenizer.SINGLE_TOKENS.values(), TokenType.SELECT} 190 191 DB_CREATABLES = { 192 TokenType.DATABASE, 193 TokenType.SCHEMA, 194 TokenType.TABLE, 195 TokenType.VIEW, 196 } 197 198 CREATABLES = { 199 TokenType.COLUMN, 200 TokenType.FUNCTION, 201 TokenType.INDEX, 202 TokenType.PROCEDURE, 203 *DB_CREATABLES, 204 } 205 206 ID_VAR_TOKENS = { 207 TokenType.VAR, 208 TokenType.ANTI, 209 TokenType.APPLY, 210 TokenType.AUTO_INCREMENT, 211 TokenType.BEGIN, 212 TokenType.BOTH, 213 TokenType.BUCKET, 214 TokenType.CACHE, 215 TokenType.CASCADE, 216 TokenType.COLLATE, 217 TokenType.COMMAND, 218 TokenType.COMMENT, 219 TokenType.COMMIT, 220 TokenType.COMPOUND, 221 TokenType.CONSTRAINT, 222 TokenType.DEFAULT, 223 TokenType.DELETE, 224 TokenType.DESCRIBE, 225 TokenType.DIV, 226 TokenType.END, 227 TokenType.EXECUTE, 228 TokenType.ESCAPE, 229 TokenType.FALSE, 230 TokenType.FIRST, 231 TokenType.FILTER, 232 TokenType.FOLLOWING, 233 TokenType.FORMAT, 234 TokenType.FULL, 235 TokenType.IF, 236 TokenType.IS, 237 TokenType.ISNULL, 238 TokenType.INTERVAL, 239 TokenType.KEEP, 240 TokenType.LAZY, 241 TokenType.LEADING, 242 TokenType.LEFT, 243 TokenType.LOCAL, 244 TokenType.MATERIALIZED, 245 TokenType.MERGE, 246 TokenType.NATURAL, 247 TokenType.NEXT, 248 TokenType.OFFSET, 249 TokenType.ONLY, 250 TokenType.OPTIONS, 251 TokenType.ORDINALITY, 252 TokenType.OVERWRITE, 253 TokenType.PARTITION, 254 TokenType.PERCENT, 255 TokenType.PIVOT, 256 TokenType.PRAGMA, 257 TokenType.PRECEDING, 258 TokenType.RANGE, 259 TokenType.REFERENCES, 260 TokenType.RIGHT, 261 TokenType.ROW, 262 TokenType.ROWS, 263 TokenType.SEED, 264 TokenType.SEMI, 265 TokenType.SET, 266 TokenType.SETTINGS, 267 TokenType.SHOW, 268 TokenType.SORTKEY, 269 TokenType.TEMPORARY, 270 TokenType.TOP, 271 TokenType.TRAILING, 272 TokenType.TRUE, 273 TokenType.UNBOUNDED, 274 TokenType.UNIQUE, 275 TokenType.UNLOGGED, 276 TokenType.UNPIVOT, 277 TokenType.VOLATILE, 278 TokenType.WINDOW, 279 *CREATABLES, 280 *SUBQUERY_PREDICATES, 281 *TYPE_TOKENS, 282 *NO_PAREN_FUNCTIONS, 283 } 284 285 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 286 287 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 288 TokenType.APPLY, 289 TokenType.FULL, 290 TokenType.LEFT, 291 TokenType.LOCK, 292 TokenType.NATURAL, 293 TokenType.OFFSET, 294 TokenType.RIGHT, 295 TokenType.WINDOW, 296 } 297 298 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 299 300 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 301 302 TRIM_TYPES = {TokenType.LEADING, TokenType.TRAILING, TokenType.BOTH} 303 304 FUNC_TOKENS = { 305 TokenType.COMMAND, 306 TokenType.CURRENT_DATE, 307 TokenType.CURRENT_DATETIME, 308 TokenType.CURRENT_TIMESTAMP, 309 TokenType.CURRENT_TIME, 310 TokenType.CURRENT_USER, 311 TokenType.FILTER, 312 TokenType.FIRST, 313 TokenType.FORMAT, 314 TokenType.GLOB, 315 TokenType.IDENTIFIER, 316 TokenType.INDEX, 317 TokenType.ISNULL, 318 TokenType.ILIKE, 319 TokenType.LIKE, 320 TokenType.MERGE, 321 TokenType.OFFSET, 322 TokenType.PRIMARY_KEY, 323 TokenType.RANGE, 324 TokenType.REPLACE, 325 TokenType.ROW, 326 TokenType.UNNEST, 327 TokenType.VAR, 328 TokenType.LEFT, 329 TokenType.RIGHT, 330 TokenType.DATE, 331 TokenType.DATETIME, 332 TokenType.TABLE, 333 TokenType.TIMESTAMP, 334 TokenType.TIMESTAMPTZ, 335 TokenType.WINDOW, 336 *TYPE_TOKENS, 337 *SUBQUERY_PREDICATES, 338 } 339 340 CONJUNCTION = { 341 TokenType.AND: exp.And, 342 TokenType.OR: exp.Or, 343 } 344 345 EQUALITY = { 346 TokenType.EQ: exp.EQ, 347 TokenType.NEQ: exp.NEQ, 348 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 349 } 350 351 COMPARISON = { 352 TokenType.GT: exp.GT, 353 TokenType.GTE: exp.GTE, 354 TokenType.LT: exp.LT, 355 TokenType.LTE: exp.LTE, 356 } 357 358 BITWISE = { 359 TokenType.AMP: exp.BitwiseAnd, 360 TokenType.CARET: exp.BitwiseXor, 361 TokenType.PIPE: exp.BitwiseOr, 362 TokenType.DPIPE: exp.DPipe, 363 } 364 365 TERM = { 366 TokenType.DASH: exp.Sub, 367 TokenType.PLUS: exp.Add, 368 TokenType.MOD: exp.Mod, 369 TokenType.COLLATE: exp.Collate, 370 } 371 372 FACTOR = { 373 TokenType.DIV: exp.IntDiv, 374 TokenType.LR_ARROW: exp.Distance, 375 TokenType.SLASH: exp.Div, 376 TokenType.STAR: exp.Mul, 377 } 378 379 TIMESTAMPS = { 380 TokenType.TIME, 381 TokenType.TIMESTAMP, 382 TokenType.TIMESTAMPTZ, 383 TokenType.TIMESTAMPLTZ, 384 } 385 386 SET_OPERATIONS = { 387 TokenType.UNION, 388 TokenType.INTERSECT, 389 TokenType.EXCEPT, 390 } 391 392 JOIN_SIDES = { 393 TokenType.LEFT, 394 TokenType.RIGHT, 395 TokenType.FULL, 396 } 397 398 JOIN_KINDS = { 399 TokenType.INNER, 400 TokenType.OUTER, 401 TokenType.CROSS, 402 TokenType.SEMI, 403 TokenType.ANTI, 404 } 405 406 LAMBDAS = { 407 TokenType.ARROW: lambda self, expressions: self.expression( 408 exp.Lambda, 409 this=self._replace_lambda( 410 self._parse_conjunction(), 411 {node.name for node in expressions}, 412 ), 413 expressions=expressions, 414 ), 415 TokenType.FARROW: lambda self, expressions: self.expression( 416 exp.Kwarg, 417 this=exp.Var(this=expressions[0].name), 418 expression=self._parse_conjunction(), 419 ), 420 } 421 422 COLUMN_OPERATORS = { 423 TokenType.DOT: None, 424 TokenType.DCOLON: lambda self, this, to: self.expression( 425 exp.Cast if self.STRICT_CAST else exp.TryCast, 426 this=this, 427 to=to, 428 ), 429 TokenType.ARROW: lambda self, this, path: self.expression( 430 exp.JSONExtract, 431 this=this, 432 expression=path, 433 ), 434 TokenType.DARROW: lambda self, this, path: self.expression( 435 exp.JSONExtractScalar, 436 this=this, 437 expression=path, 438 ), 439 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 440 exp.JSONBExtract, 441 this=this, 442 expression=path, 443 ), 444 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 445 exp.JSONBExtractScalar, 446 this=this, 447 expression=path, 448 ), 449 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 450 exp.JSONBContains, 451 this=this, 452 expression=key, 453 ), 454 } 455 456 EXPRESSION_PARSERS = { 457 exp.Column: lambda self: self._parse_column(), 458 exp.DataType: lambda self: self._parse_types(), 459 exp.From: lambda self: self._parse_from(), 460 exp.Group: lambda self: self._parse_group(), 461 exp.Identifier: lambda self: self._parse_id_var(), 462 exp.Lateral: lambda self: self._parse_lateral(), 463 exp.Join: lambda self: self._parse_join(), 464 exp.Order: lambda self: self._parse_order(), 465 exp.Cluster: lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster), 466 exp.Sort: lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort), 467 exp.Lambda: lambda self: self._parse_lambda(), 468 exp.Limit: lambda self: self._parse_limit(), 469 exp.Offset: lambda self: self._parse_offset(), 470 exp.TableAlias: lambda self: self._parse_table_alias(), 471 exp.Table: lambda self: self._parse_table(), 472 exp.Condition: lambda self: self._parse_conjunction(), 473 exp.Expression: lambda self: self._parse_statement(), 474 exp.Properties: lambda self: self._parse_properties(), 475 exp.Where: lambda self: self._parse_where(), 476 exp.Ordered: lambda self: self._parse_ordered(), 477 exp.Having: lambda self: self._parse_having(), 478 exp.With: lambda self: self._parse_with(), 479 exp.Window: lambda self: self._parse_named_window(), 480 exp.Qualify: lambda self: self._parse_qualify(), 481 exp.Returning: lambda self: self._parse_returning(), 482 "JOIN_TYPE": lambda self: self._parse_join_side_and_kind(), 483 } 484 485 STATEMENT_PARSERS = { 486 TokenType.ALTER: lambda self: self._parse_alter(), 487 TokenType.BEGIN: lambda self: self._parse_transaction(), 488 TokenType.CACHE: lambda self: self._parse_cache(), 489 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 490 TokenType.COMMENT: lambda self: self._parse_comment(), 491 TokenType.CREATE: lambda self: self._parse_create(), 492 TokenType.DELETE: lambda self: self._parse_delete(), 493 TokenType.DESC: lambda self: self._parse_describe(), 494 TokenType.DESCRIBE: lambda self: self._parse_describe(), 495 TokenType.DROP: lambda self: self._parse_drop(), 496 TokenType.END: lambda self: self._parse_commit_or_rollback(), 497 TokenType.INSERT: lambda self: self._parse_insert(), 498 TokenType.LOAD_DATA: lambda self: self._parse_load_data(), 499 TokenType.MERGE: lambda self: self._parse_merge(), 500 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 501 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 502 TokenType.SET: lambda self: self._parse_set(), 503 TokenType.UNCACHE: lambda self: self._parse_uncache(), 504 TokenType.UPDATE: lambda self: self._parse_update(), 505 TokenType.USE: lambda self: self.expression( 506 exp.Use, 507 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 508 and exp.Var(this=self._prev.text), 509 this=self._parse_table(schema=False), 510 ), 511 } 512 513 UNARY_PARSERS = { 514 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 515 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 516 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 517 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 518 } 519 520 PRIMARY_PARSERS = { 521 TokenType.STRING: lambda self, token: self.expression( 522 exp.Literal, this=token.text, is_string=True 523 ), 524 TokenType.NUMBER: lambda self, token: self.expression( 525 exp.Literal, this=token.text, is_string=False 526 ), 527 TokenType.STAR: lambda self, _: self.expression( 528 exp.Star, 529 **{"except": self._parse_except(), "replace": self._parse_replace()}, 530 ), 531 TokenType.NULL: lambda self, _: self.expression(exp.Null), 532 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 533 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 534 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 535 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 536 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 537 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 538 TokenType.NATIONAL: lambda self, token: self._parse_national(token), 539 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 540 } 541 542 PLACEHOLDER_PARSERS = { 543 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 544 TokenType.PARAMETER: lambda self: self._parse_parameter(), 545 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 546 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 547 else None, 548 } 549 550 RANGE_PARSERS = { 551 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 552 TokenType.GLOB: binary_range_parser(exp.Glob), 553 TokenType.ILIKE: binary_range_parser(exp.ILike), 554 TokenType.IN: lambda self, this: self._parse_in(this), 555 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 556 TokenType.IS: lambda self, this: self._parse_is(this), 557 TokenType.LIKE: binary_range_parser(exp.Like), 558 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 559 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 560 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 561 } 562 563 PROPERTY_PARSERS = { 564 "AFTER": lambda self: self._parse_afterjournal( 565 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 566 ), 567 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 568 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 569 "BEFORE": lambda self: self._parse_journal( 570 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 571 ), 572 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 573 "CHARACTER SET": lambda self: self._parse_character_set(), 574 "CHECKSUM": lambda self: self._parse_checksum(), 575 "CLUSTER BY": lambda self: self.expression( 576 exp.Cluster, expressions=self._parse_csv(self._parse_ordered) 577 ), 578 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 579 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 580 "DATABLOCKSIZE": lambda self: self._parse_datablocksize( 581 default=self._prev.text.upper() == "DEFAULT" 582 ), 583 "DEFINER": lambda self: self._parse_definer(), 584 "DETERMINISTIC": lambda self: self.expression( 585 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 586 ), 587 "DISTKEY": lambda self: self._parse_distkey(), 588 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 589 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 590 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 591 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 592 "FALLBACK": lambda self: self._parse_fallback(no=self._prev.text.upper() == "NO"), 593 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 594 "FREESPACE": lambda self: self._parse_freespace(), 595 "GLOBAL": lambda self: self._parse_temporary(global_=True), 596 "IMMUTABLE": lambda self: self.expression( 597 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 598 ), 599 "JOURNAL": lambda self: self._parse_journal( 600 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 601 ), 602 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 603 "LIKE": lambda self: self._parse_create_like(), 604 "LOCAL": lambda self: self._parse_afterjournal(no=False, dual=False, local=True), 605 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 606 "LOCK": lambda self: self._parse_locking(), 607 "LOCKING": lambda self: self._parse_locking(), 608 "LOG": lambda self: self._parse_log(no=self._prev.text.upper() == "NO"), 609 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 610 "MAX": lambda self: self._parse_datablocksize(), 611 "MAXIMUM": lambda self: self._parse_datablocksize(), 612 "MERGEBLOCKRATIO": lambda self: self._parse_mergeblockratio( 613 no=self._prev.text.upper() == "NO", default=self._prev.text.upper() == "DEFAULT" 614 ), 615 "MIN": lambda self: self._parse_datablocksize(), 616 "MINIMUM": lambda self: self._parse_datablocksize(), 617 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 618 "NO": lambda self: self._parse_noprimaryindex(), 619 "NOT": lambda self: self._parse_afterjournal(no=False, dual=False, local=False), 620 "ON": lambda self: self._parse_oncommit(), 621 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 622 "PARTITION BY": lambda self: self._parse_partitioned_by(), 623 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 624 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 625 "PRIMARY KEY": lambda self: self._parse_primary_key(), 626 "RETURNS": lambda self: self._parse_returns(), 627 "ROW": lambda self: self._parse_row(), 628 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 629 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 630 "SETTINGS": lambda self: self.expression( 631 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 632 ), 633 "SORTKEY": lambda self: self._parse_sortkey(), 634 "STABLE": lambda self: self.expression( 635 exp.StabilityProperty, this=exp.Literal.string("STABLE") 636 ), 637 "STORED": lambda self: self._parse_stored(), 638 "TABLE_FORMAT": lambda self: self._parse_property_assignment(exp.TableFormatProperty), 639 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 640 "TEMP": lambda self: self._parse_temporary(global_=False), 641 "TEMPORARY": lambda self: self._parse_temporary(global_=False), 642 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 643 "TTL": lambda self: self._parse_ttl(), 644 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 645 "VOLATILE": lambda self: self._parse_volatile_property(), 646 "WITH": lambda self: self._parse_with_property(), 647 } 648 649 CONSTRAINT_PARSERS = { 650 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 651 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 652 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 653 "CHARACTER SET": lambda self: self.expression( 654 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 655 ), 656 "CHECK": lambda self: self.expression( 657 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 658 ), 659 "COLLATE": lambda self: self.expression( 660 exp.CollateColumnConstraint, this=self._parse_var() 661 ), 662 "COMMENT": lambda self: self.expression( 663 exp.CommentColumnConstraint, this=self._parse_string() 664 ), 665 "COMPRESS": lambda self: self._parse_compress(), 666 "DEFAULT": lambda self: self.expression( 667 exp.DefaultColumnConstraint, this=self._parse_bitwise() 668 ), 669 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 670 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 671 "FORMAT": lambda self: self.expression( 672 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 673 ), 674 "GENERATED": lambda self: self._parse_generated_as_identity(), 675 "IDENTITY": lambda self: self._parse_auto_increment(), 676 "INLINE": lambda self: self._parse_inline(), 677 "LIKE": lambda self: self._parse_create_like(), 678 "NOT": lambda self: self._parse_not_constraint(), 679 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 680 "ON": lambda self: self._match(TokenType.UPDATE) 681 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()), 682 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 683 "PRIMARY KEY": lambda self: self._parse_primary_key(), 684 "REFERENCES": lambda self: self._parse_references(match=False), 685 "TITLE": lambda self: self.expression( 686 exp.TitleColumnConstraint, this=self._parse_var_or_string() 687 ), 688 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 689 "UNIQUE": lambda self: self._parse_unique(), 690 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 691 } 692 693 ALTER_PARSERS = { 694 "ADD": lambda self: self._parse_alter_table_add(), 695 "ALTER": lambda self: self._parse_alter_table_alter(), 696 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 697 "DROP": lambda self: self._parse_alter_table_drop(), 698 "RENAME": lambda self: self._parse_alter_table_rename(), 699 } 700 701 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 702 703 NO_PAREN_FUNCTION_PARSERS = { 704 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 705 TokenType.CASE: lambda self: self._parse_case(), 706 TokenType.IF: lambda self: self._parse_if(), 707 TokenType.NEXT_VALUE_FOR: lambda self: self.expression( 708 exp.NextValueFor, 709 this=self._parse_column(), 710 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 711 ), 712 } 713 714 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { 715 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 716 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 717 "DECODE": lambda self: self._parse_decode(), 718 "EXTRACT": lambda self: self._parse_extract(), 719 "JSON_OBJECT": lambda self: self._parse_json_object(), 720 "LOG": lambda self: self._parse_logarithm(), 721 "MATCH": lambda self: self._parse_match_against(), 722 "OPENJSON": lambda self: self._parse_open_json(), 723 "POSITION": lambda self: self._parse_position(), 724 "STRING_AGG": lambda self: self._parse_string_agg(), 725 "SUBSTRING": lambda self: self._parse_substring(), 726 "TRIM": lambda self: self._parse_trim(), 727 "TRY_CAST": lambda self: self._parse_cast(False), 728 "TRY_CONVERT": lambda self: self._parse_convert(False), 729 } 730 731 QUERY_MODIFIER_PARSERS = { 732 "joins": lambda self: list(iter(self._parse_join, None)), 733 "laterals": lambda self: list(iter(self._parse_lateral, None)), 734 "match": lambda self: self._parse_match_recognize(), 735 "where": lambda self: self._parse_where(), 736 "group": lambda self: self._parse_group(), 737 "having": lambda self: self._parse_having(), 738 "qualify": lambda self: self._parse_qualify(), 739 "windows": lambda self: self._parse_window_clause(), 740 "order": lambda self: self._parse_order(), 741 "limit": lambda self: self._parse_limit(), 742 "offset": lambda self: self._parse_offset(), 743 "locks": lambda self: self._parse_locks(), 744 "sample": lambda self: self._parse_table_sample(as_modifier=True), 745 } 746 747 SET_PARSERS = { 748 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 749 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 750 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 751 "TRANSACTION": lambda self: self._parse_set_transaction(), 752 } 753 754 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 755 756 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 757 758 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 759 760 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 761 762 TRANSACTION_CHARACTERISTICS = { 763 "ISOLATION LEVEL REPEATABLE READ", 764 "ISOLATION LEVEL READ COMMITTED", 765 "ISOLATION LEVEL READ UNCOMMITTED", 766 "ISOLATION LEVEL SERIALIZABLE", 767 "READ WRITE", 768 "READ ONLY", 769 } 770 771 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 772 773 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 774 775 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 776 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 777 778 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 779 780 STRICT_CAST = True 781 782 CONVERT_TYPE_FIRST = False 783 784 PREFIXED_PIVOT_COLUMNS = False 785 IDENTIFY_PIVOT_STRINGS = False 786 787 LOG_BASE_FIRST = True 788 LOG_DEFAULTS_TO_LN = False 789 790 __slots__ = ( 791 "error_level", 792 "error_message_context", 793 "sql", 794 "errors", 795 "index_offset", 796 "unnest_column_only", 797 "alias_post_tablesample", 798 "max_errors", 799 "null_ordering", 800 "_tokens", 801 "_index", 802 "_curr", 803 "_next", 804 "_prev", 805 "_prev_comments", 806 "_show_trie", 807 "_set_trie", 808 ) 809 810 def __init__( 811 self, 812 error_level: t.Optional[ErrorLevel] = None, 813 error_message_context: int = 100, 814 index_offset: int = 0, 815 unnest_column_only: bool = False, 816 alias_post_tablesample: bool = False, 817 max_errors: int = 3, 818 null_ordering: t.Optional[str] = None, 819 ): 820 self.error_level = error_level or ErrorLevel.IMMEDIATE 821 self.error_message_context = error_message_context 822 self.index_offset = index_offset 823 self.unnest_column_only = unnest_column_only 824 self.alias_post_tablesample = alias_post_tablesample 825 self.max_errors = max_errors 826 self.null_ordering = null_ordering 827 self.reset() 828 829 def reset(self): 830 self.sql = "" 831 self.errors = [] 832 self._tokens = [] 833 self._index = 0 834 self._curr = None 835 self._next = None 836 self._prev = None 837 self._prev_comments = None 838 839 def parse( 840 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 841 ) -> t.List[t.Optional[exp.Expression]]: 842 """ 843 Parses a list of tokens and returns a list of syntax trees, one tree 844 per parsed SQL statement. 845 846 Args: 847 raw_tokens: the list of tokens. 848 sql: the original SQL string, used to produce helpful debug messages. 849 850 Returns: 851 The list of syntax trees. 852 """ 853 return self._parse( 854 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 855 ) 856 857 def parse_into( 858 self, 859 expression_types: exp.IntoType, 860 raw_tokens: t.List[Token], 861 sql: t.Optional[str] = None, 862 ) -> t.List[t.Optional[exp.Expression]]: 863 """ 864 Parses a list of tokens into a given Expression type. If a collection of Expression 865 types is given instead, this method will try to parse the token list into each one 866 of them, stopping at the first for which the parsing succeeds. 867 868 Args: 869 expression_types: the expression type(s) to try and parse the token list into. 870 raw_tokens: the list of tokens. 871 sql: the original SQL string, used to produce helpful debug messages. 872 873 Returns: 874 The target Expression. 875 """ 876 errors = [] 877 for expression_type in ensure_collection(expression_types): 878 parser = self.EXPRESSION_PARSERS.get(expression_type) 879 if not parser: 880 raise TypeError(f"No parser registered for {expression_type}") 881 try: 882 return self._parse(parser, raw_tokens, sql) 883 except ParseError as e: 884 e.errors[0]["into_expression"] = expression_type 885 errors.append(e) 886 raise ParseError( 887 f"Failed to parse into {expression_types}", 888 errors=merge_errors(errors), 889 ) from errors[-1] 890 891 def _parse( 892 self, 893 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 894 raw_tokens: t.List[Token], 895 sql: t.Optional[str] = None, 896 ) -> t.List[t.Optional[exp.Expression]]: 897 self.reset() 898 self.sql = sql or "" 899 total = len(raw_tokens) 900 chunks: t.List[t.List[Token]] = [[]] 901 902 for i, token in enumerate(raw_tokens): 903 if token.token_type == TokenType.SEMICOLON: 904 if i < total - 1: 905 chunks.append([]) 906 else: 907 chunks[-1].append(token) 908 909 expressions = [] 910 911 for tokens in chunks: 912 self._index = -1 913 self._tokens = tokens 914 self._advance() 915 916 expressions.append(parse_method(self)) 917 918 if self._index < len(self._tokens): 919 self.raise_error("Invalid expression / Unexpected token") 920 921 self.check_errors() 922 923 return expressions 924 925 def check_errors(self) -> None: 926 """ 927 Logs or raises any found errors, depending on the chosen error level setting. 928 """ 929 if self.error_level == ErrorLevel.WARN: 930 for error in self.errors: 931 logger.error(str(error)) 932 elif self.error_level == ErrorLevel.RAISE and self.errors: 933 raise ParseError( 934 concat_messages(self.errors, self.max_errors), 935 errors=merge_errors(self.errors), 936 ) 937 938 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 939 """ 940 Appends an error in the list of recorded errors or raises it, depending on the chosen 941 error level setting. 942 """ 943 token = token or self._curr or self._prev or Token.string("") 944 start = token.start 945 end = token.end 946 start_context = self.sql[max(start - self.error_message_context, 0) : start] 947 highlight = self.sql[start:end] 948 end_context = self.sql[end : end + self.error_message_context] 949 950 error = ParseError.new( 951 f"{message}. Line {token.line}, Col: {token.col}.\n" 952 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 953 description=message, 954 line=token.line, 955 col=token.col, 956 start_context=start_context, 957 highlight=highlight, 958 end_context=end_context, 959 ) 960 961 if self.error_level == ErrorLevel.IMMEDIATE: 962 raise error 963 964 self.errors.append(error) 965 966 def expression( 967 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 968 ) -> E: 969 """ 970 Creates a new, validated Expression. 971 972 Args: 973 exp_class: the expression class to instantiate. 974 comments: an optional list of comments to attach to the expression. 975 kwargs: the arguments to set for the expression along with their respective values. 976 977 Returns: 978 The target expression. 979 """ 980 instance = exp_class(**kwargs) 981 instance.add_comments(comments) if comments else self._add_comments(instance) 982 self.validate_expression(instance) 983 return instance 984 985 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 986 if expression and self._prev_comments: 987 expression.add_comments(self._prev_comments) 988 self._prev_comments = None 989 990 def validate_expression( 991 self, expression: exp.Expression, args: t.Optional[t.List] = None 992 ) -> None: 993 """ 994 Validates an already instantiated expression, making sure that all its mandatory arguments 995 are set. 996 997 Args: 998 expression: the expression to validate. 999 args: an optional list of items that was used to instantiate the expression, if it's a Func. 1000 """ 1001 if self.error_level == ErrorLevel.IGNORE: 1002 return 1003 1004 for error_message in expression.error_messages(args): 1005 self.raise_error(error_message) 1006 1007 def _find_sql(self, start: Token, end: Token) -> str: 1008 return self.sql[start.start : end.end] 1009 1010 def _advance(self, times: int = 1) -> None: 1011 self._index += times 1012 self._curr = seq_get(self._tokens, self._index) 1013 self._next = seq_get(self._tokens, self._index + 1) 1014 if self._index > 0: 1015 self._prev = self._tokens[self._index - 1] 1016 self._prev_comments = self._prev.comments 1017 else: 1018 self._prev = None 1019 self._prev_comments = None 1020 1021 def _retreat(self, index: int) -> None: 1022 if index != self._index: 1023 self._advance(index - self._index) 1024 1025 def _parse_command(self) -> exp.Command: 1026 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1027 1028 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1029 start = self._prev 1030 exists = self._parse_exists() if allow_exists else None 1031 1032 self._match(TokenType.ON) 1033 1034 kind = self._match_set(self.CREATABLES) and self._prev 1035 1036 if not kind: 1037 return self._parse_as_command(start) 1038 1039 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1040 this = self._parse_user_defined_function(kind=kind.token_type) 1041 elif kind.token_type == TokenType.TABLE: 1042 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1043 elif kind.token_type == TokenType.COLUMN: 1044 this = self._parse_column() 1045 else: 1046 this = self._parse_id_var() 1047 1048 self._match(TokenType.IS) 1049 1050 return self.expression( 1051 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1052 ) 1053 1054 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1055 def _parse_ttl(self) -> exp.Expression: 1056 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1057 this = self._parse_bitwise() 1058 1059 if self._match_text_seq("DELETE"): 1060 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1061 if self._match_text_seq("RECOMPRESS"): 1062 return self.expression( 1063 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1064 ) 1065 if self._match_text_seq("TO", "DISK"): 1066 return self.expression( 1067 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1068 ) 1069 if self._match_text_seq("TO", "VOLUME"): 1070 return self.expression( 1071 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1072 ) 1073 1074 return this 1075 1076 expressions = self._parse_csv(_parse_ttl_action) 1077 where = self._parse_where() 1078 group = self._parse_group() 1079 1080 aggregates = None 1081 if group and self._match(TokenType.SET): 1082 aggregates = self._parse_csv(self._parse_set_item) 1083 1084 return self.expression( 1085 exp.MergeTreeTTL, 1086 expressions=expressions, 1087 where=where, 1088 group=group, 1089 aggregates=aggregates, 1090 ) 1091 1092 def _parse_statement(self) -> t.Optional[exp.Expression]: 1093 if self._curr is None: 1094 return None 1095 1096 if self._match_set(self.STATEMENT_PARSERS): 1097 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1098 1099 if self._match_set(Tokenizer.COMMANDS): 1100 return self._parse_command() 1101 1102 expression = self._parse_expression() 1103 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1104 return self._parse_query_modifiers(expression) 1105 1106 def _parse_drop(self) -> t.Optional[exp.Drop | exp.Command]: 1107 start = self._prev 1108 temporary = self._match(TokenType.TEMPORARY) 1109 materialized = self._match(TokenType.MATERIALIZED) 1110 kind = self._match_set(self.CREATABLES) and self._prev.text 1111 if not kind: 1112 return self._parse_as_command(start) 1113 1114 return self.expression( 1115 exp.Drop, 1116 exists=self._parse_exists(), 1117 this=self._parse_table(schema=True), 1118 kind=kind, 1119 temporary=temporary, 1120 materialized=materialized, 1121 cascade=self._match(TokenType.CASCADE), 1122 constraints=self._match_text_seq("CONSTRAINTS"), 1123 purge=self._match_text_seq("PURGE"), 1124 ) 1125 1126 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1127 return ( 1128 self._match(TokenType.IF) 1129 and (not not_ or self._match(TokenType.NOT)) 1130 and self._match(TokenType.EXISTS) 1131 ) 1132 1133 def _parse_create(self) -> t.Optional[exp.Expression]: 1134 start = self._prev 1135 replace = self._prev.text.upper() == "REPLACE" or self._match_pair( 1136 TokenType.OR, TokenType.REPLACE 1137 ) 1138 unique = self._match(TokenType.UNIQUE) 1139 1140 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1141 self._match(TokenType.TABLE) 1142 1143 properties = None 1144 create_token = self._match_set(self.CREATABLES) and self._prev 1145 1146 if not create_token: 1147 properties = self._parse_properties() # exp.Properties.Location.POST_CREATE 1148 create_token = self._match_set(self.CREATABLES) and self._prev 1149 1150 if not properties or not create_token: 1151 return self._parse_as_command(start) 1152 1153 exists = self._parse_exists(not_=True) 1154 this = None 1155 expression = None 1156 indexes = None 1157 no_schema_binding = None 1158 begin = None 1159 clone = None 1160 1161 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1162 this = self._parse_user_defined_function(kind=create_token.token_type) 1163 temp_properties = self._parse_properties() 1164 if properties and temp_properties: 1165 properties.expressions.extend(temp_properties.expressions) 1166 elif temp_properties: 1167 properties = temp_properties 1168 1169 self._match(TokenType.ALIAS) 1170 begin = self._match(TokenType.BEGIN) 1171 return_ = self._match_text_seq("RETURN") 1172 expression = self._parse_statement() 1173 1174 if return_: 1175 expression = self.expression(exp.Return, this=expression) 1176 elif create_token.token_type == TokenType.INDEX: 1177 this = self._parse_index() 1178 elif create_token.token_type in self.DB_CREATABLES: 1179 table_parts = self._parse_table_parts(schema=True) 1180 1181 # exp.Properties.Location.POST_NAME 1182 if self._match(TokenType.COMMA): 1183 temp_properties = self._parse_properties(before=True) 1184 if properties and temp_properties: 1185 properties.expressions.extend(temp_properties.expressions) 1186 elif temp_properties: 1187 properties = temp_properties 1188 1189 this = self._parse_schema(this=table_parts) 1190 1191 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1192 temp_properties = self._parse_properties() 1193 if properties and temp_properties: 1194 properties.expressions.extend(temp_properties.expressions) 1195 elif temp_properties: 1196 properties = temp_properties 1197 1198 self._match(TokenType.ALIAS) 1199 1200 # exp.Properties.Location.POST_ALIAS 1201 if not ( 1202 self._match(TokenType.SELECT, advance=False) 1203 or self._match(TokenType.WITH, advance=False) 1204 or self._match(TokenType.L_PAREN, advance=False) 1205 ): 1206 temp_properties = self._parse_properties() 1207 if properties and temp_properties: 1208 properties.expressions.extend(temp_properties.expressions) 1209 elif temp_properties: 1210 properties = temp_properties 1211 1212 expression = self._parse_ddl_select() 1213 1214 if create_token.token_type == TokenType.TABLE: 1215 # exp.Properties.Location.POST_EXPRESSION 1216 temp_properties = self._parse_properties() 1217 if properties and temp_properties: 1218 properties.expressions.extend(temp_properties.expressions) 1219 elif temp_properties: 1220 properties = temp_properties 1221 1222 indexes = [] 1223 while True: 1224 index = self._parse_create_table_index() 1225 1226 # exp.Properties.Location.POST_INDEX 1227 if self._match(TokenType.PARTITION_BY, advance=False): 1228 temp_properties = self._parse_properties() 1229 if properties and temp_properties: 1230 properties.expressions.extend(temp_properties.expressions) 1231 elif temp_properties: 1232 properties = temp_properties 1233 1234 if not index: 1235 break 1236 else: 1237 indexes.append(index) 1238 elif create_token.token_type == TokenType.VIEW: 1239 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1240 no_schema_binding = True 1241 1242 if self._match_text_seq("CLONE"): 1243 clone = self._parse_table(schema=True) 1244 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1245 clone_kind = ( 1246 self._match(TokenType.L_PAREN) 1247 and self._match_texts(self.CLONE_KINDS) 1248 and self._prev.text.upper() 1249 ) 1250 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1251 self._match(TokenType.R_PAREN) 1252 clone = self.expression( 1253 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1254 ) 1255 1256 return self.expression( 1257 exp.Create, 1258 this=this, 1259 kind=create_token.text, 1260 replace=replace, 1261 unique=unique, 1262 expression=expression, 1263 exists=exists, 1264 properties=properties, 1265 indexes=indexes, 1266 no_schema_binding=no_schema_binding, 1267 begin=begin, 1268 clone=clone, 1269 ) 1270 1271 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1272 self._match(TokenType.COMMA) 1273 1274 # parsers look to _prev for no/dual/default, so need to consume first 1275 self._match_text_seq("NO") 1276 self._match_text_seq("DUAL") 1277 self._match_text_seq("DEFAULT") 1278 1279 if self.PROPERTY_PARSERS.get(self._curr.text.upper()): 1280 return self.PROPERTY_PARSERS[self._curr.text.upper()](self) 1281 1282 return None 1283 1284 def _parse_property(self) -> t.Optional[exp.Expression]: 1285 if self._match_texts(self.PROPERTY_PARSERS): 1286 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1287 1288 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1289 return self._parse_character_set(default=True) 1290 1291 if self._match_pair(TokenType.COMPOUND, TokenType.SORTKEY): 1292 return self._parse_sortkey(compound=True) 1293 1294 if self._match_text_seq("SQL", "SECURITY"): 1295 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1296 1297 assignment = self._match_pair( 1298 TokenType.VAR, TokenType.EQ, advance=False 1299 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1300 1301 if assignment: 1302 key = self._parse_var_or_string() 1303 self._match(TokenType.EQ) 1304 return self.expression(exp.Property, this=key, value=self._parse_column()) 1305 1306 return None 1307 1308 def _parse_stored(self) -> exp.Expression: 1309 self._match(TokenType.ALIAS) 1310 1311 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1312 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1313 1314 return self.expression( 1315 exp.FileFormatProperty, 1316 this=self.expression( 1317 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1318 ) 1319 if input_format or output_format 1320 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1321 ) 1322 1323 def _parse_property_assignment(self, exp_class: t.Type[exp.Expression]) -> exp.Expression: 1324 self._match(TokenType.EQ) 1325 self._match(TokenType.ALIAS) 1326 return self.expression(exp_class, this=self._parse_field()) 1327 1328 def _parse_properties(self, before=None) -> t.Optional[exp.Expression]: 1329 properties = [] 1330 1331 while True: 1332 if before: 1333 identified_property = self._parse_property_before() 1334 else: 1335 identified_property = self._parse_property() 1336 1337 if not identified_property: 1338 break 1339 for p in ensure_list(identified_property): 1340 properties.append(p) 1341 1342 if properties: 1343 return self.expression(exp.Properties, expressions=properties) 1344 1345 return None 1346 1347 def _parse_fallback(self, no=False) -> exp.Expression: 1348 self._match_text_seq("FALLBACK") 1349 return self.expression( 1350 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1351 ) 1352 1353 def _parse_volatile_property(self) -> exp.Expression: 1354 if self._index >= 2: 1355 pre_volatile_token = self._tokens[self._index - 2] 1356 else: 1357 pre_volatile_token = None 1358 1359 if pre_volatile_token and pre_volatile_token.token_type in ( 1360 TokenType.CREATE, 1361 TokenType.REPLACE, 1362 TokenType.UNIQUE, 1363 ): 1364 return exp.VolatileProperty() 1365 1366 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1367 1368 def _parse_with_property( 1369 self, 1370 ) -> t.Union[t.Optional[exp.Expression], t.List[t.Optional[exp.Expression]]]: 1371 self._match(TokenType.WITH) 1372 if self._match(TokenType.L_PAREN, advance=False): 1373 return self._parse_wrapped_csv(self._parse_property) 1374 1375 if self._match_text_seq("JOURNAL"): 1376 return self._parse_withjournaltable() 1377 1378 if self._match_text_seq("DATA"): 1379 return self._parse_withdata(no=False) 1380 elif self._match_text_seq("NO", "DATA"): 1381 return self._parse_withdata(no=True) 1382 1383 if not self._next: 1384 return None 1385 1386 return self._parse_withisolatedloading() 1387 1388 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1389 def _parse_definer(self) -> t.Optional[exp.Expression]: 1390 self._match(TokenType.EQ) 1391 1392 user = self._parse_id_var() 1393 self._match(TokenType.PARAMETER) 1394 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1395 1396 if not user or not host: 1397 return None 1398 1399 return exp.DefinerProperty(this=f"{user}@{host}") 1400 1401 def _parse_withjournaltable(self) -> exp.Expression: 1402 self._match(TokenType.TABLE) 1403 self._match(TokenType.EQ) 1404 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1405 1406 def _parse_log(self, no=False) -> exp.Expression: 1407 self._match_text_seq("LOG") 1408 return self.expression(exp.LogProperty, no=no) 1409 1410 def _parse_journal(self, no=False, dual=False) -> exp.Expression: 1411 before = self._match_text_seq("BEFORE") 1412 self._match_text_seq("JOURNAL") 1413 return self.expression(exp.JournalProperty, no=no, dual=dual, before=before) 1414 1415 def _parse_afterjournal(self, no=False, dual=False, local=None) -> exp.Expression: 1416 self._match_text_seq("NOT") 1417 self._match_text_seq("LOCAL") 1418 self._match_text_seq("AFTER", "JOURNAL") 1419 return self.expression(exp.AfterJournalProperty, no=no, dual=dual, local=local) 1420 1421 def _parse_checksum(self) -> exp.Expression: 1422 self._match_text_seq("CHECKSUM") 1423 self._match(TokenType.EQ) 1424 1425 on = None 1426 if self._match(TokenType.ON): 1427 on = True 1428 elif self._match_text_seq("OFF"): 1429 on = False 1430 default = self._match(TokenType.DEFAULT) 1431 1432 return self.expression( 1433 exp.ChecksumProperty, 1434 on=on, 1435 default=default, 1436 ) 1437 1438 def _parse_freespace(self) -> exp.Expression: 1439 self._match_text_seq("FREESPACE") 1440 self._match(TokenType.EQ) 1441 return self.expression( 1442 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1443 ) 1444 1445 def _parse_mergeblockratio(self, no=False, default=False) -> exp.Expression: 1446 self._match_text_seq("MERGEBLOCKRATIO") 1447 if self._match(TokenType.EQ): 1448 return self.expression( 1449 exp.MergeBlockRatioProperty, 1450 this=self._parse_number(), 1451 percent=self._match(TokenType.PERCENT), 1452 ) 1453 else: 1454 return self.expression( 1455 exp.MergeBlockRatioProperty, 1456 no=no, 1457 default=default, 1458 ) 1459 1460 def _parse_datablocksize(self, default=None) -> exp.Expression: 1461 if default: 1462 self._match_text_seq("DATABLOCKSIZE") 1463 return self.expression(exp.DataBlocksizeProperty, default=True) 1464 elif self._match_texts(("MIN", "MINIMUM")): 1465 self._match_text_seq("DATABLOCKSIZE") 1466 return self.expression(exp.DataBlocksizeProperty, min=True) 1467 elif self._match_texts(("MAX", "MAXIMUM")): 1468 self._match_text_seq("DATABLOCKSIZE") 1469 return self.expression(exp.DataBlocksizeProperty, min=False) 1470 1471 self._match_text_seq("DATABLOCKSIZE") 1472 self._match(TokenType.EQ) 1473 size = self._parse_number() 1474 units = None 1475 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1476 units = self._prev.text 1477 return self.expression(exp.DataBlocksizeProperty, size=size, units=units) 1478 1479 def _parse_blockcompression(self) -> exp.Expression: 1480 self._match_text_seq("BLOCKCOMPRESSION") 1481 self._match(TokenType.EQ) 1482 always = self._match_text_seq("ALWAYS") 1483 manual = self._match_text_seq("MANUAL") 1484 never = self._match_text_seq("NEVER") 1485 default = self._match_text_seq("DEFAULT") 1486 autotemp = None 1487 if self._match_text_seq("AUTOTEMP"): 1488 autotemp = self._parse_schema() 1489 1490 return self.expression( 1491 exp.BlockCompressionProperty, 1492 always=always, 1493 manual=manual, 1494 never=never, 1495 default=default, 1496 autotemp=autotemp, 1497 ) 1498 1499 def _parse_withisolatedloading(self) -> exp.Expression: 1500 no = self._match_text_seq("NO") 1501 concurrent = self._match_text_seq("CONCURRENT") 1502 self._match_text_seq("ISOLATED", "LOADING") 1503 for_all = self._match_text_seq("FOR", "ALL") 1504 for_insert = self._match_text_seq("FOR", "INSERT") 1505 for_none = self._match_text_seq("FOR", "NONE") 1506 return self.expression( 1507 exp.IsolatedLoadingProperty, 1508 no=no, 1509 concurrent=concurrent, 1510 for_all=for_all, 1511 for_insert=for_insert, 1512 for_none=for_none, 1513 ) 1514 1515 def _parse_locking(self) -> exp.Expression: 1516 if self._match(TokenType.TABLE): 1517 kind = "TABLE" 1518 elif self._match(TokenType.VIEW): 1519 kind = "VIEW" 1520 elif self._match(TokenType.ROW): 1521 kind = "ROW" 1522 elif self._match_text_seq("DATABASE"): 1523 kind = "DATABASE" 1524 else: 1525 kind = None 1526 1527 if kind in ("DATABASE", "TABLE", "VIEW"): 1528 this = self._parse_table_parts() 1529 else: 1530 this = None 1531 1532 if self._match(TokenType.FOR): 1533 for_or_in = "FOR" 1534 elif self._match(TokenType.IN): 1535 for_or_in = "IN" 1536 else: 1537 for_or_in = None 1538 1539 if self._match_text_seq("ACCESS"): 1540 lock_type = "ACCESS" 1541 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1542 lock_type = "EXCLUSIVE" 1543 elif self._match_text_seq("SHARE"): 1544 lock_type = "SHARE" 1545 elif self._match_text_seq("READ"): 1546 lock_type = "READ" 1547 elif self._match_text_seq("WRITE"): 1548 lock_type = "WRITE" 1549 elif self._match_text_seq("CHECKSUM"): 1550 lock_type = "CHECKSUM" 1551 else: 1552 lock_type = None 1553 1554 override = self._match_text_seq("OVERRIDE") 1555 1556 return self.expression( 1557 exp.LockingProperty, 1558 this=this, 1559 kind=kind, 1560 for_or_in=for_or_in, 1561 lock_type=lock_type, 1562 override=override, 1563 ) 1564 1565 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1566 if self._match(TokenType.PARTITION_BY): 1567 return self._parse_csv(self._parse_conjunction) 1568 return [] 1569 1570 def _parse_partitioned_by(self) -> exp.Expression: 1571 self._match(TokenType.EQ) 1572 return self.expression( 1573 exp.PartitionedByProperty, 1574 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1575 ) 1576 1577 def _parse_withdata(self, no=False) -> exp.Expression: 1578 if self._match_text_seq("AND", "STATISTICS"): 1579 statistics = True 1580 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1581 statistics = False 1582 else: 1583 statistics = None 1584 1585 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1586 1587 def _parse_noprimaryindex(self) -> exp.Expression: 1588 self._match_text_seq("PRIMARY", "INDEX") 1589 return exp.NoPrimaryIndexProperty() 1590 1591 def _parse_oncommit(self) -> exp.Expression: 1592 self._match_text_seq("COMMIT", "PRESERVE", "ROWS") 1593 return exp.OnCommitProperty() 1594 1595 def _parse_distkey(self) -> exp.Expression: 1596 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1597 1598 def _parse_create_like(self) -> t.Optional[exp.Expression]: 1599 table = self._parse_table(schema=True) 1600 options = [] 1601 while self._match_texts(("INCLUDING", "EXCLUDING")): 1602 this = self._prev.text.upper() 1603 id_var = self._parse_id_var() 1604 1605 if not id_var: 1606 return None 1607 1608 options.append( 1609 self.expression( 1610 exp.Property, 1611 this=this, 1612 value=exp.Var(this=id_var.this.upper()), 1613 ) 1614 ) 1615 return self.expression(exp.LikeProperty, this=table, expressions=options) 1616 1617 def _parse_sortkey(self, compound: bool = False) -> exp.Expression: 1618 return self.expression( 1619 exp.SortKeyProperty, this=self._parse_wrapped_csv(self._parse_id_var), compound=compound 1620 ) 1621 1622 def _parse_character_set(self, default: bool = False) -> exp.Expression: 1623 self._match(TokenType.EQ) 1624 return self.expression( 1625 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1626 ) 1627 1628 def _parse_returns(self) -> exp.Expression: 1629 value: t.Optional[exp.Expression] 1630 is_table = self._match(TokenType.TABLE) 1631 1632 if is_table: 1633 if self._match(TokenType.LT): 1634 value = self.expression( 1635 exp.Schema, 1636 this="TABLE", 1637 expressions=self._parse_csv(self._parse_struct_types), 1638 ) 1639 if not self._match(TokenType.GT): 1640 self.raise_error("Expecting >") 1641 else: 1642 value = self._parse_schema(exp.Var(this="TABLE")) 1643 else: 1644 value = self._parse_types() 1645 1646 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1647 1648 def _parse_temporary(self, global_=False) -> exp.Expression: 1649 self._match(TokenType.TEMPORARY) # in case calling from "GLOBAL" 1650 return self.expression(exp.TemporaryProperty, global_=global_) 1651 1652 def _parse_describe(self) -> exp.Expression: 1653 kind = self._match_set(self.CREATABLES) and self._prev.text 1654 this = self._parse_table() 1655 1656 return self.expression(exp.Describe, this=this, kind=kind) 1657 1658 def _parse_insert(self) -> exp.Expression: 1659 overwrite = self._match(TokenType.OVERWRITE) 1660 local = self._match(TokenType.LOCAL) 1661 alternative = None 1662 1663 if self._match_text_seq("DIRECTORY"): 1664 this: t.Optional[exp.Expression] = self.expression( 1665 exp.Directory, 1666 this=self._parse_var_or_string(), 1667 local=local, 1668 row_format=self._parse_row_format(match_row=True), 1669 ) 1670 else: 1671 if self._match(TokenType.OR): 1672 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1673 1674 self._match(TokenType.INTO) 1675 self._match(TokenType.TABLE) 1676 this = self._parse_table(schema=True) 1677 1678 return self.expression( 1679 exp.Insert, 1680 this=this, 1681 exists=self._parse_exists(), 1682 partition=self._parse_partition(), 1683 expression=self._parse_ddl_select(), 1684 conflict=self._parse_on_conflict(), 1685 returning=self._parse_returning(), 1686 overwrite=overwrite, 1687 alternative=alternative, 1688 ) 1689 1690 def _parse_on_conflict(self) -> t.Optional[exp.Expression]: 1691 conflict = self._match_text_seq("ON", "CONFLICT") 1692 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1693 1694 if not (conflict or duplicate): 1695 return None 1696 1697 nothing = None 1698 expressions = None 1699 key = None 1700 constraint = None 1701 1702 if conflict: 1703 if self._match_text_seq("ON", "CONSTRAINT"): 1704 constraint = self._parse_id_var() 1705 else: 1706 key = self._parse_csv(self._parse_value) 1707 1708 self._match_text_seq("DO") 1709 if self._match_text_seq("NOTHING"): 1710 nothing = True 1711 else: 1712 self._match(TokenType.UPDATE) 1713 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1714 1715 return self.expression( 1716 exp.OnConflict, 1717 duplicate=duplicate, 1718 expressions=expressions, 1719 nothing=nothing, 1720 key=key, 1721 constraint=constraint, 1722 ) 1723 1724 def _parse_returning(self) -> t.Optional[exp.Expression]: 1725 if not self._match(TokenType.RETURNING): 1726 return None 1727 1728 return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column)) 1729 1730 def _parse_row(self) -> t.Optional[exp.Expression]: 1731 if not self._match(TokenType.FORMAT): 1732 return None 1733 return self._parse_row_format() 1734 1735 def _parse_row_format(self, match_row: bool = False) -> t.Optional[exp.Expression]: 1736 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1737 return None 1738 1739 if self._match_text_seq("SERDE"): 1740 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) 1741 1742 self._match_text_seq("DELIMITED") 1743 1744 kwargs = {} 1745 1746 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1747 kwargs["fields"] = self._parse_string() 1748 if self._match_text_seq("ESCAPED", "BY"): 1749 kwargs["escaped"] = self._parse_string() 1750 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1751 kwargs["collection_items"] = self._parse_string() 1752 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1753 kwargs["map_keys"] = self._parse_string() 1754 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1755 kwargs["lines"] = self._parse_string() 1756 if self._match_text_seq("NULL", "DEFINED", "AS"): 1757 kwargs["null"] = self._parse_string() 1758 1759 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1760 1761 def _parse_load_data(self) -> exp.Expression: 1762 local = self._match(TokenType.LOCAL) 1763 self._match_text_seq("INPATH") 1764 inpath = self._parse_string() 1765 overwrite = self._match(TokenType.OVERWRITE) 1766 self._match_pair(TokenType.INTO, TokenType.TABLE) 1767 1768 return self.expression( 1769 exp.LoadData, 1770 this=self._parse_table(schema=True), 1771 local=local, 1772 overwrite=overwrite, 1773 inpath=inpath, 1774 partition=self._parse_partition(), 1775 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1776 serde=self._match_text_seq("SERDE") and self._parse_string(), 1777 ) 1778 1779 def _parse_delete(self) -> exp.Expression: 1780 self._match(TokenType.FROM) 1781 1782 return self.expression( 1783 exp.Delete, 1784 this=self._parse_table(), 1785 using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()), 1786 where=self._parse_where(), 1787 returning=self._parse_returning(), 1788 ) 1789 1790 def _parse_update(self) -> exp.Expression: 1791 return self.expression( 1792 exp.Update, 1793 **{ # type: ignore 1794 "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS), 1795 "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality), 1796 "from": self._parse_from(modifiers=True), 1797 "where": self._parse_where(), 1798 "returning": self._parse_returning(), 1799 }, 1800 ) 1801 1802 def _parse_uncache(self) -> exp.Expression: 1803 if not self._match(TokenType.TABLE): 1804 self.raise_error("Expecting TABLE after UNCACHE") 1805 1806 return self.expression( 1807 exp.Uncache, 1808 exists=self._parse_exists(), 1809 this=self._parse_table(schema=True), 1810 ) 1811 1812 def _parse_cache(self) -> exp.Expression: 1813 lazy = self._match(TokenType.LAZY) 1814 self._match(TokenType.TABLE) 1815 table = self._parse_table(schema=True) 1816 options = [] 1817 1818 if self._match(TokenType.OPTIONS): 1819 self._match_l_paren() 1820 k = self._parse_string() 1821 self._match(TokenType.EQ) 1822 v = self._parse_string() 1823 options = [k, v] 1824 self._match_r_paren() 1825 1826 self._match(TokenType.ALIAS) 1827 return self.expression( 1828 exp.Cache, 1829 this=table, 1830 lazy=lazy, 1831 options=options, 1832 expression=self._parse_select(nested=True), 1833 ) 1834 1835 def _parse_partition(self) -> t.Optional[exp.Expression]: 1836 if not self._match(TokenType.PARTITION): 1837 return None 1838 1839 return self.expression( 1840 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1841 ) 1842 1843 def _parse_value(self) -> exp.Expression: 1844 if self._match(TokenType.L_PAREN): 1845 expressions = self._parse_csv(self._parse_conjunction) 1846 self._match_r_paren() 1847 return self.expression(exp.Tuple, expressions=expressions) 1848 1849 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1850 # Source: https://prestodb.io/docs/current/sql/values.html 1851 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1852 1853 def _parse_select( 1854 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1855 ) -> t.Optional[exp.Expression]: 1856 cte = self._parse_with() 1857 if cte: 1858 this = self._parse_statement() 1859 1860 if not this: 1861 self.raise_error("Failed to parse any statement following CTE") 1862 return cte 1863 1864 if "with" in this.arg_types: 1865 this.set("with", cte) 1866 else: 1867 self.raise_error(f"{this.key} does not support CTE") 1868 this = cte 1869 elif self._match(TokenType.SELECT): 1870 comments = self._prev_comments 1871 1872 hint = self._parse_hint() 1873 all_ = self._match(TokenType.ALL) 1874 distinct = self._match(TokenType.DISTINCT) 1875 1876 kind = ( 1877 self._match(TokenType.ALIAS) 1878 and self._match_texts(("STRUCT", "VALUE")) 1879 and self._prev.text 1880 ) 1881 1882 if distinct: 1883 distinct = self.expression( 1884 exp.Distinct, 1885 on=self._parse_value() if self._match(TokenType.ON) else None, 1886 ) 1887 1888 if all_ and distinct: 1889 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1890 1891 limit = self._parse_limit(top=True) 1892 expressions = self._parse_csv(self._parse_expression) 1893 1894 this = self.expression( 1895 exp.Select, 1896 kind=kind, 1897 hint=hint, 1898 distinct=distinct, 1899 expressions=expressions, 1900 limit=limit, 1901 ) 1902 this.comments = comments 1903 1904 into = self._parse_into() 1905 if into: 1906 this.set("into", into) 1907 1908 from_ = self._parse_from() 1909 if from_: 1910 this.set("from", from_) 1911 1912 this = self._parse_query_modifiers(this) 1913 elif (table or nested) and self._match(TokenType.L_PAREN): 1914 this = self._parse_table() if table else self._parse_select(nested=True) 1915 this = self._parse_set_operations(self._parse_query_modifiers(this)) 1916 self._match_r_paren() 1917 1918 # early return so that subquery unions aren't parsed again 1919 # SELECT * FROM (SELECT 1) UNION ALL SELECT 1 1920 # Union ALL should be a property of the top select node, not the subquery 1921 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 1922 elif self._match(TokenType.VALUES): 1923 this = self.expression( 1924 exp.Values, 1925 expressions=self._parse_csv(self._parse_value), 1926 alias=self._parse_table_alias(), 1927 ) 1928 else: 1929 this = None 1930 1931 return self._parse_set_operations(this) 1932 1933 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.Expression]: 1934 if not skip_with_token and not self._match(TokenType.WITH): 1935 return None 1936 1937 comments = self._prev_comments 1938 recursive = self._match(TokenType.RECURSIVE) 1939 1940 expressions = [] 1941 while True: 1942 expressions.append(self._parse_cte()) 1943 1944 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 1945 break 1946 else: 1947 self._match(TokenType.WITH) 1948 1949 return self.expression( 1950 exp.With, comments=comments, expressions=expressions, recursive=recursive 1951 ) 1952 1953 def _parse_cte(self) -> exp.Expression: 1954 alias = self._parse_table_alias() 1955 if not alias or not alias.this: 1956 self.raise_error("Expected CTE to have alias") 1957 1958 self._match(TokenType.ALIAS) 1959 1960 return self.expression( 1961 exp.CTE, 1962 this=self._parse_wrapped(self._parse_statement), 1963 alias=alias, 1964 ) 1965 1966 def _parse_table_alias( 1967 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 1968 ) -> t.Optional[exp.Expression]: 1969 any_token = self._match(TokenType.ALIAS) 1970 alias = ( 1971 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 1972 or self._parse_string_as_identifier() 1973 ) 1974 1975 index = self._index 1976 if self._match(TokenType.L_PAREN): 1977 columns = self._parse_csv(self._parse_function_parameter) 1978 self._match_r_paren() if columns else self._retreat(index) 1979 else: 1980 columns = None 1981 1982 if not alias and not columns: 1983 return None 1984 1985 return self.expression(exp.TableAlias, this=alias, columns=columns) 1986 1987 def _parse_subquery( 1988 self, this: t.Optional[exp.Expression], parse_alias: bool = True 1989 ) -> exp.Expression: 1990 return self.expression( 1991 exp.Subquery, 1992 this=this, 1993 pivots=self._parse_pivots(), 1994 alias=self._parse_table_alias() if parse_alias else None, 1995 ) 1996 1997 def _parse_query_modifiers( 1998 self, this: t.Optional[exp.Expression] 1999 ) -> t.Optional[exp.Expression]: 2000 if isinstance(this, self.MODIFIABLES): 2001 for key, parser in self.QUERY_MODIFIER_PARSERS.items(): 2002 expression = parser(self) 2003 2004 if expression: 2005 this.set(key, expression) 2006 return this 2007 2008 def _parse_hint(self) -> t.Optional[exp.Expression]: 2009 if self._match(TokenType.HINT): 2010 hints = self._parse_csv(self._parse_function) 2011 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2012 self.raise_error("Expected */ after HINT") 2013 return self.expression(exp.Hint, expressions=hints) 2014 2015 return None 2016 2017 def _parse_into(self) -> t.Optional[exp.Expression]: 2018 if not self._match(TokenType.INTO): 2019 return None 2020 2021 temp = self._match(TokenType.TEMPORARY) 2022 unlogged = self._match(TokenType.UNLOGGED) 2023 self._match(TokenType.TABLE) 2024 2025 return self.expression( 2026 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2027 ) 2028 2029 def _parse_from(self, modifiers: bool = False) -> t.Optional[exp.Expression]: 2030 if not self._match(TokenType.FROM): 2031 return None 2032 2033 comments = self._prev_comments 2034 this = self._parse_table() 2035 2036 return self.expression( 2037 exp.From, 2038 comments=comments, 2039 this=self._parse_query_modifiers(this) if modifiers else this, 2040 ) 2041 2042 def _parse_match_recognize(self) -> t.Optional[exp.Expression]: 2043 if not self._match(TokenType.MATCH_RECOGNIZE): 2044 return None 2045 2046 self._match_l_paren() 2047 2048 partition = self._parse_partition_by() 2049 order = self._parse_order() 2050 measures = ( 2051 self._parse_csv(self._parse_expression) if self._match_text_seq("MEASURES") else None 2052 ) 2053 2054 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2055 rows = exp.Var(this="ONE ROW PER MATCH") 2056 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2057 text = "ALL ROWS PER MATCH" 2058 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2059 text += f" SHOW EMPTY MATCHES" 2060 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2061 text += f" OMIT EMPTY MATCHES" 2062 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2063 text += f" WITH UNMATCHED ROWS" 2064 rows = exp.Var(this=text) 2065 else: 2066 rows = None 2067 2068 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2069 text = "AFTER MATCH SKIP" 2070 if self._match_text_seq("PAST", "LAST", "ROW"): 2071 text += f" PAST LAST ROW" 2072 elif self._match_text_seq("TO", "NEXT", "ROW"): 2073 text += f" TO NEXT ROW" 2074 elif self._match_text_seq("TO", "FIRST"): 2075 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2076 elif self._match_text_seq("TO", "LAST"): 2077 text += f" TO LAST {self._advance_any().text}" # type: ignore 2078 after = exp.Var(this=text) 2079 else: 2080 after = None 2081 2082 if self._match_text_seq("PATTERN"): 2083 self._match_l_paren() 2084 2085 if not self._curr: 2086 self.raise_error("Expecting )", self._curr) 2087 2088 paren = 1 2089 start = self._curr 2090 2091 while self._curr and paren > 0: 2092 if self._curr.token_type == TokenType.L_PAREN: 2093 paren += 1 2094 if self._curr.token_type == TokenType.R_PAREN: 2095 paren -= 1 2096 end = self._prev 2097 self._advance() 2098 if paren > 0: 2099 self.raise_error("Expecting )", self._curr) 2100 pattern = exp.Var(this=self._find_sql(start, end)) 2101 else: 2102 pattern = None 2103 2104 define = ( 2105 self._parse_csv( 2106 lambda: self.expression( 2107 exp.Alias, 2108 alias=self._parse_id_var(any_token=True), 2109 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2110 ) 2111 ) 2112 if self._match_text_seq("DEFINE") 2113 else None 2114 ) 2115 2116 self._match_r_paren() 2117 2118 return self.expression( 2119 exp.MatchRecognize, 2120 partition_by=partition, 2121 order=order, 2122 measures=measures, 2123 rows=rows, 2124 after=after, 2125 pattern=pattern, 2126 define=define, 2127 alias=self._parse_table_alias(), 2128 ) 2129 2130 def _parse_lateral(self) -> t.Optional[exp.Expression]: 2131 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2132 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2133 2134 if outer_apply or cross_apply: 2135 this = self._parse_select(table=True) 2136 view = None 2137 outer = not cross_apply 2138 elif self._match(TokenType.LATERAL): 2139 this = self._parse_select(table=True) 2140 view = self._match(TokenType.VIEW) 2141 outer = self._match(TokenType.OUTER) 2142 else: 2143 return None 2144 2145 if not this: 2146 this = self._parse_function() or self._parse_id_var(any_token=False) 2147 while self._match(TokenType.DOT): 2148 this = exp.Dot( 2149 this=this, 2150 expression=self._parse_function() or self._parse_id_var(any_token=False), 2151 ) 2152 2153 table_alias: t.Optional[exp.Expression] 2154 2155 if view: 2156 table = self._parse_id_var(any_token=False) 2157 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2158 table_alias = self.expression(exp.TableAlias, this=table, columns=columns) 2159 else: 2160 table_alias = self._parse_table_alias() 2161 2162 expression = self.expression( 2163 exp.Lateral, 2164 this=this, 2165 view=view, 2166 outer=outer, 2167 alias=table_alias, 2168 ) 2169 2170 return expression 2171 2172 def _parse_join_side_and_kind( 2173 self, 2174 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2175 return ( 2176 self._match(TokenType.NATURAL) and self._prev, 2177 self._match_set(self.JOIN_SIDES) and self._prev, 2178 self._match_set(self.JOIN_KINDS) and self._prev, 2179 ) 2180 2181 def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Expression]: 2182 if self._match(TokenType.COMMA): 2183 return self.expression(exp.Join, this=self._parse_table()) 2184 2185 index = self._index 2186 natural, side, kind = self._parse_join_side_and_kind() 2187 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2188 join = self._match(TokenType.JOIN) 2189 2190 if not skip_join_token and not join: 2191 self._retreat(index) 2192 kind = None 2193 natural = None 2194 side = None 2195 2196 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2197 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2198 2199 if not skip_join_token and not join and not outer_apply and not cross_apply: 2200 return None 2201 2202 if outer_apply: 2203 side = Token(TokenType.LEFT, "LEFT") 2204 2205 kwargs: t.Dict[ 2206 str, t.Optional[exp.Expression] | bool | str | t.List[t.Optional[exp.Expression]] 2207 ] = {"this": self._parse_table()} 2208 2209 if natural: 2210 kwargs["natural"] = True 2211 if side: 2212 kwargs["side"] = side.text 2213 if kind: 2214 kwargs["kind"] = kind.text 2215 if hint: 2216 kwargs["hint"] = hint 2217 2218 if self._match(TokenType.ON): 2219 kwargs["on"] = self._parse_conjunction() 2220 elif self._match(TokenType.USING): 2221 kwargs["using"] = self._parse_wrapped_id_vars() 2222 2223 return self.expression(exp.Join, **kwargs) # type: ignore 2224 2225 def _parse_index(self) -> exp.Expression: 2226 index = self._parse_id_var() 2227 self._match(TokenType.ON) 2228 self._match(TokenType.TABLE) # hive 2229 2230 return self.expression( 2231 exp.Index, 2232 this=index, 2233 table=self.expression(exp.Table, this=self._parse_id_var()), 2234 columns=self._parse_expression(), 2235 ) 2236 2237 def _parse_create_table_index(self) -> t.Optional[exp.Expression]: 2238 unique = self._match(TokenType.UNIQUE) 2239 primary = self._match_text_seq("PRIMARY") 2240 amp = self._match_text_seq("AMP") 2241 if not self._match(TokenType.INDEX): 2242 return None 2243 index = self._parse_id_var() 2244 columns = None 2245 if self._match(TokenType.L_PAREN, advance=False): 2246 columns = self._parse_wrapped_csv(self._parse_column) 2247 return self.expression( 2248 exp.Index, 2249 this=index, 2250 columns=columns, 2251 unique=unique, 2252 primary=primary, 2253 amp=amp, 2254 ) 2255 2256 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2257 return ( 2258 (not schema and self._parse_function()) 2259 or self._parse_id_var(any_token=False) 2260 or self._parse_string_as_identifier() 2261 or self._parse_placeholder() 2262 ) 2263 2264 def _parse_table_parts(self, schema: bool = False) -> exp.Expression: 2265 catalog = None 2266 db = None 2267 table = self._parse_table_part(schema=schema) 2268 2269 while self._match(TokenType.DOT): 2270 if catalog: 2271 # This allows nesting the table in arbitrarily many dot expressions if needed 2272 table = self.expression( 2273 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2274 ) 2275 else: 2276 catalog = db 2277 db = table 2278 table = self._parse_table_part(schema=schema) 2279 2280 if not table: 2281 self.raise_error(f"Expected table name but got {self._curr}") 2282 2283 return self.expression( 2284 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2285 ) 2286 2287 def _parse_table( 2288 self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2289 ) -> t.Optional[exp.Expression]: 2290 lateral = self._parse_lateral() 2291 if lateral: 2292 return lateral 2293 2294 unnest = self._parse_unnest() 2295 if unnest: 2296 return unnest 2297 2298 values = self._parse_derived_table_values() 2299 if values: 2300 return values 2301 2302 subquery = self._parse_select(table=True) 2303 if subquery: 2304 if not subquery.args.get("pivots"): 2305 subquery.set("pivots", self._parse_pivots()) 2306 return subquery 2307 2308 this = self._parse_table_parts(schema=schema) 2309 2310 if schema: 2311 return self._parse_schema(this=this) 2312 2313 if self.alias_post_tablesample: 2314 table_sample = self._parse_table_sample() 2315 2316 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2317 if alias: 2318 this.set("alias", alias) 2319 2320 if not this.args.get("pivots"): 2321 this.set("pivots", self._parse_pivots()) 2322 2323 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2324 this.set( 2325 "hints", 2326 self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)), 2327 ) 2328 self._match_r_paren() 2329 2330 if not self.alias_post_tablesample: 2331 table_sample = self._parse_table_sample() 2332 2333 if table_sample: 2334 table_sample.set("this", this) 2335 this = table_sample 2336 2337 return this 2338 2339 def _parse_unnest(self) -> t.Optional[exp.Expression]: 2340 if not self._match(TokenType.UNNEST): 2341 return None 2342 2343 expressions = self._parse_wrapped_csv(self._parse_type) 2344 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2345 alias = self._parse_table_alias() 2346 2347 if alias and self.unnest_column_only: 2348 if alias.args.get("columns"): 2349 self.raise_error("Unexpected extra column alias in unnest.") 2350 alias.set("columns", [alias.this]) 2351 alias.set("this", None) 2352 2353 offset = None 2354 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2355 self._match(TokenType.ALIAS) 2356 offset = self._parse_id_var() or exp.Identifier(this="offset") 2357 2358 return self.expression( 2359 exp.Unnest, 2360 expressions=expressions, 2361 ordinality=ordinality, 2362 alias=alias, 2363 offset=offset, 2364 ) 2365 2366 def _parse_derived_table_values(self) -> t.Optional[exp.Expression]: 2367 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2368 if not is_derived and not self._match(TokenType.VALUES): 2369 return None 2370 2371 expressions = self._parse_csv(self._parse_value) 2372 2373 if is_derived: 2374 self._match_r_paren() 2375 2376 return self.expression(exp.Values, expressions=expressions, alias=self._parse_table_alias()) 2377 2378 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.Expression]: 2379 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2380 as_modifier and self._match_text_seq("USING", "SAMPLE") 2381 ): 2382 return None 2383 2384 bucket_numerator = None 2385 bucket_denominator = None 2386 bucket_field = None 2387 percent = None 2388 rows = None 2389 size = None 2390 seed = None 2391 2392 kind = ( 2393 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2394 ) 2395 method = self._parse_var(tokens=(TokenType.ROW,)) 2396 2397 self._match(TokenType.L_PAREN) 2398 2399 num = self._parse_number() 2400 2401 if self._match(TokenType.BUCKET): 2402 bucket_numerator = self._parse_number() 2403 self._match(TokenType.OUT_OF) 2404 bucket_denominator = bucket_denominator = self._parse_number() 2405 self._match(TokenType.ON) 2406 bucket_field = self._parse_field() 2407 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2408 percent = num 2409 elif self._match(TokenType.ROWS): 2410 rows = num 2411 else: 2412 size = num 2413 2414 self._match(TokenType.R_PAREN) 2415 2416 if self._match(TokenType.L_PAREN): 2417 method = self._parse_var() 2418 seed = self._match(TokenType.COMMA) and self._parse_number() 2419 self._match_r_paren() 2420 elif self._match_texts(("SEED", "REPEATABLE")): 2421 seed = self._parse_wrapped(self._parse_number) 2422 2423 return self.expression( 2424 exp.TableSample, 2425 method=method, 2426 bucket_numerator=bucket_numerator, 2427 bucket_denominator=bucket_denominator, 2428 bucket_field=bucket_field, 2429 percent=percent, 2430 rows=rows, 2431 size=size, 2432 seed=seed, 2433 kind=kind, 2434 ) 2435 2436 def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]: 2437 return list(iter(self._parse_pivot, None)) 2438 2439 def _parse_pivot(self) -> t.Optional[exp.Expression]: 2440 index = self._index 2441 2442 if self._match(TokenType.PIVOT): 2443 unpivot = False 2444 elif self._match(TokenType.UNPIVOT): 2445 unpivot = True 2446 else: 2447 return None 2448 2449 expressions = [] 2450 field = None 2451 2452 if not self._match(TokenType.L_PAREN): 2453 self._retreat(index) 2454 return None 2455 2456 if unpivot: 2457 expressions = self._parse_csv(self._parse_column) 2458 else: 2459 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2460 2461 if not expressions: 2462 self.raise_error("Failed to parse PIVOT's aggregation list") 2463 2464 if not self._match(TokenType.FOR): 2465 self.raise_error("Expecting FOR") 2466 2467 value = self._parse_column() 2468 2469 if not self._match(TokenType.IN): 2470 self.raise_error("Expecting IN") 2471 2472 field = self._parse_in(value) 2473 2474 self._match_r_paren() 2475 2476 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2477 2478 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2479 pivot.set("alias", self._parse_table_alias()) 2480 2481 if not unpivot: 2482 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2483 2484 columns: t.List[exp.Expression] = [] 2485 for fld in pivot.args["field"].expressions: 2486 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2487 for name in names: 2488 if self.PREFIXED_PIVOT_COLUMNS: 2489 name = f"{name}_{field_name}" if name else field_name 2490 else: 2491 name = f"{field_name}_{name}" if name else field_name 2492 2493 columns.append(exp.to_identifier(name)) 2494 2495 pivot.set("columns", columns) 2496 2497 return pivot 2498 2499 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2500 return [agg.alias for agg in aggregations] 2501 2502 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Expression]: 2503 if not skip_where_token and not self._match(TokenType.WHERE): 2504 return None 2505 2506 return self.expression( 2507 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2508 ) 2509 2510 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Expression]: 2511 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2512 return None 2513 2514 elements = defaultdict(list) 2515 2516 while True: 2517 expressions = self._parse_csv(self._parse_conjunction) 2518 if expressions: 2519 elements["expressions"].extend(expressions) 2520 2521 grouping_sets = self._parse_grouping_sets() 2522 if grouping_sets: 2523 elements["grouping_sets"].extend(grouping_sets) 2524 2525 rollup = None 2526 cube = None 2527 totals = None 2528 2529 with_ = self._match(TokenType.WITH) 2530 if self._match(TokenType.ROLLUP): 2531 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2532 elements["rollup"].extend(ensure_list(rollup)) 2533 2534 if self._match(TokenType.CUBE): 2535 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2536 elements["cube"].extend(ensure_list(cube)) 2537 2538 if self._match_text_seq("TOTALS"): 2539 totals = True 2540 elements["totals"] = True # type: ignore 2541 2542 if not (grouping_sets or rollup or cube or totals): 2543 break 2544 2545 return self.expression(exp.Group, **elements) # type: ignore 2546 2547 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2548 if not self._match(TokenType.GROUPING_SETS): 2549 return None 2550 2551 return self._parse_wrapped_csv(self._parse_grouping_set) 2552 2553 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2554 if self._match(TokenType.L_PAREN): 2555 grouping_set = self._parse_csv(self._parse_column) 2556 self._match_r_paren() 2557 return self.expression(exp.Tuple, expressions=grouping_set) 2558 2559 return self._parse_column() 2560 2561 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Expression]: 2562 if not skip_having_token and not self._match(TokenType.HAVING): 2563 return None 2564 return self.expression(exp.Having, this=self._parse_conjunction()) 2565 2566 def _parse_qualify(self) -> t.Optional[exp.Expression]: 2567 if not self._match(TokenType.QUALIFY): 2568 return None 2569 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2570 2571 def _parse_order( 2572 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2573 ) -> t.Optional[exp.Expression]: 2574 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2575 return this 2576 2577 return self.expression( 2578 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2579 ) 2580 2581 def _parse_sort( 2582 self, token_type: TokenType, exp_class: t.Type[exp.Expression] 2583 ) -> t.Optional[exp.Expression]: 2584 if not self._match(token_type): 2585 return None 2586 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2587 2588 def _parse_ordered(self) -> exp.Expression: 2589 this = self._parse_conjunction() 2590 self._match(TokenType.ASC) 2591 is_desc = self._match(TokenType.DESC) 2592 is_nulls_first = self._match(TokenType.NULLS_FIRST) 2593 is_nulls_last = self._match(TokenType.NULLS_LAST) 2594 desc = is_desc or False 2595 asc = not desc 2596 nulls_first = is_nulls_first or False 2597 explicitly_null_ordered = is_nulls_first or is_nulls_last 2598 if ( 2599 not explicitly_null_ordered 2600 and ( 2601 (asc and self.null_ordering == "nulls_are_small") 2602 or (desc and self.null_ordering != "nulls_are_small") 2603 ) 2604 and self.null_ordering != "nulls_are_last" 2605 ): 2606 nulls_first = True 2607 2608 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2609 2610 def _parse_limit( 2611 self, this: t.Optional[exp.Expression] = None, top: bool = False 2612 ) -> t.Optional[exp.Expression]: 2613 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2614 limit_paren = self._match(TokenType.L_PAREN) 2615 limit_exp = self.expression( 2616 exp.Limit, this=this, expression=self._parse_number() if top else self._parse_term() 2617 ) 2618 2619 if limit_paren: 2620 self._match_r_paren() 2621 2622 return limit_exp 2623 2624 if self._match(TokenType.FETCH): 2625 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2626 direction = self._prev.text if direction else "FIRST" 2627 2628 count = self._parse_number() 2629 percent = self._match(TokenType.PERCENT) 2630 2631 self._match_set((TokenType.ROW, TokenType.ROWS)) 2632 2633 only = self._match(TokenType.ONLY) 2634 with_ties = self._match_text_seq("WITH", "TIES") 2635 2636 if only and with_ties: 2637 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2638 2639 return self.expression( 2640 exp.Fetch, 2641 direction=direction, 2642 count=count, 2643 percent=percent, 2644 with_ties=with_ties, 2645 ) 2646 2647 return this 2648 2649 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2650 if not self._match_set((TokenType.OFFSET, TokenType.COMMA)): 2651 return this 2652 2653 count = self._parse_number() 2654 self._match_set((TokenType.ROW, TokenType.ROWS)) 2655 return self.expression(exp.Offset, this=this, expression=count) 2656 2657 def _parse_locks(self) -> t.List[exp.Expression]: 2658 # Lists are invariant, so we need to use a type hint here 2659 locks: t.List[exp.Expression] = [] 2660 2661 while True: 2662 if self._match_text_seq("FOR", "UPDATE"): 2663 update = True 2664 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2665 "LOCK", "IN", "SHARE", "MODE" 2666 ): 2667 update = False 2668 else: 2669 break 2670 2671 expressions = None 2672 if self._match_text_seq("OF"): 2673 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2674 2675 wait: t.Optional[bool | exp.Expression] = None 2676 if self._match_text_seq("NOWAIT"): 2677 wait = True 2678 elif self._match_text_seq("WAIT"): 2679 wait = self._parse_primary() 2680 elif self._match_text_seq("SKIP", "LOCKED"): 2681 wait = False 2682 2683 locks.append( 2684 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2685 ) 2686 2687 return locks 2688 2689 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2690 if not self._match_set(self.SET_OPERATIONS): 2691 return this 2692 2693 token_type = self._prev.token_type 2694 2695 if token_type == TokenType.UNION: 2696 expression = exp.Union 2697 elif token_type == TokenType.EXCEPT: 2698 expression = exp.Except 2699 else: 2700 expression = exp.Intersect 2701 2702 return self.expression( 2703 expression, 2704 this=this, 2705 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2706 expression=self._parse_set_operations(self._parse_select(nested=True)), 2707 ) 2708 2709 def _parse_expression(self) -> t.Optional[exp.Expression]: 2710 return self._parse_alias(self._parse_conjunction()) 2711 2712 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2713 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2714 2715 def _parse_equality(self) -> t.Optional[exp.Expression]: 2716 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2717 2718 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2719 return self._parse_tokens(self._parse_range, self.COMPARISON) 2720 2721 def _parse_range(self) -> t.Optional[exp.Expression]: 2722 this = self._parse_bitwise() 2723 negate = self._match(TokenType.NOT) 2724 2725 if self._match_set(self.RANGE_PARSERS): 2726 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2727 if not expression: 2728 return this 2729 2730 this = expression 2731 elif self._match(TokenType.ISNULL): 2732 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2733 2734 # Postgres supports ISNULL and NOTNULL for conditions. 2735 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2736 if self._match(TokenType.NOTNULL): 2737 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2738 this = self.expression(exp.Not, this=this) 2739 2740 if negate: 2741 this = self.expression(exp.Not, this=this) 2742 2743 if self._match(TokenType.IS): 2744 this = self._parse_is(this) 2745 2746 return this 2747 2748 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2749 index = self._index - 1 2750 negate = self._match(TokenType.NOT) 2751 if self._match(TokenType.DISTINCT_FROM): 2752 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2753 return self.expression(klass, this=this, expression=self._parse_expression()) 2754 2755 expression = self._parse_null() or self._parse_boolean() 2756 if not expression: 2757 self._retreat(index) 2758 return None 2759 2760 this = self.expression(exp.Is, this=this, expression=expression) 2761 return self.expression(exp.Not, this=this) if negate else this 2762 2763 def _parse_in(self, this: t.Optional[exp.Expression]) -> exp.Expression: 2764 unnest = self._parse_unnest() 2765 if unnest: 2766 this = self.expression(exp.In, this=this, unnest=unnest) 2767 elif self._match(TokenType.L_PAREN): 2768 expressions = self._parse_csv(self._parse_select_or_expression) 2769 2770 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2771 this = self.expression(exp.In, this=this, query=expressions[0]) 2772 else: 2773 this = self.expression(exp.In, this=this, expressions=expressions) 2774 2775 self._match_r_paren(this) 2776 else: 2777 this = self.expression(exp.In, this=this, field=self._parse_field()) 2778 2779 return this 2780 2781 def _parse_between(self, this: exp.Expression) -> exp.Expression: 2782 low = self._parse_bitwise() 2783 self._match(TokenType.AND) 2784 high = self._parse_bitwise() 2785 return self.expression(exp.Between, this=this, low=low, high=high) 2786 2787 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2788 if not self._match(TokenType.ESCAPE): 2789 return this 2790 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2791 2792 def _parse_interval(self) -> t.Optional[exp.Expression]: 2793 if not self._match(TokenType.INTERVAL): 2794 return None 2795 2796 this = self._parse_primary() or self._parse_term() 2797 unit = self._parse_function() or self._parse_var() 2798 2799 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 2800 # each INTERVAL expression into this canonical form so it's easy to transpile 2801 if this and isinstance(this, exp.Literal): 2802 if this.is_number: 2803 this = exp.Literal.string(this.name) 2804 2805 # Try to not clutter Snowflake's multi-part intervals like INTERVAL '1 day, 1 year' 2806 parts = this.name.split() 2807 if not unit and len(parts) <= 2: 2808 this = exp.Literal.string(seq_get(parts, 0)) 2809 unit = self.expression(exp.Var, this=seq_get(parts, 1)) 2810 2811 return self.expression(exp.Interval, this=this, unit=unit) 2812 2813 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 2814 this = self._parse_term() 2815 2816 while True: 2817 if self._match_set(self.BITWISE): 2818 this = self.expression( 2819 self.BITWISE[self._prev.token_type], 2820 this=this, 2821 expression=self._parse_term(), 2822 ) 2823 elif self._match_pair(TokenType.LT, TokenType.LT): 2824 this = self.expression( 2825 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 2826 ) 2827 elif self._match_pair(TokenType.GT, TokenType.GT): 2828 this = self.expression( 2829 exp.BitwiseRightShift, this=this, expression=self._parse_term() 2830 ) 2831 else: 2832 break 2833 2834 return this 2835 2836 def _parse_term(self) -> t.Optional[exp.Expression]: 2837 return self._parse_tokens(self._parse_factor, self.TERM) 2838 2839 def _parse_factor(self) -> t.Optional[exp.Expression]: 2840 return self._parse_tokens(self._parse_unary, self.FACTOR) 2841 2842 def _parse_unary(self) -> t.Optional[exp.Expression]: 2843 if self._match_set(self.UNARY_PARSERS): 2844 return self.UNARY_PARSERS[self._prev.token_type](self) 2845 return self._parse_at_time_zone(self._parse_type()) 2846 2847 def _parse_type(self) -> t.Optional[exp.Expression]: 2848 interval = self._parse_interval() 2849 if interval: 2850 return interval 2851 2852 index = self._index 2853 data_type = self._parse_types(check_func=True) 2854 this = self._parse_column() 2855 2856 if data_type: 2857 if isinstance(this, exp.Literal): 2858 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 2859 if parser: 2860 return parser(self, this, data_type) 2861 return self.expression(exp.Cast, this=this, to=data_type) 2862 if not data_type.expressions: 2863 self._retreat(index) 2864 return self._parse_column() 2865 return data_type 2866 2867 return this 2868 2869 def _parse_type_size(self) -> t.Optional[exp.Expression]: 2870 this = self._parse_type() 2871 if not this: 2872 return None 2873 2874 return self.expression( 2875 exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True) 2876 ) 2877 2878 def _parse_types(self, check_func: bool = False) -> t.Optional[exp.Expression]: 2879 index = self._index 2880 2881 prefix = self._match_text_seq("SYSUDTLIB", ".") 2882 2883 if not self._match_set(self.TYPE_TOKENS): 2884 return None 2885 2886 type_token = self._prev.token_type 2887 2888 if type_token == TokenType.PSEUDO_TYPE: 2889 return self.expression(exp.PseudoType, this=self._prev.text) 2890 2891 nested = type_token in self.NESTED_TYPE_TOKENS 2892 is_struct = type_token == TokenType.STRUCT 2893 expressions = None 2894 maybe_func = False 2895 2896 if self._match(TokenType.L_PAREN): 2897 if is_struct: 2898 expressions = self._parse_csv(self._parse_struct_types) 2899 elif nested: 2900 expressions = self._parse_csv(self._parse_types) 2901 else: 2902 expressions = self._parse_csv(self._parse_type_size) 2903 2904 if not expressions or not self._match(TokenType.R_PAREN): 2905 self._retreat(index) 2906 return None 2907 2908 maybe_func = True 2909 2910 if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2911 this = exp.DataType( 2912 this=exp.DataType.Type.ARRAY, 2913 expressions=[exp.DataType.build(type_token.value, expressions=expressions)], 2914 nested=True, 2915 ) 2916 2917 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2918 this = exp.DataType( 2919 this=exp.DataType.Type.ARRAY, 2920 expressions=[this], 2921 nested=True, 2922 ) 2923 2924 return this 2925 2926 if self._match(TokenType.L_BRACKET): 2927 self._retreat(index) 2928 return None 2929 2930 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 2931 if nested and self._match(TokenType.LT): 2932 if is_struct: 2933 expressions = self._parse_csv(self._parse_struct_types) 2934 else: 2935 expressions = self._parse_csv(self._parse_types) 2936 2937 if not self._match(TokenType.GT): 2938 self.raise_error("Expecting >") 2939 2940 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 2941 values = self._parse_csv(self._parse_conjunction) 2942 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 2943 2944 value: t.Optional[exp.Expression] = None 2945 if type_token in self.TIMESTAMPS: 2946 if self._match(TokenType.WITH_TIME_ZONE) or type_token == TokenType.TIMESTAMPTZ: 2947 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 2948 elif ( 2949 self._match(TokenType.WITH_LOCAL_TIME_ZONE) or type_token == TokenType.TIMESTAMPLTZ 2950 ): 2951 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 2952 elif self._match(TokenType.WITHOUT_TIME_ZONE): 2953 if type_token == TokenType.TIME: 2954 value = exp.DataType(this=exp.DataType.Type.TIME, expressions=expressions) 2955 else: 2956 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2957 2958 maybe_func = maybe_func and value is None 2959 2960 if value is None: 2961 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2962 elif type_token == TokenType.INTERVAL: 2963 unit = self._parse_var() 2964 2965 if not unit: 2966 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 2967 else: 2968 value = self.expression(exp.Interval, unit=unit) 2969 2970 if maybe_func and check_func: 2971 index2 = self._index 2972 peek = self._parse_string() 2973 2974 if not peek: 2975 self._retreat(index) 2976 return None 2977 2978 self._retreat(index2) 2979 2980 if value: 2981 return value 2982 2983 return exp.DataType( 2984 this=exp.DataType.Type[type_token.value.upper()], 2985 expressions=expressions, 2986 nested=nested, 2987 values=values, 2988 prefix=prefix, 2989 ) 2990 2991 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 2992 this = self._parse_type() or self._parse_id_var() 2993 self._match(TokenType.COLON) 2994 return self._parse_column_def(this) 2995 2996 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2997 if not self._match(TokenType.AT_TIME_ZONE): 2998 return this 2999 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3000 3001 def _parse_column(self) -> t.Optional[exp.Expression]: 3002 this = self._parse_field() 3003 if isinstance(this, exp.Identifier): 3004 this = self.expression(exp.Column, this=this) 3005 elif not this: 3006 return self._parse_bracket(this) 3007 this = self._parse_bracket(this) 3008 3009 while self._match_set(self.COLUMN_OPERATORS): 3010 op_token = self._prev.token_type 3011 op = self.COLUMN_OPERATORS.get(op_token) 3012 3013 if op_token == TokenType.DCOLON: 3014 field = self._parse_types() 3015 if not field: 3016 self.raise_error("Expected type") 3017 elif op and self._curr: 3018 self._advance() 3019 value = self._prev.text 3020 field = ( 3021 exp.Literal.number(value) 3022 if self._prev.token_type == TokenType.NUMBER 3023 else exp.Literal.string(value) 3024 ) 3025 else: 3026 field = ( 3027 self._parse_star() 3028 or self._parse_function(anonymous=True) 3029 or self._parse_id_var() 3030 ) 3031 3032 if isinstance(field, exp.Func): 3033 # bigquery allows function calls like x.y.count(...) 3034 # SAFE.SUBSTR(...) 3035 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3036 this = self._replace_columns_with_dots(this) 3037 3038 if op: 3039 this = op(self, this, field) 3040 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3041 this = self.expression( 3042 exp.Column, 3043 this=field, 3044 table=this.this, 3045 db=this.args.get("table"), 3046 catalog=this.args.get("db"), 3047 ) 3048 else: 3049 this = self.expression(exp.Dot, this=this, expression=field) 3050 this = self._parse_bracket(this) 3051 3052 return this 3053 3054 def _parse_primary(self) -> t.Optional[exp.Expression]: 3055 if self._match_set(self.PRIMARY_PARSERS): 3056 token_type = self._prev.token_type 3057 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3058 3059 if token_type == TokenType.STRING: 3060 expressions = [primary] 3061 while self._match(TokenType.STRING): 3062 expressions.append(exp.Literal.string(self._prev.text)) 3063 if len(expressions) > 1: 3064 return self.expression(exp.Concat, expressions=expressions) 3065 return primary 3066 3067 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3068 return exp.Literal.number(f"0.{self._prev.text}") 3069 3070 if self._match(TokenType.L_PAREN): 3071 comments = self._prev_comments 3072 query = self._parse_select() 3073 3074 if query: 3075 expressions = [query] 3076 else: 3077 expressions = self._parse_csv(self._parse_expression) 3078 3079 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3080 3081 if isinstance(this, exp.Subqueryable): 3082 this = self._parse_set_operations( 3083 self._parse_subquery(this=this, parse_alias=False) 3084 ) 3085 elif len(expressions) > 1: 3086 this = self.expression(exp.Tuple, expressions=expressions) 3087 else: 3088 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3089 3090 if this: 3091 this.add_comments(comments) 3092 self._match_r_paren(expression=this) 3093 3094 return this 3095 3096 return None 3097 3098 def _parse_field( 3099 self, 3100 any_token: bool = False, 3101 tokens: t.Optional[t.Collection[TokenType]] = None, 3102 ) -> t.Optional[exp.Expression]: 3103 return ( 3104 self._parse_primary() 3105 or self._parse_function() 3106 or self._parse_id_var(any_token=any_token, tokens=tokens) 3107 ) 3108 3109 def _parse_function( 3110 self, functions: t.Optional[t.Dict[str, t.Callable]] = None, anonymous: bool = False 3111 ) -> t.Optional[exp.Expression]: 3112 if not self._curr: 3113 return None 3114 3115 token_type = self._curr.token_type 3116 3117 if self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 3118 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 3119 3120 if not self._next or self._next.token_type != TokenType.L_PAREN: 3121 if token_type in self.NO_PAREN_FUNCTIONS: 3122 self._advance() 3123 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3124 3125 return None 3126 3127 if token_type not in self.FUNC_TOKENS: 3128 return None 3129 3130 this = self._curr.text 3131 upper = this.upper() 3132 self._advance(2) 3133 3134 parser = self.FUNCTION_PARSERS.get(upper) 3135 3136 if parser and not anonymous: 3137 this = parser(self) 3138 else: 3139 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3140 3141 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3142 this = self.expression(subquery_predicate, this=self._parse_select()) 3143 self._match_r_paren() 3144 return this 3145 3146 if functions is None: 3147 functions = self.FUNCTIONS 3148 3149 function = functions.get(upper) 3150 args = self._parse_csv(self._parse_lambda) 3151 3152 if function and not anonymous: 3153 this = function(args) 3154 self.validate_expression(this, args) 3155 else: 3156 this = self.expression(exp.Anonymous, this=this, expressions=args) 3157 3158 self._match_r_paren(this) 3159 return self._parse_window(this) 3160 3161 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3162 return self._parse_column_def(self._parse_id_var()) 3163 3164 def _parse_user_defined_function( 3165 self, kind: t.Optional[TokenType] = None 3166 ) -> t.Optional[exp.Expression]: 3167 this = self._parse_id_var() 3168 3169 while self._match(TokenType.DOT): 3170 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3171 3172 if not self._match(TokenType.L_PAREN): 3173 return this 3174 3175 expressions = self._parse_csv(self._parse_function_parameter) 3176 self._match_r_paren() 3177 return self.expression( 3178 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3179 ) 3180 3181 def _parse_introducer(self, token: Token) -> t.Optional[exp.Expression]: 3182 literal = self._parse_primary() 3183 if literal: 3184 return self.expression(exp.Introducer, this=token.text, expression=literal) 3185 3186 return self.expression(exp.Identifier, this=token.text) 3187 3188 def _parse_national(self, token: Token) -> exp.Expression: 3189 return self.expression(exp.National, this=exp.Literal.string(token.text)) 3190 3191 def _parse_session_parameter(self) -> exp.Expression: 3192 kind = None 3193 this = self._parse_id_var() or self._parse_primary() 3194 3195 if this and self._match(TokenType.DOT): 3196 kind = this.name 3197 this = self._parse_var() or self._parse_primary() 3198 3199 return self.expression(exp.SessionParameter, this=this, kind=kind) 3200 3201 def _parse_lambda(self) -> t.Optional[exp.Expression]: 3202 index = self._index 3203 3204 if self._match(TokenType.L_PAREN): 3205 expressions = self._parse_csv(self._parse_id_var) 3206 3207 if not self._match(TokenType.R_PAREN): 3208 self._retreat(index) 3209 else: 3210 expressions = [self._parse_id_var()] 3211 3212 if self._match_set(self.LAMBDAS): 3213 return self.LAMBDAS[self._prev.token_type](self, expressions) 3214 3215 self._retreat(index) 3216 3217 this: t.Optional[exp.Expression] 3218 3219 if self._match(TokenType.DISTINCT): 3220 this = self.expression( 3221 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3222 ) 3223 else: 3224 this = self._parse_select_or_expression() 3225 3226 if isinstance(this, exp.EQ): 3227 left = this.this 3228 if isinstance(left, exp.Column): 3229 left.replace(exp.Var(this=left.text("this"))) 3230 3231 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3232 3233 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3234 index = self._index 3235 3236 try: 3237 if self._parse_select(nested=True): 3238 return this 3239 except Exception: 3240 pass 3241 finally: 3242 self._retreat(index) 3243 3244 if not self._match(TokenType.L_PAREN): 3245 return this 3246 3247 args = self._parse_csv( 3248 lambda: self._parse_constraint() 3249 or self._parse_column_def(self._parse_field(any_token=True)) 3250 ) 3251 self._match_r_paren() 3252 return self.expression(exp.Schema, this=this, expressions=args) 3253 3254 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3255 # column defs are not really columns, they're identifiers 3256 if isinstance(this, exp.Column): 3257 this = this.this 3258 kind = self._parse_types() 3259 3260 if self._match_text_seq("FOR", "ORDINALITY"): 3261 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3262 3263 constraints = [] 3264 while True: 3265 constraint = self._parse_column_constraint() 3266 if not constraint: 3267 break 3268 constraints.append(constraint) 3269 3270 if not kind and not constraints: 3271 return this 3272 3273 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3274 3275 def _parse_auto_increment(self) -> exp.Expression: 3276 start = None 3277 increment = None 3278 3279 if self._match(TokenType.L_PAREN, advance=False): 3280 args = self._parse_wrapped_csv(self._parse_bitwise) 3281 start = seq_get(args, 0) 3282 increment = seq_get(args, 1) 3283 elif self._match_text_seq("START"): 3284 start = self._parse_bitwise() 3285 self._match_text_seq("INCREMENT") 3286 increment = self._parse_bitwise() 3287 3288 if start and increment: 3289 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3290 3291 return exp.AutoIncrementColumnConstraint() 3292 3293 def _parse_compress(self) -> exp.Expression: 3294 if self._match(TokenType.L_PAREN, advance=False): 3295 return self.expression( 3296 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3297 ) 3298 3299 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3300 3301 def _parse_generated_as_identity(self) -> exp.Expression: 3302 if self._match(TokenType.BY_DEFAULT): 3303 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3304 this = self.expression( 3305 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3306 ) 3307 else: 3308 self._match_text_seq("ALWAYS") 3309 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3310 3311 self._match_text_seq("AS", "IDENTITY") 3312 if self._match(TokenType.L_PAREN): 3313 if self._match_text_seq("START", "WITH"): 3314 this.set("start", self._parse_bitwise()) 3315 if self._match_text_seq("INCREMENT", "BY"): 3316 this.set("increment", self._parse_bitwise()) 3317 if self._match_text_seq("MINVALUE"): 3318 this.set("minvalue", self._parse_bitwise()) 3319 if self._match_text_seq("MAXVALUE"): 3320 this.set("maxvalue", self._parse_bitwise()) 3321 3322 if self._match_text_seq("CYCLE"): 3323 this.set("cycle", True) 3324 elif self._match_text_seq("NO", "CYCLE"): 3325 this.set("cycle", False) 3326 3327 self._match_r_paren() 3328 3329 return this 3330 3331 def _parse_inline(self) -> t.Optional[exp.Expression]: 3332 self._match_text_seq("LENGTH") 3333 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3334 3335 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 3336 if self._match_text_seq("NULL"): 3337 return self.expression(exp.NotNullColumnConstraint) 3338 if self._match_text_seq("CASESPECIFIC"): 3339 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3340 return None 3341 3342 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3343 if self._match(TokenType.CONSTRAINT): 3344 this = self._parse_id_var() 3345 else: 3346 this = None 3347 3348 if self._match_texts(self.CONSTRAINT_PARSERS): 3349 return self.expression( 3350 exp.ColumnConstraint, 3351 this=this, 3352 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3353 ) 3354 3355 return this 3356 3357 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3358 if not self._match(TokenType.CONSTRAINT): 3359 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3360 3361 this = self._parse_id_var() 3362 expressions = [] 3363 3364 while True: 3365 constraint = self._parse_unnamed_constraint() or self._parse_function() 3366 if not constraint: 3367 break 3368 expressions.append(constraint) 3369 3370 return self.expression(exp.Constraint, this=this, expressions=expressions) 3371 3372 def _parse_unnamed_constraint( 3373 self, constraints: t.Optional[t.Collection[str]] = None 3374 ) -> t.Optional[exp.Expression]: 3375 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3376 return None 3377 3378 constraint = self._prev.text.upper() 3379 if constraint not in self.CONSTRAINT_PARSERS: 3380 self.raise_error(f"No parser found for schema constraint {constraint}.") 3381 3382 return self.CONSTRAINT_PARSERS[constraint](self) 3383 3384 def _parse_unique(self) -> exp.Expression: 3385 if not self._match(TokenType.L_PAREN, advance=False): 3386 return self.expression(exp.UniqueColumnConstraint) 3387 return self.expression(exp.Unique, expressions=self._parse_wrapped_id_vars()) 3388 3389 def _parse_key_constraint_options(self) -> t.List[str]: 3390 options = [] 3391 while True: 3392 if not self._curr: 3393 break 3394 3395 if self._match(TokenType.ON): 3396 action = None 3397 on = self._advance_any() and self._prev.text 3398 3399 if self._match(TokenType.NO_ACTION): 3400 action = "NO ACTION" 3401 elif self._match(TokenType.CASCADE): 3402 action = "CASCADE" 3403 elif self._match_pair(TokenType.SET, TokenType.NULL): 3404 action = "SET NULL" 3405 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3406 action = "SET DEFAULT" 3407 else: 3408 self.raise_error("Invalid key constraint") 3409 3410 options.append(f"ON {on} {action}") 3411 elif self._match_text_seq("NOT", "ENFORCED"): 3412 options.append("NOT ENFORCED") 3413 elif self._match_text_seq("DEFERRABLE"): 3414 options.append("DEFERRABLE") 3415 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3416 options.append("INITIALLY DEFERRED") 3417 elif self._match_text_seq("NORELY"): 3418 options.append("NORELY") 3419 elif self._match_text_seq("MATCH", "FULL"): 3420 options.append("MATCH FULL") 3421 else: 3422 break 3423 3424 return options 3425 3426 def _parse_references(self, match=True) -> t.Optional[exp.Expression]: 3427 if match and not self._match(TokenType.REFERENCES): 3428 return None 3429 3430 expressions = None 3431 this = self._parse_id_var() 3432 3433 if self._match(TokenType.L_PAREN, advance=False): 3434 expressions = self._parse_wrapped_id_vars() 3435 3436 options = self._parse_key_constraint_options() 3437 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3438 3439 def _parse_foreign_key(self) -> exp.Expression: 3440 expressions = self._parse_wrapped_id_vars() 3441 reference = self._parse_references() 3442 options = {} 3443 3444 while self._match(TokenType.ON): 3445 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3446 self.raise_error("Expected DELETE or UPDATE") 3447 3448 kind = self._prev.text.lower() 3449 3450 if self._match(TokenType.NO_ACTION): 3451 action = "NO ACTION" 3452 elif self._match(TokenType.SET): 3453 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3454 action = "SET " + self._prev.text.upper() 3455 else: 3456 self._advance() 3457 action = self._prev.text.upper() 3458 3459 options[kind] = action 3460 3461 return self.expression( 3462 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3463 ) 3464 3465 def _parse_primary_key(self) -> exp.Expression: 3466 desc = ( 3467 self._match_set((TokenType.ASC, TokenType.DESC)) 3468 and self._prev.token_type == TokenType.DESC 3469 ) 3470 3471 if not self._match(TokenType.L_PAREN, advance=False): 3472 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3473 3474 expressions = self._parse_wrapped_csv(self._parse_field) 3475 options = self._parse_key_constraint_options() 3476 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3477 3478 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3479 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3480 return this 3481 3482 bracket_kind = self._prev.token_type 3483 expressions: t.List[t.Optional[exp.Expression]] 3484 3485 if self._match(TokenType.COLON): 3486 expressions = [self.expression(exp.Slice, expression=self._parse_conjunction())] 3487 else: 3488 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3489 3490 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3491 if bracket_kind == TokenType.L_BRACE: 3492 this = self.expression(exp.Struct, expressions=expressions) 3493 elif not this or this.name.upper() == "ARRAY": 3494 this = self.expression(exp.Array, expressions=expressions) 3495 else: 3496 expressions = apply_index_offset(this, expressions, -self.index_offset) 3497 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3498 3499 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3500 self.raise_error("Expected ]") 3501 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3502 self.raise_error("Expected }") 3503 3504 self._add_comments(this) 3505 return self._parse_bracket(this) 3506 3507 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3508 if self._match(TokenType.COLON): 3509 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3510 return this 3511 3512 def _parse_case(self) -> t.Optional[exp.Expression]: 3513 ifs = [] 3514 default = None 3515 3516 expression = self._parse_conjunction() 3517 3518 while self._match(TokenType.WHEN): 3519 this = self._parse_conjunction() 3520 self._match(TokenType.THEN) 3521 then = self._parse_conjunction() 3522 ifs.append(self.expression(exp.If, this=this, true=then)) 3523 3524 if self._match(TokenType.ELSE): 3525 default = self._parse_conjunction() 3526 3527 if not self._match(TokenType.END): 3528 self.raise_error("Expected END after CASE", self._prev) 3529 3530 return self._parse_window( 3531 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3532 ) 3533 3534 def _parse_if(self) -> t.Optional[exp.Expression]: 3535 if self._match(TokenType.L_PAREN): 3536 args = self._parse_csv(self._parse_conjunction) 3537 this = exp.If.from_arg_list(args) 3538 self.validate_expression(this, args) 3539 self._match_r_paren() 3540 else: 3541 index = self._index - 1 3542 condition = self._parse_conjunction() 3543 3544 if not condition: 3545 self._retreat(index) 3546 return None 3547 3548 self._match(TokenType.THEN) 3549 true = self._parse_conjunction() 3550 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3551 self._match(TokenType.END) 3552 this = self.expression(exp.If, this=condition, true=true, false=false) 3553 3554 return self._parse_window(this) 3555 3556 def _parse_extract(self) -> exp.Expression: 3557 this = self._parse_function() or self._parse_var() or self._parse_type() 3558 3559 if self._match(TokenType.FROM): 3560 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3561 3562 if not self._match(TokenType.COMMA): 3563 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3564 3565 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3566 3567 def _parse_cast(self, strict: bool) -> exp.Expression: 3568 this = self._parse_conjunction() 3569 3570 if not self._match(TokenType.ALIAS): 3571 if self._match(TokenType.COMMA): 3572 return self.expression( 3573 exp.CastToStrType, this=this, expression=self._parse_string() 3574 ) 3575 else: 3576 self.raise_error("Expected AS after CAST") 3577 3578 to = self._parse_types() 3579 3580 if not to: 3581 self.raise_error("Expected TYPE after CAST") 3582 elif to.this == exp.DataType.Type.CHAR: 3583 if self._match(TokenType.CHARACTER_SET): 3584 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3585 3586 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3587 3588 def _parse_string_agg(self) -> exp.Expression: 3589 expression: t.Optional[exp.Expression] 3590 3591 if self._match(TokenType.DISTINCT): 3592 args = self._parse_csv(self._parse_conjunction) 3593 expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)]) 3594 else: 3595 args = self._parse_csv(self._parse_conjunction) 3596 expression = seq_get(args, 0) 3597 3598 index = self._index 3599 if not self._match(TokenType.R_PAREN): 3600 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3601 order = self._parse_order(this=expression) 3602 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3603 3604 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3605 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3606 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3607 if not self._match(TokenType.WITHIN_GROUP): 3608 self._retreat(index) 3609 this = exp.GroupConcat.from_arg_list(args) 3610 self.validate_expression(this, args) 3611 return this 3612 3613 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3614 order = self._parse_order(this=expression) 3615 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3616 3617 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3618 to: t.Optional[exp.Expression] 3619 this = self._parse_bitwise() 3620 3621 if self._match(TokenType.USING): 3622 to = self.expression(exp.CharacterSet, this=self._parse_var()) 3623 elif self._match(TokenType.COMMA): 3624 to = self._parse_bitwise() 3625 else: 3626 to = None 3627 3628 # Swap the argument order if needed to produce the correct AST 3629 if self.CONVERT_TYPE_FIRST: 3630 this, to = to, this 3631 3632 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3633 3634 def _parse_decode(self) -> t.Optional[exp.Expression]: 3635 """ 3636 There are generally two variants of the DECODE function: 3637 3638 - DECODE(bin, charset) 3639 - DECODE(expression, search, result [, search, result] ... [, default]) 3640 3641 The second variant will always be parsed into a CASE expression. Note that NULL 3642 needs special treatment, since we need to explicitly check for it with `IS NULL`, 3643 instead of relying on pattern matching. 3644 """ 3645 args = self._parse_csv(self._parse_conjunction) 3646 3647 if len(args) < 3: 3648 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 3649 3650 expression, *expressions = args 3651 if not expression: 3652 return None 3653 3654 ifs = [] 3655 for search, result in zip(expressions[::2], expressions[1::2]): 3656 if not search or not result: 3657 return None 3658 3659 if isinstance(search, exp.Literal): 3660 ifs.append( 3661 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 3662 ) 3663 elif isinstance(search, exp.Null): 3664 ifs.append( 3665 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 3666 ) 3667 else: 3668 cond = exp.or_( 3669 exp.EQ(this=expression.copy(), expression=search), 3670 exp.and_( 3671 exp.Is(this=expression.copy(), expression=exp.Null()), 3672 exp.Is(this=search.copy(), expression=exp.Null()), 3673 copy=False, 3674 ), 3675 copy=False, 3676 ) 3677 ifs.append(exp.If(this=cond, true=result)) 3678 3679 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 3680 3681 def _parse_json_key_value(self) -> t.Optional[exp.Expression]: 3682 self._match_text_seq("KEY") 3683 key = self._parse_field() 3684 self._match(TokenType.COLON) 3685 self._match_text_seq("VALUE") 3686 value = self._parse_field() 3687 if not key and not value: 3688 return None 3689 return self.expression(exp.JSONKeyValue, this=key, expression=value) 3690 3691 def _parse_json_object(self) -> exp.Expression: 3692 expressions = self._parse_csv(self._parse_json_key_value) 3693 3694 null_handling = None 3695 if self._match_text_seq("NULL", "ON", "NULL"): 3696 null_handling = "NULL ON NULL" 3697 elif self._match_text_seq("ABSENT", "ON", "NULL"): 3698 null_handling = "ABSENT ON NULL" 3699 3700 unique_keys = None 3701 if self._match_text_seq("WITH", "UNIQUE"): 3702 unique_keys = True 3703 elif self._match_text_seq("WITHOUT", "UNIQUE"): 3704 unique_keys = False 3705 3706 self._match_text_seq("KEYS") 3707 3708 return_type = self._match_text_seq("RETURNING") and self._parse_type() 3709 format_json = self._match_text_seq("FORMAT", "JSON") 3710 encoding = self._match_text_seq("ENCODING") and self._parse_var() 3711 3712 return self.expression( 3713 exp.JSONObject, 3714 expressions=expressions, 3715 null_handling=null_handling, 3716 unique_keys=unique_keys, 3717 return_type=return_type, 3718 format_json=format_json, 3719 encoding=encoding, 3720 ) 3721 3722 def _parse_logarithm(self) -> exp.Expression: 3723 # Default argument order is base, expression 3724 args = self._parse_csv(self._parse_range) 3725 3726 if len(args) > 1: 3727 if not self.LOG_BASE_FIRST: 3728 args.reverse() 3729 return exp.Log.from_arg_list(args) 3730 3731 return self.expression( 3732 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 3733 ) 3734 3735 def _parse_match_against(self) -> exp.Expression: 3736 expressions = self._parse_csv(self._parse_column) 3737 3738 self._match_text_seq(")", "AGAINST", "(") 3739 3740 this = self._parse_string() 3741 3742 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 3743 modifier = "IN NATURAL LANGUAGE MODE" 3744 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3745 modifier = f"{modifier} WITH QUERY EXPANSION" 3746 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 3747 modifier = "IN BOOLEAN MODE" 3748 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3749 modifier = "WITH QUERY EXPANSION" 3750 else: 3751 modifier = None 3752 3753 return self.expression( 3754 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 3755 ) 3756 3757 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 3758 def _parse_open_json(self) -> exp.Expression: 3759 this = self._parse_bitwise() 3760 path = self._match(TokenType.COMMA) and self._parse_string() 3761 3762 def _parse_open_json_column_def() -> exp.Expression: 3763 this = self._parse_field(any_token=True) 3764 kind = self._parse_types() 3765 path = self._parse_string() 3766 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 3767 return self.expression( 3768 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 3769 ) 3770 3771 expressions = None 3772 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 3773 self._match_l_paren() 3774 expressions = self._parse_csv(_parse_open_json_column_def) 3775 3776 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 3777 3778 def _parse_position(self, haystack_first: bool = False) -> exp.Expression: 3779 args = self._parse_csv(self._parse_bitwise) 3780 3781 if self._match(TokenType.IN): 3782 return self.expression( 3783 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 3784 ) 3785 3786 if haystack_first: 3787 haystack = seq_get(args, 0) 3788 needle = seq_get(args, 1) 3789 else: 3790 needle = seq_get(args, 0) 3791 haystack = seq_get(args, 1) 3792 3793 this = exp.StrPosition(this=haystack, substr=needle, position=seq_get(args, 2)) 3794 3795 self.validate_expression(this, args) 3796 3797 return this 3798 3799 def _parse_join_hint(self, func_name: str) -> exp.Expression: 3800 args = self._parse_csv(self._parse_table) 3801 return exp.JoinHint(this=func_name.upper(), expressions=args) 3802 3803 def _parse_substring(self) -> exp.Expression: 3804 # Postgres supports the form: substring(string [from int] [for int]) 3805 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 3806 3807 args = self._parse_csv(self._parse_bitwise) 3808 3809 if self._match(TokenType.FROM): 3810 args.append(self._parse_bitwise()) 3811 if self._match(TokenType.FOR): 3812 args.append(self._parse_bitwise()) 3813 3814 this = exp.Substring.from_arg_list(args) 3815 self.validate_expression(this, args) 3816 3817 return this 3818 3819 def _parse_trim(self) -> exp.Expression: 3820 # https://www.w3resource.com/sql/character-functions/trim.php 3821 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 3822 3823 position = None 3824 collation = None 3825 3826 if self._match_set(self.TRIM_TYPES): 3827 position = self._prev.text.upper() 3828 3829 expression = self._parse_bitwise() 3830 if self._match_set((TokenType.FROM, TokenType.COMMA)): 3831 this = self._parse_bitwise() 3832 else: 3833 this = expression 3834 expression = None 3835 3836 if self._match(TokenType.COLLATE): 3837 collation = self._parse_bitwise() 3838 3839 return self.expression( 3840 exp.Trim, 3841 this=this, 3842 position=position, 3843 expression=expression, 3844 collation=collation, 3845 ) 3846 3847 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3848 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 3849 3850 def _parse_named_window(self) -> t.Optional[exp.Expression]: 3851 return self._parse_window(self._parse_id_var(), alias=True) 3852 3853 def _parse_respect_or_ignore_nulls( 3854 self, this: t.Optional[exp.Expression] 3855 ) -> t.Optional[exp.Expression]: 3856 if self._match(TokenType.IGNORE_NULLS): 3857 return self.expression(exp.IgnoreNulls, this=this) 3858 if self._match(TokenType.RESPECT_NULLS): 3859 return self.expression(exp.RespectNulls, this=this) 3860 return this 3861 3862 def _parse_window( 3863 self, this: t.Optional[exp.Expression], alias: bool = False 3864 ) -> t.Optional[exp.Expression]: 3865 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 3866 this = self.expression(exp.Filter, this=this, expression=self._parse_where()) 3867 self._match_r_paren() 3868 3869 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 3870 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 3871 if self._match(TokenType.WITHIN_GROUP): 3872 order = self._parse_wrapped(self._parse_order) 3873 this = self.expression(exp.WithinGroup, this=this, expression=order) 3874 3875 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 3876 # Some dialects choose to implement and some do not. 3877 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 3878 3879 # There is some code above in _parse_lambda that handles 3880 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 3881 3882 # The below changes handle 3883 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 3884 3885 # Oracle allows both formats 3886 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 3887 # and Snowflake chose to do the same for familiarity 3888 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 3889 this = self._parse_respect_or_ignore_nulls(this) 3890 3891 # bigquery select from window x AS (partition by ...) 3892 if alias: 3893 over = None 3894 self._match(TokenType.ALIAS) 3895 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 3896 return this 3897 else: 3898 over = self._prev.text.upper() 3899 3900 if not self._match(TokenType.L_PAREN): 3901 return self.expression( 3902 exp.Window, this=this, alias=self._parse_id_var(False), over=over 3903 ) 3904 3905 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 3906 3907 first = self._match(TokenType.FIRST) 3908 if self._match_text_seq("LAST"): 3909 first = False 3910 3911 partition = self._parse_partition_by() 3912 order = self._parse_order() 3913 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 3914 3915 if kind: 3916 self._match(TokenType.BETWEEN) 3917 start = self._parse_window_spec() 3918 self._match(TokenType.AND) 3919 end = self._parse_window_spec() 3920 3921 spec = self.expression( 3922 exp.WindowSpec, 3923 kind=kind, 3924 start=start["value"], 3925 start_side=start["side"], 3926 end=end["value"], 3927 end_side=end["side"], 3928 ) 3929 else: 3930 spec = None 3931 3932 self._match_r_paren() 3933 3934 return self.expression( 3935 exp.Window, 3936 this=this, 3937 partition_by=partition, 3938 order=order, 3939 spec=spec, 3940 alias=window_alias, 3941 over=over, 3942 first=first, 3943 ) 3944 3945 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 3946 self._match(TokenType.BETWEEN) 3947 3948 return { 3949 "value": ( 3950 self._match_set((TokenType.UNBOUNDED, TokenType.CURRENT_ROW)) and self._prev.text 3951 ) 3952 or self._parse_bitwise(), 3953 "side": self._match_set((TokenType.PRECEDING, TokenType.FOLLOWING)) and self._prev.text, 3954 } 3955 3956 def _parse_alias( 3957 self, this: t.Optional[exp.Expression], explicit: bool = False 3958 ) -> t.Optional[exp.Expression]: 3959 any_token = self._match(TokenType.ALIAS) 3960 3961 if explicit and not any_token: 3962 return this 3963 3964 if self._match(TokenType.L_PAREN): 3965 aliases = self.expression( 3966 exp.Aliases, 3967 this=this, 3968 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 3969 ) 3970 self._match_r_paren(aliases) 3971 return aliases 3972 3973 alias = self._parse_id_var(any_token) 3974 3975 if alias: 3976 return self.expression(exp.Alias, this=this, alias=alias) 3977 3978 return this 3979 3980 def _parse_id_var( 3981 self, 3982 any_token: bool = True, 3983 tokens: t.Optional[t.Collection[TokenType]] = None, 3984 prefix_tokens: t.Optional[t.Collection[TokenType]] = None, 3985 ) -> t.Optional[exp.Expression]: 3986 identifier = self._parse_identifier() 3987 3988 if identifier: 3989 return identifier 3990 3991 prefix = "" 3992 3993 if prefix_tokens: 3994 while self._match_set(prefix_tokens): 3995 prefix += self._prev.text 3996 3997 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 3998 quoted = self._prev.token_type == TokenType.STRING 3999 return exp.Identifier(this=prefix + self._prev.text, quoted=quoted) 4000 4001 return None 4002 4003 def _parse_string(self) -> t.Optional[exp.Expression]: 4004 if self._match(TokenType.STRING): 4005 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4006 return self._parse_placeholder() 4007 4008 def _parse_string_as_identifier(self) -> t.Optional[exp.Expression]: 4009 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4010 4011 def _parse_number(self) -> t.Optional[exp.Expression]: 4012 if self._match(TokenType.NUMBER): 4013 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4014 return self._parse_placeholder() 4015 4016 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4017 if self._match(TokenType.IDENTIFIER): 4018 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4019 return self._parse_placeholder() 4020 4021 def _parse_var( 4022 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4023 ) -> t.Optional[exp.Expression]: 4024 if ( 4025 (any_token and self._advance_any()) 4026 or self._match(TokenType.VAR) 4027 or (self._match_set(tokens) if tokens else False) 4028 ): 4029 return self.expression(exp.Var, this=self._prev.text) 4030 return self._parse_placeholder() 4031 4032 def _advance_any(self) -> t.Optional[Token]: 4033 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4034 self._advance() 4035 return self._prev 4036 return None 4037 4038 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4039 return self._parse_var() or self._parse_string() 4040 4041 def _parse_null(self) -> t.Optional[exp.Expression]: 4042 if self._match(TokenType.NULL): 4043 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4044 return None 4045 4046 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4047 if self._match(TokenType.TRUE): 4048 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4049 if self._match(TokenType.FALSE): 4050 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4051 return None 4052 4053 def _parse_star(self) -> t.Optional[exp.Expression]: 4054 if self._match(TokenType.STAR): 4055 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4056 return None 4057 4058 def _parse_parameter(self) -> exp.Expression: 4059 wrapped = self._match(TokenType.L_BRACE) 4060 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4061 self._match(TokenType.R_BRACE) 4062 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4063 4064 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4065 if self._match_set(self.PLACEHOLDER_PARSERS): 4066 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4067 if placeholder: 4068 return placeholder 4069 self._advance(-1) 4070 return None 4071 4072 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4073 if not self._match(TokenType.EXCEPT): 4074 return None 4075 if self._match(TokenType.L_PAREN, advance=False): 4076 return self._parse_wrapped_csv(self._parse_column) 4077 return self._parse_csv(self._parse_column) 4078 4079 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4080 if not self._match(TokenType.REPLACE): 4081 return None 4082 if self._match(TokenType.L_PAREN, advance=False): 4083 return self._parse_wrapped_csv(self._parse_expression) 4084 return self._parse_csv(self._parse_expression) 4085 4086 def _parse_csv( 4087 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4088 ) -> t.List[t.Optional[exp.Expression]]: 4089 parse_result = parse_method() 4090 items = [parse_result] if parse_result is not None else [] 4091 4092 while self._match(sep): 4093 self._add_comments(parse_result) 4094 parse_result = parse_method() 4095 if parse_result is not None: 4096 items.append(parse_result) 4097 4098 return items 4099 4100 def _parse_tokens( 4101 self, parse_method: t.Callable, expressions: t.Dict 4102 ) -> t.Optional[exp.Expression]: 4103 this = parse_method() 4104 4105 while self._match_set(expressions): 4106 this = self.expression( 4107 expressions[self._prev.token_type], 4108 this=this, 4109 comments=self._prev_comments, 4110 expression=parse_method(), 4111 ) 4112 4113 return this 4114 4115 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]: 4116 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4117 4118 def _parse_wrapped_csv( 4119 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4120 ) -> t.List[t.Optional[exp.Expression]]: 4121 return self._parse_wrapped( 4122 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4123 ) 4124 4125 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4126 wrapped = self._match(TokenType.L_PAREN) 4127 if not wrapped and not optional: 4128 self.raise_error("Expecting (") 4129 parse_result = parse_method() 4130 if wrapped: 4131 self._match_r_paren() 4132 return parse_result 4133 4134 def _parse_select_or_expression(self) -> t.Optional[exp.Expression]: 4135 return self._parse_select() or self._parse_set_operations(self._parse_expression()) 4136 4137 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4138 return self._parse_set_operations( 4139 self._parse_select(nested=True, parse_subquery_alias=False) 4140 ) 4141 4142 def _parse_transaction(self) -> exp.Expression: 4143 this = None 4144 if self._match_texts(self.TRANSACTION_KIND): 4145 this = self._prev.text 4146 4147 self._match_texts({"TRANSACTION", "WORK"}) 4148 4149 modes = [] 4150 while True: 4151 mode = [] 4152 while self._match(TokenType.VAR): 4153 mode.append(self._prev.text) 4154 4155 if mode: 4156 modes.append(" ".join(mode)) 4157 if not self._match(TokenType.COMMA): 4158 break 4159 4160 return self.expression(exp.Transaction, this=this, modes=modes) 4161 4162 def _parse_commit_or_rollback(self) -> exp.Expression: 4163 chain = None 4164 savepoint = None 4165 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4166 4167 self._match_texts({"TRANSACTION", "WORK"}) 4168 4169 if self._match_text_seq("TO"): 4170 self._match_text_seq("SAVEPOINT") 4171 savepoint = self._parse_id_var() 4172 4173 if self._match(TokenType.AND): 4174 chain = not self._match_text_seq("NO") 4175 self._match_text_seq("CHAIN") 4176 4177 if is_rollback: 4178 return self.expression(exp.Rollback, savepoint=savepoint) 4179 return self.expression(exp.Commit, chain=chain) 4180 4181 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4182 if not self._match_text_seq("ADD"): 4183 return None 4184 4185 self._match(TokenType.COLUMN) 4186 exists_column = self._parse_exists(not_=True) 4187 expression = self._parse_column_def(self._parse_field(any_token=True)) 4188 4189 if expression: 4190 expression.set("exists", exists_column) 4191 4192 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4193 if self._match_texts(("FIRST", "AFTER")): 4194 position = self._prev.text 4195 column_position = self.expression( 4196 exp.ColumnPosition, this=self._parse_column(), position=position 4197 ) 4198 expression.set("position", column_position) 4199 4200 return expression 4201 4202 def _parse_drop_column(self) -> t.Optional[exp.Expression]: 4203 drop = self._match(TokenType.DROP) and self._parse_drop() 4204 if drop and not isinstance(drop, exp.Command): 4205 drop.set("kind", drop.args.get("kind", "COLUMN")) 4206 return drop 4207 4208 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4209 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.Expression: 4210 return self.expression( 4211 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4212 ) 4213 4214 def _parse_add_constraint(self) -> t.Optional[exp.Expression]: 4215 this = None 4216 kind = self._prev.token_type 4217 4218 if kind == TokenType.CONSTRAINT: 4219 this = self._parse_id_var() 4220 4221 if self._match_text_seq("CHECK"): 4222 expression = self._parse_wrapped(self._parse_conjunction) 4223 enforced = self._match_text_seq("ENFORCED") 4224 4225 return self.expression( 4226 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4227 ) 4228 4229 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4230 expression = self._parse_foreign_key() 4231 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4232 expression = self._parse_primary_key() 4233 else: 4234 expression = None 4235 4236 return self.expression(exp.AddConstraint, this=this, expression=expression) 4237 4238 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 4239 index = self._index - 1 4240 4241 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4242 return self._parse_csv(self._parse_add_constraint) 4243 4244 self._retreat(index) 4245 return self._parse_csv(self._parse_add_column) 4246 4247 def _parse_alter_table_alter(self) -> exp.Expression: 4248 self._match(TokenType.COLUMN) 4249 column = self._parse_field(any_token=True) 4250 4251 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4252 return self.expression(exp.AlterColumn, this=column, drop=True) 4253 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4254 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4255 4256 self._match_text_seq("SET", "DATA") 4257 return self.expression( 4258 exp.AlterColumn, 4259 this=column, 4260 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4261 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4262 using=self._match(TokenType.USING) and self._parse_conjunction(), 4263 ) 4264 4265 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 4266 index = self._index - 1 4267 4268 partition_exists = self._parse_exists() 4269 if self._match(TokenType.PARTITION, advance=False): 4270 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4271 4272 self._retreat(index) 4273 return self._parse_csv(self._parse_drop_column) 4274 4275 def _parse_alter_table_rename(self) -> exp.Expression: 4276 self._match_text_seq("TO") 4277 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4278 4279 def _parse_alter(self) -> t.Optional[exp.Expression]: 4280 start = self._prev 4281 4282 if not self._match(TokenType.TABLE): 4283 return self._parse_as_command(start) 4284 4285 exists = self._parse_exists() 4286 this = self._parse_table(schema=True) 4287 4288 if self._next: 4289 self._advance() 4290 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4291 4292 if parser: 4293 actions = ensure_list(parser(self)) 4294 4295 if not self._curr: 4296 return self.expression( 4297 exp.AlterTable, 4298 this=this, 4299 exists=exists, 4300 actions=actions, 4301 ) 4302 return self._parse_as_command(start) 4303 4304 def _parse_merge(self) -> exp.Expression: 4305 self._match(TokenType.INTO) 4306 target = self._parse_table() 4307 4308 self._match(TokenType.USING) 4309 using = self._parse_table() 4310 4311 self._match(TokenType.ON) 4312 on = self._parse_conjunction() 4313 4314 whens = [] 4315 while self._match(TokenType.WHEN): 4316 matched = not self._match(TokenType.NOT) 4317 self._match_text_seq("MATCHED") 4318 source = ( 4319 False 4320 if self._match_text_seq("BY", "TARGET") 4321 else self._match_text_seq("BY", "SOURCE") 4322 ) 4323 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4324 4325 self._match(TokenType.THEN) 4326 4327 if self._match(TokenType.INSERT): 4328 _this = self._parse_star() 4329 if _this: 4330 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4331 else: 4332 then = self.expression( 4333 exp.Insert, 4334 this=self._parse_value(), 4335 expression=self._match(TokenType.VALUES) and self._parse_value(), 4336 ) 4337 elif self._match(TokenType.UPDATE): 4338 expressions = self._parse_star() 4339 if expressions: 4340 then = self.expression(exp.Update, expressions=expressions) 4341 else: 4342 then = self.expression( 4343 exp.Update, 4344 expressions=self._match(TokenType.SET) 4345 and self._parse_csv(self._parse_equality), 4346 ) 4347 elif self._match(TokenType.DELETE): 4348 then = self.expression(exp.Var, this=self._prev.text) 4349 else: 4350 then = None 4351 4352 whens.append( 4353 self.expression( 4354 exp.When, 4355 matched=matched, 4356 source=source, 4357 condition=condition, 4358 then=then, 4359 ) 4360 ) 4361 4362 return self.expression( 4363 exp.Merge, 4364 this=target, 4365 using=using, 4366 on=on, 4367 expressions=whens, 4368 ) 4369 4370 def _parse_show(self) -> t.Optional[exp.Expression]: 4371 parser = self._find_parser(self.SHOW_PARSERS, self._show_trie) # type: ignore 4372 if parser: 4373 return parser(self) 4374 self._advance() 4375 return self.expression(exp.Show, this=self._prev.text.upper()) 4376 4377 def _parse_set_item_assignment( 4378 self, kind: t.Optional[str] = None 4379 ) -> t.Optional[exp.Expression]: 4380 index = self._index 4381 4382 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4383 return self._parse_set_transaction(global_=kind == "GLOBAL") 4384 4385 left = self._parse_primary() or self._parse_id_var() 4386 4387 if not self._match_texts(("=", "TO")): 4388 self._retreat(index) 4389 return None 4390 4391 right = self._parse_statement() or self._parse_id_var() 4392 this = self.expression( 4393 exp.EQ, 4394 this=left, 4395 expression=right, 4396 ) 4397 4398 return self.expression( 4399 exp.SetItem, 4400 this=this, 4401 kind=kind, 4402 ) 4403 4404 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4405 self._match_text_seq("TRANSACTION") 4406 characteristics = self._parse_csv( 4407 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4408 ) 4409 return self.expression( 4410 exp.SetItem, 4411 expressions=characteristics, 4412 kind="TRANSACTION", 4413 **{"global": global_}, # type: ignore 4414 ) 4415 4416 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4417 parser = self._find_parser(self.SET_PARSERS, self._set_trie) # type: ignore 4418 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4419 4420 def _parse_set(self) -> exp.Expression: 4421 index = self._index 4422 set_ = self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item)) 4423 4424 if self._curr: 4425 self._retreat(index) 4426 return self._parse_as_command(self._prev) 4427 4428 return set_ 4429 4430 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Expression]: 4431 for option in options: 4432 if self._match_text_seq(*option.split(" ")): 4433 return exp.Var(this=option) 4434 return None 4435 4436 def _parse_as_command(self, start: Token) -> exp.Command: 4437 while self._curr: 4438 self._advance() 4439 text = self._find_sql(start, self._prev) 4440 size = len(start.text) 4441 return exp.Command(this=text[:size], expression=text[size:]) 4442 4443 def _find_parser( 4444 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4445 ) -> t.Optional[t.Callable]: 4446 if not self._curr: 4447 return None 4448 4449 index = self._index 4450 this = [] 4451 while True: 4452 # The current token might be multiple words 4453 curr = self._curr.text.upper() 4454 key = curr.split(" ") 4455 this.append(curr) 4456 self._advance() 4457 result, trie = in_trie(trie, key) 4458 if result == 0: 4459 break 4460 if result == 2: 4461 subparser = parsers[" ".join(this)] 4462 return subparser 4463 self._retreat(index) 4464 return None 4465 4466 def _match(self, token_type, advance=True, expression=None): 4467 if not self._curr: 4468 return None 4469 4470 if self._curr.token_type == token_type: 4471 if advance: 4472 self._advance() 4473 self._add_comments(expression) 4474 return True 4475 4476 return None 4477 4478 def _match_set(self, types, advance=True): 4479 if not self._curr: 4480 return None 4481 4482 if self._curr.token_type in types: 4483 if advance: 4484 self._advance() 4485 return True 4486 4487 return None 4488 4489 def _match_pair(self, token_type_a, token_type_b, advance=True): 4490 if not self._curr or not self._next: 4491 return None 4492 4493 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4494 if advance: 4495 self._advance(2) 4496 return True 4497 4498 return None 4499 4500 def _match_l_paren(self, expression=None): 4501 if not self._match(TokenType.L_PAREN, expression=expression): 4502 self.raise_error("Expecting (") 4503 4504 def _match_r_paren(self, expression=None): 4505 if not self._match(TokenType.R_PAREN, expression=expression): 4506 self.raise_error("Expecting )") 4507 4508 def _match_texts(self, texts, advance=True): 4509 if self._curr and self._curr.text.upper() in texts: 4510 if advance: 4511 self._advance() 4512 return True 4513 return False 4514 4515 def _match_text_seq(self, *texts, advance=True): 4516 index = self._index 4517 for text in texts: 4518 if self._curr and self._curr.text.upper() == text: 4519 self._advance() 4520 else: 4521 self._retreat(index) 4522 return False 4523 4524 if not advance: 4525 self._retreat(index) 4526 4527 return True 4528 4529 def _replace_columns_with_dots(self, this): 4530 if isinstance(this, exp.Dot): 4531 exp.replace_children(this, self._replace_columns_with_dots) 4532 elif isinstance(this, exp.Column): 4533 exp.replace_children(this, self._replace_columns_with_dots) 4534 table = this.args.get("table") 4535 this = ( 4536 self.expression(exp.Dot, this=table, expression=this.this) 4537 if table 4538 else self.expression(exp.Var, this=this.name) 4539 ) 4540 elif isinstance(this, exp.Identifier): 4541 this = self.expression(exp.Var, this=this.name) 4542 return this 4543 4544 def _replace_lambda(self, node, lambda_variables): 4545 for column in node.find_all(exp.Column): 4546 if column.parts[0].name in lambda_variables: 4547 dot_or_id = column.to_dot() if column.table else column.this 4548 parent = column.parent 4549 4550 while isinstance(parent, exp.Dot): 4551 if not isinstance(parent.parent, exp.Dot): 4552 parent.replace(dot_or_id) 4553 break 4554 parent = parent.parent 4555 else: 4556 if column is node: 4557 node = dot_or_id 4558 else: 4559 column.replace(dot_or_id) 4560 return node
Parser consumes a list of tokens produced by the sqlglot.tokens.Tokenizer and produces
a parsed syntax tree.
Arguments:
- error_level: the desired error level. Default: ErrorLevel.RAISE
- error_message_context: determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 50.
- index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. Default: 0
- alias_post_tablesample: If the table alias comes after tablesample. Default: False
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
- null_ordering: Indicates the default null ordering method to use if not explicitly set. Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". Default: "nulls_are_small"
810 def __init__( 811 self, 812 error_level: t.Optional[ErrorLevel] = None, 813 error_message_context: int = 100, 814 index_offset: int = 0, 815 unnest_column_only: bool = False, 816 alias_post_tablesample: bool = False, 817 max_errors: int = 3, 818 null_ordering: t.Optional[str] = None, 819 ): 820 self.error_level = error_level or ErrorLevel.IMMEDIATE 821 self.error_message_context = error_message_context 822 self.index_offset = index_offset 823 self.unnest_column_only = unnest_column_only 824 self.alias_post_tablesample = alias_post_tablesample 825 self.max_errors = max_errors 826 self.null_ordering = null_ordering 827 self.reset()
839 def parse( 840 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 841 ) -> t.List[t.Optional[exp.Expression]]: 842 """ 843 Parses a list of tokens and returns a list of syntax trees, one tree 844 per parsed SQL statement. 845 846 Args: 847 raw_tokens: the list of tokens. 848 sql: the original SQL string, used to produce helpful debug messages. 849 850 Returns: 851 The list of syntax trees. 852 """ 853 return self._parse( 854 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 855 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: the list of tokens.
- sql: the original SQL string, used to produce helpful debug messages.
Returns:
The list of syntax trees.
857 def parse_into( 858 self, 859 expression_types: exp.IntoType, 860 raw_tokens: t.List[Token], 861 sql: t.Optional[str] = None, 862 ) -> t.List[t.Optional[exp.Expression]]: 863 """ 864 Parses a list of tokens into a given Expression type. If a collection of Expression 865 types is given instead, this method will try to parse the token list into each one 866 of them, stopping at the first for which the parsing succeeds. 867 868 Args: 869 expression_types: the expression type(s) to try and parse the token list into. 870 raw_tokens: the list of tokens. 871 sql: the original SQL string, used to produce helpful debug messages. 872 873 Returns: 874 The target Expression. 875 """ 876 errors = [] 877 for expression_type in ensure_collection(expression_types): 878 parser = self.EXPRESSION_PARSERS.get(expression_type) 879 if not parser: 880 raise TypeError(f"No parser registered for {expression_type}") 881 try: 882 return self._parse(parser, raw_tokens, sql) 883 except ParseError as e: 884 e.errors[0]["into_expression"] = expression_type 885 errors.append(e) 886 raise ParseError( 887 f"Failed to parse into {expression_types}", 888 errors=merge_errors(errors), 889 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: the expression type(s) to try and parse the token list into.
- raw_tokens: the list of tokens.
- sql: the original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
925 def check_errors(self) -> None: 926 """ 927 Logs or raises any found errors, depending on the chosen error level setting. 928 """ 929 if self.error_level == ErrorLevel.WARN: 930 for error in self.errors: 931 logger.error(str(error)) 932 elif self.error_level == ErrorLevel.RAISE and self.errors: 933 raise ParseError( 934 concat_messages(self.errors, self.max_errors), 935 errors=merge_errors(self.errors), 936 )
Logs or raises any found errors, depending on the chosen error level setting.
938 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 939 """ 940 Appends an error in the list of recorded errors or raises it, depending on the chosen 941 error level setting. 942 """ 943 token = token or self._curr or self._prev or Token.string("") 944 start = token.start 945 end = token.end 946 start_context = self.sql[max(start - self.error_message_context, 0) : start] 947 highlight = self.sql[start:end] 948 end_context = self.sql[end : end + self.error_message_context] 949 950 error = ParseError.new( 951 f"{message}. Line {token.line}, Col: {token.col}.\n" 952 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 953 description=message, 954 line=token.line, 955 col=token.col, 956 start_context=start_context, 957 highlight=highlight, 958 end_context=end_context, 959 ) 960 961 if self.error_level == ErrorLevel.IMMEDIATE: 962 raise error 963 964 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
966 def expression( 967 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 968 ) -> E: 969 """ 970 Creates a new, validated Expression. 971 972 Args: 973 exp_class: the expression class to instantiate. 974 comments: an optional list of comments to attach to the expression. 975 kwargs: the arguments to set for the expression along with their respective values. 976 977 Returns: 978 The target expression. 979 """ 980 instance = exp_class(**kwargs) 981 instance.add_comments(comments) if comments else self._add_comments(instance) 982 self.validate_expression(instance) 983 return instance
Creates a new, validated Expression.
Arguments:
- exp_class: the expression class to instantiate.
- comments: an optional list of comments to attach to the expression.
- kwargs: the arguments to set for the expression along with their respective values.
Returns:
The target expression.
990 def validate_expression( 991 self, expression: exp.Expression, args: t.Optional[t.List] = None 992 ) -> None: 993 """ 994 Validates an already instantiated expression, making sure that all its mandatory arguments 995 are set. 996 997 Args: 998 expression: the expression to validate. 999 args: an optional list of items that was used to instantiate the expression, if it's a Func. 1000 """ 1001 if self.error_level == ErrorLevel.IGNORE: 1002 return 1003 1004 for error_message in expression.error_messages(args): 1005 self.raise_error(error_message)
Validates an already instantiated expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: the expression to validate.
- args: an optional list of items that was used to instantiate the expression, if it's a Func.