sqlglot.dialects.bigquery
1from __future__ import annotations 2 3import logging 4import re 5import typing as t 6 7from sqlglot import exp, generator, parser, tokens, transforms 8from sqlglot.dialects.dialect import ( 9 Dialect, 10 NormalizationStrategy, 11 arg_max_or_min_no_count, 12 binary_from_function, 13 date_add_interval_sql, 14 datestrtodate_sql, 15 build_formatted_time, 16 filter_array_using_unnest, 17 if_sql, 18 inline_array_sql, 19 max_or_greatest, 20 min_or_least, 21 no_ilike_sql, 22 build_date_delta_with_interval, 23 regexp_replace_sql, 24 rename_func, 25 timestrtotime_sql, 26 ts_or_ds_add_cast, 27) 28from sqlglot.helper import seq_get, split_num_words 29from sqlglot.tokens import TokenType 30 31if t.TYPE_CHECKING: 32 from sqlglot._typing import E, Lit 33 34logger = logging.getLogger("sqlglot") 35 36 37def _derived_table_values_to_unnest(self: BigQuery.Generator, expression: exp.Values) -> str: 38 if not expression.find_ancestor(exp.From, exp.Join): 39 return self.values_sql(expression) 40 41 structs = [] 42 alias = expression.args.get("alias") 43 for tup in expression.find_all(exp.Tuple): 44 field_aliases = ( 45 alias.columns 46 if alias and alias.columns 47 else (f"_c{i}" for i in range(len(tup.expressions))) 48 ) 49 expressions = [ 50 exp.PropertyEQ(this=exp.to_identifier(name), expression=fld) 51 for name, fld in zip(field_aliases, tup.expressions) 52 ] 53 structs.append(exp.Struct(expressions=expressions)) 54 55 # Due to `UNNEST_COLUMN_ONLY`, it is expected that the table alias be contained in the columns expression 56 alias_name_only = exp.TableAlias(columns=[alias.this]) if alias else None 57 return self.unnest_sql( 58 exp.Unnest(expressions=[exp.array(*structs, copy=False)], alias=alias_name_only) 59 ) 60 61 62def _returnsproperty_sql(self: BigQuery.Generator, expression: exp.ReturnsProperty) -> str: 63 this = expression.this 64 if isinstance(this, exp.Schema): 65 this = f"{self.sql(this, 'this')} <{self.expressions(this)}>" 66 else: 67 this = self.sql(this) 68 return f"RETURNS {this}" 69 70 71def _create_sql(self: BigQuery.Generator, expression: exp.Create) -> str: 72 returns = expression.find(exp.ReturnsProperty) 73 if expression.kind == "FUNCTION" and returns and returns.args.get("is_table"): 74 expression.set("kind", "TABLE FUNCTION") 75 76 if isinstance(expression.expression, (exp.Subquery, exp.Literal)): 77 expression.set("expression", expression.expression.this) 78 79 return self.create_sql(expression) 80 81 82def _unqualify_unnest(expression: exp.Expression) -> exp.Expression: 83 """Remove references to unnest table aliases since bigquery doesn't allow them. 84 85 These are added by the optimizer's qualify_column step. 86 """ 87 from sqlglot.optimizer.scope import find_all_in_scope 88 89 if isinstance(expression, exp.Select): 90 unnest_aliases = { 91 unnest.alias 92 for unnest in find_all_in_scope(expression, exp.Unnest) 93 if isinstance(unnest.parent, (exp.From, exp.Join)) 94 } 95 if unnest_aliases: 96 for column in expression.find_all(exp.Column): 97 if column.table in unnest_aliases: 98 column.set("table", None) 99 elif column.db in unnest_aliases: 100 column.set("db", None) 101 102 return expression 103 104 105# https://issuetracker.google.com/issues/162294746 106# workaround for bigquery bug when grouping by an expression and then ordering 107# WITH x AS (SELECT 1 y) 108# SELECT y + 1 z 109# FROM x 110# GROUP BY x + 1 111# ORDER by z 112def _alias_ordered_group(expression: exp.Expression) -> exp.Expression: 113 if isinstance(expression, exp.Select): 114 group = expression.args.get("group") 115 order = expression.args.get("order") 116 117 if group and order: 118 aliases = { 119 select.this: select.args["alias"] 120 for select in expression.selects 121 if isinstance(select, exp.Alias) 122 } 123 124 for grouped in group.expressions: 125 if grouped.is_int: 126 continue 127 alias = aliases.get(grouped) 128 if alias: 129 grouped.replace(exp.column(alias)) 130 131 return expression 132 133 134def _pushdown_cte_column_names(expression: exp.Expression) -> exp.Expression: 135 """BigQuery doesn't allow column names when defining a CTE, so we try to push them down.""" 136 if isinstance(expression, exp.CTE) and expression.alias_column_names: 137 cte_query = expression.this 138 139 if cte_query.is_star: 140 logger.warning( 141 "Can't push down CTE column names for star queries. Run the query through" 142 " the optimizer or use 'qualify' to expand the star projections first." 143 ) 144 return expression 145 146 column_names = expression.alias_column_names 147 expression.args["alias"].set("columns", None) 148 149 for name, select in zip(column_names, cte_query.selects): 150 to_replace = select 151 152 if isinstance(select, exp.Alias): 153 select = select.this 154 155 # Inner aliases are shadowed by the CTE column names 156 to_replace.replace(exp.alias_(select, name)) 157 158 return expression 159 160 161def _build_parse_timestamp(args: t.List) -> exp.StrToTime: 162 this = build_formatted_time(exp.StrToTime, "bigquery")([seq_get(args, 1), seq_get(args, 0)]) 163 this.set("zone", seq_get(args, 2)) 164 return this 165 166 167def _build_timestamp(args: t.List) -> exp.Timestamp: 168 timestamp = exp.Timestamp.from_arg_list(args) 169 timestamp.set("with_tz", True) 170 return timestamp 171 172 173def _build_date(args: t.List) -> exp.Date | exp.DateFromParts: 174 expr_type = exp.DateFromParts if len(args) == 3 else exp.Date 175 return expr_type.from_arg_list(args) 176 177 178def _build_to_hex(args: t.List) -> exp.Hex | exp.MD5: 179 # TO_HEX(MD5(..)) is common in BigQuery, so it's parsed into MD5 to simplify its transpilation 180 arg = seq_get(args, 0) 181 return exp.MD5(this=arg.this) if isinstance(arg, exp.MD5Digest) else exp.Hex(this=arg) 182 183 184def _array_contains_sql(self: BigQuery.Generator, expression: exp.ArrayContains) -> str: 185 return self.sql( 186 exp.Exists( 187 this=exp.select("1") 188 .from_(exp.Unnest(expressions=[expression.left]).as_("_unnest", table=["_col"])) 189 .where(exp.column("_col").eq(expression.right)) 190 ) 191 ) 192 193 194def _ts_or_ds_add_sql(self: BigQuery.Generator, expression: exp.TsOrDsAdd) -> str: 195 return date_add_interval_sql("DATE", "ADD")(self, ts_or_ds_add_cast(expression)) 196 197 198def _ts_or_ds_diff_sql(self: BigQuery.Generator, expression: exp.TsOrDsDiff) -> str: 199 expression.this.replace(exp.cast(expression.this, "TIMESTAMP", copy=True)) 200 expression.expression.replace(exp.cast(expression.expression, "TIMESTAMP", copy=True)) 201 unit = expression.args.get("unit") or "DAY" 202 return self.func("DATE_DIFF", expression.this, expression.expression, unit) 203 204 205def _unix_to_time_sql(self: BigQuery.Generator, expression: exp.UnixToTime) -> str: 206 scale = expression.args.get("scale") 207 timestamp = expression.this 208 209 if scale in (None, exp.UnixToTime.SECONDS): 210 return self.func("TIMESTAMP_SECONDS", timestamp) 211 if scale == exp.UnixToTime.MILLIS: 212 return self.func("TIMESTAMP_MILLIS", timestamp) 213 if scale == exp.UnixToTime.MICROS: 214 return self.func("TIMESTAMP_MICROS", timestamp) 215 216 unix_seconds = exp.cast(exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), "int64") 217 return self.func("TIMESTAMP_SECONDS", unix_seconds) 218 219 220def _build_time(args: t.List) -> exp.Func: 221 if len(args) == 1: 222 return exp.TsOrDsToTime(this=args[0]) 223 if len(args) == 3: 224 return exp.TimeFromParts.from_arg_list(args) 225 226 return exp.Anonymous(this="TIME", expressions=args) 227 228 229class BigQuery(Dialect): 230 WEEK_OFFSET = -1 231 UNNEST_COLUMN_ONLY = True 232 SUPPORTS_USER_DEFINED_TYPES = False 233 SUPPORTS_SEMI_ANTI_JOIN = False 234 LOG_BASE_FIRST = False 235 236 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 237 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 238 239 # bigquery udfs are case sensitive 240 NORMALIZE_FUNCTIONS = False 241 242 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time 243 TIME_MAPPING = { 244 "%D": "%m/%d/%y", 245 "%E*S": "%S.%f", 246 "%E6S": "%S.%f", 247 } 248 249 ESCAPE_SEQUENCES = { 250 "\\a": "\a", 251 "\\b": "\b", 252 "\\f": "\f", 253 "\\n": "\n", 254 "\\r": "\r", 255 "\\t": "\t", 256 "\\v": "\v", 257 } 258 259 FORMAT_MAPPING = { 260 "DD": "%d", 261 "MM": "%m", 262 "MON": "%b", 263 "MONTH": "%B", 264 "YYYY": "%Y", 265 "YY": "%y", 266 "HH": "%I", 267 "HH12": "%I", 268 "HH24": "%H", 269 "MI": "%M", 270 "SS": "%S", 271 "SSSSS": "%f", 272 "TZH": "%z", 273 } 274 275 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 276 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 277 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 278 279 def normalize_identifier(self, expression: E) -> E: 280 if isinstance(expression, exp.Identifier): 281 parent = expression.parent 282 while isinstance(parent, exp.Dot): 283 parent = parent.parent 284 285 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 286 # by default. The following check uses a heuristic to detect tables based on whether 287 # they are qualified. This should generally be correct, because tables in BigQuery 288 # must be qualified with at least a dataset, unless @@dataset_id is set. 289 case_sensitive = ( 290 isinstance(parent, exp.UserDefinedFunction) 291 or ( 292 isinstance(parent, exp.Table) 293 and parent.db 294 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 295 ) 296 or expression.meta.get("is_table") 297 ) 298 if not case_sensitive: 299 expression.set("this", expression.this.lower()) 300 301 return expression 302 303 class Tokenizer(tokens.Tokenizer): 304 QUOTES = ["'", '"', '"""', "'''"] 305 COMMENTS = ["--", "#", ("/*", "*/")] 306 IDENTIFIERS = ["`"] 307 STRING_ESCAPES = ["\\"] 308 309 HEX_STRINGS = [("0x", ""), ("0X", "")] 310 311 BYTE_STRINGS = [ 312 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 313 ] 314 315 RAW_STRINGS = [ 316 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 317 ] 318 319 KEYWORDS = { 320 **tokens.Tokenizer.KEYWORDS, 321 "ANY TYPE": TokenType.VARIANT, 322 "BEGIN": TokenType.COMMAND, 323 "BEGIN TRANSACTION": TokenType.BEGIN, 324 "BYTES": TokenType.BINARY, 325 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 326 "DATETIME": TokenType.TIMESTAMP, 327 "DECLARE": TokenType.COMMAND, 328 "ELSEIF": TokenType.COMMAND, 329 "EXCEPTION": TokenType.COMMAND, 330 "FLOAT64": TokenType.DOUBLE, 331 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 332 "MODEL": TokenType.MODEL, 333 "NOT DETERMINISTIC": TokenType.VOLATILE, 334 "RECORD": TokenType.STRUCT, 335 "TIMESTAMP": TokenType.TIMESTAMPTZ, 336 } 337 KEYWORDS.pop("DIV") 338 KEYWORDS.pop("VALUES") 339 340 class Parser(parser.Parser): 341 PREFIXED_PIVOT_COLUMNS = True 342 LOG_DEFAULTS_TO_LN = True 343 SUPPORTS_IMPLICIT_UNNEST = True 344 345 FUNCTIONS = { 346 **parser.Parser.FUNCTIONS, 347 "DATE": _build_date, 348 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 349 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 350 "DATE_TRUNC": lambda args: exp.DateTrunc( 351 unit=exp.Literal.string(str(seq_get(args, 1))), 352 this=seq_get(args, 0), 353 ), 354 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 355 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 356 "DIV": binary_from_function(exp.IntDiv), 357 "FORMAT_DATE": lambda args: exp.TimeToStr( 358 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 359 ), 360 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 361 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 362 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 363 ), 364 "MD5": exp.MD5Digest.from_arg_list, 365 "TO_HEX": _build_to_hex, 366 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 367 [seq_get(args, 1), seq_get(args, 0)] 368 ), 369 "PARSE_TIMESTAMP": _build_parse_timestamp, 370 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 371 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 372 this=seq_get(args, 0), 373 expression=seq_get(args, 1), 374 position=seq_get(args, 2), 375 occurrence=seq_get(args, 3), 376 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 377 ), 378 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 379 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 380 "SPLIT": lambda args: exp.Split( 381 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 382 this=seq_get(args, 0), 383 expression=seq_get(args, 1) or exp.Literal.string(","), 384 ), 385 "TIME": _build_time, 386 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 387 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 388 "TIMESTAMP": _build_timestamp, 389 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 390 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 391 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 392 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 393 ), 394 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 395 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 396 ), 397 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 398 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 399 } 400 401 FUNCTION_PARSERS = { 402 **parser.Parser.FUNCTION_PARSERS, 403 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 404 } 405 FUNCTION_PARSERS.pop("TRIM") 406 407 NO_PAREN_FUNCTIONS = { 408 **parser.Parser.NO_PAREN_FUNCTIONS, 409 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 410 } 411 412 NESTED_TYPE_TOKENS = { 413 *parser.Parser.NESTED_TYPE_TOKENS, 414 TokenType.TABLE, 415 } 416 417 PROPERTY_PARSERS = { 418 **parser.Parser.PROPERTY_PARSERS, 419 "NOT DETERMINISTIC": lambda self: self.expression( 420 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 421 ), 422 "OPTIONS": lambda self: self._parse_with_property(), 423 } 424 425 CONSTRAINT_PARSERS = { 426 **parser.Parser.CONSTRAINT_PARSERS, 427 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 428 } 429 430 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 431 RANGE_PARSERS.pop(TokenType.OVERLAPS) 432 433 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 434 435 STATEMENT_PARSERS = { 436 **parser.Parser.STATEMENT_PARSERS, 437 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 438 TokenType.END: lambda self: self._parse_as_command(self._prev), 439 TokenType.FOR: lambda self: self._parse_for_in(), 440 } 441 442 BRACKET_OFFSETS = { 443 "OFFSET": (0, False), 444 "ORDINAL": (1, False), 445 "SAFE_OFFSET": (0, True), 446 "SAFE_ORDINAL": (1, True), 447 } 448 449 def _parse_for_in(self) -> exp.ForIn: 450 this = self._parse_range() 451 self._match_text_seq("DO") 452 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 453 454 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 455 this = super()._parse_table_part(schema=schema) or self._parse_number() 456 457 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 458 if isinstance(this, exp.Identifier): 459 table_name = this.name 460 while self._match(TokenType.DASH, advance=False) and self._next: 461 text = "" 462 while self._curr and self._curr.token_type != TokenType.DOT: 463 self._advance() 464 text += self._prev.text 465 table_name += text 466 467 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 468 elif isinstance(this, exp.Literal): 469 table_name = this.name 470 471 if self._is_connected() and self._parse_var(any_token=True): 472 table_name += self._prev.text 473 474 this = exp.Identifier(this=table_name, quoted=True) 475 476 return this 477 478 def _parse_table_parts( 479 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 480 ) -> exp.Table: 481 table = super()._parse_table_parts( 482 schema=schema, is_db_reference=is_db_reference, wildcard=True 483 ) 484 485 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 486 if not table.catalog: 487 if table.db: 488 parts = table.db.split(".") 489 if len(parts) == 2 and not table.args["db"].quoted: 490 table.set("catalog", exp.Identifier(this=parts[0])) 491 table.set("db", exp.Identifier(this=parts[1])) 492 else: 493 parts = table.name.split(".") 494 if len(parts) == 2 and not table.this.quoted: 495 table.set("db", exp.Identifier(this=parts[0])) 496 table.set("this", exp.Identifier(this=parts[1])) 497 498 if any("." in p.name for p in table.parts): 499 catalog, db, this, *rest = ( 500 exp.to_identifier(p, quoted=True) 501 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 502 ) 503 504 if rest and this: 505 this = exp.Dot.build([this, *rest]) # type: ignore 506 507 table = exp.Table(this=this, db=db, catalog=catalog) 508 table.meta["quoted_table"] = True 509 510 return table 511 512 @t.overload 513 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 514 515 @t.overload 516 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 517 518 def _parse_json_object(self, agg=False): 519 json_object = super()._parse_json_object() 520 array_kv_pair = seq_get(json_object.expressions, 0) 521 522 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 523 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 524 if ( 525 array_kv_pair 526 and isinstance(array_kv_pair.this, exp.Array) 527 and isinstance(array_kv_pair.expression, exp.Array) 528 ): 529 keys = array_kv_pair.this.expressions 530 values = array_kv_pair.expression.expressions 531 532 json_object.set( 533 "expressions", 534 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 535 ) 536 537 return json_object 538 539 def _parse_bracket( 540 self, this: t.Optional[exp.Expression] = None 541 ) -> t.Optional[exp.Expression]: 542 bracket = super()._parse_bracket(this) 543 544 if this is bracket: 545 return bracket 546 547 if isinstance(bracket, exp.Bracket): 548 for expression in bracket.expressions: 549 name = expression.name.upper() 550 551 if name not in self.BRACKET_OFFSETS: 552 break 553 554 offset, safe = self.BRACKET_OFFSETS[name] 555 bracket.set("offset", offset) 556 bracket.set("safe", safe) 557 expression.replace(expression.expressions[0]) 558 559 return bracket 560 561 class Generator(generator.Generator): 562 EXPLICIT_UNION = True 563 INTERVAL_ALLOWS_PLURAL_FORM = False 564 JOIN_HINTS = False 565 QUERY_HINTS = False 566 TABLE_HINTS = False 567 LIMIT_FETCH = "LIMIT" 568 RENAME_TABLE_WITH_DB = False 569 NVL2_SUPPORTED = False 570 UNNEST_WITH_ORDINALITY = False 571 COLLATE_IS_FUNC = True 572 LIMIT_ONLY_LITERALS = True 573 SUPPORTS_TABLE_ALIAS_COLUMNS = False 574 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 575 JSON_KEY_VALUE_PAIR_SEP = "," 576 NULL_ORDERING_SUPPORTED = False 577 IGNORE_NULLS_IN_FUNC = True 578 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 579 CAN_IMPLEMENT_ARRAY_ANY = True 580 SUPPORTS_TO_NUMBER = False 581 NAMED_PLACEHOLDER_TOKEN = "@" 582 583 TRANSFORMS = { 584 **generator.Generator.TRANSFORMS, 585 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 586 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 587 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 588 exp.ArrayContains: _array_contains_sql, 589 exp.ArrayFilter: filter_array_using_unnest, 590 exp.ArraySize: rename_func("ARRAY_LENGTH"), 591 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 592 exp.CollateProperty: lambda self, e: ( 593 f"DEFAULT COLLATE {self.sql(e, 'this')}" 594 if e.args.get("default") 595 else f"COLLATE {self.sql(e, 'this')}" 596 ), 597 exp.Commit: lambda *_: "COMMIT TRANSACTION", 598 exp.CountIf: rename_func("COUNTIF"), 599 exp.Create: _create_sql, 600 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 601 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 602 exp.DateDiff: lambda self, e: self.func( 603 "DATE_DIFF", e.this, e.expression, e.unit or "DAY" 604 ), 605 exp.DateFromParts: rename_func("DATE"), 606 exp.DateStrToDate: datestrtodate_sql, 607 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 608 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 609 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 610 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 611 exp.FromTimeZone: lambda self, e: self.func( 612 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 613 ), 614 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 615 exp.GroupConcat: rename_func("STRING_AGG"), 616 exp.Hex: rename_func("TO_HEX"), 617 exp.If: if_sql(false_value="NULL"), 618 exp.ILike: no_ilike_sql, 619 exp.IntDiv: rename_func("DIV"), 620 exp.JSONFormat: rename_func("TO_JSON_STRING"), 621 exp.Max: max_or_greatest, 622 exp.Mod: rename_func("MOD"), 623 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 624 exp.MD5Digest: rename_func("MD5"), 625 exp.Min: min_or_least, 626 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 627 exp.RegexpExtract: lambda self, e: self.func( 628 "REGEXP_EXTRACT", 629 e.this, 630 e.expression, 631 e.args.get("position"), 632 e.args.get("occurrence"), 633 ), 634 exp.RegexpReplace: regexp_replace_sql, 635 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 636 exp.ReturnsProperty: _returnsproperty_sql, 637 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 638 exp.Select: transforms.preprocess( 639 [ 640 transforms.explode_to_unnest(), 641 _unqualify_unnest, 642 transforms.eliminate_distinct_on, 643 _alias_ordered_group, 644 transforms.eliminate_semi_and_anti_joins, 645 ] 646 ), 647 exp.SHA2: lambda self, e: self.func( 648 "SHA256" if e.text("length") == "256" else "SHA512", e.this 649 ), 650 exp.StabilityProperty: lambda self, e: ( 651 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 652 ), 653 exp.StrToDate: lambda self, e: self.func("PARSE_DATE", self.format_time(e), e.this), 654 exp.StrToTime: lambda self, e: self.func( 655 "PARSE_TIMESTAMP", self.format_time(e), e.this, e.args.get("zone") 656 ), 657 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 658 exp.TimeFromParts: rename_func("TIME"), 659 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 660 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 661 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 662 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 663 exp.TimeStrToTime: timestrtotime_sql, 664 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 665 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 666 exp.TsOrDsAdd: _ts_or_ds_add_sql, 667 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 668 exp.TsOrDsToTime: rename_func("TIME"), 669 exp.Unhex: rename_func("FROM_HEX"), 670 exp.UnixDate: rename_func("UNIX_DATE"), 671 exp.UnixToTime: _unix_to_time_sql, 672 exp.Values: _derived_table_values_to_unnest, 673 exp.VariancePop: rename_func("VAR_POP"), 674 } 675 676 SUPPORTED_JSON_PATH_PARTS = { 677 exp.JSONPathKey, 678 exp.JSONPathRoot, 679 exp.JSONPathSubscript, 680 } 681 682 TYPE_MAPPING = { 683 **generator.Generator.TYPE_MAPPING, 684 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 685 exp.DataType.Type.BIGINT: "INT64", 686 exp.DataType.Type.BINARY: "BYTES", 687 exp.DataType.Type.BOOLEAN: "BOOL", 688 exp.DataType.Type.CHAR: "STRING", 689 exp.DataType.Type.DECIMAL: "NUMERIC", 690 exp.DataType.Type.DOUBLE: "FLOAT64", 691 exp.DataType.Type.FLOAT: "FLOAT64", 692 exp.DataType.Type.INT: "INT64", 693 exp.DataType.Type.NCHAR: "STRING", 694 exp.DataType.Type.NVARCHAR: "STRING", 695 exp.DataType.Type.SMALLINT: "INT64", 696 exp.DataType.Type.TEXT: "STRING", 697 exp.DataType.Type.TIMESTAMP: "DATETIME", 698 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 699 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 700 exp.DataType.Type.TINYINT: "INT64", 701 exp.DataType.Type.VARBINARY: "BYTES", 702 exp.DataType.Type.VARCHAR: "STRING", 703 exp.DataType.Type.VARIANT: "ANY TYPE", 704 } 705 706 PROPERTIES_LOCATION = { 707 **generator.Generator.PROPERTIES_LOCATION, 708 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 709 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 710 } 711 712 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 713 RESERVED_KEYWORDS = { 714 *generator.Generator.RESERVED_KEYWORDS, 715 "all", 716 "and", 717 "any", 718 "array", 719 "as", 720 "asc", 721 "assert_rows_modified", 722 "at", 723 "between", 724 "by", 725 "case", 726 "cast", 727 "collate", 728 "contains", 729 "create", 730 "cross", 731 "cube", 732 "current", 733 "default", 734 "define", 735 "desc", 736 "distinct", 737 "else", 738 "end", 739 "enum", 740 "escape", 741 "except", 742 "exclude", 743 "exists", 744 "extract", 745 "false", 746 "fetch", 747 "following", 748 "for", 749 "from", 750 "full", 751 "group", 752 "grouping", 753 "groups", 754 "hash", 755 "having", 756 "if", 757 "ignore", 758 "in", 759 "inner", 760 "intersect", 761 "interval", 762 "into", 763 "is", 764 "join", 765 "lateral", 766 "left", 767 "like", 768 "limit", 769 "lookup", 770 "merge", 771 "natural", 772 "new", 773 "no", 774 "not", 775 "null", 776 "nulls", 777 "of", 778 "on", 779 "or", 780 "order", 781 "outer", 782 "over", 783 "partition", 784 "preceding", 785 "proto", 786 "qualify", 787 "range", 788 "recursive", 789 "respect", 790 "right", 791 "rollup", 792 "rows", 793 "select", 794 "set", 795 "some", 796 "struct", 797 "tablesample", 798 "then", 799 "to", 800 "treat", 801 "true", 802 "unbounded", 803 "union", 804 "unnest", 805 "using", 806 "when", 807 "where", 808 "window", 809 "with", 810 "within", 811 } 812 813 def table_parts(self, expression: exp.Table) -> str: 814 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 815 # we need to make sure the correct quoting is used in each case. 816 # 817 # For example, if there is a CTE x that clashes with a schema name, then the former will 818 # return the table y in that schema, whereas the latter will return the CTE's y column: 819 # 820 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 821 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 822 if expression.meta.get("quoted_table"): 823 table_parts = ".".join(p.name for p in expression.parts) 824 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 825 826 return super().table_parts(expression) 827 828 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 829 this = expression.this if isinstance(expression.this, exp.TsOrDsToDate) else expression 830 return self.func("FORMAT_DATE", self.format_time(expression), this.this) 831 832 def eq_sql(self, expression: exp.EQ) -> str: 833 # Operands of = cannot be NULL in BigQuery 834 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 835 if not isinstance(expression.parent, exp.Update): 836 return "NULL" 837 838 return self.binary(expression, "=") 839 840 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 841 parent = expression.parent 842 843 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 844 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 845 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 846 return self.func( 847 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 848 ) 849 850 return super().attimezone_sql(expression) 851 852 def trycast_sql(self, expression: exp.TryCast) -> str: 853 return self.cast_sql(expression, safe_prefix="SAFE_") 854 855 def array_sql(self, expression: exp.Array) -> str: 856 first_arg = seq_get(expression.expressions, 0) 857 if isinstance(first_arg, exp.Query): 858 return f"ARRAY{self.wrap(self.sql(first_arg))}" 859 860 return inline_array_sql(self, expression) 861 862 def bracket_sql(self, expression: exp.Bracket) -> str: 863 this = expression.this 864 expressions = expression.expressions 865 866 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 867 arg = expressions[0] 868 if arg.type is None: 869 from sqlglot.optimizer.annotate_types import annotate_types 870 871 arg = annotate_types(arg) 872 873 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 874 # BQ doesn't support bracket syntax with string values for structs 875 return f"{self.sql(this)}.{arg.name}" 876 877 expressions_sql = self.expressions(expression, flat=True) 878 offset = expression.args.get("offset") 879 880 if offset == 0: 881 expressions_sql = f"OFFSET({expressions_sql})" 882 elif offset == 1: 883 expressions_sql = f"ORDINAL({expressions_sql})" 884 elif offset is not None: 885 self.unsupported(f"Unsupported array offset: {offset}") 886 887 if expression.args.get("safe"): 888 expressions_sql = f"SAFE_{expressions_sql}" 889 890 return f"{self.sql(this)}[{expressions_sql}]" 891 892 def in_unnest_op(self, expression: exp.Unnest) -> str: 893 return self.sql(expression) 894 895 def except_op(self, expression: exp.Except) -> str: 896 if not expression.args.get("distinct"): 897 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 898 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 899 900 def intersect_op(self, expression: exp.Intersect) -> str: 901 if not expression.args.get("distinct"): 902 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 903 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 904 905 def with_properties(self, properties: exp.Properties) -> str: 906 return self.properties(properties, prefix=self.seg("OPTIONS")) 907 908 def version_sql(self, expression: exp.Version) -> str: 909 if expression.name == "TIMESTAMP": 910 expression.set("this", "SYSTEM_TIME") 911 return super().version_sql(expression)
230class BigQuery(Dialect): 231 WEEK_OFFSET = -1 232 UNNEST_COLUMN_ONLY = True 233 SUPPORTS_USER_DEFINED_TYPES = False 234 SUPPORTS_SEMI_ANTI_JOIN = False 235 LOG_BASE_FIRST = False 236 237 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 238 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 239 240 # bigquery udfs are case sensitive 241 NORMALIZE_FUNCTIONS = False 242 243 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time 244 TIME_MAPPING = { 245 "%D": "%m/%d/%y", 246 "%E*S": "%S.%f", 247 "%E6S": "%S.%f", 248 } 249 250 ESCAPE_SEQUENCES = { 251 "\\a": "\a", 252 "\\b": "\b", 253 "\\f": "\f", 254 "\\n": "\n", 255 "\\r": "\r", 256 "\\t": "\t", 257 "\\v": "\v", 258 } 259 260 FORMAT_MAPPING = { 261 "DD": "%d", 262 "MM": "%m", 263 "MON": "%b", 264 "MONTH": "%B", 265 "YYYY": "%Y", 266 "YY": "%y", 267 "HH": "%I", 268 "HH12": "%I", 269 "HH24": "%H", 270 "MI": "%M", 271 "SS": "%S", 272 "SSSSS": "%f", 273 "TZH": "%z", 274 } 275 276 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 277 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 278 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 279 280 def normalize_identifier(self, expression: E) -> E: 281 if isinstance(expression, exp.Identifier): 282 parent = expression.parent 283 while isinstance(parent, exp.Dot): 284 parent = parent.parent 285 286 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 287 # by default. The following check uses a heuristic to detect tables based on whether 288 # they are qualified. This should generally be correct, because tables in BigQuery 289 # must be qualified with at least a dataset, unless @@dataset_id is set. 290 case_sensitive = ( 291 isinstance(parent, exp.UserDefinedFunction) 292 or ( 293 isinstance(parent, exp.Table) 294 and parent.db 295 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 296 ) 297 or expression.meta.get("is_table") 298 ) 299 if not case_sensitive: 300 expression.set("this", expression.this.lower()) 301 302 return expression 303 304 class Tokenizer(tokens.Tokenizer): 305 QUOTES = ["'", '"', '"""', "'''"] 306 COMMENTS = ["--", "#", ("/*", "*/")] 307 IDENTIFIERS = ["`"] 308 STRING_ESCAPES = ["\\"] 309 310 HEX_STRINGS = [("0x", ""), ("0X", "")] 311 312 BYTE_STRINGS = [ 313 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 314 ] 315 316 RAW_STRINGS = [ 317 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 318 ] 319 320 KEYWORDS = { 321 **tokens.Tokenizer.KEYWORDS, 322 "ANY TYPE": TokenType.VARIANT, 323 "BEGIN": TokenType.COMMAND, 324 "BEGIN TRANSACTION": TokenType.BEGIN, 325 "BYTES": TokenType.BINARY, 326 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 327 "DATETIME": TokenType.TIMESTAMP, 328 "DECLARE": TokenType.COMMAND, 329 "ELSEIF": TokenType.COMMAND, 330 "EXCEPTION": TokenType.COMMAND, 331 "FLOAT64": TokenType.DOUBLE, 332 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 333 "MODEL": TokenType.MODEL, 334 "NOT DETERMINISTIC": TokenType.VOLATILE, 335 "RECORD": TokenType.STRUCT, 336 "TIMESTAMP": TokenType.TIMESTAMPTZ, 337 } 338 KEYWORDS.pop("DIV") 339 KEYWORDS.pop("VALUES") 340 341 class Parser(parser.Parser): 342 PREFIXED_PIVOT_COLUMNS = True 343 LOG_DEFAULTS_TO_LN = True 344 SUPPORTS_IMPLICIT_UNNEST = True 345 346 FUNCTIONS = { 347 **parser.Parser.FUNCTIONS, 348 "DATE": _build_date, 349 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 350 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 351 "DATE_TRUNC": lambda args: exp.DateTrunc( 352 unit=exp.Literal.string(str(seq_get(args, 1))), 353 this=seq_get(args, 0), 354 ), 355 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 356 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 357 "DIV": binary_from_function(exp.IntDiv), 358 "FORMAT_DATE": lambda args: exp.TimeToStr( 359 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 360 ), 361 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 362 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 363 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 364 ), 365 "MD5": exp.MD5Digest.from_arg_list, 366 "TO_HEX": _build_to_hex, 367 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 368 [seq_get(args, 1), seq_get(args, 0)] 369 ), 370 "PARSE_TIMESTAMP": _build_parse_timestamp, 371 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 372 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 373 this=seq_get(args, 0), 374 expression=seq_get(args, 1), 375 position=seq_get(args, 2), 376 occurrence=seq_get(args, 3), 377 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 378 ), 379 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 380 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 381 "SPLIT": lambda args: exp.Split( 382 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 383 this=seq_get(args, 0), 384 expression=seq_get(args, 1) or exp.Literal.string(","), 385 ), 386 "TIME": _build_time, 387 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 388 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 389 "TIMESTAMP": _build_timestamp, 390 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 391 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 392 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 393 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 394 ), 395 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 396 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 397 ), 398 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 399 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 400 } 401 402 FUNCTION_PARSERS = { 403 **parser.Parser.FUNCTION_PARSERS, 404 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 405 } 406 FUNCTION_PARSERS.pop("TRIM") 407 408 NO_PAREN_FUNCTIONS = { 409 **parser.Parser.NO_PAREN_FUNCTIONS, 410 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 411 } 412 413 NESTED_TYPE_TOKENS = { 414 *parser.Parser.NESTED_TYPE_TOKENS, 415 TokenType.TABLE, 416 } 417 418 PROPERTY_PARSERS = { 419 **parser.Parser.PROPERTY_PARSERS, 420 "NOT DETERMINISTIC": lambda self: self.expression( 421 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 422 ), 423 "OPTIONS": lambda self: self._parse_with_property(), 424 } 425 426 CONSTRAINT_PARSERS = { 427 **parser.Parser.CONSTRAINT_PARSERS, 428 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 429 } 430 431 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 432 RANGE_PARSERS.pop(TokenType.OVERLAPS) 433 434 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 435 436 STATEMENT_PARSERS = { 437 **parser.Parser.STATEMENT_PARSERS, 438 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 439 TokenType.END: lambda self: self._parse_as_command(self._prev), 440 TokenType.FOR: lambda self: self._parse_for_in(), 441 } 442 443 BRACKET_OFFSETS = { 444 "OFFSET": (0, False), 445 "ORDINAL": (1, False), 446 "SAFE_OFFSET": (0, True), 447 "SAFE_ORDINAL": (1, True), 448 } 449 450 def _parse_for_in(self) -> exp.ForIn: 451 this = self._parse_range() 452 self._match_text_seq("DO") 453 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 454 455 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 456 this = super()._parse_table_part(schema=schema) or self._parse_number() 457 458 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 459 if isinstance(this, exp.Identifier): 460 table_name = this.name 461 while self._match(TokenType.DASH, advance=False) and self._next: 462 text = "" 463 while self._curr and self._curr.token_type != TokenType.DOT: 464 self._advance() 465 text += self._prev.text 466 table_name += text 467 468 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 469 elif isinstance(this, exp.Literal): 470 table_name = this.name 471 472 if self._is_connected() and self._parse_var(any_token=True): 473 table_name += self._prev.text 474 475 this = exp.Identifier(this=table_name, quoted=True) 476 477 return this 478 479 def _parse_table_parts( 480 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 481 ) -> exp.Table: 482 table = super()._parse_table_parts( 483 schema=schema, is_db_reference=is_db_reference, wildcard=True 484 ) 485 486 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 487 if not table.catalog: 488 if table.db: 489 parts = table.db.split(".") 490 if len(parts) == 2 and not table.args["db"].quoted: 491 table.set("catalog", exp.Identifier(this=parts[0])) 492 table.set("db", exp.Identifier(this=parts[1])) 493 else: 494 parts = table.name.split(".") 495 if len(parts) == 2 and not table.this.quoted: 496 table.set("db", exp.Identifier(this=parts[0])) 497 table.set("this", exp.Identifier(this=parts[1])) 498 499 if any("." in p.name for p in table.parts): 500 catalog, db, this, *rest = ( 501 exp.to_identifier(p, quoted=True) 502 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 503 ) 504 505 if rest and this: 506 this = exp.Dot.build([this, *rest]) # type: ignore 507 508 table = exp.Table(this=this, db=db, catalog=catalog) 509 table.meta["quoted_table"] = True 510 511 return table 512 513 @t.overload 514 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 515 516 @t.overload 517 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 518 519 def _parse_json_object(self, agg=False): 520 json_object = super()._parse_json_object() 521 array_kv_pair = seq_get(json_object.expressions, 0) 522 523 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 524 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 525 if ( 526 array_kv_pair 527 and isinstance(array_kv_pair.this, exp.Array) 528 and isinstance(array_kv_pair.expression, exp.Array) 529 ): 530 keys = array_kv_pair.this.expressions 531 values = array_kv_pair.expression.expressions 532 533 json_object.set( 534 "expressions", 535 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 536 ) 537 538 return json_object 539 540 def _parse_bracket( 541 self, this: t.Optional[exp.Expression] = None 542 ) -> t.Optional[exp.Expression]: 543 bracket = super()._parse_bracket(this) 544 545 if this is bracket: 546 return bracket 547 548 if isinstance(bracket, exp.Bracket): 549 for expression in bracket.expressions: 550 name = expression.name.upper() 551 552 if name not in self.BRACKET_OFFSETS: 553 break 554 555 offset, safe = self.BRACKET_OFFSETS[name] 556 bracket.set("offset", offset) 557 bracket.set("safe", safe) 558 expression.replace(expression.expressions[0]) 559 560 return bracket 561 562 class Generator(generator.Generator): 563 EXPLICIT_UNION = True 564 INTERVAL_ALLOWS_PLURAL_FORM = False 565 JOIN_HINTS = False 566 QUERY_HINTS = False 567 TABLE_HINTS = False 568 LIMIT_FETCH = "LIMIT" 569 RENAME_TABLE_WITH_DB = False 570 NVL2_SUPPORTED = False 571 UNNEST_WITH_ORDINALITY = False 572 COLLATE_IS_FUNC = True 573 LIMIT_ONLY_LITERALS = True 574 SUPPORTS_TABLE_ALIAS_COLUMNS = False 575 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 576 JSON_KEY_VALUE_PAIR_SEP = "," 577 NULL_ORDERING_SUPPORTED = False 578 IGNORE_NULLS_IN_FUNC = True 579 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 580 CAN_IMPLEMENT_ARRAY_ANY = True 581 SUPPORTS_TO_NUMBER = False 582 NAMED_PLACEHOLDER_TOKEN = "@" 583 584 TRANSFORMS = { 585 **generator.Generator.TRANSFORMS, 586 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 587 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 588 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 589 exp.ArrayContains: _array_contains_sql, 590 exp.ArrayFilter: filter_array_using_unnest, 591 exp.ArraySize: rename_func("ARRAY_LENGTH"), 592 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 593 exp.CollateProperty: lambda self, e: ( 594 f"DEFAULT COLLATE {self.sql(e, 'this')}" 595 if e.args.get("default") 596 else f"COLLATE {self.sql(e, 'this')}" 597 ), 598 exp.Commit: lambda *_: "COMMIT TRANSACTION", 599 exp.CountIf: rename_func("COUNTIF"), 600 exp.Create: _create_sql, 601 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 602 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 603 exp.DateDiff: lambda self, e: self.func( 604 "DATE_DIFF", e.this, e.expression, e.unit or "DAY" 605 ), 606 exp.DateFromParts: rename_func("DATE"), 607 exp.DateStrToDate: datestrtodate_sql, 608 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 609 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 610 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 611 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 612 exp.FromTimeZone: lambda self, e: self.func( 613 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 614 ), 615 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 616 exp.GroupConcat: rename_func("STRING_AGG"), 617 exp.Hex: rename_func("TO_HEX"), 618 exp.If: if_sql(false_value="NULL"), 619 exp.ILike: no_ilike_sql, 620 exp.IntDiv: rename_func("DIV"), 621 exp.JSONFormat: rename_func("TO_JSON_STRING"), 622 exp.Max: max_or_greatest, 623 exp.Mod: rename_func("MOD"), 624 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 625 exp.MD5Digest: rename_func("MD5"), 626 exp.Min: min_or_least, 627 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 628 exp.RegexpExtract: lambda self, e: self.func( 629 "REGEXP_EXTRACT", 630 e.this, 631 e.expression, 632 e.args.get("position"), 633 e.args.get("occurrence"), 634 ), 635 exp.RegexpReplace: regexp_replace_sql, 636 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 637 exp.ReturnsProperty: _returnsproperty_sql, 638 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 639 exp.Select: transforms.preprocess( 640 [ 641 transforms.explode_to_unnest(), 642 _unqualify_unnest, 643 transforms.eliminate_distinct_on, 644 _alias_ordered_group, 645 transforms.eliminate_semi_and_anti_joins, 646 ] 647 ), 648 exp.SHA2: lambda self, e: self.func( 649 "SHA256" if e.text("length") == "256" else "SHA512", e.this 650 ), 651 exp.StabilityProperty: lambda self, e: ( 652 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 653 ), 654 exp.StrToDate: lambda self, e: self.func("PARSE_DATE", self.format_time(e), e.this), 655 exp.StrToTime: lambda self, e: self.func( 656 "PARSE_TIMESTAMP", self.format_time(e), e.this, e.args.get("zone") 657 ), 658 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 659 exp.TimeFromParts: rename_func("TIME"), 660 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 661 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 662 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 663 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 664 exp.TimeStrToTime: timestrtotime_sql, 665 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 666 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 667 exp.TsOrDsAdd: _ts_or_ds_add_sql, 668 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 669 exp.TsOrDsToTime: rename_func("TIME"), 670 exp.Unhex: rename_func("FROM_HEX"), 671 exp.UnixDate: rename_func("UNIX_DATE"), 672 exp.UnixToTime: _unix_to_time_sql, 673 exp.Values: _derived_table_values_to_unnest, 674 exp.VariancePop: rename_func("VAR_POP"), 675 } 676 677 SUPPORTED_JSON_PATH_PARTS = { 678 exp.JSONPathKey, 679 exp.JSONPathRoot, 680 exp.JSONPathSubscript, 681 } 682 683 TYPE_MAPPING = { 684 **generator.Generator.TYPE_MAPPING, 685 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 686 exp.DataType.Type.BIGINT: "INT64", 687 exp.DataType.Type.BINARY: "BYTES", 688 exp.DataType.Type.BOOLEAN: "BOOL", 689 exp.DataType.Type.CHAR: "STRING", 690 exp.DataType.Type.DECIMAL: "NUMERIC", 691 exp.DataType.Type.DOUBLE: "FLOAT64", 692 exp.DataType.Type.FLOAT: "FLOAT64", 693 exp.DataType.Type.INT: "INT64", 694 exp.DataType.Type.NCHAR: "STRING", 695 exp.DataType.Type.NVARCHAR: "STRING", 696 exp.DataType.Type.SMALLINT: "INT64", 697 exp.DataType.Type.TEXT: "STRING", 698 exp.DataType.Type.TIMESTAMP: "DATETIME", 699 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 700 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 701 exp.DataType.Type.TINYINT: "INT64", 702 exp.DataType.Type.VARBINARY: "BYTES", 703 exp.DataType.Type.VARCHAR: "STRING", 704 exp.DataType.Type.VARIANT: "ANY TYPE", 705 } 706 707 PROPERTIES_LOCATION = { 708 **generator.Generator.PROPERTIES_LOCATION, 709 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 710 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 711 } 712 713 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 714 RESERVED_KEYWORDS = { 715 *generator.Generator.RESERVED_KEYWORDS, 716 "all", 717 "and", 718 "any", 719 "array", 720 "as", 721 "asc", 722 "assert_rows_modified", 723 "at", 724 "between", 725 "by", 726 "case", 727 "cast", 728 "collate", 729 "contains", 730 "create", 731 "cross", 732 "cube", 733 "current", 734 "default", 735 "define", 736 "desc", 737 "distinct", 738 "else", 739 "end", 740 "enum", 741 "escape", 742 "except", 743 "exclude", 744 "exists", 745 "extract", 746 "false", 747 "fetch", 748 "following", 749 "for", 750 "from", 751 "full", 752 "group", 753 "grouping", 754 "groups", 755 "hash", 756 "having", 757 "if", 758 "ignore", 759 "in", 760 "inner", 761 "intersect", 762 "interval", 763 "into", 764 "is", 765 "join", 766 "lateral", 767 "left", 768 "like", 769 "limit", 770 "lookup", 771 "merge", 772 "natural", 773 "new", 774 "no", 775 "not", 776 "null", 777 "nulls", 778 "of", 779 "on", 780 "or", 781 "order", 782 "outer", 783 "over", 784 "partition", 785 "preceding", 786 "proto", 787 "qualify", 788 "range", 789 "recursive", 790 "respect", 791 "right", 792 "rollup", 793 "rows", 794 "select", 795 "set", 796 "some", 797 "struct", 798 "tablesample", 799 "then", 800 "to", 801 "treat", 802 "true", 803 "unbounded", 804 "union", 805 "unnest", 806 "using", 807 "when", 808 "where", 809 "window", 810 "with", 811 "within", 812 } 813 814 def table_parts(self, expression: exp.Table) -> str: 815 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 816 # we need to make sure the correct quoting is used in each case. 817 # 818 # For example, if there is a CTE x that clashes with a schema name, then the former will 819 # return the table y in that schema, whereas the latter will return the CTE's y column: 820 # 821 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 822 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 823 if expression.meta.get("quoted_table"): 824 table_parts = ".".join(p.name for p in expression.parts) 825 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 826 827 return super().table_parts(expression) 828 829 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 830 this = expression.this if isinstance(expression.this, exp.TsOrDsToDate) else expression 831 return self.func("FORMAT_DATE", self.format_time(expression), this.this) 832 833 def eq_sql(self, expression: exp.EQ) -> str: 834 # Operands of = cannot be NULL in BigQuery 835 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 836 if not isinstance(expression.parent, exp.Update): 837 return "NULL" 838 839 return self.binary(expression, "=") 840 841 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 842 parent = expression.parent 843 844 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 845 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 846 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 847 return self.func( 848 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 849 ) 850 851 return super().attimezone_sql(expression) 852 853 def trycast_sql(self, expression: exp.TryCast) -> str: 854 return self.cast_sql(expression, safe_prefix="SAFE_") 855 856 def array_sql(self, expression: exp.Array) -> str: 857 first_arg = seq_get(expression.expressions, 0) 858 if isinstance(first_arg, exp.Query): 859 return f"ARRAY{self.wrap(self.sql(first_arg))}" 860 861 return inline_array_sql(self, expression) 862 863 def bracket_sql(self, expression: exp.Bracket) -> str: 864 this = expression.this 865 expressions = expression.expressions 866 867 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 868 arg = expressions[0] 869 if arg.type is None: 870 from sqlglot.optimizer.annotate_types import annotate_types 871 872 arg = annotate_types(arg) 873 874 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 875 # BQ doesn't support bracket syntax with string values for structs 876 return f"{self.sql(this)}.{arg.name}" 877 878 expressions_sql = self.expressions(expression, flat=True) 879 offset = expression.args.get("offset") 880 881 if offset == 0: 882 expressions_sql = f"OFFSET({expressions_sql})" 883 elif offset == 1: 884 expressions_sql = f"ORDINAL({expressions_sql})" 885 elif offset is not None: 886 self.unsupported(f"Unsupported array offset: {offset}") 887 888 if expression.args.get("safe"): 889 expressions_sql = f"SAFE_{expressions_sql}" 890 891 return f"{self.sql(this)}[{expressions_sql}]" 892 893 def in_unnest_op(self, expression: exp.Unnest) -> str: 894 return self.sql(expression) 895 896 def except_op(self, expression: exp.Except) -> str: 897 if not expression.args.get("distinct"): 898 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 899 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 900 901 def intersect_op(self, expression: exp.Intersect) -> str: 902 if not expression.args.get("distinct"): 903 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 904 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 905 906 def with_properties(self, properties: exp.Properties) -> str: 907 return self.properties(properties, prefix=self.seg("OPTIONS")) 908 909 def version_sql(self, expression: exp.Version) -> str: 910 if expression.name == "TIMESTAMP": 911 expression.set("this", "SYSTEM_TIME") 912 return super().version_sql(expression)
First day of the week in DATE_TRUNC(week). Defaults to 0 (Monday). -1 would be Sunday.
Whether the base comes first in the LOG function.
Possible values: True, False, None (two arguments are not supported by LOG)
Specifies the strategy according to which identifiers should be normalized.
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Associates this dialect's time formats with their equivalent Python strftime formats.
Mapping of an unescaped escape sequence to the corresponding character.
Helper which is used for parsing the special syntax CAST(x AS DATE FORMAT 'yyyy').
If empty, the corresponding trie will be constructed off of TIME_MAPPING.
Columns that are auto-generated by the engine corresponding to this dialect.
For example, such columns may be excluded from SELECT * queries.
280 def normalize_identifier(self, expression: E) -> E: 281 if isinstance(expression, exp.Identifier): 282 parent = expression.parent 283 while isinstance(parent, exp.Dot): 284 parent = parent.parent 285 286 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 287 # by default. The following check uses a heuristic to detect tables based on whether 288 # they are qualified. This should generally be correct, because tables in BigQuery 289 # must be qualified with at least a dataset, unless @@dataset_id is set. 290 case_sensitive = ( 291 isinstance(parent, exp.UserDefinedFunction) 292 or ( 293 isinstance(parent, exp.Table) 294 and parent.db 295 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 296 ) 297 or expression.meta.get("is_table") 298 ) 299 if not case_sensitive: 300 expression.set("this", expression.this.lower()) 301 302 return expression
Transforms an identifier in a way that resembles how it'd be resolved by this dialect.
For example, an identifier like FoO would be resolved as foo in Postgres, because it
lowercases all unquoted identifiers. On the other hand, Snowflake uppercases them, so
it would resolve it as FOO. If it was quoted, it'd need to be treated as case-sensitive,
and so any normalization would be prohibited in order to avoid "breaking" the identifier.
There are also dialects like Spark, which are case-insensitive even when quotes are present, and dialects like MySQL, whose resolution rules match those employed by the underlying operating system, for example they may always be case-sensitive in Linux.
Finally, the normalization behavior of some engines can even be controlled through flags, like in Redshift's case, where users can explicitly set enable_case_sensitive_identifier.
SQLGlot aims to understand and handle all of these different behaviors gracefully, so that it can analyze queries in the optimizer and successfully capture their semantics.
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- NULL_ORDERING
- TYPED_DIVISION
- SAFE_DIVISION
- CONCAT_COALESCE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- PREFER_CTE_ALIAS_COLUMN
- get_or_raise
- format_time
- case_sensitive
- can_identify
- quote_identifier
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- parser
- generator
304 class Tokenizer(tokens.Tokenizer): 305 QUOTES = ["'", '"', '"""', "'''"] 306 COMMENTS = ["--", "#", ("/*", "*/")] 307 IDENTIFIERS = ["`"] 308 STRING_ESCAPES = ["\\"] 309 310 HEX_STRINGS = [("0x", ""), ("0X", "")] 311 312 BYTE_STRINGS = [ 313 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 314 ] 315 316 RAW_STRINGS = [ 317 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 318 ] 319 320 KEYWORDS = { 321 **tokens.Tokenizer.KEYWORDS, 322 "ANY TYPE": TokenType.VARIANT, 323 "BEGIN": TokenType.COMMAND, 324 "BEGIN TRANSACTION": TokenType.BEGIN, 325 "BYTES": TokenType.BINARY, 326 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 327 "DATETIME": TokenType.TIMESTAMP, 328 "DECLARE": TokenType.COMMAND, 329 "ELSEIF": TokenType.COMMAND, 330 "EXCEPTION": TokenType.COMMAND, 331 "FLOAT64": TokenType.DOUBLE, 332 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 333 "MODEL": TokenType.MODEL, 334 "NOT DETERMINISTIC": TokenType.VOLATILE, 335 "RECORD": TokenType.STRUCT, 336 "TIMESTAMP": TokenType.TIMESTAMPTZ, 337 } 338 KEYWORDS.pop("DIV") 339 KEYWORDS.pop("VALUES")
Inherited Members
341 class Parser(parser.Parser): 342 PREFIXED_PIVOT_COLUMNS = True 343 LOG_DEFAULTS_TO_LN = True 344 SUPPORTS_IMPLICIT_UNNEST = True 345 346 FUNCTIONS = { 347 **parser.Parser.FUNCTIONS, 348 "DATE": _build_date, 349 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 350 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 351 "DATE_TRUNC": lambda args: exp.DateTrunc( 352 unit=exp.Literal.string(str(seq_get(args, 1))), 353 this=seq_get(args, 0), 354 ), 355 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 356 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 357 "DIV": binary_from_function(exp.IntDiv), 358 "FORMAT_DATE": lambda args: exp.TimeToStr( 359 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 360 ), 361 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 362 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 363 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 364 ), 365 "MD5": exp.MD5Digest.from_arg_list, 366 "TO_HEX": _build_to_hex, 367 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 368 [seq_get(args, 1), seq_get(args, 0)] 369 ), 370 "PARSE_TIMESTAMP": _build_parse_timestamp, 371 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 372 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 373 this=seq_get(args, 0), 374 expression=seq_get(args, 1), 375 position=seq_get(args, 2), 376 occurrence=seq_get(args, 3), 377 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 378 ), 379 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 380 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 381 "SPLIT": lambda args: exp.Split( 382 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 383 this=seq_get(args, 0), 384 expression=seq_get(args, 1) or exp.Literal.string(","), 385 ), 386 "TIME": _build_time, 387 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 388 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 389 "TIMESTAMP": _build_timestamp, 390 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 391 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 392 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 393 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 394 ), 395 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 396 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 397 ), 398 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 399 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 400 } 401 402 FUNCTION_PARSERS = { 403 **parser.Parser.FUNCTION_PARSERS, 404 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 405 } 406 FUNCTION_PARSERS.pop("TRIM") 407 408 NO_PAREN_FUNCTIONS = { 409 **parser.Parser.NO_PAREN_FUNCTIONS, 410 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 411 } 412 413 NESTED_TYPE_TOKENS = { 414 *parser.Parser.NESTED_TYPE_TOKENS, 415 TokenType.TABLE, 416 } 417 418 PROPERTY_PARSERS = { 419 **parser.Parser.PROPERTY_PARSERS, 420 "NOT DETERMINISTIC": lambda self: self.expression( 421 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 422 ), 423 "OPTIONS": lambda self: self._parse_with_property(), 424 } 425 426 CONSTRAINT_PARSERS = { 427 **parser.Parser.CONSTRAINT_PARSERS, 428 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 429 } 430 431 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 432 RANGE_PARSERS.pop(TokenType.OVERLAPS) 433 434 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 435 436 STATEMENT_PARSERS = { 437 **parser.Parser.STATEMENT_PARSERS, 438 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 439 TokenType.END: lambda self: self._parse_as_command(self._prev), 440 TokenType.FOR: lambda self: self._parse_for_in(), 441 } 442 443 BRACKET_OFFSETS = { 444 "OFFSET": (0, False), 445 "ORDINAL": (1, False), 446 "SAFE_OFFSET": (0, True), 447 "SAFE_ORDINAL": (1, True), 448 } 449 450 def _parse_for_in(self) -> exp.ForIn: 451 this = self._parse_range() 452 self._match_text_seq("DO") 453 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 454 455 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 456 this = super()._parse_table_part(schema=schema) or self._parse_number() 457 458 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 459 if isinstance(this, exp.Identifier): 460 table_name = this.name 461 while self._match(TokenType.DASH, advance=False) and self._next: 462 text = "" 463 while self._curr and self._curr.token_type != TokenType.DOT: 464 self._advance() 465 text += self._prev.text 466 table_name += text 467 468 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 469 elif isinstance(this, exp.Literal): 470 table_name = this.name 471 472 if self._is_connected() and self._parse_var(any_token=True): 473 table_name += self._prev.text 474 475 this = exp.Identifier(this=table_name, quoted=True) 476 477 return this 478 479 def _parse_table_parts( 480 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 481 ) -> exp.Table: 482 table = super()._parse_table_parts( 483 schema=schema, is_db_reference=is_db_reference, wildcard=True 484 ) 485 486 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 487 if not table.catalog: 488 if table.db: 489 parts = table.db.split(".") 490 if len(parts) == 2 and not table.args["db"].quoted: 491 table.set("catalog", exp.Identifier(this=parts[0])) 492 table.set("db", exp.Identifier(this=parts[1])) 493 else: 494 parts = table.name.split(".") 495 if len(parts) == 2 and not table.this.quoted: 496 table.set("db", exp.Identifier(this=parts[0])) 497 table.set("this", exp.Identifier(this=parts[1])) 498 499 if any("." in p.name for p in table.parts): 500 catalog, db, this, *rest = ( 501 exp.to_identifier(p, quoted=True) 502 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 503 ) 504 505 if rest and this: 506 this = exp.Dot.build([this, *rest]) # type: ignore 507 508 table = exp.Table(this=this, db=db, catalog=catalog) 509 table.meta["quoted_table"] = True 510 511 return table 512 513 @t.overload 514 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 515 516 @t.overload 517 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 518 519 def _parse_json_object(self, agg=False): 520 json_object = super()._parse_json_object() 521 array_kv_pair = seq_get(json_object.expressions, 0) 522 523 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 524 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 525 if ( 526 array_kv_pair 527 and isinstance(array_kv_pair.this, exp.Array) 528 and isinstance(array_kv_pair.expression, exp.Array) 529 ): 530 keys = array_kv_pair.this.expressions 531 values = array_kv_pair.expression.expressions 532 533 json_object.set( 534 "expressions", 535 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 536 ) 537 538 return json_object 539 540 def _parse_bracket( 541 self, this: t.Optional[exp.Expression] = None 542 ) -> t.Optional[exp.Expression]: 543 bracket = super()._parse_bracket(this) 544 545 if this is bracket: 546 return bracket 547 548 if isinstance(bracket, exp.Bracket): 549 for expression in bracket.expressions: 550 name = expression.name.upper() 551 552 if name not in self.BRACKET_OFFSETS: 553 break 554 555 offset, safe = self.BRACKET_OFFSETS[name] 556 bracket.set("offset", offset) 557 bracket.set("safe", safe) 558 expression.replace(expression.expressions[0]) 559 560 return bracket
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ID_VAR_TOKENS
- INTERVAL_VARS
- ALIAS_TOKENS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- STRICT_CAST
- IDENTIFY_PIVOT_STRINGS
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_UNION
- UNION_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- VALUES_FOLLOWED_BY_PAREN
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
562 class Generator(generator.Generator): 563 EXPLICIT_UNION = True 564 INTERVAL_ALLOWS_PLURAL_FORM = False 565 JOIN_HINTS = False 566 QUERY_HINTS = False 567 TABLE_HINTS = False 568 LIMIT_FETCH = "LIMIT" 569 RENAME_TABLE_WITH_DB = False 570 NVL2_SUPPORTED = False 571 UNNEST_WITH_ORDINALITY = False 572 COLLATE_IS_FUNC = True 573 LIMIT_ONLY_LITERALS = True 574 SUPPORTS_TABLE_ALIAS_COLUMNS = False 575 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 576 JSON_KEY_VALUE_PAIR_SEP = "," 577 NULL_ORDERING_SUPPORTED = False 578 IGNORE_NULLS_IN_FUNC = True 579 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 580 CAN_IMPLEMENT_ARRAY_ANY = True 581 SUPPORTS_TO_NUMBER = False 582 NAMED_PLACEHOLDER_TOKEN = "@" 583 584 TRANSFORMS = { 585 **generator.Generator.TRANSFORMS, 586 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 587 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 588 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 589 exp.ArrayContains: _array_contains_sql, 590 exp.ArrayFilter: filter_array_using_unnest, 591 exp.ArraySize: rename_func("ARRAY_LENGTH"), 592 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 593 exp.CollateProperty: lambda self, e: ( 594 f"DEFAULT COLLATE {self.sql(e, 'this')}" 595 if e.args.get("default") 596 else f"COLLATE {self.sql(e, 'this')}" 597 ), 598 exp.Commit: lambda *_: "COMMIT TRANSACTION", 599 exp.CountIf: rename_func("COUNTIF"), 600 exp.Create: _create_sql, 601 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 602 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 603 exp.DateDiff: lambda self, e: self.func( 604 "DATE_DIFF", e.this, e.expression, e.unit or "DAY" 605 ), 606 exp.DateFromParts: rename_func("DATE"), 607 exp.DateStrToDate: datestrtodate_sql, 608 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 609 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 610 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 611 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 612 exp.FromTimeZone: lambda self, e: self.func( 613 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 614 ), 615 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 616 exp.GroupConcat: rename_func("STRING_AGG"), 617 exp.Hex: rename_func("TO_HEX"), 618 exp.If: if_sql(false_value="NULL"), 619 exp.ILike: no_ilike_sql, 620 exp.IntDiv: rename_func("DIV"), 621 exp.JSONFormat: rename_func("TO_JSON_STRING"), 622 exp.Max: max_or_greatest, 623 exp.Mod: rename_func("MOD"), 624 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 625 exp.MD5Digest: rename_func("MD5"), 626 exp.Min: min_or_least, 627 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 628 exp.RegexpExtract: lambda self, e: self.func( 629 "REGEXP_EXTRACT", 630 e.this, 631 e.expression, 632 e.args.get("position"), 633 e.args.get("occurrence"), 634 ), 635 exp.RegexpReplace: regexp_replace_sql, 636 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 637 exp.ReturnsProperty: _returnsproperty_sql, 638 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 639 exp.Select: transforms.preprocess( 640 [ 641 transforms.explode_to_unnest(), 642 _unqualify_unnest, 643 transforms.eliminate_distinct_on, 644 _alias_ordered_group, 645 transforms.eliminate_semi_and_anti_joins, 646 ] 647 ), 648 exp.SHA2: lambda self, e: self.func( 649 "SHA256" if e.text("length") == "256" else "SHA512", e.this 650 ), 651 exp.StabilityProperty: lambda self, e: ( 652 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 653 ), 654 exp.StrToDate: lambda self, e: self.func("PARSE_DATE", self.format_time(e), e.this), 655 exp.StrToTime: lambda self, e: self.func( 656 "PARSE_TIMESTAMP", self.format_time(e), e.this, e.args.get("zone") 657 ), 658 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 659 exp.TimeFromParts: rename_func("TIME"), 660 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 661 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 662 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 663 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 664 exp.TimeStrToTime: timestrtotime_sql, 665 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 666 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 667 exp.TsOrDsAdd: _ts_or_ds_add_sql, 668 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 669 exp.TsOrDsToTime: rename_func("TIME"), 670 exp.Unhex: rename_func("FROM_HEX"), 671 exp.UnixDate: rename_func("UNIX_DATE"), 672 exp.UnixToTime: _unix_to_time_sql, 673 exp.Values: _derived_table_values_to_unnest, 674 exp.VariancePop: rename_func("VAR_POP"), 675 } 676 677 SUPPORTED_JSON_PATH_PARTS = { 678 exp.JSONPathKey, 679 exp.JSONPathRoot, 680 exp.JSONPathSubscript, 681 } 682 683 TYPE_MAPPING = { 684 **generator.Generator.TYPE_MAPPING, 685 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 686 exp.DataType.Type.BIGINT: "INT64", 687 exp.DataType.Type.BINARY: "BYTES", 688 exp.DataType.Type.BOOLEAN: "BOOL", 689 exp.DataType.Type.CHAR: "STRING", 690 exp.DataType.Type.DECIMAL: "NUMERIC", 691 exp.DataType.Type.DOUBLE: "FLOAT64", 692 exp.DataType.Type.FLOAT: "FLOAT64", 693 exp.DataType.Type.INT: "INT64", 694 exp.DataType.Type.NCHAR: "STRING", 695 exp.DataType.Type.NVARCHAR: "STRING", 696 exp.DataType.Type.SMALLINT: "INT64", 697 exp.DataType.Type.TEXT: "STRING", 698 exp.DataType.Type.TIMESTAMP: "DATETIME", 699 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 700 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 701 exp.DataType.Type.TINYINT: "INT64", 702 exp.DataType.Type.VARBINARY: "BYTES", 703 exp.DataType.Type.VARCHAR: "STRING", 704 exp.DataType.Type.VARIANT: "ANY TYPE", 705 } 706 707 PROPERTIES_LOCATION = { 708 **generator.Generator.PROPERTIES_LOCATION, 709 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 710 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 711 } 712 713 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 714 RESERVED_KEYWORDS = { 715 *generator.Generator.RESERVED_KEYWORDS, 716 "all", 717 "and", 718 "any", 719 "array", 720 "as", 721 "asc", 722 "assert_rows_modified", 723 "at", 724 "between", 725 "by", 726 "case", 727 "cast", 728 "collate", 729 "contains", 730 "create", 731 "cross", 732 "cube", 733 "current", 734 "default", 735 "define", 736 "desc", 737 "distinct", 738 "else", 739 "end", 740 "enum", 741 "escape", 742 "except", 743 "exclude", 744 "exists", 745 "extract", 746 "false", 747 "fetch", 748 "following", 749 "for", 750 "from", 751 "full", 752 "group", 753 "grouping", 754 "groups", 755 "hash", 756 "having", 757 "if", 758 "ignore", 759 "in", 760 "inner", 761 "intersect", 762 "interval", 763 "into", 764 "is", 765 "join", 766 "lateral", 767 "left", 768 "like", 769 "limit", 770 "lookup", 771 "merge", 772 "natural", 773 "new", 774 "no", 775 "not", 776 "null", 777 "nulls", 778 "of", 779 "on", 780 "or", 781 "order", 782 "outer", 783 "over", 784 "partition", 785 "preceding", 786 "proto", 787 "qualify", 788 "range", 789 "recursive", 790 "respect", 791 "right", 792 "rollup", 793 "rows", 794 "select", 795 "set", 796 "some", 797 "struct", 798 "tablesample", 799 "then", 800 "to", 801 "treat", 802 "true", 803 "unbounded", 804 "union", 805 "unnest", 806 "using", 807 "when", 808 "where", 809 "window", 810 "with", 811 "within", 812 } 813 814 def table_parts(self, expression: exp.Table) -> str: 815 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 816 # we need to make sure the correct quoting is used in each case. 817 # 818 # For example, if there is a CTE x that clashes with a schema name, then the former will 819 # return the table y in that schema, whereas the latter will return the CTE's y column: 820 # 821 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 822 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 823 if expression.meta.get("quoted_table"): 824 table_parts = ".".join(p.name for p in expression.parts) 825 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 826 827 return super().table_parts(expression) 828 829 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 830 this = expression.this if isinstance(expression.this, exp.TsOrDsToDate) else expression 831 return self.func("FORMAT_DATE", self.format_time(expression), this.this) 832 833 def eq_sql(self, expression: exp.EQ) -> str: 834 # Operands of = cannot be NULL in BigQuery 835 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 836 if not isinstance(expression.parent, exp.Update): 837 return "NULL" 838 839 return self.binary(expression, "=") 840 841 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 842 parent = expression.parent 843 844 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 845 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 846 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 847 return self.func( 848 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 849 ) 850 851 return super().attimezone_sql(expression) 852 853 def trycast_sql(self, expression: exp.TryCast) -> str: 854 return self.cast_sql(expression, safe_prefix="SAFE_") 855 856 def array_sql(self, expression: exp.Array) -> str: 857 first_arg = seq_get(expression.expressions, 0) 858 if isinstance(first_arg, exp.Query): 859 return f"ARRAY{self.wrap(self.sql(first_arg))}" 860 861 return inline_array_sql(self, expression) 862 863 def bracket_sql(self, expression: exp.Bracket) -> str: 864 this = expression.this 865 expressions = expression.expressions 866 867 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 868 arg = expressions[0] 869 if arg.type is None: 870 from sqlglot.optimizer.annotate_types import annotate_types 871 872 arg = annotate_types(arg) 873 874 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 875 # BQ doesn't support bracket syntax with string values for structs 876 return f"{self.sql(this)}.{arg.name}" 877 878 expressions_sql = self.expressions(expression, flat=True) 879 offset = expression.args.get("offset") 880 881 if offset == 0: 882 expressions_sql = f"OFFSET({expressions_sql})" 883 elif offset == 1: 884 expressions_sql = f"ORDINAL({expressions_sql})" 885 elif offset is not None: 886 self.unsupported(f"Unsupported array offset: {offset}") 887 888 if expression.args.get("safe"): 889 expressions_sql = f"SAFE_{expressions_sql}" 890 891 return f"{self.sql(this)}[{expressions_sql}]" 892 893 def in_unnest_op(self, expression: exp.Unnest) -> str: 894 return self.sql(expression) 895 896 def except_op(self, expression: exp.Except) -> str: 897 if not expression.args.get("distinct"): 898 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 899 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 900 901 def intersect_op(self, expression: exp.Intersect) -> str: 902 if not expression.args.get("distinct"): 903 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 904 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 905 906 def with_properties(self, properties: exp.Properties) -> str: 907 return self.properties(properties, prefix=self.seg("OPTIONS")) 908 909 def version_sql(self, expression: exp.Version) -> str: 910 if expression.name == "TIMESTAMP": 911 expression.set("this", "SYSTEM_TIME") 912 return super().version_sql(expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHEREclause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
814 def table_parts(self, expression: exp.Table) -> str: 815 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 816 # we need to make sure the correct quoting is used in each case. 817 # 818 # For example, if there is a CTE x that clashes with a schema name, then the former will 819 # return the table y in that schema, whereas the latter will return the CTE's y column: 820 # 821 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 822 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 823 if expression.meta.get("quoted_table"): 824 table_parts = ".".join(p.name for p in expression.parts) 825 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 826 827 return super().table_parts(expression)
841 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 842 parent = expression.parent 843 844 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 845 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 846 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 847 return self.func( 848 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 849 ) 850 851 return super().attimezone_sql(expression)
863 def bracket_sql(self, expression: exp.Bracket) -> str: 864 this = expression.this 865 expressions = expression.expressions 866 867 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 868 arg = expressions[0] 869 if arg.type is None: 870 from sqlglot.optimizer.annotate_types import annotate_types 871 872 arg = annotate_types(arg) 873 874 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 875 # BQ doesn't support bracket syntax with string values for structs 876 return f"{self.sql(this)}.{arg.name}" 877 878 expressions_sql = self.expressions(expression, flat=True) 879 offset = expression.args.get("offset") 880 881 if offset == 0: 882 expressions_sql = f"OFFSET({expressions_sql})" 883 elif offset == 1: 884 expressions_sql = f"ORDINAL({expressions_sql})" 885 elif offset is not None: 886 self.unsupported(f"Unsupported array offset: {offset}") 887 888 if expression.args.get("safe"): 889 expressions_sql = f"SAFE_{expressions_sql}" 890 891 return f"{self.sql(this)}[{expressions_sql}]"
Inherited Members
- sqlglot.generator.Generator
- Generator
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- COLUMN_JOIN_MARKS_SUPPORTED
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- SELECT_KINDS
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- STAR_MAPPING
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- PARAMETER_TOKEN
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognize_sql
- query_modifiers
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- set_operations
- union_sql
- union_op
- unnest_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- currenttimestamp_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- renametable_sql
- renamecolumn_sql
- altertable_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- text_width
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- arrayany_sql
- generateseries_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql