sqlglot.dialects.snowflake
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 NormalizationStrategy, 9 binary_from_function, 10 date_delta_sql, 11 date_trunc_to_time, 12 datestrtodate_sql, 13 build_formatted_time, 14 if_sql, 15 inline_array_sql, 16 max_or_greatest, 17 min_or_least, 18 rename_func, 19 timestamptrunc_sql, 20 timestrtotime_sql, 21 var_map_sql, 22) 23from sqlglot.expressions import Literal 24from sqlglot.helper import flatten, is_int, seq_get 25from sqlglot.tokens import TokenType 26 27if t.TYPE_CHECKING: 28 from sqlglot._typing import E 29 30 31# from https://docs.snowflake.com/en/sql-reference/functions/to_timestamp.html 32def _build_to_timestamp(args: t.List) -> t.Union[exp.StrToTime, exp.UnixToTime, exp.TimeStrToTime]: 33 if len(args) == 2: 34 first_arg, second_arg = args 35 if second_arg.is_string: 36 # case: <string_expr> [ , <format> ] 37 return build_formatted_time(exp.StrToTime, "snowflake")(args) 38 return exp.UnixToTime(this=first_arg, scale=second_arg) 39 40 from sqlglot.optimizer.simplify import simplify_literals 41 42 # The first argument might be an expression like 40 * 365 * 86400, so we try to 43 # reduce it using `simplify_literals` first and then check if it's a Literal. 44 first_arg = seq_get(args, 0) 45 if not isinstance(simplify_literals(first_arg, root=True), Literal): 46 # case: <variant_expr> or other expressions such as columns 47 return exp.TimeStrToTime.from_arg_list(args) 48 49 if first_arg.is_string: 50 if is_int(first_arg.this): 51 # case: <integer> 52 return exp.UnixToTime.from_arg_list(args) 53 54 # case: <date_expr> 55 return build_formatted_time(exp.StrToTime, "snowflake", default=True)(args) 56 57 # case: <numeric_expr> 58 return exp.UnixToTime.from_arg_list(args) 59 60 61def _build_object_construct(args: t.List) -> t.Union[exp.StarMap, exp.Struct]: 62 expression = parser.build_var_map(args) 63 64 if isinstance(expression, exp.StarMap): 65 return expression 66 67 return exp.Struct( 68 expressions=[ 69 exp.PropertyEQ(this=k, expression=v) for k, v in zip(expression.keys, expression.values) 70 ] 71 ) 72 73 74def _build_datediff(args: t.List) -> exp.DateDiff: 75 return exp.DateDiff( 76 this=seq_get(args, 2), expression=seq_get(args, 1), unit=_map_date_part(seq_get(args, 0)) 77 ) 78 79 80# https://docs.snowflake.com/en/sql-reference/functions/div0 81def _build_if_from_div0(args: t.List) -> exp.If: 82 cond = exp.EQ(this=seq_get(args, 1), expression=exp.Literal.number(0)) 83 true = exp.Literal.number(0) 84 false = exp.Div(this=seq_get(args, 0), expression=seq_get(args, 1)) 85 return exp.If(this=cond, true=true, false=false) 86 87 88# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 89def _build_if_from_zeroifnull(args: t.List) -> exp.If: 90 cond = exp.Is(this=seq_get(args, 0), expression=exp.Null()) 91 return exp.If(this=cond, true=exp.Literal.number(0), false=seq_get(args, 0)) 92 93 94# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 95def _build_if_from_nullifzero(args: t.List) -> exp.If: 96 cond = exp.EQ(this=seq_get(args, 0), expression=exp.Literal.number(0)) 97 return exp.If(this=cond, true=exp.Null(), false=seq_get(args, 0)) 98 99 100def _datatype_sql(self: Snowflake.Generator, expression: exp.DataType) -> str: 101 if expression.is_type("array"): 102 return "ARRAY" 103 elif expression.is_type("map"): 104 return "OBJECT" 105 return self.datatype_sql(expression) 106 107 108def _regexpilike_sql(self: Snowflake.Generator, expression: exp.RegexpILike) -> str: 109 flag = expression.text("flag") 110 111 if "i" not in flag: 112 flag += "i" 113 114 return self.func( 115 "REGEXP_LIKE", expression.this, expression.expression, exp.Literal.string(flag) 116 ) 117 118 119def _build_convert_timezone(args: t.List) -> t.Union[exp.Anonymous, exp.AtTimeZone]: 120 if len(args) == 3: 121 return exp.Anonymous(this="CONVERT_TIMEZONE", expressions=args) 122 return exp.AtTimeZone(this=seq_get(args, 1), zone=seq_get(args, 0)) 123 124 125def _build_regexp_replace(args: t.List) -> exp.RegexpReplace: 126 regexp_replace = exp.RegexpReplace.from_arg_list(args) 127 128 if not regexp_replace.args.get("replacement"): 129 regexp_replace.set("replacement", exp.Literal.string("")) 130 131 return regexp_replace 132 133 134def _show_parser(*args: t.Any, **kwargs: t.Any) -> t.Callable[[Snowflake.Parser], exp.Show]: 135 def _parse(self: Snowflake.Parser) -> exp.Show: 136 return self._parse_show_snowflake(*args, **kwargs) 137 138 return _parse 139 140 141DATE_PART_MAPPING = { 142 "Y": "YEAR", 143 "YY": "YEAR", 144 "YYY": "YEAR", 145 "YYYY": "YEAR", 146 "YR": "YEAR", 147 "YEARS": "YEAR", 148 "YRS": "YEAR", 149 "MM": "MONTH", 150 "MON": "MONTH", 151 "MONS": "MONTH", 152 "MONTHS": "MONTH", 153 "D": "DAY", 154 "DD": "DAY", 155 "DAYS": "DAY", 156 "DAYOFMONTH": "DAY", 157 "WEEKDAY": "DAYOFWEEK", 158 "DOW": "DAYOFWEEK", 159 "DW": "DAYOFWEEK", 160 "WEEKDAY_ISO": "DAYOFWEEKISO", 161 "DOW_ISO": "DAYOFWEEKISO", 162 "DW_ISO": "DAYOFWEEKISO", 163 "YEARDAY": "DAYOFYEAR", 164 "DOY": "DAYOFYEAR", 165 "DY": "DAYOFYEAR", 166 "W": "WEEK", 167 "WK": "WEEK", 168 "WEEKOFYEAR": "WEEK", 169 "WOY": "WEEK", 170 "WY": "WEEK", 171 "WEEK_ISO": "WEEKISO", 172 "WEEKOFYEARISO": "WEEKISO", 173 "WEEKOFYEAR_ISO": "WEEKISO", 174 "Q": "QUARTER", 175 "QTR": "QUARTER", 176 "QTRS": "QUARTER", 177 "QUARTERS": "QUARTER", 178 "H": "HOUR", 179 "HH": "HOUR", 180 "HR": "HOUR", 181 "HOURS": "HOUR", 182 "HRS": "HOUR", 183 "M": "MINUTE", 184 "MI": "MINUTE", 185 "MIN": "MINUTE", 186 "MINUTES": "MINUTE", 187 "MINS": "MINUTE", 188 "S": "SECOND", 189 "SEC": "SECOND", 190 "SECONDS": "SECOND", 191 "SECS": "SECOND", 192 "MS": "MILLISECOND", 193 "MSEC": "MILLISECOND", 194 "MILLISECONDS": "MILLISECOND", 195 "US": "MICROSECOND", 196 "USEC": "MICROSECOND", 197 "MICROSECONDS": "MICROSECOND", 198 "NS": "NANOSECOND", 199 "NSEC": "NANOSECOND", 200 "NANOSEC": "NANOSECOND", 201 "NSECOND": "NANOSECOND", 202 "NSECONDS": "NANOSECOND", 203 "NANOSECS": "NANOSECOND", 204 "EPOCH": "EPOCH_SECOND", 205 "EPOCH_SECONDS": "EPOCH_SECOND", 206 "EPOCH_MILLISECONDS": "EPOCH_MILLISECOND", 207 "EPOCH_MICROSECONDS": "EPOCH_MICROSECOND", 208 "EPOCH_NANOSECONDS": "EPOCH_NANOSECOND", 209 "TZH": "TIMEZONE_HOUR", 210 "TZM": "TIMEZONE_MINUTE", 211} 212 213 214@t.overload 215def _map_date_part(part: exp.Expression) -> exp.Var: 216 pass 217 218 219@t.overload 220def _map_date_part(part: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 221 pass 222 223 224def _map_date_part(part): 225 mapped = DATE_PART_MAPPING.get(part.name.upper()) if part else None 226 return exp.var(mapped) if mapped else part 227 228 229def _date_trunc_to_time(args: t.List) -> exp.DateTrunc | exp.TimestampTrunc: 230 trunc = date_trunc_to_time(args) 231 trunc.set("unit", _map_date_part(trunc.args["unit"])) 232 return trunc 233 234 235def _build_timestamp_from_parts(args: t.List) -> exp.Func: 236 if len(args) == 2: 237 # Other dialects don't have the TIMESTAMP_FROM_PARTS(date, time) concept, 238 # so we parse this into Anonymous for now instead of introducing complexity 239 return exp.Anonymous(this="TIMESTAMP_FROM_PARTS", expressions=args) 240 241 return exp.TimestampFromParts.from_arg_list(args) 242 243 244def _unqualify_unpivot_columns(expression: exp.Expression) -> exp.Expression: 245 """ 246 Snowflake doesn't allow columns referenced in UNPIVOT to be qualified, 247 so we need to unqualify them. 248 249 Example: 250 >>> from sqlglot import parse_one 251 >>> expr = parse_one("SELECT * FROM m_sales UNPIVOT(sales FOR month IN (m_sales.jan, feb, mar, april))") 252 >>> print(_unqualify_unpivot_columns(expr).sql(dialect="snowflake")) 253 SELECT * FROM m_sales UNPIVOT(sales FOR month IN (jan, feb, mar, april)) 254 """ 255 if isinstance(expression, exp.Pivot) and expression.unpivot: 256 expression = transforms.unqualify_columns(expression) 257 258 return expression 259 260 261class Snowflake(Dialect): 262 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 263 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 264 NULL_ORDERING = "nulls_are_large" 265 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 266 SUPPORTS_USER_DEFINED_TYPES = False 267 SUPPORTS_SEMI_ANTI_JOIN = False 268 PREFER_CTE_ALIAS_COLUMN = True 269 TABLESAMPLE_SIZE_IS_PERCENT = True 270 271 TIME_MAPPING = { 272 "YYYY": "%Y", 273 "yyyy": "%Y", 274 "YY": "%y", 275 "yy": "%y", 276 "MMMM": "%B", 277 "mmmm": "%B", 278 "MON": "%b", 279 "mon": "%b", 280 "MM": "%m", 281 "mm": "%m", 282 "DD": "%d", 283 "dd": "%-d", 284 "DY": "%a", 285 "dy": "%w", 286 "HH24": "%H", 287 "hh24": "%H", 288 "HH12": "%I", 289 "hh12": "%I", 290 "MI": "%M", 291 "mi": "%M", 292 "SS": "%S", 293 "ss": "%S", 294 "FF": "%f", 295 "ff": "%f", 296 "FF6": "%f", 297 "ff6": "%f", 298 } 299 300 def quote_identifier(self, expression: E, identify: bool = True) -> E: 301 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 302 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 303 if ( 304 isinstance(expression, exp.Identifier) 305 and isinstance(expression.parent, exp.Table) 306 and expression.name.lower() == "dual" 307 ): 308 return expression # type: ignore 309 310 return super().quote_identifier(expression, identify=identify) 311 312 class Parser(parser.Parser): 313 IDENTIFY_PIVOT_STRINGS = True 314 315 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 316 317 FUNCTIONS = { 318 **parser.Parser.FUNCTIONS, 319 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 320 "ARRAY_CONSTRUCT": exp.Array.from_arg_list, 321 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 322 this=seq_get(args, 1), expression=seq_get(args, 0) 323 ), 324 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 325 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 326 start=seq_get(args, 0), 327 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 328 step=seq_get(args, 2), 329 ), 330 "BITXOR": binary_from_function(exp.BitwiseXor), 331 "BIT_XOR": binary_from_function(exp.BitwiseXor), 332 "BOOLXOR": binary_from_function(exp.Xor), 333 "CONVERT_TIMEZONE": _build_convert_timezone, 334 "DATE_TRUNC": _date_trunc_to_time, 335 "DATEADD": lambda args: exp.DateAdd( 336 this=seq_get(args, 2), 337 expression=seq_get(args, 1), 338 unit=_map_date_part(seq_get(args, 0)), 339 ), 340 "DATEDIFF": _build_datediff, 341 "DIV0": _build_if_from_div0, 342 "FLATTEN": exp.Explode.from_arg_list, 343 "GET_PATH": lambda args, dialect: exp.JSONExtract( 344 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 345 ), 346 "IFF": exp.If.from_arg_list, 347 "LAST_DAY": lambda args: exp.LastDay( 348 this=seq_get(args, 0), unit=_map_date_part(seq_get(args, 1)) 349 ), 350 "LISTAGG": exp.GroupConcat.from_arg_list, 351 "NULLIFZERO": _build_if_from_nullifzero, 352 "OBJECT_CONSTRUCT": _build_object_construct, 353 "REGEXP_REPLACE": _build_regexp_replace, 354 "REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list, 355 "RLIKE": exp.RegexpLike.from_arg_list, 356 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 357 "TIMEDIFF": _build_datediff, 358 "TIMESTAMPDIFF": _build_datediff, 359 "TIMESTAMPFROMPARTS": _build_timestamp_from_parts, 360 "TIMESTAMP_FROM_PARTS": _build_timestamp_from_parts, 361 "TO_NUMBER": lambda args: exp.ToNumber( 362 this=seq_get(args, 0), 363 format=seq_get(args, 1), 364 precision=seq_get(args, 2), 365 scale=seq_get(args, 3), 366 ), 367 "TO_TIMESTAMP": _build_to_timestamp, 368 "TO_VARCHAR": exp.ToChar.from_arg_list, 369 "ZEROIFNULL": _build_if_from_zeroifnull, 370 } 371 372 FUNCTION_PARSERS = { 373 **parser.Parser.FUNCTION_PARSERS, 374 "DATE_PART": lambda self: self._parse_date_part(), 375 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 376 } 377 FUNCTION_PARSERS.pop("TRIM") 378 379 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 380 381 RANGE_PARSERS = { 382 **parser.Parser.RANGE_PARSERS, 383 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 384 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 385 } 386 387 ALTER_PARSERS = { 388 **parser.Parser.ALTER_PARSERS, 389 "SET": lambda self: self._parse_set(tag=self._match_text_seq("TAG")), 390 "UNSET": lambda self: self.expression( 391 exp.Set, 392 tag=self._match_text_seq("TAG"), 393 expressions=self._parse_csv(self._parse_id_var), 394 unset=True, 395 ), 396 "SWAP": lambda self: self._parse_alter_table_swap(), 397 } 398 399 STATEMENT_PARSERS = { 400 **parser.Parser.STATEMENT_PARSERS, 401 TokenType.SHOW: lambda self: self._parse_show(), 402 } 403 404 PROPERTY_PARSERS = { 405 **parser.Parser.PROPERTY_PARSERS, 406 "LOCATION": lambda self: self._parse_location(), 407 } 408 409 SHOW_PARSERS = { 410 "SCHEMAS": _show_parser("SCHEMAS"), 411 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 412 "OBJECTS": _show_parser("OBJECTS"), 413 "TERSE OBJECTS": _show_parser("OBJECTS"), 414 "TABLES": _show_parser("TABLES"), 415 "TERSE TABLES": _show_parser("TABLES"), 416 "VIEWS": _show_parser("VIEWS"), 417 "TERSE VIEWS": _show_parser("VIEWS"), 418 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 419 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 420 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 421 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 422 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 423 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 424 "SEQUENCES": _show_parser("SEQUENCES"), 425 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 426 "COLUMNS": _show_parser("COLUMNS"), 427 "USERS": _show_parser("USERS"), 428 "TERSE USERS": _show_parser("USERS"), 429 } 430 431 STAGED_FILE_SINGLE_TOKENS = { 432 TokenType.DOT, 433 TokenType.MOD, 434 TokenType.SLASH, 435 } 436 437 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 438 439 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 440 441 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 442 this = super()._parse_column_ops(this) 443 444 casts = [] 445 json_path = [] 446 447 while self._match(TokenType.COLON): 448 path = super()._parse_column_ops(self._parse_field(any_token=True)) 449 450 # The cast :: operator has a lower precedence than the extraction operator :, so 451 # we rearrange the AST appropriately to avoid casting the 2nd argument of GET_PATH 452 while isinstance(path, exp.Cast): 453 casts.append(path.to) 454 path = path.this 455 456 if path: 457 json_path.append(path.sql(dialect="snowflake", copy=False)) 458 459 if json_path: 460 this = self.expression( 461 exp.JSONExtract, 462 this=this, 463 expression=self.dialect.to_json_path(exp.Literal.string(".".join(json_path))), 464 ) 465 466 while casts: 467 this = self.expression(exp.Cast, this=this, to=casts.pop()) 468 469 return this 470 471 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 472 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 473 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 474 this = self._parse_var() or self._parse_type() 475 476 if not this: 477 return None 478 479 self._match(TokenType.COMMA) 480 expression = self._parse_bitwise() 481 this = _map_date_part(this) 482 name = this.name.upper() 483 484 if name.startswith("EPOCH"): 485 if name == "EPOCH_MILLISECOND": 486 scale = 10**3 487 elif name == "EPOCH_MICROSECOND": 488 scale = 10**6 489 elif name == "EPOCH_NANOSECOND": 490 scale = 10**9 491 else: 492 scale = None 493 494 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 495 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 496 497 if scale: 498 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 499 500 return to_unix 501 502 return self.expression(exp.Extract, this=this, expression=expression) 503 504 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 505 if is_map: 506 # Keys are strings in Snowflake's objects, see also: 507 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 508 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 509 return self._parse_slice(self._parse_string()) 510 511 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 512 513 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 514 lateral = super()._parse_lateral() 515 if not lateral: 516 return lateral 517 518 if isinstance(lateral.this, exp.Explode): 519 table_alias = lateral.args.get("alias") 520 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 521 if table_alias and not table_alias.args.get("columns"): 522 table_alias.set("columns", columns) 523 elif not table_alias: 524 exp.alias_(lateral, "_flattened", table=columns, copy=False) 525 526 return lateral 527 528 def _parse_at_before(self, table: exp.Table) -> exp.Table: 529 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 530 index = self._index 531 if self._match_texts(("AT", "BEFORE")): 532 this = self._prev.text.upper() 533 kind = ( 534 self._match(TokenType.L_PAREN) 535 and self._match_texts(self.HISTORICAL_DATA_KIND) 536 and self._prev.text.upper() 537 ) 538 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 539 540 if expression: 541 self._match_r_paren() 542 when = self.expression( 543 exp.HistoricalData, this=this, kind=kind, expression=expression 544 ) 545 table.set("when", when) 546 else: 547 self._retreat(index) 548 549 return table 550 551 def _parse_table_parts( 552 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 553 ) -> exp.Table: 554 # https://docs.snowflake.com/en/user-guide/querying-stage 555 if self._match(TokenType.STRING, advance=False): 556 table = self._parse_string() 557 elif self._match_text_seq("@", advance=False): 558 table = self._parse_location_path() 559 else: 560 table = None 561 562 if table: 563 file_format = None 564 pattern = None 565 566 self._match(TokenType.L_PAREN) 567 while self._curr and not self._match(TokenType.R_PAREN): 568 if self._match_text_seq("FILE_FORMAT", "=>"): 569 file_format = self._parse_string() or super()._parse_table_parts( 570 is_db_reference=is_db_reference 571 ) 572 elif self._match_text_seq("PATTERN", "=>"): 573 pattern = self._parse_string() 574 else: 575 break 576 577 self._match(TokenType.COMMA) 578 579 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 580 else: 581 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 582 583 return self._parse_at_before(table) 584 585 def _parse_id_var( 586 self, 587 any_token: bool = True, 588 tokens: t.Optional[t.Collection[TokenType]] = None, 589 ) -> t.Optional[exp.Expression]: 590 if self._match_text_seq("IDENTIFIER", "("): 591 identifier = ( 592 super()._parse_id_var(any_token=any_token, tokens=tokens) 593 or self._parse_string() 594 ) 595 self._match_r_paren() 596 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 597 598 return super()._parse_id_var(any_token=any_token, tokens=tokens) 599 600 def _parse_show_snowflake(self, this: str) -> exp.Show: 601 scope = None 602 scope_kind = None 603 604 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 605 # which is syntactically valid but has no effect on the output 606 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 607 608 history = self._match_text_seq("HISTORY") 609 610 like = self._parse_string() if self._match(TokenType.LIKE) else None 611 612 if self._match(TokenType.IN): 613 if self._match_text_seq("ACCOUNT"): 614 scope_kind = "ACCOUNT" 615 elif self._match_set(self.DB_CREATABLES): 616 scope_kind = self._prev.text.upper() 617 if self._curr: 618 scope = self._parse_table_parts() 619 elif self._curr: 620 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 621 scope = self._parse_table_parts() 622 623 return self.expression( 624 exp.Show, 625 **{ 626 "terse": terse, 627 "this": this, 628 "history": history, 629 "like": like, 630 "scope": scope, 631 "scope_kind": scope_kind, 632 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 633 "limit": self._parse_limit(), 634 "from": self._parse_string() if self._match(TokenType.FROM) else None, 635 }, 636 ) 637 638 def _parse_alter_table_swap(self) -> exp.SwapTable: 639 self._match_text_seq("WITH") 640 return self.expression(exp.SwapTable, this=self._parse_table(schema=True)) 641 642 def _parse_location(self) -> exp.LocationProperty: 643 self._match(TokenType.EQ) 644 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 645 646 def _parse_location_path(self) -> exp.Var: 647 parts = [self._advance_any(ignore_reserved=True)] 648 649 # We avoid consuming a comma token because external tables like @foo and @bar 650 # can be joined in a query with a comma separator. 651 while self._is_connected() and not self._match(TokenType.COMMA, advance=False): 652 parts.append(self._advance_any(ignore_reserved=True)) 653 654 return exp.var("".join(part.text for part in parts if part)) 655 656 class Tokenizer(tokens.Tokenizer): 657 STRING_ESCAPES = ["\\", "'"] 658 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 659 RAW_STRINGS = ["$$"] 660 COMMENTS = ["--", "//", ("/*", "*/")] 661 662 KEYWORDS = { 663 **tokens.Tokenizer.KEYWORDS, 664 "BYTEINT": TokenType.INT, 665 "CHAR VARYING": TokenType.VARCHAR, 666 "CHARACTER VARYING": TokenType.VARCHAR, 667 "EXCLUDE": TokenType.EXCEPT, 668 "ILIKE ANY": TokenType.ILIKE_ANY, 669 "LIKE ANY": TokenType.LIKE_ANY, 670 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 671 "MINUS": TokenType.EXCEPT, 672 "NCHAR VARYING": TokenType.VARCHAR, 673 "PUT": TokenType.COMMAND, 674 "REMOVE": TokenType.COMMAND, 675 "RENAME": TokenType.REPLACE, 676 "RM": TokenType.COMMAND, 677 "SAMPLE": TokenType.TABLE_SAMPLE, 678 "SQL_DOUBLE": TokenType.DOUBLE, 679 "SQL_VARCHAR": TokenType.VARCHAR, 680 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 681 "TIMESTAMP_LTZ": TokenType.TIMESTAMPLTZ, 682 "TIMESTAMP_NTZ": TokenType.TIMESTAMP, 683 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 684 "TIMESTAMPNTZ": TokenType.TIMESTAMP, 685 "TOP": TokenType.TOP, 686 } 687 688 SINGLE_TOKENS = { 689 **tokens.Tokenizer.SINGLE_TOKENS, 690 "$": TokenType.PARAMETER, 691 } 692 693 VAR_SINGLE_TOKENS = {"$"} 694 695 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 696 697 class Generator(generator.Generator): 698 PARAMETER_TOKEN = "$" 699 MATCHED_BY_SOURCE = False 700 SINGLE_STRING_INTERVAL = True 701 JOIN_HINTS = False 702 TABLE_HINTS = False 703 QUERY_HINTS = False 704 AGGREGATE_FILTER_SUPPORTED = False 705 SUPPORTS_TABLE_COPY = False 706 COLLATE_IS_FUNC = True 707 LIMIT_ONLY_LITERALS = True 708 JSON_KEY_VALUE_PAIR_SEP = "," 709 INSERT_OVERWRITE = " OVERWRITE INTO" 710 711 TRANSFORMS = { 712 **generator.Generator.TRANSFORMS, 713 exp.ArgMax: rename_func("MAX_BY"), 714 exp.ArgMin: rename_func("MIN_BY"), 715 exp.Array: inline_array_sql, 716 exp.ArrayConcat: rename_func("ARRAY_CAT"), 717 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 718 exp.AtTimeZone: lambda self, e: self.func( 719 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 720 ), 721 exp.BitwiseXor: rename_func("BITXOR"), 722 exp.DateAdd: date_delta_sql("DATEADD"), 723 exp.DateDiff: date_delta_sql("DATEDIFF"), 724 exp.DateStrToDate: datestrtodate_sql, 725 exp.DataType: _datatype_sql, 726 exp.DayOfMonth: rename_func("DAYOFMONTH"), 727 exp.DayOfWeek: rename_func("DAYOFWEEK"), 728 exp.DayOfYear: rename_func("DAYOFYEAR"), 729 exp.Explode: rename_func("FLATTEN"), 730 exp.Extract: rename_func("DATE_PART"), 731 exp.FromTimeZone: lambda self, e: self.func( 732 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 733 ), 734 exp.GenerateSeries: lambda self, e: self.func( 735 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 736 ), 737 exp.GroupConcat: rename_func("LISTAGG"), 738 exp.If: if_sql(name="IFF", false_value="NULL"), 739 exp.JSONExtract: lambda self, e: self.func("GET_PATH", e.this, e.expression), 740 exp.JSONExtractScalar: lambda self, e: self.func( 741 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 742 ), 743 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 744 exp.JSONPathRoot: lambda *_: "", 745 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 746 exp.LogicalOr: rename_func("BOOLOR_AGG"), 747 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 748 exp.Max: max_or_greatest, 749 exp.Min: min_or_least, 750 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 751 exp.PercentileCont: transforms.preprocess( 752 [transforms.add_within_group_for_percentiles] 753 ), 754 exp.PercentileDisc: transforms.preprocess( 755 [transforms.add_within_group_for_percentiles] 756 ), 757 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 758 exp.RegexpILike: _regexpilike_sql, 759 exp.Rand: rename_func("RANDOM"), 760 exp.Select: transforms.preprocess( 761 [ 762 transforms.eliminate_distinct_on, 763 transforms.explode_to_unnest(), 764 transforms.eliminate_semi_and_anti_joins, 765 ] 766 ), 767 exp.SHA: rename_func("SHA1"), 768 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 769 exp.StartsWith: rename_func("STARTSWITH"), 770 exp.StrPosition: lambda self, e: self.func( 771 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 772 ), 773 exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)), 774 exp.Stuff: rename_func("INSERT"), 775 exp.TimestampDiff: lambda self, e: self.func( 776 "TIMESTAMPDIFF", e.unit, e.expression, e.this 777 ), 778 exp.TimestampTrunc: timestamptrunc_sql, 779 exp.TimeStrToTime: timestrtotime_sql, 780 exp.TimeToStr: lambda self, e: self.func( 781 "TO_CHAR", exp.cast(e.this, "timestamp"), self.format_time(e) 782 ), 783 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 784 exp.ToArray: rename_func("TO_ARRAY"), 785 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 786 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 787 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 788 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 789 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 790 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 791 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 792 exp.Xor: rename_func("BOOLXOR"), 793 } 794 795 SUPPORTED_JSON_PATH_PARTS = { 796 exp.JSONPathKey, 797 exp.JSONPathRoot, 798 exp.JSONPathSubscript, 799 } 800 801 TYPE_MAPPING = { 802 **generator.Generator.TYPE_MAPPING, 803 exp.DataType.Type.TIMESTAMP: "TIMESTAMPNTZ", 804 } 805 806 STAR_MAPPING = { 807 "except": "EXCLUDE", 808 "replace": "RENAME", 809 } 810 811 PROPERTIES_LOCATION = { 812 **generator.Generator.PROPERTIES_LOCATION, 813 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 814 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 815 } 816 817 def tonumber_sql(self, expression: exp.ToNumber) -> str: 818 return self.func( 819 "TO_NUMBER", 820 expression.this, 821 expression.args.get("format"), 822 expression.args.get("precision"), 823 expression.args.get("scale"), 824 ) 825 826 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 827 milli = expression.args.get("milli") 828 if milli is not None: 829 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 830 expression.set("nano", milli_to_nano) 831 832 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 833 834 def trycast_sql(self, expression: exp.TryCast) -> str: 835 value = expression.this 836 837 if value.type is None: 838 from sqlglot.optimizer.annotate_types import annotate_types 839 840 value = annotate_types(value) 841 842 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 843 return super().trycast_sql(expression) 844 845 # TRY_CAST only works for string values in Snowflake 846 return self.cast_sql(expression) 847 848 def log_sql(self, expression: exp.Log) -> str: 849 if not expression.expression: 850 return self.func("LN", expression.this) 851 852 return super().log_sql(expression) 853 854 def unnest_sql(self, expression: exp.Unnest) -> str: 855 unnest_alias = expression.args.get("alias") 856 offset = expression.args.get("offset") 857 858 columns = [ 859 exp.to_identifier("seq"), 860 exp.to_identifier("key"), 861 exp.to_identifier("path"), 862 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 863 seq_get(unnest_alias.columns if unnest_alias else [], 0) 864 or exp.to_identifier("value"), 865 exp.to_identifier("this"), 866 ] 867 868 if unnest_alias: 869 unnest_alias.set("columns", columns) 870 else: 871 unnest_alias = exp.TableAlias(this="_u", columns=columns) 872 873 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 874 alias = self.sql(unnest_alias) 875 alias = f" AS {alias}" if alias else "" 876 return f"{explode}{alias}" 877 878 def show_sql(self, expression: exp.Show) -> str: 879 terse = "TERSE " if expression.args.get("terse") else "" 880 history = " HISTORY" if expression.args.get("history") else "" 881 like = self.sql(expression, "like") 882 like = f" LIKE {like}" if like else "" 883 884 scope = self.sql(expression, "scope") 885 scope = f" {scope}" if scope else "" 886 887 scope_kind = self.sql(expression, "scope_kind") 888 if scope_kind: 889 scope_kind = f" IN {scope_kind}" 890 891 starts_with = self.sql(expression, "starts_with") 892 if starts_with: 893 starts_with = f" STARTS WITH {starts_with}" 894 895 limit = self.sql(expression, "limit") 896 897 from_ = self.sql(expression, "from") 898 if from_: 899 from_ = f" FROM {from_}" 900 901 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 902 903 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 904 # Other dialects don't support all of the following parameters, so we need to 905 # generate default values as necessary to ensure the transpilation is correct 906 group = expression.args.get("group") 907 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 908 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 909 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 910 911 return self.func( 912 "REGEXP_SUBSTR", 913 expression.this, 914 expression.expression, 915 position, 916 occurrence, 917 parameters, 918 group, 919 ) 920 921 def except_op(self, expression: exp.Except) -> str: 922 if not expression.args.get("distinct"): 923 self.unsupported("EXCEPT with All is not supported in Snowflake") 924 return super().except_op(expression) 925 926 def intersect_op(self, expression: exp.Intersect) -> str: 927 if not expression.args.get("distinct"): 928 self.unsupported("INTERSECT with All is not supported in Snowflake") 929 return super().intersect_op(expression) 930 931 def describe_sql(self, expression: exp.Describe) -> str: 932 # Default to table if kind is unknown 933 kind_value = expression.args.get("kind") or "TABLE" 934 kind = f" {kind_value}" if kind_value else "" 935 this = f" {self.sql(expression, 'this')}" 936 expressions = self.expressions(expression, flat=True) 937 expressions = f" {expressions}" if expressions else "" 938 return f"DESCRIBE{kind}{this}{expressions}" 939 940 def generatedasidentitycolumnconstraint_sql( 941 self, expression: exp.GeneratedAsIdentityColumnConstraint 942 ) -> str: 943 start = expression.args.get("start") 944 start = f" START {start}" if start else "" 945 increment = expression.args.get("increment") 946 increment = f" INCREMENT {increment}" if increment else "" 947 return f"AUTOINCREMENT{start}{increment}" 948 949 def swaptable_sql(self, expression: exp.SwapTable) -> str: 950 this = self.sql(expression, "this") 951 return f"SWAP WITH {this}" 952 953 def with_properties(self, properties: exp.Properties) -> str: 954 return self.properties(properties, wrapped=False, prefix=self.seg(""), sep=" ") 955 956 def cluster_sql(self, expression: exp.Cluster) -> str: 957 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 958 959 def struct_sql(self, expression: exp.Struct) -> str: 960 keys = [] 961 values = [] 962 963 for i, e in enumerate(expression.expressions): 964 if isinstance(e, exp.PropertyEQ): 965 keys.append( 966 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 967 ) 968 values.append(e.expression) 969 else: 970 keys.append(exp.Literal.string(f"_{i}")) 971 values.append(e) 972 973 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values)))
262class Snowflake(Dialect): 263 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 264 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 265 NULL_ORDERING = "nulls_are_large" 266 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 267 SUPPORTS_USER_DEFINED_TYPES = False 268 SUPPORTS_SEMI_ANTI_JOIN = False 269 PREFER_CTE_ALIAS_COLUMN = True 270 TABLESAMPLE_SIZE_IS_PERCENT = True 271 272 TIME_MAPPING = { 273 "YYYY": "%Y", 274 "yyyy": "%Y", 275 "YY": "%y", 276 "yy": "%y", 277 "MMMM": "%B", 278 "mmmm": "%B", 279 "MON": "%b", 280 "mon": "%b", 281 "MM": "%m", 282 "mm": "%m", 283 "DD": "%d", 284 "dd": "%-d", 285 "DY": "%a", 286 "dy": "%w", 287 "HH24": "%H", 288 "hh24": "%H", 289 "HH12": "%I", 290 "hh12": "%I", 291 "MI": "%M", 292 "mi": "%M", 293 "SS": "%S", 294 "ss": "%S", 295 "FF": "%f", 296 "ff": "%f", 297 "FF6": "%f", 298 "ff6": "%f", 299 } 300 301 def quote_identifier(self, expression: E, identify: bool = True) -> E: 302 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 303 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 304 if ( 305 isinstance(expression, exp.Identifier) 306 and isinstance(expression.parent, exp.Table) 307 and expression.name.lower() == "dual" 308 ): 309 return expression # type: ignore 310 311 return super().quote_identifier(expression, identify=identify) 312 313 class Parser(parser.Parser): 314 IDENTIFY_PIVOT_STRINGS = True 315 316 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 317 318 FUNCTIONS = { 319 **parser.Parser.FUNCTIONS, 320 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 321 "ARRAY_CONSTRUCT": exp.Array.from_arg_list, 322 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 323 this=seq_get(args, 1), expression=seq_get(args, 0) 324 ), 325 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 326 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 327 start=seq_get(args, 0), 328 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 329 step=seq_get(args, 2), 330 ), 331 "BITXOR": binary_from_function(exp.BitwiseXor), 332 "BIT_XOR": binary_from_function(exp.BitwiseXor), 333 "BOOLXOR": binary_from_function(exp.Xor), 334 "CONVERT_TIMEZONE": _build_convert_timezone, 335 "DATE_TRUNC": _date_trunc_to_time, 336 "DATEADD": lambda args: exp.DateAdd( 337 this=seq_get(args, 2), 338 expression=seq_get(args, 1), 339 unit=_map_date_part(seq_get(args, 0)), 340 ), 341 "DATEDIFF": _build_datediff, 342 "DIV0": _build_if_from_div0, 343 "FLATTEN": exp.Explode.from_arg_list, 344 "GET_PATH": lambda args, dialect: exp.JSONExtract( 345 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 346 ), 347 "IFF": exp.If.from_arg_list, 348 "LAST_DAY": lambda args: exp.LastDay( 349 this=seq_get(args, 0), unit=_map_date_part(seq_get(args, 1)) 350 ), 351 "LISTAGG": exp.GroupConcat.from_arg_list, 352 "NULLIFZERO": _build_if_from_nullifzero, 353 "OBJECT_CONSTRUCT": _build_object_construct, 354 "REGEXP_REPLACE": _build_regexp_replace, 355 "REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list, 356 "RLIKE": exp.RegexpLike.from_arg_list, 357 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 358 "TIMEDIFF": _build_datediff, 359 "TIMESTAMPDIFF": _build_datediff, 360 "TIMESTAMPFROMPARTS": _build_timestamp_from_parts, 361 "TIMESTAMP_FROM_PARTS": _build_timestamp_from_parts, 362 "TO_NUMBER": lambda args: exp.ToNumber( 363 this=seq_get(args, 0), 364 format=seq_get(args, 1), 365 precision=seq_get(args, 2), 366 scale=seq_get(args, 3), 367 ), 368 "TO_TIMESTAMP": _build_to_timestamp, 369 "TO_VARCHAR": exp.ToChar.from_arg_list, 370 "ZEROIFNULL": _build_if_from_zeroifnull, 371 } 372 373 FUNCTION_PARSERS = { 374 **parser.Parser.FUNCTION_PARSERS, 375 "DATE_PART": lambda self: self._parse_date_part(), 376 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 377 } 378 FUNCTION_PARSERS.pop("TRIM") 379 380 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 381 382 RANGE_PARSERS = { 383 **parser.Parser.RANGE_PARSERS, 384 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 385 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 386 } 387 388 ALTER_PARSERS = { 389 **parser.Parser.ALTER_PARSERS, 390 "SET": lambda self: self._parse_set(tag=self._match_text_seq("TAG")), 391 "UNSET": lambda self: self.expression( 392 exp.Set, 393 tag=self._match_text_seq("TAG"), 394 expressions=self._parse_csv(self._parse_id_var), 395 unset=True, 396 ), 397 "SWAP": lambda self: self._parse_alter_table_swap(), 398 } 399 400 STATEMENT_PARSERS = { 401 **parser.Parser.STATEMENT_PARSERS, 402 TokenType.SHOW: lambda self: self._parse_show(), 403 } 404 405 PROPERTY_PARSERS = { 406 **parser.Parser.PROPERTY_PARSERS, 407 "LOCATION": lambda self: self._parse_location(), 408 } 409 410 SHOW_PARSERS = { 411 "SCHEMAS": _show_parser("SCHEMAS"), 412 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 413 "OBJECTS": _show_parser("OBJECTS"), 414 "TERSE OBJECTS": _show_parser("OBJECTS"), 415 "TABLES": _show_parser("TABLES"), 416 "TERSE TABLES": _show_parser("TABLES"), 417 "VIEWS": _show_parser("VIEWS"), 418 "TERSE VIEWS": _show_parser("VIEWS"), 419 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 420 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 421 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 422 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 423 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 424 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 425 "SEQUENCES": _show_parser("SEQUENCES"), 426 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 427 "COLUMNS": _show_parser("COLUMNS"), 428 "USERS": _show_parser("USERS"), 429 "TERSE USERS": _show_parser("USERS"), 430 } 431 432 STAGED_FILE_SINGLE_TOKENS = { 433 TokenType.DOT, 434 TokenType.MOD, 435 TokenType.SLASH, 436 } 437 438 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 439 440 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 441 442 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 443 this = super()._parse_column_ops(this) 444 445 casts = [] 446 json_path = [] 447 448 while self._match(TokenType.COLON): 449 path = super()._parse_column_ops(self._parse_field(any_token=True)) 450 451 # The cast :: operator has a lower precedence than the extraction operator :, so 452 # we rearrange the AST appropriately to avoid casting the 2nd argument of GET_PATH 453 while isinstance(path, exp.Cast): 454 casts.append(path.to) 455 path = path.this 456 457 if path: 458 json_path.append(path.sql(dialect="snowflake", copy=False)) 459 460 if json_path: 461 this = self.expression( 462 exp.JSONExtract, 463 this=this, 464 expression=self.dialect.to_json_path(exp.Literal.string(".".join(json_path))), 465 ) 466 467 while casts: 468 this = self.expression(exp.Cast, this=this, to=casts.pop()) 469 470 return this 471 472 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 473 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 474 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 475 this = self._parse_var() or self._parse_type() 476 477 if not this: 478 return None 479 480 self._match(TokenType.COMMA) 481 expression = self._parse_bitwise() 482 this = _map_date_part(this) 483 name = this.name.upper() 484 485 if name.startswith("EPOCH"): 486 if name == "EPOCH_MILLISECOND": 487 scale = 10**3 488 elif name == "EPOCH_MICROSECOND": 489 scale = 10**6 490 elif name == "EPOCH_NANOSECOND": 491 scale = 10**9 492 else: 493 scale = None 494 495 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 496 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 497 498 if scale: 499 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 500 501 return to_unix 502 503 return self.expression(exp.Extract, this=this, expression=expression) 504 505 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 506 if is_map: 507 # Keys are strings in Snowflake's objects, see also: 508 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 509 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 510 return self._parse_slice(self._parse_string()) 511 512 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 513 514 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 515 lateral = super()._parse_lateral() 516 if not lateral: 517 return lateral 518 519 if isinstance(lateral.this, exp.Explode): 520 table_alias = lateral.args.get("alias") 521 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 522 if table_alias and not table_alias.args.get("columns"): 523 table_alias.set("columns", columns) 524 elif not table_alias: 525 exp.alias_(lateral, "_flattened", table=columns, copy=False) 526 527 return lateral 528 529 def _parse_at_before(self, table: exp.Table) -> exp.Table: 530 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 531 index = self._index 532 if self._match_texts(("AT", "BEFORE")): 533 this = self._prev.text.upper() 534 kind = ( 535 self._match(TokenType.L_PAREN) 536 and self._match_texts(self.HISTORICAL_DATA_KIND) 537 and self._prev.text.upper() 538 ) 539 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 540 541 if expression: 542 self._match_r_paren() 543 when = self.expression( 544 exp.HistoricalData, this=this, kind=kind, expression=expression 545 ) 546 table.set("when", when) 547 else: 548 self._retreat(index) 549 550 return table 551 552 def _parse_table_parts( 553 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 554 ) -> exp.Table: 555 # https://docs.snowflake.com/en/user-guide/querying-stage 556 if self._match(TokenType.STRING, advance=False): 557 table = self._parse_string() 558 elif self._match_text_seq("@", advance=False): 559 table = self._parse_location_path() 560 else: 561 table = None 562 563 if table: 564 file_format = None 565 pattern = None 566 567 self._match(TokenType.L_PAREN) 568 while self._curr and not self._match(TokenType.R_PAREN): 569 if self._match_text_seq("FILE_FORMAT", "=>"): 570 file_format = self._parse_string() or super()._parse_table_parts( 571 is_db_reference=is_db_reference 572 ) 573 elif self._match_text_seq("PATTERN", "=>"): 574 pattern = self._parse_string() 575 else: 576 break 577 578 self._match(TokenType.COMMA) 579 580 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 581 else: 582 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 583 584 return self._parse_at_before(table) 585 586 def _parse_id_var( 587 self, 588 any_token: bool = True, 589 tokens: t.Optional[t.Collection[TokenType]] = None, 590 ) -> t.Optional[exp.Expression]: 591 if self._match_text_seq("IDENTIFIER", "("): 592 identifier = ( 593 super()._parse_id_var(any_token=any_token, tokens=tokens) 594 or self._parse_string() 595 ) 596 self._match_r_paren() 597 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 598 599 return super()._parse_id_var(any_token=any_token, tokens=tokens) 600 601 def _parse_show_snowflake(self, this: str) -> exp.Show: 602 scope = None 603 scope_kind = None 604 605 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 606 # which is syntactically valid but has no effect on the output 607 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 608 609 history = self._match_text_seq("HISTORY") 610 611 like = self._parse_string() if self._match(TokenType.LIKE) else None 612 613 if self._match(TokenType.IN): 614 if self._match_text_seq("ACCOUNT"): 615 scope_kind = "ACCOUNT" 616 elif self._match_set(self.DB_CREATABLES): 617 scope_kind = self._prev.text.upper() 618 if self._curr: 619 scope = self._parse_table_parts() 620 elif self._curr: 621 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 622 scope = self._parse_table_parts() 623 624 return self.expression( 625 exp.Show, 626 **{ 627 "terse": terse, 628 "this": this, 629 "history": history, 630 "like": like, 631 "scope": scope, 632 "scope_kind": scope_kind, 633 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 634 "limit": self._parse_limit(), 635 "from": self._parse_string() if self._match(TokenType.FROM) else None, 636 }, 637 ) 638 639 def _parse_alter_table_swap(self) -> exp.SwapTable: 640 self._match_text_seq("WITH") 641 return self.expression(exp.SwapTable, this=self._parse_table(schema=True)) 642 643 def _parse_location(self) -> exp.LocationProperty: 644 self._match(TokenType.EQ) 645 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 646 647 def _parse_location_path(self) -> exp.Var: 648 parts = [self._advance_any(ignore_reserved=True)] 649 650 # We avoid consuming a comma token because external tables like @foo and @bar 651 # can be joined in a query with a comma separator. 652 while self._is_connected() and not self._match(TokenType.COMMA, advance=False): 653 parts.append(self._advance_any(ignore_reserved=True)) 654 655 return exp.var("".join(part.text for part in parts if part)) 656 657 class Tokenizer(tokens.Tokenizer): 658 STRING_ESCAPES = ["\\", "'"] 659 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 660 RAW_STRINGS = ["$$"] 661 COMMENTS = ["--", "//", ("/*", "*/")] 662 663 KEYWORDS = { 664 **tokens.Tokenizer.KEYWORDS, 665 "BYTEINT": TokenType.INT, 666 "CHAR VARYING": TokenType.VARCHAR, 667 "CHARACTER VARYING": TokenType.VARCHAR, 668 "EXCLUDE": TokenType.EXCEPT, 669 "ILIKE ANY": TokenType.ILIKE_ANY, 670 "LIKE ANY": TokenType.LIKE_ANY, 671 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 672 "MINUS": TokenType.EXCEPT, 673 "NCHAR VARYING": TokenType.VARCHAR, 674 "PUT": TokenType.COMMAND, 675 "REMOVE": TokenType.COMMAND, 676 "RENAME": TokenType.REPLACE, 677 "RM": TokenType.COMMAND, 678 "SAMPLE": TokenType.TABLE_SAMPLE, 679 "SQL_DOUBLE": TokenType.DOUBLE, 680 "SQL_VARCHAR": TokenType.VARCHAR, 681 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 682 "TIMESTAMP_LTZ": TokenType.TIMESTAMPLTZ, 683 "TIMESTAMP_NTZ": TokenType.TIMESTAMP, 684 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 685 "TIMESTAMPNTZ": TokenType.TIMESTAMP, 686 "TOP": TokenType.TOP, 687 } 688 689 SINGLE_TOKENS = { 690 **tokens.Tokenizer.SINGLE_TOKENS, 691 "$": TokenType.PARAMETER, 692 } 693 694 VAR_SINGLE_TOKENS = {"$"} 695 696 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 697 698 class Generator(generator.Generator): 699 PARAMETER_TOKEN = "$" 700 MATCHED_BY_SOURCE = False 701 SINGLE_STRING_INTERVAL = True 702 JOIN_HINTS = False 703 TABLE_HINTS = False 704 QUERY_HINTS = False 705 AGGREGATE_FILTER_SUPPORTED = False 706 SUPPORTS_TABLE_COPY = False 707 COLLATE_IS_FUNC = True 708 LIMIT_ONLY_LITERALS = True 709 JSON_KEY_VALUE_PAIR_SEP = "," 710 INSERT_OVERWRITE = " OVERWRITE INTO" 711 712 TRANSFORMS = { 713 **generator.Generator.TRANSFORMS, 714 exp.ArgMax: rename_func("MAX_BY"), 715 exp.ArgMin: rename_func("MIN_BY"), 716 exp.Array: inline_array_sql, 717 exp.ArrayConcat: rename_func("ARRAY_CAT"), 718 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 719 exp.AtTimeZone: lambda self, e: self.func( 720 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 721 ), 722 exp.BitwiseXor: rename_func("BITXOR"), 723 exp.DateAdd: date_delta_sql("DATEADD"), 724 exp.DateDiff: date_delta_sql("DATEDIFF"), 725 exp.DateStrToDate: datestrtodate_sql, 726 exp.DataType: _datatype_sql, 727 exp.DayOfMonth: rename_func("DAYOFMONTH"), 728 exp.DayOfWeek: rename_func("DAYOFWEEK"), 729 exp.DayOfYear: rename_func("DAYOFYEAR"), 730 exp.Explode: rename_func("FLATTEN"), 731 exp.Extract: rename_func("DATE_PART"), 732 exp.FromTimeZone: lambda self, e: self.func( 733 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 734 ), 735 exp.GenerateSeries: lambda self, e: self.func( 736 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 737 ), 738 exp.GroupConcat: rename_func("LISTAGG"), 739 exp.If: if_sql(name="IFF", false_value="NULL"), 740 exp.JSONExtract: lambda self, e: self.func("GET_PATH", e.this, e.expression), 741 exp.JSONExtractScalar: lambda self, e: self.func( 742 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 743 ), 744 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 745 exp.JSONPathRoot: lambda *_: "", 746 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 747 exp.LogicalOr: rename_func("BOOLOR_AGG"), 748 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 749 exp.Max: max_or_greatest, 750 exp.Min: min_or_least, 751 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 752 exp.PercentileCont: transforms.preprocess( 753 [transforms.add_within_group_for_percentiles] 754 ), 755 exp.PercentileDisc: transforms.preprocess( 756 [transforms.add_within_group_for_percentiles] 757 ), 758 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 759 exp.RegexpILike: _regexpilike_sql, 760 exp.Rand: rename_func("RANDOM"), 761 exp.Select: transforms.preprocess( 762 [ 763 transforms.eliminate_distinct_on, 764 transforms.explode_to_unnest(), 765 transforms.eliminate_semi_and_anti_joins, 766 ] 767 ), 768 exp.SHA: rename_func("SHA1"), 769 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 770 exp.StartsWith: rename_func("STARTSWITH"), 771 exp.StrPosition: lambda self, e: self.func( 772 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 773 ), 774 exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)), 775 exp.Stuff: rename_func("INSERT"), 776 exp.TimestampDiff: lambda self, e: self.func( 777 "TIMESTAMPDIFF", e.unit, e.expression, e.this 778 ), 779 exp.TimestampTrunc: timestamptrunc_sql, 780 exp.TimeStrToTime: timestrtotime_sql, 781 exp.TimeToStr: lambda self, e: self.func( 782 "TO_CHAR", exp.cast(e.this, "timestamp"), self.format_time(e) 783 ), 784 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 785 exp.ToArray: rename_func("TO_ARRAY"), 786 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 787 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 788 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 789 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 790 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 791 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 792 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 793 exp.Xor: rename_func("BOOLXOR"), 794 } 795 796 SUPPORTED_JSON_PATH_PARTS = { 797 exp.JSONPathKey, 798 exp.JSONPathRoot, 799 exp.JSONPathSubscript, 800 } 801 802 TYPE_MAPPING = { 803 **generator.Generator.TYPE_MAPPING, 804 exp.DataType.Type.TIMESTAMP: "TIMESTAMPNTZ", 805 } 806 807 STAR_MAPPING = { 808 "except": "EXCLUDE", 809 "replace": "RENAME", 810 } 811 812 PROPERTIES_LOCATION = { 813 **generator.Generator.PROPERTIES_LOCATION, 814 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 815 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 816 } 817 818 def tonumber_sql(self, expression: exp.ToNumber) -> str: 819 return self.func( 820 "TO_NUMBER", 821 expression.this, 822 expression.args.get("format"), 823 expression.args.get("precision"), 824 expression.args.get("scale"), 825 ) 826 827 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 828 milli = expression.args.get("milli") 829 if milli is not None: 830 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 831 expression.set("nano", milli_to_nano) 832 833 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 834 835 def trycast_sql(self, expression: exp.TryCast) -> str: 836 value = expression.this 837 838 if value.type is None: 839 from sqlglot.optimizer.annotate_types import annotate_types 840 841 value = annotate_types(value) 842 843 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 844 return super().trycast_sql(expression) 845 846 # TRY_CAST only works for string values in Snowflake 847 return self.cast_sql(expression) 848 849 def log_sql(self, expression: exp.Log) -> str: 850 if not expression.expression: 851 return self.func("LN", expression.this) 852 853 return super().log_sql(expression) 854 855 def unnest_sql(self, expression: exp.Unnest) -> str: 856 unnest_alias = expression.args.get("alias") 857 offset = expression.args.get("offset") 858 859 columns = [ 860 exp.to_identifier("seq"), 861 exp.to_identifier("key"), 862 exp.to_identifier("path"), 863 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 864 seq_get(unnest_alias.columns if unnest_alias else [], 0) 865 or exp.to_identifier("value"), 866 exp.to_identifier("this"), 867 ] 868 869 if unnest_alias: 870 unnest_alias.set("columns", columns) 871 else: 872 unnest_alias = exp.TableAlias(this="_u", columns=columns) 873 874 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 875 alias = self.sql(unnest_alias) 876 alias = f" AS {alias}" if alias else "" 877 return f"{explode}{alias}" 878 879 def show_sql(self, expression: exp.Show) -> str: 880 terse = "TERSE " if expression.args.get("terse") else "" 881 history = " HISTORY" if expression.args.get("history") else "" 882 like = self.sql(expression, "like") 883 like = f" LIKE {like}" if like else "" 884 885 scope = self.sql(expression, "scope") 886 scope = f" {scope}" if scope else "" 887 888 scope_kind = self.sql(expression, "scope_kind") 889 if scope_kind: 890 scope_kind = f" IN {scope_kind}" 891 892 starts_with = self.sql(expression, "starts_with") 893 if starts_with: 894 starts_with = f" STARTS WITH {starts_with}" 895 896 limit = self.sql(expression, "limit") 897 898 from_ = self.sql(expression, "from") 899 if from_: 900 from_ = f" FROM {from_}" 901 902 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 903 904 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 905 # Other dialects don't support all of the following parameters, so we need to 906 # generate default values as necessary to ensure the transpilation is correct 907 group = expression.args.get("group") 908 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 909 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 910 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 911 912 return self.func( 913 "REGEXP_SUBSTR", 914 expression.this, 915 expression.expression, 916 position, 917 occurrence, 918 parameters, 919 group, 920 ) 921 922 def except_op(self, expression: exp.Except) -> str: 923 if not expression.args.get("distinct"): 924 self.unsupported("EXCEPT with All is not supported in Snowflake") 925 return super().except_op(expression) 926 927 def intersect_op(self, expression: exp.Intersect) -> str: 928 if not expression.args.get("distinct"): 929 self.unsupported("INTERSECT with All is not supported in Snowflake") 930 return super().intersect_op(expression) 931 932 def describe_sql(self, expression: exp.Describe) -> str: 933 # Default to table if kind is unknown 934 kind_value = expression.args.get("kind") or "TABLE" 935 kind = f" {kind_value}" if kind_value else "" 936 this = f" {self.sql(expression, 'this')}" 937 expressions = self.expressions(expression, flat=True) 938 expressions = f" {expressions}" if expressions else "" 939 return f"DESCRIBE{kind}{this}{expressions}" 940 941 def generatedasidentitycolumnconstraint_sql( 942 self, expression: exp.GeneratedAsIdentityColumnConstraint 943 ) -> str: 944 start = expression.args.get("start") 945 start = f" START {start}" if start else "" 946 increment = expression.args.get("increment") 947 increment = f" INCREMENT {increment}" if increment else "" 948 return f"AUTOINCREMENT{start}{increment}" 949 950 def swaptable_sql(self, expression: exp.SwapTable) -> str: 951 this = self.sql(expression, "this") 952 return f"SWAP WITH {this}" 953 954 def with_properties(self, properties: exp.Properties) -> str: 955 return self.properties(properties, wrapped=False, prefix=self.seg(""), sep=" ") 956 957 def cluster_sql(self, expression: exp.Cluster) -> str: 958 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 959 960 def struct_sql(self, expression: exp.Struct) -> str: 961 keys = [] 962 values = [] 963 964 for i, e in enumerate(expression.expressions): 965 if isinstance(e, exp.PropertyEQ): 966 keys.append( 967 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 968 ) 969 values.append(e.expression) 970 else: 971 keys.append(exp.Literal.string(f"_{i}")) 972 values.append(e) 973 974 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values)))
Specifies the strategy according to which identifiers should be normalized.
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Some dialects, such as Snowflake, allow you to reference a CTE column alias in the HAVING clause of the CTE. This flag will cause the CTE alias columns to override any projection aliases in the subquery.
For example, WITH y(c) AS ( SELECT SUM(a) FROM (SELECT 1 a) AS x HAVING c > 0 ) SELECT c FROM y;
will be rewritten as
WITH y(c) AS (
SELECT SUM(a) AS c FROM (SELECT 1 AS a) AS x HAVING c > 0
) SELECT c FROM y;
Associates this dialect's time formats with their equivalent Python strftime
formats.
301 def quote_identifier(self, expression: E, identify: bool = True) -> E: 302 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 303 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 304 if ( 305 isinstance(expression, exp.Identifier) 306 and isinstance(expression.parent, exp.Table) 307 and expression.name.lower() == "dual" 308 ): 309 return expression # type: ignore 310 311 return super().quote_identifier(expression, identify=identify)
Adds quotes to a given identifier.
Arguments:
- expression: The expression of interest. If it's not an
Identifier
, this method is a no-op. - identify: If set to
False
, the quotes will only be added if the identifier is deemed "unsafe", with respect to its characters and this dialect's normalization strategy.
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- NORMALIZE_FUNCTIONS
- LOG_BASE_FIRST
- TYPED_DIVISION
- SAFE_DIVISION
- CONCAT_COALESCE
- DATE_FORMAT
- DATEINT_FORMAT
- FORMAT_MAPPING
- ESCAPE_SEQUENCES
- PSEUDOCOLUMNS
- get_or_raise
- format_time
- normalize_identifier
- case_sensitive
- can_identify
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- parser
- generator
313 class Parser(parser.Parser): 314 IDENTIFY_PIVOT_STRINGS = True 315 316 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 317 318 FUNCTIONS = { 319 **parser.Parser.FUNCTIONS, 320 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 321 "ARRAY_CONSTRUCT": exp.Array.from_arg_list, 322 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 323 this=seq_get(args, 1), expression=seq_get(args, 0) 324 ), 325 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 326 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 327 start=seq_get(args, 0), 328 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 329 step=seq_get(args, 2), 330 ), 331 "BITXOR": binary_from_function(exp.BitwiseXor), 332 "BIT_XOR": binary_from_function(exp.BitwiseXor), 333 "BOOLXOR": binary_from_function(exp.Xor), 334 "CONVERT_TIMEZONE": _build_convert_timezone, 335 "DATE_TRUNC": _date_trunc_to_time, 336 "DATEADD": lambda args: exp.DateAdd( 337 this=seq_get(args, 2), 338 expression=seq_get(args, 1), 339 unit=_map_date_part(seq_get(args, 0)), 340 ), 341 "DATEDIFF": _build_datediff, 342 "DIV0": _build_if_from_div0, 343 "FLATTEN": exp.Explode.from_arg_list, 344 "GET_PATH": lambda args, dialect: exp.JSONExtract( 345 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 346 ), 347 "IFF": exp.If.from_arg_list, 348 "LAST_DAY": lambda args: exp.LastDay( 349 this=seq_get(args, 0), unit=_map_date_part(seq_get(args, 1)) 350 ), 351 "LISTAGG": exp.GroupConcat.from_arg_list, 352 "NULLIFZERO": _build_if_from_nullifzero, 353 "OBJECT_CONSTRUCT": _build_object_construct, 354 "REGEXP_REPLACE": _build_regexp_replace, 355 "REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list, 356 "RLIKE": exp.RegexpLike.from_arg_list, 357 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 358 "TIMEDIFF": _build_datediff, 359 "TIMESTAMPDIFF": _build_datediff, 360 "TIMESTAMPFROMPARTS": _build_timestamp_from_parts, 361 "TIMESTAMP_FROM_PARTS": _build_timestamp_from_parts, 362 "TO_NUMBER": lambda args: exp.ToNumber( 363 this=seq_get(args, 0), 364 format=seq_get(args, 1), 365 precision=seq_get(args, 2), 366 scale=seq_get(args, 3), 367 ), 368 "TO_TIMESTAMP": _build_to_timestamp, 369 "TO_VARCHAR": exp.ToChar.from_arg_list, 370 "ZEROIFNULL": _build_if_from_zeroifnull, 371 } 372 373 FUNCTION_PARSERS = { 374 **parser.Parser.FUNCTION_PARSERS, 375 "DATE_PART": lambda self: self._parse_date_part(), 376 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 377 } 378 FUNCTION_PARSERS.pop("TRIM") 379 380 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 381 382 RANGE_PARSERS = { 383 **parser.Parser.RANGE_PARSERS, 384 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 385 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 386 } 387 388 ALTER_PARSERS = { 389 **parser.Parser.ALTER_PARSERS, 390 "SET": lambda self: self._parse_set(tag=self._match_text_seq("TAG")), 391 "UNSET": lambda self: self.expression( 392 exp.Set, 393 tag=self._match_text_seq("TAG"), 394 expressions=self._parse_csv(self._parse_id_var), 395 unset=True, 396 ), 397 "SWAP": lambda self: self._parse_alter_table_swap(), 398 } 399 400 STATEMENT_PARSERS = { 401 **parser.Parser.STATEMENT_PARSERS, 402 TokenType.SHOW: lambda self: self._parse_show(), 403 } 404 405 PROPERTY_PARSERS = { 406 **parser.Parser.PROPERTY_PARSERS, 407 "LOCATION": lambda self: self._parse_location(), 408 } 409 410 SHOW_PARSERS = { 411 "SCHEMAS": _show_parser("SCHEMAS"), 412 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 413 "OBJECTS": _show_parser("OBJECTS"), 414 "TERSE OBJECTS": _show_parser("OBJECTS"), 415 "TABLES": _show_parser("TABLES"), 416 "TERSE TABLES": _show_parser("TABLES"), 417 "VIEWS": _show_parser("VIEWS"), 418 "TERSE VIEWS": _show_parser("VIEWS"), 419 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 420 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 421 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 422 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 423 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 424 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 425 "SEQUENCES": _show_parser("SEQUENCES"), 426 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 427 "COLUMNS": _show_parser("COLUMNS"), 428 "USERS": _show_parser("USERS"), 429 "TERSE USERS": _show_parser("USERS"), 430 } 431 432 STAGED_FILE_SINGLE_TOKENS = { 433 TokenType.DOT, 434 TokenType.MOD, 435 TokenType.SLASH, 436 } 437 438 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 439 440 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 441 442 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 443 this = super()._parse_column_ops(this) 444 445 casts = [] 446 json_path = [] 447 448 while self._match(TokenType.COLON): 449 path = super()._parse_column_ops(self._parse_field(any_token=True)) 450 451 # The cast :: operator has a lower precedence than the extraction operator :, so 452 # we rearrange the AST appropriately to avoid casting the 2nd argument of GET_PATH 453 while isinstance(path, exp.Cast): 454 casts.append(path.to) 455 path = path.this 456 457 if path: 458 json_path.append(path.sql(dialect="snowflake", copy=False)) 459 460 if json_path: 461 this = self.expression( 462 exp.JSONExtract, 463 this=this, 464 expression=self.dialect.to_json_path(exp.Literal.string(".".join(json_path))), 465 ) 466 467 while casts: 468 this = self.expression(exp.Cast, this=this, to=casts.pop()) 469 470 return this 471 472 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 473 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 474 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 475 this = self._parse_var() or self._parse_type() 476 477 if not this: 478 return None 479 480 self._match(TokenType.COMMA) 481 expression = self._parse_bitwise() 482 this = _map_date_part(this) 483 name = this.name.upper() 484 485 if name.startswith("EPOCH"): 486 if name == "EPOCH_MILLISECOND": 487 scale = 10**3 488 elif name == "EPOCH_MICROSECOND": 489 scale = 10**6 490 elif name == "EPOCH_NANOSECOND": 491 scale = 10**9 492 else: 493 scale = None 494 495 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 496 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 497 498 if scale: 499 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 500 501 return to_unix 502 503 return self.expression(exp.Extract, this=this, expression=expression) 504 505 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 506 if is_map: 507 # Keys are strings in Snowflake's objects, see also: 508 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 509 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 510 return self._parse_slice(self._parse_string()) 511 512 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 513 514 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 515 lateral = super()._parse_lateral() 516 if not lateral: 517 return lateral 518 519 if isinstance(lateral.this, exp.Explode): 520 table_alias = lateral.args.get("alias") 521 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 522 if table_alias and not table_alias.args.get("columns"): 523 table_alias.set("columns", columns) 524 elif not table_alias: 525 exp.alias_(lateral, "_flattened", table=columns, copy=False) 526 527 return lateral 528 529 def _parse_at_before(self, table: exp.Table) -> exp.Table: 530 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 531 index = self._index 532 if self._match_texts(("AT", "BEFORE")): 533 this = self._prev.text.upper() 534 kind = ( 535 self._match(TokenType.L_PAREN) 536 and self._match_texts(self.HISTORICAL_DATA_KIND) 537 and self._prev.text.upper() 538 ) 539 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 540 541 if expression: 542 self._match_r_paren() 543 when = self.expression( 544 exp.HistoricalData, this=this, kind=kind, expression=expression 545 ) 546 table.set("when", when) 547 else: 548 self._retreat(index) 549 550 return table 551 552 def _parse_table_parts( 553 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 554 ) -> exp.Table: 555 # https://docs.snowflake.com/en/user-guide/querying-stage 556 if self._match(TokenType.STRING, advance=False): 557 table = self._parse_string() 558 elif self._match_text_seq("@", advance=False): 559 table = self._parse_location_path() 560 else: 561 table = None 562 563 if table: 564 file_format = None 565 pattern = None 566 567 self._match(TokenType.L_PAREN) 568 while self._curr and not self._match(TokenType.R_PAREN): 569 if self._match_text_seq("FILE_FORMAT", "=>"): 570 file_format = self._parse_string() or super()._parse_table_parts( 571 is_db_reference=is_db_reference 572 ) 573 elif self._match_text_seq("PATTERN", "=>"): 574 pattern = self._parse_string() 575 else: 576 break 577 578 self._match(TokenType.COMMA) 579 580 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 581 else: 582 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 583 584 return self._parse_at_before(table) 585 586 def _parse_id_var( 587 self, 588 any_token: bool = True, 589 tokens: t.Optional[t.Collection[TokenType]] = None, 590 ) -> t.Optional[exp.Expression]: 591 if self._match_text_seq("IDENTIFIER", "("): 592 identifier = ( 593 super()._parse_id_var(any_token=any_token, tokens=tokens) 594 or self._parse_string() 595 ) 596 self._match_r_paren() 597 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 598 599 return super()._parse_id_var(any_token=any_token, tokens=tokens) 600 601 def _parse_show_snowflake(self, this: str) -> exp.Show: 602 scope = None 603 scope_kind = None 604 605 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 606 # which is syntactically valid but has no effect on the output 607 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 608 609 history = self._match_text_seq("HISTORY") 610 611 like = self._parse_string() if self._match(TokenType.LIKE) else None 612 613 if self._match(TokenType.IN): 614 if self._match_text_seq("ACCOUNT"): 615 scope_kind = "ACCOUNT" 616 elif self._match_set(self.DB_CREATABLES): 617 scope_kind = self._prev.text.upper() 618 if self._curr: 619 scope = self._parse_table_parts() 620 elif self._curr: 621 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 622 scope = self._parse_table_parts() 623 624 return self.expression( 625 exp.Show, 626 **{ 627 "terse": terse, 628 "this": this, 629 "history": history, 630 "like": like, 631 "scope": scope, 632 "scope_kind": scope_kind, 633 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 634 "limit": self._parse_limit(), 635 "from": self._parse_string() if self._match(TokenType.FROM) else None, 636 }, 637 ) 638 639 def _parse_alter_table_swap(self) -> exp.SwapTable: 640 self._match_text_seq("WITH") 641 return self.expression(exp.SwapTable, this=self._parse_table(schema=True)) 642 643 def _parse_location(self) -> exp.LocationProperty: 644 self._match(TokenType.EQ) 645 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 646 647 def _parse_location_path(self) -> exp.Var: 648 parts = [self._advance_any(ignore_reserved=True)] 649 650 # We avoid consuming a comma token because external tables like @foo and @bar 651 # can be joined in a query with a comma separator. 652 while self._is_connected() and not self._match(TokenType.COMMA, advance=False): 653 parts.append(self._advance_any(ignore_reserved=True)) 654 655 return exp.var("".join(part.text for part in parts if part))
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ID_VAR_TOKENS
- INTERVAL_VARS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- CONSTRAINT_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- USABLES
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_UNION
- UNION_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
657 class Tokenizer(tokens.Tokenizer): 658 STRING_ESCAPES = ["\\", "'"] 659 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 660 RAW_STRINGS = ["$$"] 661 COMMENTS = ["--", "//", ("/*", "*/")] 662 663 KEYWORDS = { 664 **tokens.Tokenizer.KEYWORDS, 665 "BYTEINT": TokenType.INT, 666 "CHAR VARYING": TokenType.VARCHAR, 667 "CHARACTER VARYING": TokenType.VARCHAR, 668 "EXCLUDE": TokenType.EXCEPT, 669 "ILIKE ANY": TokenType.ILIKE_ANY, 670 "LIKE ANY": TokenType.LIKE_ANY, 671 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 672 "MINUS": TokenType.EXCEPT, 673 "NCHAR VARYING": TokenType.VARCHAR, 674 "PUT": TokenType.COMMAND, 675 "REMOVE": TokenType.COMMAND, 676 "RENAME": TokenType.REPLACE, 677 "RM": TokenType.COMMAND, 678 "SAMPLE": TokenType.TABLE_SAMPLE, 679 "SQL_DOUBLE": TokenType.DOUBLE, 680 "SQL_VARCHAR": TokenType.VARCHAR, 681 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 682 "TIMESTAMP_LTZ": TokenType.TIMESTAMPLTZ, 683 "TIMESTAMP_NTZ": TokenType.TIMESTAMP, 684 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 685 "TIMESTAMPNTZ": TokenType.TIMESTAMP, 686 "TOP": TokenType.TOP, 687 } 688 689 SINGLE_TOKENS = { 690 **tokens.Tokenizer.SINGLE_TOKENS, 691 "$": TokenType.PARAMETER, 692 } 693 694 VAR_SINGLE_TOKENS = {"$"} 695 696 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW}
Inherited Members
698 class Generator(generator.Generator): 699 PARAMETER_TOKEN = "$" 700 MATCHED_BY_SOURCE = False 701 SINGLE_STRING_INTERVAL = True 702 JOIN_HINTS = False 703 TABLE_HINTS = False 704 QUERY_HINTS = False 705 AGGREGATE_FILTER_SUPPORTED = False 706 SUPPORTS_TABLE_COPY = False 707 COLLATE_IS_FUNC = True 708 LIMIT_ONLY_LITERALS = True 709 JSON_KEY_VALUE_PAIR_SEP = "," 710 INSERT_OVERWRITE = " OVERWRITE INTO" 711 712 TRANSFORMS = { 713 **generator.Generator.TRANSFORMS, 714 exp.ArgMax: rename_func("MAX_BY"), 715 exp.ArgMin: rename_func("MIN_BY"), 716 exp.Array: inline_array_sql, 717 exp.ArrayConcat: rename_func("ARRAY_CAT"), 718 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 719 exp.AtTimeZone: lambda self, e: self.func( 720 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 721 ), 722 exp.BitwiseXor: rename_func("BITXOR"), 723 exp.DateAdd: date_delta_sql("DATEADD"), 724 exp.DateDiff: date_delta_sql("DATEDIFF"), 725 exp.DateStrToDate: datestrtodate_sql, 726 exp.DataType: _datatype_sql, 727 exp.DayOfMonth: rename_func("DAYOFMONTH"), 728 exp.DayOfWeek: rename_func("DAYOFWEEK"), 729 exp.DayOfYear: rename_func("DAYOFYEAR"), 730 exp.Explode: rename_func("FLATTEN"), 731 exp.Extract: rename_func("DATE_PART"), 732 exp.FromTimeZone: lambda self, e: self.func( 733 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 734 ), 735 exp.GenerateSeries: lambda self, e: self.func( 736 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 737 ), 738 exp.GroupConcat: rename_func("LISTAGG"), 739 exp.If: if_sql(name="IFF", false_value="NULL"), 740 exp.JSONExtract: lambda self, e: self.func("GET_PATH", e.this, e.expression), 741 exp.JSONExtractScalar: lambda self, e: self.func( 742 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 743 ), 744 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 745 exp.JSONPathRoot: lambda *_: "", 746 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 747 exp.LogicalOr: rename_func("BOOLOR_AGG"), 748 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 749 exp.Max: max_or_greatest, 750 exp.Min: min_or_least, 751 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 752 exp.PercentileCont: transforms.preprocess( 753 [transforms.add_within_group_for_percentiles] 754 ), 755 exp.PercentileDisc: transforms.preprocess( 756 [transforms.add_within_group_for_percentiles] 757 ), 758 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 759 exp.RegexpILike: _regexpilike_sql, 760 exp.Rand: rename_func("RANDOM"), 761 exp.Select: transforms.preprocess( 762 [ 763 transforms.eliminate_distinct_on, 764 transforms.explode_to_unnest(), 765 transforms.eliminate_semi_and_anti_joins, 766 ] 767 ), 768 exp.SHA: rename_func("SHA1"), 769 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 770 exp.StartsWith: rename_func("STARTSWITH"), 771 exp.StrPosition: lambda self, e: self.func( 772 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 773 ), 774 exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)), 775 exp.Stuff: rename_func("INSERT"), 776 exp.TimestampDiff: lambda self, e: self.func( 777 "TIMESTAMPDIFF", e.unit, e.expression, e.this 778 ), 779 exp.TimestampTrunc: timestamptrunc_sql, 780 exp.TimeStrToTime: timestrtotime_sql, 781 exp.TimeToStr: lambda self, e: self.func( 782 "TO_CHAR", exp.cast(e.this, "timestamp"), self.format_time(e) 783 ), 784 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 785 exp.ToArray: rename_func("TO_ARRAY"), 786 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 787 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 788 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 789 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 790 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 791 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 792 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 793 exp.Xor: rename_func("BOOLXOR"), 794 } 795 796 SUPPORTED_JSON_PATH_PARTS = { 797 exp.JSONPathKey, 798 exp.JSONPathRoot, 799 exp.JSONPathSubscript, 800 } 801 802 TYPE_MAPPING = { 803 **generator.Generator.TYPE_MAPPING, 804 exp.DataType.Type.TIMESTAMP: "TIMESTAMPNTZ", 805 } 806 807 STAR_MAPPING = { 808 "except": "EXCLUDE", 809 "replace": "RENAME", 810 } 811 812 PROPERTIES_LOCATION = { 813 **generator.Generator.PROPERTIES_LOCATION, 814 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 815 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 816 } 817 818 def tonumber_sql(self, expression: exp.ToNumber) -> str: 819 return self.func( 820 "TO_NUMBER", 821 expression.this, 822 expression.args.get("format"), 823 expression.args.get("precision"), 824 expression.args.get("scale"), 825 ) 826 827 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 828 milli = expression.args.get("milli") 829 if milli is not None: 830 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 831 expression.set("nano", milli_to_nano) 832 833 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 834 835 def trycast_sql(self, expression: exp.TryCast) -> str: 836 value = expression.this 837 838 if value.type is None: 839 from sqlglot.optimizer.annotate_types import annotate_types 840 841 value = annotate_types(value) 842 843 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 844 return super().trycast_sql(expression) 845 846 # TRY_CAST only works for string values in Snowflake 847 return self.cast_sql(expression) 848 849 def log_sql(self, expression: exp.Log) -> str: 850 if not expression.expression: 851 return self.func("LN", expression.this) 852 853 return super().log_sql(expression) 854 855 def unnest_sql(self, expression: exp.Unnest) -> str: 856 unnest_alias = expression.args.get("alias") 857 offset = expression.args.get("offset") 858 859 columns = [ 860 exp.to_identifier("seq"), 861 exp.to_identifier("key"), 862 exp.to_identifier("path"), 863 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 864 seq_get(unnest_alias.columns if unnest_alias else [], 0) 865 or exp.to_identifier("value"), 866 exp.to_identifier("this"), 867 ] 868 869 if unnest_alias: 870 unnest_alias.set("columns", columns) 871 else: 872 unnest_alias = exp.TableAlias(this="_u", columns=columns) 873 874 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 875 alias = self.sql(unnest_alias) 876 alias = f" AS {alias}" if alias else "" 877 return f"{explode}{alias}" 878 879 def show_sql(self, expression: exp.Show) -> str: 880 terse = "TERSE " if expression.args.get("terse") else "" 881 history = " HISTORY" if expression.args.get("history") else "" 882 like = self.sql(expression, "like") 883 like = f" LIKE {like}" if like else "" 884 885 scope = self.sql(expression, "scope") 886 scope = f" {scope}" if scope else "" 887 888 scope_kind = self.sql(expression, "scope_kind") 889 if scope_kind: 890 scope_kind = f" IN {scope_kind}" 891 892 starts_with = self.sql(expression, "starts_with") 893 if starts_with: 894 starts_with = f" STARTS WITH {starts_with}" 895 896 limit = self.sql(expression, "limit") 897 898 from_ = self.sql(expression, "from") 899 if from_: 900 from_ = f" FROM {from_}" 901 902 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 903 904 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 905 # Other dialects don't support all of the following parameters, so we need to 906 # generate default values as necessary to ensure the transpilation is correct 907 group = expression.args.get("group") 908 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 909 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 910 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 911 912 return self.func( 913 "REGEXP_SUBSTR", 914 expression.this, 915 expression.expression, 916 position, 917 occurrence, 918 parameters, 919 group, 920 ) 921 922 def except_op(self, expression: exp.Except) -> str: 923 if not expression.args.get("distinct"): 924 self.unsupported("EXCEPT with All is not supported in Snowflake") 925 return super().except_op(expression) 926 927 def intersect_op(self, expression: exp.Intersect) -> str: 928 if not expression.args.get("distinct"): 929 self.unsupported("INTERSECT with All is not supported in Snowflake") 930 return super().intersect_op(expression) 931 932 def describe_sql(self, expression: exp.Describe) -> str: 933 # Default to table if kind is unknown 934 kind_value = expression.args.get("kind") or "TABLE" 935 kind = f" {kind_value}" if kind_value else "" 936 this = f" {self.sql(expression, 'this')}" 937 expressions = self.expressions(expression, flat=True) 938 expressions = f" {expressions}" if expressions else "" 939 return f"DESCRIBE{kind}{this}{expressions}" 940 941 def generatedasidentitycolumnconstraint_sql( 942 self, expression: exp.GeneratedAsIdentityColumnConstraint 943 ) -> str: 944 start = expression.args.get("start") 945 start = f" START {start}" if start else "" 946 increment = expression.args.get("increment") 947 increment = f" INCREMENT {increment}" if increment else "" 948 return f"AUTOINCREMENT{start}{increment}" 949 950 def swaptable_sql(self, expression: exp.SwapTable) -> str: 951 this = self.sql(expression, "this") 952 return f"SWAP WITH {this}" 953 954 def with_properties(self, properties: exp.Properties) -> str: 955 return self.properties(properties, wrapped=False, prefix=self.seg(""), sep=" ") 956 957 def cluster_sql(self, expression: exp.Cluster) -> str: 958 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 959 960 def struct_sql(self, expression: exp.Struct) -> str: 961 keys = [] 962 values = [] 963 964 for i, e in enumerate(expression.expressions): 965 if isinstance(e, exp.PropertyEQ): 966 keys.append( 967 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 968 ) 969 values.append(e.expression) 970 else: 971 keys.append(exp.Literal.string(f"_{i}")) 972 values.append(e) 973 974 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values)))
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. Default: 2.
- indent: The indentation size in a formatted string. Default: 2.
- normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
827 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 828 milli = expression.args.get("milli") 829 if milli is not None: 830 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 831 expression.set("nano", milli_to_nano) 832 833 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression)
835 def trycast_sql(self, expression: exp.TryCast) -> str: 836 value = expression.this 837 838 if value.type is None: 839 from sqlglot.optimizer.annotate_types import annotate_types 840 841 value = annotate_types(value) 842 843 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 844 return super().trycast_sql(expression) 845 846 # TRY_CAST only works for string values in Snowflake 847 return self.cast_sql(expression)
855 def unnest_sql(self, expression: exp.Unnest) -> str: 856 unnest_alias = expression.args.get("alias") 857 offset = expression.args.get("offset") 858 859 columns = [ 860 exp.to_identifier("seq"), 861 exp.to_identifier("key"), 862 exp.to_identifier("path"), 863 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 864 seq_get(unnest_alias.columns if unnest_alias else [], 0) 865 or exp.to_identifier("value"), 866 exp.to_identifier("this"), 867 ] 868 869 if unnest_alias: 870 unnest_alias.set("columns", columns) 871 else: 872 unnest_alias = exp.TableAlias(this="_u", columns=columns) 873 874 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 875 alias = self.sql(unnest_alias) 876 alias = f" AS {alias}" if alias else "" 877 return f"{explode}{alias}"
879 def show_sql(self, expression: exp.Show) -> str: 880 terse = "TERSE " if expression.args.get("terse") else "" 881 history = " HISTORY" if expression.args.get("history") else "" 882 like = self.sql(expression, "like") 883 like = f" LIKE {like}" if like else "" 884 885 scope = self.sql(expression, "scope") 886 scope = f" {scope}" if scope else "" 887 888 scope_kind = self.sql(expression, "scope_kind") 889 if scope_kind: 890 scope_kind = f" IN {scope_kind}" 891 892 starts_with = self.sql(expression, "starts_with") 893 if starts_with: 894 starts_with = f" STARTS WITH {starts_with}" 895 896 limit = self.sql(expression, "limit") 897 898 from_ = self.sql(expression, "from") 899 if from_: 900 from_ = f" FROM {from_}" 901 902 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}"
904 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 905 # Other dialects don't support all of the following parameters, so we need to 906 # generate default values as necessary to ensure the transpilation is correct 907 group = expression.args.get("group") 908 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 909 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 910 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 911 912 return self.func( 913 "REGEXP_SUBSTR", 914 expression.this, 915 expression.expression, 916 position, 917 occurrence, 918 parameters, 919 group, 920 )
932 def describe_sql(self, expression: exp.Describe) -> str: 933 # Default to table if kind is unknown 934 kind_value = expression.args.get("kind") or "TABLE" 935 kind = f" {kind_value}" if kind_value else "" 936 this = f" {self.sql(expression, 'this')}" 937 expressions = self.expressions(expression, flat=True) 938 expressions = f" {expressions}" if expressions else "" 939 return f"DESCRIBE{kind}{this}{expressions}"
941 def generatedasidentitycolumnconstraint_sql( 942 self, expression: exp.GeneratedAsIdentityColumnConstraint 943 ) -> str: 944 start = expression.args.get("start") 945 start = f" START {start}" if start else "" 946 increment = expression.args.get("increment") 947 increment = f" INCREMENT {increment}" if increment else "" 948 return f"AUTOINCREMENT{start}{increment}"
960 def struct_sql(self, expression: exp.Struct) -> str: 961 keys = [] 962 values = [] 963 964 for i, e in enumerate(expression.expressions): 965 if isinstance(e, exp.PropertyEQ): 966 keys.append( 967 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 968 ) 969 values.append(e.expression) 970 else: 971 keys.append(exp.Literal.string(f"_{i}")) 972 values.append(e) 973 974 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values)))
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- EXPLICIT_UNION
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- COLUMN_JOIN_MARKS_SUPPORTED
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- NVL2_SUPPORTED
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- CAN_IMPLEMENT_ARRAY_ANY
- SUPPORTS_TO_NUMBER
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- NAMED_PLACEHOLDER_TOKEN
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognize_sql
- query_modifiers
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- set_operations
- union_sql
- union_op
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- currenttimestamp_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- renametable_sql
- renamecolumn_sql
- altertable_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- or_sql
- slice_sql
- sub_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- text_width
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql
- tsordstotime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- arrayany_sql
- generateseries_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql