sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18logger = logging.getLogger("sqlglot") 19 20OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 21 22 23def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 24 if len(args) == 1 and args[0].is_star: 25 return exp.StarMap(this=args[0]) 26 27 keys = [] 28 values = [] 29 for i in range(0, len(args), 2): 30 keys.append(args[i]) 31 values.append(args[i + 1]) 32 33 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 34 35 36def build_like(args: t.List) -> exp.Escape | exp.Like: 37 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 38 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 39 40 41def binary_range_parser( 42 expr_type: t.Type[exp.Expression], 43) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 44 return lambda self, this: self._parse_escape( 45 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 46 ) 47 48 49def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 50 # Default argument order is base, expression 51 this = seq_get(args, 0) 52 expression = seq_get(args, 1) 53 54 if expression: 55 if not dialect.LOG_BASE_FIRST: 56 this, expression = expression, this 57 return exp.Log(this=this, expression=expression) 58 59 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 60 61 62def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 63 def _builder(args: t.List, dialect: Dialect) -> E: 64 expression = expr_type( 65 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 66 ) 67 if len(args) > 2 and expr_type is exp.JSONExtract: 68 expression.set("expressions", args[2:]) 69 70 return expression 71 72 return _builder 73 74 75class _Parser(type): 76 def __new__(cls, clsname, bases, attrs): 77 klass = super().__new__(cls, clsname, bases, attrs) 78 79 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 80 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 81 82 return klass 83 84 85class Parser(metaclass=_Parser): 86 """ 87 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 88 89 Args: 90 error_level: The desired error level. 91 Default: ErrorLevel.IMMEDIATE 92 error_message_context: The amount of context to capture from a query string when displaying 93 the error message (in number of characters). 94 Default: 100 95 max_errors: Maximum number of error messages to include in a raised ParseError. 96 This is only relevant if error_level is ErrorLevel.RAISE. 97 Default: 3 98 """ 99 100 FUNCTIONS: t.Dict[str, t.Callable] = { 101 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 102 "CONCAT": lambda args, dialect: exp.Concat( 103 expressions=args, 104 safe=not dialect.STRICT_STRING_CONCAT, 105 coalesce=dialect.CONCAT_COALESCE, 106 ), 107 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 108 expressions=args, 109 safe=not dialect.STRICT_STRING_CONCAT, 110 coalesce=dialect.CONCAT_COALESCE, 111 ), 112 "DATE_TO_DATE_STR": lambda args: exp.Cast( 113 this=seq_get(args, 0), 114 to=exp.DataType(this=exp.DataType.Type.TEXT), 115 ), 116 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 117 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 118 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 119 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 120 "LIKE": build_like, 121 "LOG": build_logarithm, 122 "TIME_TO_TIME_STR": lambda args: exp.Cast( 123 this=seq_get(args, 0), 124 to=exp.DataType(this=exp.DataType.Type.TEXT), 125 ), 126 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 127 this=exp.Cast( 128 this=seq_get(args, 0), 129 to=exp.DataType(this=exp.DataType.Type.TEXT), 130 ), 131 start=exp.Literal.number(1), 132 length=exp.Literal.number(10), 133 ), 134 "VAR_MAP": build_var_map, 135 } 136 137 NO_PAREN_FUNCTIONS = { 138 TokenType.CURRENT_DATE: exp.CurrentDate, 139 TokenType.CURRENT_DATETIME: exp.CurrentDate, 140 TokenType.CURRENT_TIME: exp.CurrentTime, 141 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 142 TokenType.CURRENT_USER: exp.CurrentUser, 143 } 144 145 STRUCT_TYPE_TOKENS = { 146 TokenType.NESTED, 147 TokenType.STRUCT, 148 } 149 150 NESTED_TYPE_TOKENS = { 151 TokenType.ARRAY, 152 TokenType.LOWCARDINALITY, 153 TokenType.MAP, 154 TokenType.NULLABLE, 155 *STRUCT_TYPE_TOKENS, 156 } 157 158 ENUM_TYPE_TOKENS = { 159 TokenType.ENUM, 160 TokenType.ENUM8, 161 TokenType.ENUM16, 162 } 163 164 AGGREGATE_TYPE_TOKENS = { 165 TokenType.AGGREGATEFUNCTION, 166 TokenType.SIMPLEAGGREGATEFUNCTION, 167 } 168 169 TYPE_TOKENS = { 170 TokenType.BIT, 171 TokenType.BOOLEAN, 172 TokenType.TINYINT, 173 TokenType.UTINYINT, 174 TokenType.SMALLINT, 175 TokenType.USMALLINT, 176 TokenType.INT, 177 TokenType.UINT, 178 TokenType.BIGINT, 179 TokenType.UBIGINT, 180 TokenType.INT128, 181 TokenType.UINT128, 182 TokenType.INT256, 183 TokenType.UINT256, 184 TokenType.MEDIUMINT, 185 TokenType.UMEDIUMINT, 186 TokenType.FIXEDSTRING, 187 TokenType.FLOAT, 188 TokenType.DOUBLE, 189 TokenType.CHAR, 190 TokenType.NCHAR, 191 TokenType.VARCHAR, 192 TokenType.NVARCHAR, 193 TokenType.BPCHAR, 194 TokenType.TEXT, 195 TokenType.MEDIUMTEXT, 196 TokenType.LONGTEXT, 197 TokenType.MEDIUMBLOB, 198 TokenType.LONGBLOB, 199 TokenType.BINARY, 200 TokenType.VARBINARY, 201 TokenType.JSON, 202 TokenType.JSONB, 203 TokenType.INTERVAL, 204 TokenType.TINYBLOB, 205 TokenType.TINYTEXT, 206 TokenType.TIME, 207 TokenType.TIMETZ, 208 TokenType.TIMESTAMP, 209 TokenType.TIMESTAMP_S, 210 TokenType.TIMESTAMP_MS, 211 TokenType.TIMESTAMP_NS, 212 TokenType.TIMESTAMPTZ, 213 TokenType.TIMESTAMPLTZ, 214 TokenType.DATETIME, 215 TokenType.DATETIME64, 216 TokenType.DATE, 217 TokenType.DATE32, 218 TokenType.INT4RANGE, 219 TokenType.INT4MULTIRANGE, 220 TokenType.INT8RANGE, 221 TokenType.INT8MULTIRANGE, 222 TokenType.NUMRANGE, 223 TokenType.NUMMULTIRANGE, 224 TokenType.TSRANGE, 225 TokenType.TSMULTIRANGE, 226 TokenType.TSTZRANGE, 227 TokenType.TSTZMULTIRANGE, 228 TokenType.DATERANGE, 229 TokenType.DATEMULTIRANGE, 230 TokenType.DECIMAL, 231 TokenType.UDECIMAL, 232 TokenType.BIGDECIMAL, 233 TokenType.UUID, 234 TokenType.GEOGRAPHY, 235 TokenType.GEOMETRY, 236 TokenType.HLLSKETCH, 237 TokenType.HSTORE, 238 TokenType.PSEUDO_TYPE, 239 TokenType.SUPER, 240 TokenType.SERIAL, 241 TokenType.SMALLSERIAL, 242 TokenType.BIGSERIAL, 243 TokenType.XML, 244 TokenType.YEAR, 245 TokenType.UNIQUEIDENTIFIER, 246 TokenType.USERDEFINED, 247 TokenType.MONEY, 248 TokenType.SMALLMONEY, 249 TokenType.ROWVERSION, 250 TokenType.IMAGE, 251 TokenType.VARIANT, 252 TokenType.OBJECT, 253 TokenType.OBJECT_IDENTIFIER, 254 TokenType.INET, 255 TokenType.IPADDRESS, 256 TokenType.IPPREFIX, 257 TokenType.IPV4, 258 TokenType.IPV6, 259 TokenType.UNKNOWN, 260 TokenType.NULL, 261 TokenType.NAME, 262 *ENUM_TYPE_TOKENS, 263 *NESTED_TYPE_TOKENS, 264 *AGGREGATE_TYPE_TOKENS, 265 } 266 267 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 268 TokenType.BIGINT: TokenType.UBIGINT, 269 TokenType.INT: TokenType.UINT, 270 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 271 TokenType.SMALLINT: TokenType.USMALLINT, 272 TokenType.TINYINT: TokenType.UTINYINT, 273 TokenType.DECIMAL: TokenType.UDECIMAL, 274 } 275 276 SUBQUERY_PREDICATES = { 277 TokenType.ANY: exp.Any, 278 TokenType.ALL: exp.All, 279 TokenType.EXISTS: exp.Exists, 280 TokenType.SOME: exp.Any, 281 } 282 283 RESERVED_TOKENS = { 284 *Tokenizer.SINGLE_TOKENS.values(), 285 TokenType.SELECT, 286 } 287 288 DB_CREATABLES = { 289 TokenType.DATABASE, 290 TokenType.SCHEMA, 291 TokenType.TABLE, 292 TokenType.VIEW, 293 TokenType.MODEL, 294 TokenType.DICTIONARY, 295 TokenType.SEQUENCE, 296 TokenType.STORAGE_INTEGRATION, 297 } 298 299 CREATABLES = { 300 TokenType.COLUMN, 301 TokenType.CONSTRAINT, 302 TokenType.FUNCTION, 303 TokenType.INDEX, 304 TokenType.PROCEDURE, 305 TokenType.FOREIGN_KEY, 306 *DB_CREATABLES, 307 } 308 309 # Tokens that can represent identifiers 310 ID_VAR_TOKENS = { 311 TokenType.VAR, 312 TokenType.ANTI, 313 TokenType.APPLY, 314 TokenType.ASC, 315 TokenType.AUTO_INCREMENT, 316 TokenType.BEGIN, 317 TokenType.BPCHAR, 318 TokenType.CACHE, 319 TokenType.CASE, 320 TokenType.COLLATE, 321 TokenType.COMMAND, 322 TokenType.COMMENT, 323 TokenType.COMMIT, 324 TokenType.CONSTRAINT, 325 TokenType.DEFAULT, 326 TokenType.DELETE, 327 TokenType.DESC, 328 TokenType.DESCRIBE, 329 TokenType.DICTIONARY, 330 TokenType.DIV, 331 TokenType.END, 332 TokenType.EXECUTE, 333 TokenType.ESCAPE, 334 TokenType.FALSE, 335 TokenType.FIRST, 336 TokenType.FILTER, 337 TokenType.FINAL, 338 TokenType.FORMAT, 339 TokenType.FULL, 340 TokenType.IS, 341 TokenType.ISNULL, 342 TokenType.INTERVAL, 343 TokenType.KEEP, 344 TokenType.KILL, 345 TokenType.LEFT, 346 TokenType.LOAD, 347 TokenType.MERGE, 348 TokenType.NATURAL, 349 TokenType.NEXT, 350 TokenType.OFFSET, 351 TokenType.OPERATOR, 352 TokenType.ORDINALITY, 353 TokenType.OVERLAPS, 354 TokenType.OVERWRITE, 355 TokenType.PARTITION, 356 TokenType.PERCENT, 357 TokenType.PIVOT, 358 TokenType.PRAGMA, 359 TokenType.RANGE, 360 TokenType.RECURSIVE, 361 TokenType.REFERENCES, 362 TokenType.REFRESH, 363 TokenType.REPLACE, 364 TokenType.RIGHT, 365 TokenType.ROW, 366 TokenType.ROWS, 367 TokenType.SEMI, 368 TokenType.SET, 369 TokenType.SETTINGS, 370 TokenType.SHOW, 371 TokenType.TEMPORARY, 372 TokenType.TOP, 373 TokenType.TRUE, 374 TokenType.TRUNCATE, 375 TokenType.UNIQUE, 376 TokenType.UNPIVOT, 377 TokenType.UPDATE, 378 TokenType.USE, 379 TokenType.VOLATILE, 380 TokenType.WINDOW, 381 *CREATABLES, 382 *SUBQUERY_PREDICATES, 383 *TYPE_TOKENS, 384 *NO_PAREN_FUNCTIONS, 385 } 386 387 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 388 389 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 390 TokenType.ANTI, 391 TokenType.APPLY, 392 TokenType.ASOF, 393 TokenType.FULL, 394 TokenType.LEFT, 395 TokenType.LOCK, 396 TokenType.NATURAL, 397 TokenType.OFFSET, 398 TokenType.RIGHT, 399 TokenType.SEMI, 400 TokenType.WINDOW, 401 } 402 403 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 404 405 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 406 407 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 408 409 FUNC_TOKENS = { 410 TokenType.COLLATE, 411 TokenType.COMMAND, 412 TokenType.CURRENT_DATE, 413 TokenType.CURRENT_DATETIME, 414 TokenType.CURRENT_TIMESTAMP, 415 TokenType.CURRENT_TIME, 416 TokenType.CURRENT_USER, 417 TokenType.FILTER, 418 TokenType.FIRST, 419 TokenType.FORMAT, 420 TokenType.GLOB, 421 TokenType.IDENTIFIER, 422 TokenType.INDEX, 423 TokenType.ISNULL, 424 TokenType.ILIKE, 425 TokenType.INSERT, 426 TokenType.LIKE, 427 TokenType.MERGE, 428 TokenType.OFFSET, 429 TokenType.PRIMARY_KEY, 430 TokenType.RANGE, 431 TokenType.REPLACE, 432 TokenType.RLIKE, 433 TokenType.ROW, 434 TokenType.UNNEST, 435 TokenType.VAR, 436 TokenType.LEFT, 437 TokenType.RIGHT, 438 TokenType.SEQUENCE, 439 TokenType.DATE, 440 TokenType.DATETIME, 441 TokenType.TABLE, 442 TokenType.TIMESTAMP, 443 TokenType.TIMESTAMPTZ, 444 TokenType.TRUNCATE, 445 TokenType.WINDOW, 446 TokenType.XOR, 447 *TYPE_TOKENS, 448 *SUBQUERY_PREDICATES, 449 } 450 451 CONJUNCTION = { 452 TokenType.AND: exp.And, 453 TokenType.OR: exp.Or, 454 } 455 456 EQUALITY = { 457 TokenType.COLON_EQ: exp.PropertyEQ, 458 TokenType.EQ: exp.EQ, 459 TokenType.NEQ: exp.NEQ, 460 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 461 } 462 463 COMPARISON = { 464 TokenType.GT: exp.GT, 465 TokenType.GTE: exp.GTE, 466 TokenType.LT: exp.LT, 467 TokenType.LTE: exp.LTE, 468 } 469 470 BITWISE = { 471 TokenType.AMP: exp.BitwiseAnd, 472 TokenType.CARET: exp.BitwiseXor, 473 TokenType.PIPE: exp.BitwiseOr, 474 } 475 476 TERM = { 477 TokenType.DASH: exp.Sub, 478 TokenType.PLUS: exp.Add, 479 TokenType.MOD: exp.Mod, 480 TokenType.COLLATE: exp.Collate, 481 } 482 483 FACTOR = { 484 TokenType.DIV: exp.IntDiv, 485 TokenType.LR_ARROW: exp.Distance, 486 TokenType.SLASH: exp.Div, 487 TokenType.STAR: exp.Mul, 488 } 489 490 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 491 492 TIMES = { 493 TokenType.TIME, 494 TokenType.TIMETZ, 495 } 496 497 TIMESTAMPS = { 498 TokenType.TIMESTAMP, 499 TokenType.TIMESTAMPTZ, 500 TokenType.TIMESTAMPLTZ, 501 *TIMES, 502 } 503 504 SET_OPERATIONS = { 505 TokenType.UNION, 506 TokenType.INTERSECT, 507 TokenType.EXCEPT, 508 } 509 510 JOIN_METHODS = { 511 TokenType.ASOF, 512 TokenType.NATURAL, 513 TokenType.POSITIONAL, 514 } 515 516 JOIN_SIDES = { 517 TokenType.LEFT, 518 TokenType.RIGHT, 519 TokenType.FULL, 520 } 521 522 JOIN_KINDS = { 523 TokenType.INNER, 524 TokenType.OUTER, 525 TokenType.CROSS, 526 TokenType.SEMI, 527 TokenType.ANTI, 528 } 529 530 JOIN_HINTS: t.Set[str] = set() 531 532 LAMBDAS = { 533 TokenType.ARROW: lambda self, expressions: self.expression( 534 exp.Lambda, 535 this=self._replace_lambda( 536 self._parse_conjunction(), 537 {node.name for node in expressions}, 538 ), 539 expressions=expressions, 540 ), 541 TokenType.FARROW: lambda self, expressions: self.expression( 542 exp.Kwarg, 543 this=exp.var(expressions[0].name), 544 expression=self._parse_conjunction(), 545 ), 546 } 547 548 COLUMN_OPERATORS = { 549 TokenType.DOT: None, 550 TokenType.DCOLON: lambda self, this, to: self.expression( 551 exp.Cast if self.STRICT_CAST else exp.TryCast, 552 this=this, 553 to=to, 554 ), 555 TokenType.ARROW: lambda self, this, path: self.expression( 556 exp.JSONExtract, 557 this=this, 558 expression=self.dialect.to_json_path(path), 559 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 560 ), 561 TokenType.DARROW: lambda self, this, path: self.expression( 562 exp.JSONExtractScalar, 563 this=this, 564 expression=self.dialect.to_json_path(path), 565 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 566 ), 567 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 568 exp.JSONBExtract, 569 this=this, 570 expression=path, 571 ), 572 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 573 exp.JSONBExtractScalar, 574 this=this, 575 expression=path, 576 ), 577 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 578 exp.JSONBContains, 579 this=this, 580 expression=key, 581 ), 582 } 583 584 EXPRESSION_PARSERS = { 585 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 586 exp.Column: lambda self: self._parse_column(), 587 exp.Condition: lambda self: self._parse_conjunction(), 588 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 589 exp.Expression: lambda self: self._parse_expression(), 590 exp.From: lambda self: self._parse_from(), 591 exp.Group: lambda self: self._parse_group(), 592 exp.Having: lambda self: self._parse_having(), 593 exp.Identifier: lambda self: self._parse_id_var(), 594 exp.Join: lambda self: self._parse_join(), 595 exp.Lambda: lambda self: self._parse_lambda(), 596 exp.Lateral: lambda self: self._parse_lateral(), 597 exp.Limit: lambda self: self._parse_limit(), 598 exp.Offset: lambda self: self._parse_offset(), 599 exp.Order: lambda self: self._parse_order(), 600 exp.Ordered: lambda self: self._parse_ordered(), 601 exp.Properties: lambda self: self._parse_properties(), 602 exp.Qualify: lambda self: self._parse_qualify(), 603 exp.Returning: lambda self: self._parse_returning(), 604 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 605 exp.Table: lambda self: self._parse_table_parts(), 606 exp.TableAlias: lambda self: self._parse_table_alias(), 607 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 608 exp.Where: lambda self: self._parse_where(), 609 exp.Window: lambda self: self._parse_named_window(), 610 exp.With: lambda self: self._parse_with(), 611 "JOIN_TYPE": lambda self: self._parse_join_parts(), 612 } 613 614 STATEMENT_PARSERS = { 615 TokenType.ALTER: lambda self: self._parse_alter(), 616 TokenType.BEGIN: lambda self: self._parse_transaction(), 617 TokenType.CACHE: lambda self: self._parse_cache(), 618 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 619 TokenType.COMMENT: lambda self: self._parse_comment(), 620 TokenType.CREATE: lambda self: self._parse_create(), 621 TokenType.DELETE: lambda self: self._parse_delete(), 622 TokenType.DESC: lambda self: self._parse_describe(), 623 TokenType.DESCRIBE: lambda self: self._parse_describe(), 624 TokenType.DROP: lambda self: self._parse_drop(), 625 TokenType.INSERT: lambda self: self._parse_insert(), 626 TokenType.KILL: lambda self: self._parse_kill(), 627 TokenType.LOAD: lambda self: self._parse_load(), 628 TokenType.MERGE: lambda self: self._parse_merge(), 629 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 630 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 631 TokenType.REFRESH: lambda self: self._parse_refresh(), 632 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 633 TokenType.SET: lambda self: self._parse_set(), 634 TokenType.UNCACHE: lambda self: self._parse_uncache(), 635 TokenType.UPDATE: lambda self: self._parse_update(), 636 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 637 TokenType.USE: lambda self: self.expression( 638 exp.Use, 639 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 640 this=self._parse_table(schema=False), 641 ), 642 } 643 644 UNARY_PARSERS = { 645 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 646 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 647 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 648 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 649 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 650 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 651 } 652 653 STRING_PARSERS = { 654 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 655 exp.RawString, this=token.text 656 ), 657 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 658 exp.National, this=token.text 659 ), 660 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 661 TokenType.STRING: lambda self, token: self.expression( 662 exp.Literal, this=token.text, is_string=True 663 ), 664 TokenType.UNICODE_STRING: lambda self, token: self.expression( 665 exp.UnicodeString, 666 this=token.text, 667 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 668 ), 669 } 670 671 NUMERIC_PARSERS = { 672 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 673 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 674 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 675 TokenType.NUMBER: lambda self, token: self.expression( 676 exp.Literal, this=token.text, is_string=False 677 ), 678 } 679 680 PRIMARY_PARSERS = { 681 **STRING_PARSERS, 682 **NUMERIC_PARSERS, 683 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 684 TokenType.NULL: lambda self, _: self.expression(exp.Null), 685 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 686 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 687 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 688 TokenType.STAR: lambda self, _: self.expression( 689 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 690 ), 691 } 692 693 PLACEHOLDER_PARSERS = { 694 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 695 TokenType.PARAMETER: lambda self: self._parse_parameter(), 696 TokenType.COLON: lambda self: ( 697 self.expression(exp.Placeholder, this=self._prev.text) 698 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 699 else None 700 ), 701 } 702 703 RANGE_PARSERS = { 704 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 705 TokenType.GLOB: binary_range_parser(exp.Glob), 706 TokenType.ILIKE: binary_range_parser(exp.ILike), 707 TokenType.IN: lambda self, this: self._parse_in(this), 708 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 709 TokenType.IS: lambda self, this: self._parse_is(this), 710 TokenType.LIKE: binary_range_parser(exp.Like), 711 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 712 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 713 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 714 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 715 } 716 717 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 718 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 719 "AUTO": lambda self: self._parse_auto_property(), 720 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 721 "BACKUP": lambda self: self.expression( 722 exp.BackupProperty, this=self._parse_var(any_token=True) 723 ), 724 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 725 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 726 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 727 "CHECKSUM": lambda self: self._parse_checksum(), 728 "CLUSTER BY": lambda self: self._parse_cluster(), 729 "CLUSTERED": lambda self: self._parse_clustered_by(), 730 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 731 exp.CollateProperty, **kwargs 732 ), 733 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 734 "CONTAINS": lambda self: self._parse_contains_property(), 735 "COPY": lambda self: self._parse_copy_property(), 736 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 737 "DEFINER": lambda self: self._parse_definer(), 738 "DETERMINISTIC": lambda self: self.expression( 739 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 740 ), 741 "DISTKEY": lambda self: self._parse_distkey(), 742 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 743 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 744 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 745 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 746 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 747 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 748 "FREESPACE": lambda self: self._parse_freespace(), 749 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 750 "HEAP": lambda self: self.expression(exp.HeapProperty), 751 "IMMUTABLE": lambda self: self.expression( 752 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 753 ), 754 "INHERITS": lambda self: self.expression( 755 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 756 ), 757 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 758 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 759 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 760 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 761 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 762 "LIKE": lambda self: self._parse_create_like(), 763 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 764 "LOCK": lambda self: self._parse_locking(), 765 "LOCKING": lambda self: self._parse_locking(), 766 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 767 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 768 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 769 "MODIFIES": lambda self: self._parse_modifies_property(), 770 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 771 "NO": lambda self: self._parse_no_property(), 772 "ON": lambda self: self._parse_on_property(), 773 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 774 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 775 "PARTITION": lambda self: self._parse_partitioned_of(), 776 "PARTITION BY": lambda self: self._parse_partitioned_by(), 777 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 778 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 779 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 780 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 781 "READS": lambda self: self._parse_reads_property(), 782 "REMOTE": lambda self: self._parse_remote_with_connection(), 783 "RETURNS": lambda self: self._parse_returns(), 784 "ROW": lambda self: self._parse_row(), 785 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 786 "SAMPLE": lambda self: self.expression( 787 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 788 ), 789 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 790 "SETTINGS": lambda self: self.expression( 791 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 792 ), 793 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 794 "SORTKEY": lambda self: self._parse_sortkey(), 795 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 796 "STABLE": lambda self: self.expression( 797 exp.StabilityProperty, this=exp.Literal.string("STABLE") 798 ), 799 "STORED": lambda self: self._parse_stored(), 800 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 801 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 802 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 803 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 804 "TO": lambda self: self._parse_to_table(), 805 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 806 "TRANSFORM": lambda self: self.expression( 807 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 808 ), 809 "TTL": lambda self: self._parse_ttl(), 810 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 811 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 812 "VOLATILE": lambda self: self._parse_volatile_property(), 813 "WITH": lambda self: self._parse_with_property(), 814 } 815 816 CONSTRAINT_PARSERS = { 817 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 818 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 819 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 820 "CHARACTER SET": lambda self: self.expression( 821 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 822 ), 823 "CHECK": lambda self: self.expression( 824 exp.CheckColumnConstraint, 825 this=self._parse_wrapped(self._parse_conjunction), 826 enforced=self._match_text_seq("ENFORCED"), 827 ), 828 "COLLATE": lambda self: self.expression( 829 exp.CollateColumnConstraint, this=self._parse_var() 830 ), 831 "COMMENT": lambda self: self.expression( 832 exp.CommentColumnConstraint, this=self._parse_string() 833 ), 834 "COMPRESS": lambda self: self._parse_compress(), 835 "CLUSTERED": lambda self: self.expression( 836 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 837 ), 838 "NONCLUSTERED": lambda self: self.expression( 839 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 840 ), 841 "DEFAULT": lambda self: self.expression( 842 exp.DefaultColumnConstraint, this=self._parse_bitwise() 843 ), 844 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 845 "EXCLUDE": lambda self: self.expression( 846 exp.ExcludeColumnConstraint, this=self._parse_index_params() 847 ), 848 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 849 "FORMAT": lambda self: self.expression( 850 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 851 ), 852 "GENERATED": lambda self: self._parse_generated_as_identity(), 853 "IDENTITY": lambda self: self._parse_auto_increment(), 854 "INLINE": lambda self: self._parse_inline(), 855 "LIKE": lambda self: self._parse_create_like(), 856 "NOT": lambda self: self._parse_not_constraint(), 857 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 858 "ON": lambda self: ( 859 self._match(TokenType.UPDATE) 860 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 861 ) 862 or self.expression(exp.OnProperty, this=self._parse_id_var()), 863 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 864 "PERIOD": lambda self: self._parse_period_for_system_time(), 865 "PRIMARY KEY": lambda self: self._parse_primary_key(), 866 "REFERENCES": lambda self: self._parse_references(match=False), 867 "TITLE": lambda self: self.expression( 868 exp.TitleColumnConstraint, this=self._parse_var_or_string() 869 ), 870 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 871 "UNIQUE": lambda self: self._parse_unique(), 872 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 873 "WITH": lambda self: self.expression( 874 exp.Properties, expressions=self._parse_wrapped_properties() 875 ), 876 } 877 878 ALTER_PARSERS = { 879 "ADD": lambda self: self._parse_alter_table_add(), 880 "ALTER": lambda self: self._parse_alter_table_alter(), 881 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 882 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 883 "DROP": lambda self: self._parse_alter_table_drop(), 884 "RENAME": lambda self: self._parse_alter_table_rename(), 885 } 886 887 SCHEMA_UNNAMED_CONSTRAINTS = { 888 "CHECK", 889 "EXCLUDE", 890 "FOREIGN KEY", 891 "LIKE", 892 "PERIOD", 893 "PRIMARY KEY", 894 "UNIQUE", 895 } 896 897 NO_PAREN_FUNCTION_PARSERS = { 898 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 899 "CASE": lambda self: self._parse_case(), 900 "IF": lambda self: self._parse_if(), 901 "NEXT": lambda self: self._parse_next_value_for(), 902 } 903 904 INVALID_FUNC_NAME_TOKENS = { 905 TokenType.IDENTIFIER, 906 TokenType.STRING, 907 } 908 909 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 910 911 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 912 913 FUNCTION_PARSERS = { 914 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 915 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 916 "DECODE": lambda self: self._parse_decode(), 917 "EXTRACT": lambda self: self._parse_extract(), 918 "JSON_OBJECT": lambda self: self._parse_json_object(), 919 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 920 "JSON_TABLE": lambda self: self._parse_json_table(), 921 "MATCH": lambda self: self._parse_match_against(), 922 "OPENJSON": lambda self: self._parse_open_json(), 923 "POSITION": lambda self: self._parse_position(), 924 "PREDICT": lambda self: self._parse_predict(), 925 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 926 "STRING_AGG": lambda self: self._parse_string_agg(), 927 "SUBSTRING": lambda self: self._parse_substring(), 928 "TRIM": lambda self: self._parse_trim(), 929 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 930 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 931 } 932 933 QUERY_MODIFIER_PARSERS = { 934 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 935 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 936 TokenType.WHERE: lambda self: ("where", self._parse_where()), 937 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 938 TokenType.HAVING: lambda self: ("having", self._parse_having()), 939 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 940 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 941 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 942 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 943 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 944 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 945 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 946 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 947 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 948 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 949 TokenType.CLUSTER_BY: lambda self: ( 950 "cluster", 951 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 952 ), 953 TokenType.DISTRIBUTE_BY: lambda self: ( 954 "distribute", 955 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 956 ), 957 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 958 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 959 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 960 } 961 962 SET_PARSERS = { 963 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 964 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 965 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 966 "TRANSACTION": lambda self: self._parse_set_transaction(), 967 } 968 969 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 970 971 TYPE_LITERAL_PARSERS = { 972 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 973 } 974 975 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 976 977 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 978 979 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 980 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 981 "ISOLATION": ( 982 ("LEVEL", "REPEATABLE", "READ"), 983 ("LEVEL", "READ", "COMMITTED"), 984 ("LEVEL", "READ", "UNCOMITTED"), 985 ("LEVEL", "SERIALIZABLE"), 986 ), 987 "READ": ("WRITE", "ONLY"), 988 } 989 990 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 991 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 992 ) 993 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 994 995 CREATE_SEQUENCE: OPTIONS_TYPE = { 996 "SCALE": ("EXTEND", "NOEXTEND"), 997 "SHARD": ("EXTEND", "NOEXTEND"), 998 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 999 **dict.fromkeys( 1000 ( 1001 "SESSION", 1002 "GLOBAL", 1003 "KEEP", 1004 "NOKEEP", 1005 "ORDER", 1006 "NOORDER", 1007 "NOCACHE", 1008 "CYCLE", 1009 "NOCYCLE", 1010 "NOMINVALUE", 1011 "NOMAXVALUE", 1012 "NOSCALE", 1013 "NOSHARD", 1014 ), 1015 tuple(), 1016 ), 1017 } 1018 1019 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 1020 1021 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1022 1023 CLONE_KEYWORDS = {"CLONE", "COPY"} 1024 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1025 1026 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1027 1028 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1029 1030 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1031 1032 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1033 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1034 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1035 1036 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1037 1038 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1039 1040 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 1041 1042 DISTINCT_TOKENS = {TokenType.DISTINCT} 1043 1044 NULL_TOKENS = {TokenType.NULL} 1045 1046 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1047 1048 STRICT_CAST = True 1049 1050 PREFIXED_PIVOT_COLUMNS = False 1051 IDENTIFY_PIVOT_STRINGS = False 1052 1053 LOG_DEFAULTS_TO_LN = False 1054 1055 # Whether ADD is present for each column added by ALTER TABLE 1056 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1057 1058 # Whether the table sample clause expects CSV syntax 1059 TABLESAMPLE_CSV = False 1060 1061 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1062 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1063 1064 # Whether the TRIM function expects the characters to trim as its first argument 1065 TRIM_PATTERN_FIRST = False 1066 1067 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1068 STRING_ALIASES = False 1069 1070 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1071 MODIFIERS_ATTACHED_TO_UNION = True 1072 UNION_MODIFIERS = {"order", "limit", "offset"} 1073 1074 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1075 NO_PAREN_IF_COMMANDS = True 1076 1077 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1078 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1079 1080 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1081 # If this is True and '(' is not found, the keyword will be treated as an identifier 1082 VALUES_FOLLOWED_BY_PAREN = True 1083 1084 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1085 SUPPORTS_IMPLICIT_UNNEST = False 1086 1087 __slots__ = ( 1088 "error_level", 1089 "error_message_context", 1090 "max_errors", 1091 "dialect", 1092 "sql", 1093 "errors", 1094 "_tokens", 1095 "_index", 1096 "_curr", 1097 "_next", 1098 "_prev", 1099 "_prev_comments", 1100 ) 1101 1102 # Autofilled 1103 SHOW_TRIE: t.Dict = {} 1104 SET_TRIE: t.Dict = {} 1105 1106 def __init__( 1107 self, 1108 error_level: t.Optional[ErrorLevel] = None, 1109 error_message_context: int = 100, 1110 max_errors: int = 3, 1111 dialect: DialectType = None, 1112 ): 1113 from sqlglot.dialects import Dialect 1114 1115 self.error_level = error_level or ErrorLevel.IMMEDIATE 1116 self.error_message_context = error_message_context 1117 self.max_errors = max_errors 1118 self.dialect = Dialect.get_or_raise(dialect) 1119 self.reset() 1120 1121 def reset(self): 1122 self.sql = "" 1123 self.errors = [] 1124 self._tokens = [] 1125 self._index = 0 1126 self._curr = None 1127 self._next = None 1128 self._prev = None 1129 self._prev_comments = None 1130 1131 def parse( 1132 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1133 ) -> t.List[t.Optional[exp.Expression]]: 1134 """ 1135 Parses a list of tokens and returns a list of syntax trees, one tree 1136 per parsed SQL statement. 1137 1138 Args: 1139 raw_tokens: The list of tokens. 1140 sql: The original SQL string, used to produce helpful debug messages. 1141 1142 Returns: 1143 The list of the produced syntax trees. 1144 """ 1145 return self._parse( 1146 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1147 ) 1148 1149 def parse_into( 1150 self, 1151 expression_types: exp.IntoType, 1152 raw_tokens: t.List[Token], 1153 sql: t.Optional[str] = None, 1154 ) -> t.List[t.Optional[exp.Expression]]: 1155 """ 1156 Parses a list of tokens into a given Expression type. If a collection of Expression 1157 types is given instead, this method will try to parse the token list into each one 1158 of them, stopping at the first for which the parsing succeeds. 1159 1160 Args: 1161 expression_types: The expression type(s) to try and parse the token list into. 1162 raw_tokens: The list of tokens. 1163 sql: The original SQL string, used to produce helpful debug messages. 1164 1165 Returns: 1166 The target Expression. 1167 """ 1168 errors = [] 1169 for expression_type in ensure_list(expression_types): 1170 parser = self.EXPRESSION_PARSERS.get(expression_type) 1171 if not parser: 1172 raise TypeError(f"No parser registered for {expression_type}") 1173 1174 try: 1175 return self._parse(parser, raw_tokens, sql) 1176 except ParseError as e: 1177 e.errors[0]["into_expression"] = expression_type 1178 errors.append(e) 1179 1180 raise ParseError( 1181 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1182 errors=merge_errors(errors), 1183 ) from errors[-1] 1184 1185 def _parse( 1186 self, 1187 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1188 raw_tokens: t.List[Token], 1189 sql: t.Optional[str] = None, 1190 ) -> t.List[t.Optional[exp.Expression]]: 1191 self.reset() 1192 self.sql = sql or "" 1193 1194 total = len(raw_tokens) 1195 chunks: t.List[t.List[Token]] = [[]] 1196 1197 for i, token in enumerate(raw_tokens): 1198 if token.token_type == TokenType.SEMICOLON: 1199 if i < total - 1: 1200 chunks.append([]) 1201 else: 1202 chunks[-1].append(token) 1203 1204 expressions = [] 1205 1206 for tokens in chunks: 1207 self._index = -1 1208 self._tokens = tokens 1209 self._advance() 1210 1211 expressions.append(parse_method(self)) 1212 1213 if self._index < len(self._tokens): 1214 self.raise_error("Invalid expression / Unexpected token") 1215 1216 self.check_errors() 1217 1218 return expressions 1219 1220 def check_errors(self) -> None: 1221 """Logs or raises any found errors, depending on the chosen error level setting.""" 1222 if self.error_level == ErrorLevel.WARN: 1223 for error in self.errors: 1224 logger.error(str(error)) 1225 elif self.error_level == ErrorLevel.RAISE and self.errors: 1226 raise ParseError( 1227 concat_messages(self.errors, self.max_errors), 1228 errors=merge_errors(self.errors), 1229 ) 1230 1231 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1232 """ 1233 Appends an error in the list of recorded errors or raises it, depending on the chosen 1234 error level setting. 1235 """ 1236 token = token or self._curr or self._prev or Token.string("") 1237 start = token.start 1238 end = token.end + 1 1239 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1240 highlight = self.sql[start:end] 1241 end_context = self.sql[end : end + self.error_message_context] 1242 1243 error = ParseError.new( 1244 f"{message}. Line {token.line}, Col: {token.col}.\n" 1245 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1246 description=message, 1247 line=token.line, 1248 col=token.col, 1249 start_context=start_context, 1250 highlight=highlight, 1251 end_context=end_context, 1252 ) 1253 1254 if self.error_level == ErrorLevel.IMMEDIATE: 1255 raise error 1256 1257 self.errors.append(error) 1258 1259 def expression( 1260 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1261 ) -> E: 1262 """ 1263 Creates a new, validated Expression. 1264 1265 Args: 1266 exp_class: The expression class to instantiate. 1267 comments: An optional list of comments to attach to the expression. 1268 kwargs: The arguments to set for the expression along with their respective values. 1269 1270 Returns: 1271 The target expression. 1272 """ 1273 instance = exp_class(**kwargs) 1274 instance.add_comments(comments) if comments else self._add_comments(instance) 1275 return self.validate_expression(instance) 1276 1277 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1278 if expression and self._prev_comments: 1279 expression.add_comments(self._prev_comments) 1280 self._prev_comments = None 1281 1282 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1283 """ 1284 Validates an Expression, making sure that all its mandatory arguments are set. 1285 1286 Args: 1287 expression: The expression to validate. 1288 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1289 1290 Returns: 1291 The validated expression. 1292 """ 1293 if self.error_level != ErrorLevel.IGNORE: 1294 for error_message in expression.error_messages(args): 1295 self.raise_error(error_message) 1296 1297 return expression 1298 1299 def _find_sql(self, start: Token, end: Token) -> str: 1300 return self.sql[start.start : end.end + 1] 1301 1302 def _is_connected(self) -> bool: 1303 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1304 1305 def _advance(self, times: int = 1) -> None: 1306 self._index += times 1307 self._curr = seq_get(self._tokens, self._index) 1308 self._next = seq_get(self._tokens, self._index + 1) 1309 1310 if self._index > 0: 1311 self._prev = self._tokens[self._index - 1] 1312 self._prev_comments = self._prev.comments 1313 else: 1314 self._prev = None 1315 self._prev_comments = None 1316 1317 def _retreat(self, index: int) -> None: 1318 if index != self._index: 1319 self._advance(index - self._index) 1320 1321 def _warn_unsupported(self) -> None: 1322 if len(self._tokens) <= 1: 1323 return 1324 1325 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1326 # interested in emitting a warning for the one being currently processed. 1327 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1328 1329 logger.warning( 1330 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1331 ) 1332 1333 def _parse_command(self) -> exp.Command: 1334 self._warn_unsupported() 1335 return self.expression( 1336 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1337 ) 1338 1339 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1340 start = self._prev 1341 exists = self._parse_exists() if allow_exists else None 1342 1343 self._match(TokenType.ON) 1344 1345 kind = self._match_set(self.CREATABLES) and self._prev 1346 if not kind: 1347 return self._parse_as_command(start) 1348 1349 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1350 this = self._parse_user_defined_function(kind=kind.token_type) 1351 elif kind.token_type == TokenType.TABLE: 1352 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1353 elif kind.token_type == TokenType.COLUMN: 1354 this = self._parse_column() 1355 else: 1356 this = self._parse_id_var() 1357 1358 self._match(TokenType.IS) 1359 1360 return self.expression( 1361 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1362 ) 1363 1364 def _parse_to_table( 1365 self, 1366 ) -> exp.ToTableProperty: 1367 table = self._parse_table_parts(schema=True) 1368 return self.expression(exp.ToTableProperty, this=table) 1369 1370 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1371 def _parse_ttl(self) -> exp.Expression: 1372 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1373 this = self._parse_bitwise() 1374 1375 if self._match_text_seq("DELETE"): 1376 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1377 if self._match_text_seq("RECOMPRESS"): 1378 return self.expression( 1379 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1380 ) 1381 if self._match_text_seq("TO", "DISK"): 1382 return self.expression( 1383 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1384 ) 1385 if self._match_text_seq("TO", "VOLUME"): 1386 return self.expression( 1387 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1388 ) 1389 1390 return this 1391 1392 expressions = self._parse_csv(_parse_ttl_action) 1393 where = self._parse_where() 1394 group = self._parse_group() 1395 1396 aggregates = None 1397 if group and self._match(TokenType.SET): 1398 aggregates = self._parse_csv(self._parse_set_item) 1399 1400 return self.expression( 1401 exp.MergeTreeTTL, 1402 expressions=expressions, 1403 where=where, 1404 group=group, 1405 aggregates=aggregates, 1406 ) 1407 1408 def _parse_statement(self) -> t.Optional[exp.Expression]: 1409 if self._curr is None: 1410 return None 1411 1412 if self._match_set(self.STATEMENT_PARSERS): 1413 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1414 1415 if self._match_set(Tokenizer.COMMANDS): 1416 return self._parse_command() 1417 1418 expression = self._parse_expression() 1419 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1420 return self._parse_query_modifiers(expression) 1421 1422 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1423 start = self._prev 1424 temporary = self._match(TokenType.TEMPORARY) 1425 materialized = self._match_text_seq("MATERIALIZED") 1426 1427 kind = self._match_set(self.CREATABLES) and self._prev.text 1428 if not kind: 1429 return self._parse_as_command(start) 1430 1431 return self.expression( 1432 exp.Drop, 1433 comments=start.comments, 1434 exists=exists or self._parse_exists(), 1435 this=self._parse_table( 1436 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1437 ), 1438 kind=kind, 1439 temporary=temporary, 1440 materialized=materialized, 1441 cascade=self._match_text_seq("CASCADE"), 1442 constraints=self._match_text_seq("CONSTRAINTS"), 1443 purge=self._match_text_seq("PURGE"), 1444 ) 1445 1446 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1447 return ( 1448 self._match_text_seq("IF") 1449 and (not not_ or self._match(TokenType.NOT)) 1450 and self._match(TokenType.EXISTS) 1451 ) 1452 1453 def _parse_create(self) -> exp.Create | exp.Command: 1454 # Note: this can't be None because we've matched a statement parser 1455 start = self._prev 1456 comments = self._prev_comments 1457 1458 replace = ( 1459 start.token_type == TokenType.REPLACE 1460 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1461 or self._match_pair(TokenType.OR, TokenType.ALTER) 1462 ) 1463 1464 unique = self._match(TokenType.UNIQUE) 1465 1466 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1467 self._advance() 1468 1469 properties = None 1470 create_token = self._match_set(self.CREATABLES) and self._prev 1471 1472 if not create_token: 1473 # exp.Properties.Location.POST_CREATE 1474 properties = self._parse_properties() 1475 create_token = self._match_set(self.CREATABLES) and self._prev 1476 1477 if not properties or not create_token: 1478 return self._parse_as_command(start) 1479 1480 exists = self._parse_exists(not_=True) 1481 this = None 1482 expression: t.Optional[exp.Expression] = None 1483 indexes = None 1484 no_schema_binding = None 1485 begin = None 1486 end = None 1487 clone = None 1488 1489 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1490 nonlocal properties 1491 if properties and temp_props: 1492 properties.expressions.extend(temp_props.expressions) 1493 elif temp_props: 1494 properties = temp_props 1495 1496 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1497 this = self._parse_user_defined_function(kind=create_token.token_type) 1498 1499 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1500 extend_props(self._parse_properties()) 1501 1502 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1503 1504 if not expression: 1505 if self._match(TokenType.COMMAND): 1506 expression = self._parse_as_command(self._prev) 1507 else: 1508 begin = self._match(TokenType.BEGIN) 1509 return_ = self._match_text_seq("RETURN") 1510 1511 if self._match(TokenType.STRING, advance=False): 1512 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1513 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1514 expression = self._parse_string() 1515 extend_props(self._parse_properties()) 1516 else: 1517 expression = self._parse_statement() 1518 1519 end = self._match_text_seq("END") 1520 1521 if return_: 1522 expression = self.expression(exp.Return, this=expression) 1523 elif create_token.token_type == TokenType.INDEX: 1524 this = self._parse_index(index=self._parse_id_var()) 1525 elif create_token.token_type in self.DB_CREATABLES: 1526 table_parts = self._parse_table_parts( 1527 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1528 ) 1529 1530 # exp.Properties.Location.POST_NAME 1531 self._match(TokenType.COMMA) 1532 extend_props(self._parse_properties(before=True)) 1533 1534 this = self._parse_schema(this=table_parts) 1535 1536 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1537 extend_props(self._parse_properties()) 1538 1539 self._match(TokenType.ALIAS) 1540 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1541 # exp.Properties.Location.POST_ALIAS 1542 extend_props(self._parse_properties()) 1543 1544 if create_token.token_type == TokenType.SEQUENCE: 1545 expression = self._parse_types() 1546 extend_props(self._parse_properties()) 1547 else: 1548 expression = self._parse_ddl_select() 1549 1550 if create_token.token_type == TokenType.TABLE: 1551 # exp.Properties.Location.POST_EXPRESSION 1552 extend_props(self._parse_properties()) 1553 1554 indexes = [] 1555 while True: 1556 index = self._parse_index() 1557 1558 # exp.Properties.Location.POST_INDEX 1559 extend_props(self._parse_properties()) 1560 1561 if not index: 1562 break 1563 else: 1564 self._match(TokenType.COMMA) 1565 indexes.append(index) 1566 elif create_token.token_type == TokenType.VIEW: 1567 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1568 no_schema_binding = True 1569 1570 shallow = self._match_text_seq("SHALLOW") 1571 1572 if self._match_texts(self.CLONE_KEYWORDS): 1573 copy = self._prev.text.lower() == "copy" 1574 clone = self.expression( 1575 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1576 ) 1577 1578 if self._curr: 1579 return self._parse_as_command(start) 1580 1581 return self.expression( 1582 exp.Create, 1583 comments=comments, 1584 this=this, 1585 kind=create_token.text.upper(), 1586 replace=replace, 1587 unique=unique, 1588 expression=expression, 1589 exists=exists, 1590 properties=properties, 1591 indexes=indexes, 1592 no_schema_binding=no_schema_binding, 1593 begin=begin, 1594 end=end, 1595 clone=clone, 1596 ) 1597 1598 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1599 seq = exp.SequenceProperties() 1600 1601 options = [] 1602 index = self._index 1603 1604 while self._curr: 1605 if self._match_text_seq("INCREMENT"): 1606 self._match_text_seq("BY") 1607 self._match_text_seq("=") 1608 seq.set("increment", self._parse_term()) 1609 elif self._match_text_seq("MINVALUE"): 1610 seq.set("minvalue", self._parse_term()) 1611 elif self._match_text_seq("MAXVALUE"): 1612 seq.set("maxvalue", self._parse_term()) 1613 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1614 self._match_text_seq("=") 1615 seq.set("start", self._parse_term()) 1616 elif self._match_text_seq("CACHE"): 1617 # T-SQL allows empty CACHE which is initialized dynamically 1618 seq.set("cache", self._parse_number() or True) 1619 elif self._match_text_seq("OWNED", "BY"): 1620 # "OWNED BY NONE" is the default 1621 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1622 else: 1623 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1624 if opt: 1625 options.append(opt) 1626 else: 1627 break 1628 1629 seq.set("options", options if options else None) 1630 return None if self._index == index else seq 1631 1632 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1633 # only used for teradata currently 1634 self._match(TokenType.COMMA) 1635 1636 kwargs = { 1637 "no": self._match_text_seq("NO"), 1638 "dual": self._match_text_seq("DUAL"), 1639 "before": self._match_text_seq("BEFORE"), 1640 "default": self._match_text_seq("DEFAULT"), 1641 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1642 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1643 "after": self._match_text_seq("AFTER"), 1644 "minimum": self._match_texts(("MIN", "MINIMUM")), 1645 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1646 } 1647 1648 if self._match_texts(self.PROPERTY_PARSERS): 1649 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1650 try: 1651 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1652 except TypeError: 1653 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1654 1655 return None 1656 1657 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1658 return self._parse_wrapped_csv(self._parse_property) 1659 1660 def _parse_property(self) -> t.Optional[exp.Expression]: 1661 if self._match_texts(self.PROPERTY_PARSERS): 1662 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1663 1664 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1665 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1666 1667 if self._match_text_seq("COMPOUND", "SORTKEY"): 1668 return self._parse_sortkey(compound=True) 1669 1670 if self._match_text_seq("SQL", "SECURITY"): 1671 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1672 1673 index = self._index 1674 key = self._parse_column() 1675 1676 if not self._match(TokenType.EQ): 1677 self._retreat(index) 1678 return self._parse_sequence_properties() 1679 1680 return self.expression( 1681 exp.Property, 1682 this=key.to_dot() if isinstance(key, exp.Column) else key, 1683 value=self._parse_column() or self._parse_var(any_token=True), 1684 ) 1685 1686 def _parse_stored(self) -> exp.FileFormatProperty: 1687 self._match(TokenType.ALIAS) 1688 1689 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1690 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1691 1692 return self.expression( 1693 exp.FileFormatProperty, 1694 this=( 1695 self.expression( 1696 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1697 ) 1698 if input_format or output_format 1699 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1700 ), 1701 ) 1702 1703 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1704 self._match(TokenType.EQ) 1705 self._match(TokenType.ALIAS) 1706 return self.expression(exp_class, this=self._parse_field(), **kwargs) 1707 1708 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1709 properties = [] 1710 while True: 1711 if before: 1712 prop = self._parse_property_before() 1713 else: 1714 prop = self._parse_property() 1715 if not prop: 1716 break 1717 for p in ensure_list(prop): 1718 properties.append(p) 1719 1720 if properties: 1721 return self.expression(exp.Properties, expressions=properties) 1722 1723 return None 1724 1725 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1726 return self.expression( 1727 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1728 ) 1729 1730 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1731 if self._index >= 2: 1732 pre_volatile_token = self._tokens[self._index - 2] 1733 else: 1734 pre_volatile_token = None 1735 1736 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1737 return exp.VolatileProperty() 1738 1739 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1740 1741 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1742 self._match_pair(TokenType.EQ, TokenType.ON) 1743 1744 prop = self.expression(exp.WithSystemVersioningProperty) 1745 if self._match(TokenType.L_PAREN): 1746 self._match_text_seq("HISTORY_TABLE", "=") 1747 prop.set("this", self._parse_table_parts()) 1748 1749 if self._match(TokenType.COMMA): 1750 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1751 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1752 1753 self._match_r_paren() 1754 1755 return prop 1756 1757 def _parse_with_property( 1758 self, 1759 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1760 if self._match(TokenType.L_PAREN, advance=False): 1761 return self._parse_wrapped_properties() 1762 1763 if self._match_text_seq("JOURNAL"): 1764 return self._parse_withjournaltable() 1765 1766 if self._match_text_seq("DATA"): 1767 return self._parse_withdata(no=False) 1768 elif self._match_text_seq("NO", "DATA"): 1769 return self._parse_withdata(no=True) 1770 1771 if not self._next: 1772 return None 1773 1774 return self._parse_withisolatedloading() 1775 1776 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1777 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1778 self._match(TokenType.EQ) 1779 1780 user = self._parse_id_var() 1781 self._match(TokenType.PARAMETER) 1782 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1783 1784 if not user or not host: 1785 return None 1786 1787 return exp.DefinerProperty(this=f"{user}@{host}") 1788 1789 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1790 self._match(TokenType.TABLE) 1791 self._match(TokenType.EQ) 1792 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1793 1794 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1795 return self.expression(exp.LogProperty, no=no) 1796 1797 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1798 return self.expression(exp.JournalProperty, **kwargs) 1799 1800 def _parse_checksum(self) -> exp.ChecksumProperty: 1801 self._match(TokenType.EQ) 1802 1803 on = None 1804 if self._match(TokenType.ON): 1805 on = True 1806 elif self._match_text_seq("OFF"): 1807 on = False 1808 1809 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1810 1811 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 1812 return self.expression( 1813 exp.Cluster, 1814 expressions=( 1815 self._parse_wrapped_csv(self._parse_ordered) 1816 if wrapped 1817 else self._parse_csv(self._parse_ordered) 1818 ), 1819 ) 1820 1821 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1822 self._match_text_seq("BY") 1823 1824 self._match_l_paren() 1825 expressions = self._parse_csv(self._parse_column) 1826 self._match_r_paren() 1827 1828 if self._match_text_seq("SORTED", "BY"): 1829 self._match_l_paren() 1830 sorted_by = self._parse_csv(self._parse_ordered) 1831 self._match_r_paren() 1832 else: 1833 sorted_by = None 1834 1835 self._match(TokenType.INTO) 1836 buckets = self._parse_number() 1837 self._match_text_seq("BUCKETS") 1838 1839 return self.expression( 1840 exp.ClusteredByProperty, 1841 expressions=expressions, 1842 sorted_by=sorted_by, 1843 buckets=buckets, 1844 ) 1845 1846 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1847 if not self._match_text_seq("GRANTS"): 1848 self._retreat(self._index - 1) 1849 return None 1850 1851 return self.expression(exp.CopyGrantsProperty) 1852 1853 def _parse_freespace(self) -> exp.FreespaceProperty: 1854 self._match(TokenType.EQ) 1855 return self.expression( 1856 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1857 ) 1858 1859 def _parse_mergeblockratio( 1860 self, no: bool = False, default: bool = False 1861 ) -> exp.MergeBlockRatioProperty: 1862 if self._match(TokenType.EQ): 1863 return self.expression( 1864 exp.MergeBlockRatioProperty, 1865 this=self._parse_number(), 1866 percent=self._match(TokenType.PERCENT), 1867 ) 1868 1869 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1870 1871 def _parse_datablocksize( 1872 self, 1873 default: t.Optional[bool] = None, 1874 minimum: t.Optional[bool] = None, 1875 maximum: t.Optional[bool] = None, 1876 ) -> exp.DataBlocksizeProperty: 1877 self._match(TokenType.EQ) 1878 size = self._parse_number() 1879 1880 units = None 1881 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1882 units = self._prev.text 1883 1884 return self.expression( 1885 exp.DataBlocksizeProperty, 1886 size=size, 1887 units=units, 1888 default=default, 1889 minimum=minimum, 1890 maximum=maximum, 1891 ) 1892 1893 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1894 self._match(TokenType.EQ) 1895 always = self._match_text_seq("ALWAYS") 1896 manual = self._match_text_seq("MANUAL") 1897 never = self._match_text_seq("NEVER") 1898 default = self._match_text_seq("DEFAULT") 1899 1900 autotemp = None 1901 if self._match_text_seq("AUTOTEMP"): 1902 autotemp = self._parse_schema() 1903 1904 return self.expression( 1905 exp.BlockCompressionProperty, 1906 always=always, 1907 manual=manual, 1908 never=never, 1909 default=default, 1910 autotemp=autotemp, 1911 ) 1912 1913 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1914 no = self._match_text_seq("NO") 1915 concurrent = self._match_text_seq("CONCURRENT") 1916 self._match_text_seq("ISOLATED", "LOADING") 1917 for_all = self._match_text_seq("FOR", "ALL") 1918 for_insert = self._match_text_seq("FOR", "INSERT") 1919 for_none = self._match_text_seq("FOR", "NONE") 1920 return self.expression( 1921 exp.IsolatedLoadingProperty, 1922 no=no, 1923 concurrent=concurrent, 1924 for_all=for_all, 1925 for_insert=for_insert, 1926 for_none=for_none, 1927 ) 1928 1929 def _parse_locking(self) -> exp.LockingProperty: 1930 if self._match(TokenType.TABLE): 1931 kind = "TABLE" 1932 elif self._match(TokenType.VIEW): 1933 kind = "VIEW" 1934 elif self._match(TokenType.ROW): 1935 kind = "ROW" 1936 elif self._match_text_seq("DATABASE"): 1937 kind = "DATABASE" 1938 else: 1939 kind = None 1940 1941 if kind in ("DATABASE", "TABLE", "VIEW"): 1942 this = self._parse_table_parts() 1943 else: 1944 this = None 1945 1946 if self._match(TokenType.FOR): 1947 for_or_in = "FOR" 1948 elif self._match(TokenType.IN): 1949 for_or_in = "IN" 1950 else: 1951 for_or_in = None 1952 1953 if self._match_text_seq("ACCESS"): 1954 lock_type = "ACCESS" 1955 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1956 lock_type = "EXCLUSIVE" 1957 elif self._match_text_seq("SHARE"): 1958 lock_type = "SHARE" 1959 elif self._match_text_seq("READ"): 1960 lock_type = "READ" 1961 elif self._match_text_seq("WRITE"): 1962 lock_type = "WRITE" 1963 elif self._match_text_seq("CHECKSUM"): 1964 lock_type = "CHECKSUM" 1965 else: 1966 lock_type = None 1967 1968 override = self._match_text_seq("OVERRIDE") 1969 1970 return self.expression( 1971 exp.LockingProperty, 1972 this=this, 1973 kind=kind, 1974 for_or_in=for_or_in, 1975 lock_type=lock_type, 1976 override=override, 1977 ) 1978 1979 def _parse_partition_by(self) -> t.List[exp.Expression]: 1980 if self._match(TokenType.PARTITION_BY): 1981 return self._parse_csv(self._parse_conjunction) 1982 return [] 1983 1984 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 1985 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 1986 if self._match_text_seq("MINVALUE"): 1987 return exp.var("MINVALUE") 1988 if self._match_text_seq("MAXVALUE"): 1989 return exp.var("MAXVALUE") 1990 return self._parse_bitwise() 1991 1992 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 1993 expression = None 1994 from_expressions = None 1995 to_expressions = None 1996 1997 if self._match(TokenType.IN): 1998 this = self._parse_wrapped_csv(self._parse_bitwise) 1999 elif self._match(TokenType.FROM): 2000 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2001 self._match_text_seq("TO") 2002 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2003 elif self._match_text_seq("WITH", "(", "MODULUS"): 2004 this = self._parse_number() 2005 self._match_text_seq(",", "REMAINDER") 2006 expression = self._parse_number() 2007 self._match_r_paren() 2008 else: 2009 self.raise_error("Failed to parse partition bound spec.") 2010 2011 return self.expression( 2012 exp.PartitionBoundSpec, 2013 this=this, 2014 expression=expression, 2015 from_expressions=from_expressions, 2016 to_expressions=to_expressions, 2017 ) 2018 2019 # https://www.postgresql.org/docs/current/sql-createtable.html 2020 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2021 if not self._match_text_seq("OF"): 2022 self._retreat(self._index - 1) 2023 return None 2024 2025 this = self._parse_table(schema=True) 2026 2027 if self._match(TokenType.DEFAULT): 2028 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2029 elif self._match_text_seq("FOR", "VALUES"): 2030 expression = self._parse_partition_bound_spec() 2031 else: 2032 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2033 2034 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2035 2036 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2037 self._match(TokenType.EQ) 2038 return self.expression( 2039 exp.PartitionedByProperty, 2040 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2041 ) 2042 2043 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2044 if self._match_text_seq("AND", "STATISTICS"): 2045 statistics = True 2046 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2047 statistics = False 2048 else: 2049 statistics = None 2050 2051 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2052 2053 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2054 if self._match_text_seq("SQL"): 2055 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2056 return None 2057 2058 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2059 if self._match_text_seq("SQL", "DATA"): 2060 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2061 return None 2062 2063 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2064 if self._match_text_seq("PRIMARY", "INDEX"): 2065 return exp.NoPrimaryIndexProperty() 2066 if self._match_text_seq("SQL"): 2067 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2068 return None 2069 2070 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2071 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2072 return exp.OnCommitProperty() 2073 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2074 return exp.OnCommitProperty(delete=True) 2075 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2076 2077 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2078 if self._match_text_seq("SQL", "DATA"): 2079 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2080 return None 2081 2082 def _parse_distkey(self) -> exp.DistKeyProperty: 2083 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2084 2085 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2086 table = self._parse_table(schema=True) 2087 2088 options = [] 2089 while self._match_texts(("INCLUDING", "EXCLUDING")): 2090 this = self._prev.text.upper() 2091 2092 id_var = self._parse_id_var() 2093 if not id_var: 2094 return None 2095 2096 options.append( 2097 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2098 ) 2099 2100 return self.expression(exp.LikeProperty, this=table, expressions=options) 2101 2102 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2103 return self.expression( 2104 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2105 ) 2106 2107 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2108 self._match(TokenType.EQ) 2109 return self.expression( 2110 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2111 ) 2112 2113 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2114 self._match_text_seq("WITH", "CONNECTION") 2115 return self.expression( 2116 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2117 ) 2118 2119 def _parse_returns(self) -> exp.ReturnsProperty: 2120 value: t.Optional[exp.Expression] 2121 is_table = self._match(TokenType.TABLE) 2122 2123 if is_table: 2124 if self._match(TokenType.LT): 2125 value = self.expression( 2126 exp.Schema, 2127 this="TABLE", 2128 expressions=self._parse_csv(self._parse_struct_types), 2129 ) 2130 if not self._match(TokenType.GT): 2131 self.raise_error("Expecting >") 2132 else: 2133 value = self._parse_schema(exp.var("TABLE")) 2134 else: 2135 value = self._parse_types() 2136 2137 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 2138 2139 def _parse_describe(self) -> exp.Describe: 2140 kind = self._match_set(self.CREATABLES) and self._prev.text 2141 extended = self._match_text_seq("EXTENDED") 2142 this = self._parse_table(schema=True) 2143 properties = self._parse_properties() 2144 expressions = properties.expressions if properties else None 2145 return self.expression( 2146 exp.Describe, this=this, extended=extended, kind=kind, expressions=expressions 2147 ) 2148 2149 def _parse_insert(self) -> exp.Insert: 2150 comments = ensure_list(self._prev_comments) 2151 hint = self._parse_hint() 2152 overwrite = self._match(TokenType.OVERWRITE) 2153 ignore = self._match(TokenType.IGNORE) 2154 local = self._match_text_seq("LOCAL") 2155 alternative = None 2156 2157 if self._match_text_seq("DIRECTORY"): 2158 this: t.Optional[exp.Expression] = self.expression( 2159 exp.Directory, 2160 this=self._parse_var_or_string(), 2161 local=local, 2162 row_format=self._parse_row_format(match_row=True), 2163 ) 2164 else: 2165 if self._match(TokenType.OR): 2166 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2167 2168 self._match(TokenType.INTO) 2169 comments += ensure_list(self._prev_comments) 2170 self._match(TokenType.TABLE) 2171 this = self._parse_table(schema=True) 2172 2173 returning = self._parse_returning() 2174 2175 return self.expression( 2176 exp.Insert, 2177 comments=comments, 2178 hint=hint, 2179 this=this, 2180 by_name=self._match_text_seq("BY", "NAME"), 2181 exists=self._parse_exists(), 2182 partition=self._parse_partition(), 2183 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 2184 and self._parse_conjunction(), 2185 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2186 conflict=self._parse_on_conflict(), 2187 returning=returning or self._parse_returning(), 2188 overwrite=overwrite, 2189 alternative=alternative, 2190 ignore=ignore, 2191 ) 2192 2193 def _parse_kill(self) -> exp.Kill: 2194 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2195 2196 return self.expression( 2197 exp.Kill, 2198 this=self._parse_primary(), 2199 kind=kind, 2200 ) 2201 2202 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2203 conflict = self._match_text_seq("ON", "CONFLICT") 2204 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2205 2206 if not conflict and not duplicate: 2207 return None 2208 2209 conflict_keys = None 2210 constraint = None 2211 2212 if conflict: 2213 if self._match_text_seq("ON", "CONSTRAINT"): 2214 constraint = self._parse_id_var() 2215 elif self._match(TokenType.L_PAREN): 2216 conflict_keys = self._parse_csv(self._parse_id_var) 2217 self._match_r_paren() 2218 2219 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2220 if self._prev.token_type == TokenType.UPDATE: 2221 self._match(TokenType.SET) 2222 expressions = self._parse_csv(self._parse_equality) 2223 else: 2224 expressions = None 2225 2226 return self.expression( 2227 exp.OnConflict, 2228 duplicate=duplicate, 2229 expressions=expressions, 2230 action=action, 2231 conflict_keys=conflict_keys, 2232 constraint=constraint, 2233 ) 2234 2235 def _parse_returning(self) -> t.Optional[exp.Returning]: 2236 if not self._match(TokenType.RETURNING): 2237 return None 2238 return self.expression( 2239 exp.Returning, 2240 expressions=self._parse_csv(self._parse_expression), 2241 into=self._match(TokenType.INTO) and self._parse_table_part(), 2242 ) 2243 2244 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2245 if not self._match(TokenType.FORMAT): 2246 return None 2247 return self._parse_row_format() 2248 2249 def _parse_row_format( 2250 self, match_row: bool = False 2251 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2252 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2253 return None 2254 2255 if self._match_text_seq("SERDE"): 2256 this = self._parse_string() 2257 2258 serde_properties = None 2259 if self._match(TokenType.SERDE_PROPERTIES): 2260 serde_properties = self.expression( 2261 exp.SerdeProperties, expressions=self._parse_wrapped_properties() 2262 ) 2263 2264 return self.expression( 2265 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2266 ) 2267 2268 self._match_text_seq("DELIMITED") 2269 2270 kwargs = {} 2271 2272 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2273 kwargs["fields"] = self._parse_string() 2274 if self._match_text_seq("ESCAPED", "BY"): 2275 kwargs["escaped"] = self._parse_string() 2276 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2277 kwargs["collection_items"] = self._parse_string() 2278 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2279 kwargs["map_keys"] = self._parse_string() 2280 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2281 kwargs["lines"] = self._parse_string() 2282 if self._match_text_seq("NULL", "DEFINED", "AS"): 2283 kwargs["null"] = self._parse_string() 2284 2285 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2286 2287 def _parse_load(self) -> exp.LoadData | exp.Command: 2288 if self._match_text_seq("DATA"): 2289 local = self._match_text_seq("LOCAL") 2290 self._match_text_seq("INPATH") 2291 inpath = self._parse_string() 2292 overwrite = self._match(TokenType.OVERWRITE) 2293 self._match_pair(TokenType.INTO, TokenType.TABLE) 2294 2295 return self.expression( 2296 exp.LoadData, 2297 this=self._parse_table(schema=True), 2298 local=local, 2299 overwrite=overwrite, 2300 inpath=inpath, 2301 partition=self._parse_partition(), 2302 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2303 serde=self._match_text_seq("SERDE") and self._parse_string(), 2304 ) 2305 return self._parse_as_command(self._prev) 2306 2307 def _parse_delete(self) -> exp.Delete: 2308 # This handles MySQL's "Multiple-Table Syntax" 2309 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2310 tables = None 2311 comments = self._prev_comments 2312 if not self._match(TokenType.FROM, advance=False): 2313 tables = self._parse_csv(self._parse_table) or None 2314 2315 returning = self._parse_returning() 2316 2317 return self.expression( 2318 exp.Delete, 2319 comments=comments, 2320 tables=tables, 2321 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2322 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2323 where=self._parse_where(), 2324 returning=returning or self._parse_returning(), 2325 limit=self._parse_limit(), 2326 ) 2327 2328 def _parse_update(self) -> exp.Update: 2329 comments = self._prev_comments 2330 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2331 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2332 returning = self._parse_returning() 2333 return self.expression( 2334 exp.Update, 2335 comments=comments, 2336 **{ # type: ignore 2337 "this": this, 2338 "expressions": expressions, 2339 "from": self._parse_from(joins=True), 2340 "where": self._parse_where(), 2341 "returning": returning or self._parse_returning(), 2342 "order": self._parse_order(), 2343 "limit": self._parse_limit(), 2344 }, 2345 ) 2346 2347 def _parse_uncache(self) -> exp.Uncache: 2348 if not self._match(TokenType.TABLE): 2349 self.raise_error("Expecting TABLE after UNCACHE") 2350 2351 return self.expression( 2352 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2353 ) 2354 2355 def _parse_cache(self) -> exp.Cache: 2356 lazy = self._match_text_seq("LAZY") 2357 self._match(TokenType.TABLE) 2358 table = self._parse_table(schema=True) 2359 2360 options = [] 2361 if self._match_text_seq("OPTIONS"): 2362 self._match_l_paren() 2363 k = self._parse_string() 2364 self._match(TokenType.EQ) 2365 v = self._parse_string() 2366 options = [k, v] 2367 self._match_r_paren() 2368 2369 self._match(TokenType.ALIAS) 2370 return self.expression( 2371 exp.Cache, 2372 this=table, 2373 lazy=lazy, 2374 options=options, 2375 expression=self._parse_select(nested=True), 2376 ) 2377 2378 def _parse_partition(self) -> t.Optional[exp.Partition]: 2379 if not self._match(TokenType.PARTITION): 2380 return None 2381 2382 return self.expression( 2383 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2384 ) 2385 2386 def _parse_value(self) -> exp.Tuple: 2387 if self._match(TokenType.L_PAREN): 2388 expressions = self._parse_csv(self._parse_expression) 2389 self._match_r_paren() 2390 return self.expression(exp.Tuple, expressions=expressions) 2391 2392 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2393 return self.expression(exp.Tuple, expressions=[self._parse_expression()]) 2394 2395 def _parse_projections(self) -> t.List[exp.Expression]: 2396 return self._parse_expressions() 2397 2398 def _parse_select( 2399 self, 2400 nested: bool = False, 2401 table: bool = False, 2402 parse_subquery_alias: bool = True, 2403 parse_set_operation: bool = True, 2404 ) -> t.Optional[exp.Expression]: 2405 cte = self._parse_with() 2406 2407 if cte: 2408 this = self._parse_statement() 2409 2410 if not this: 2411 self.raise_error("Failed to parse any statement following CTE") 2412 return cte 2413 2414 if "with" in this.arg_types: 2415 this.set("with", cte) 2416 else: 2417 self.raise_error(f"{this.key} does not support CTE") 2418 this = cte 2419 2420 return this 2421 2422 # duckdb supports leading with FROM x 2423 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2424 2425 if self._match(TokenType.SELECT): 2426 comments = self._prev_comments 2427 2428 hint = self._parse_hint() 2429 all_ = self._match(TokenType.ALL) 2430 distinct = self._match_set(self.DISTINCT_TOKENS) 2431 2432 kind = ( 2433 self._match(TokenType.ALIAS) 2434 and self._match_texts(("STRUCT", "VALUE")) 2435 and self._prev.text.upper() 2436 ) 2437 2438 if distinct: 2439 distinct = self.expression( 2440 exp.Distinct, 2441 on=self._parse_value() if self._match(TokenType.ON) else None, 2442 ) 2443 2444 if all_ and distinct: 2445 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2446 2447 limit = self._parse_limit(top=True) 2448 projections = self._parse_projections() 2449 2450 this = self.expression( 2451 exp.Select, 2452 kind=kind, 2453 hint=hint, 2454 distinct=distinct, 2455 expressions=projections, 2456 limit=limit, 2457 ) 2458 this.comments = comments 2459 2460 into = self._parse_into() 2461 if into: 2462 this.set("into", into) 2463 2464 if not from_: 2465 from_ = self._parse_from() 2466 2467 if from_: 2468 this.set("from", from_) 2469 2470 this = self._parse_query_modifiers(this) 2471 elif (table or nested) and self._match(TokenType.L_PAREN): 2472 if self._match(TokenType.PIVOT): 2473 this = self._parse_simplified_pivot() 2474 elif self._match(TokenType.FROM): 2475 this = exp.select("*").from_( 2476 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2477 ) 2478 else: 2479 this = ( 2480 self._parse_table() 2481 if table 2482 else self._parse_select(nested=True, parse_set_operation=False) 2483 ) 2484 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2485 2486 self._match_r_paren() 2487 2488 # We return early here so that the UNION isn't attached to the subquery by the 2489 # following call to _parse_set_operations, but instead becomes the parent node 2490 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2491 elif self._match(TokenType.VALUES, advance=False): 2492 this = self._parse_derived_table_values() 2493 elif from_: 2494 this = exp.select("*").from_(from_.this, copy=False) 2495 else: 2496 this = None 2497 2498 if parse_set_operation: 2499 return self._parse_set_operations(this) 2500 return this 2501 2502 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2503 if not skip_with_token and not self._match(TokenType.WITH): 2504 return None 2505 2506 comments = self._prev_comments 2507 recursive = self._match(TokenType.RECURSIVE) 2508 2509 expressions = [] 2510 while True: 2511 expressions.append(self._parse_cte()) 2512 2513 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2514 break 2515 else: 2516 self._match(TokenType.WITH) 2517 2518 return self.expression( 2519 exp.With, comments=comments, expressions=expressions, recursive=recursive 2520 ) 2521 2522 def _parse_cte(self) -> exp.CTE: 2523 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2524 if not alias or not alias.this: 2525 self.raise_error("Expected CTE to have alias") 2526 2527 self._match(TokenType.ALIAS) 2528 return self.expression( 2529 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2530 ) 2531 2532 def _parse_table_alias( 2533 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2534 ) -> t.Optional[exp.TableAlias]: 2535 any_token = self._match(TokenType.ALIAS) 2536 alias = ( 2537 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2538 or self._parse_string_as_identifier() 2539 ) 2540 2541 index = self._index 2542 if self._match(TokenType.L_PAREN): 2543 columns = self._parse_csv(self._parse_function_parameter) 2544 self._match_r_paren() if columns else self._retreat(index) 2545 else: 2546 columns = None 2547 2548 if not alias and not columns: 2549 return None 2550 2551 return self.expression(exp.TableAlias, this=alias, columns=columns) 2552 2553 def _parse_subquery( 2554 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2555 ) -> t.Optional[exp.Subquery]: 2556 if not this: 2557 return None 2558 2559 return self.expression( 2560 exp.Subquery, 2561 this=this, 2562 pivots=self._parse_pivots(), 2563 alias=self._parse_table_alias() if parse_alias else None, 2564 ) 2565 2566 def _implicit_unnests_to_explicit(self, this: E) -> E: 2567 from sqlglot.optimizer.normalize_identifiers import ( 2568 normalize_identifiers as _norm, 2569 ) 2570 2571 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2572 for i, join in enumerate(this.args.get("joins") or []): 2573 table = join.this 2574 normalized_table = table.copy() 2575 normalized_table.meta["maybe_column"] = True 2576 normalized_table = _norm(normalized_table, dialect=self.dialect) 2577 2578 if isinstance(table, exp.Table) and not join.args.get("on"): 2579 if normalized_table.parts[0].name in refs: 2580 table_as_column = table.to_column() 2581 unnest = exp.Unnest(expressions=[table_as_column]) 2582 2583 # Table.to_column creates a parent Alias node that we want to convert to 2584 # a TableAlias and attach to the Unnest, so it matches the parser's output 2585 if isinstance(table.args.get("alias"), exp.TableAlias): 2586 table_as_column.replace(table_as_column.this) 2587 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2588 2589 table.replace(unnest) 2590 2591 refs.add(normalized_table.alias_or_name) 2592 2593 return this 2594 2595 def _parse_query_modifiers( 2596 self, this: t.Optional[exp.Expression] 2597 ) -> t.Optional[exp.Expression]: 2598 if isinstance(this, (exp.Query, exp.Table)): 2599 for join in iter(self._parse_join, None): 2600 this.append("joins", join) 2601 for lateral in iter(self._parse_lateral, None): 2602 this.append("laterals", lateral) 2603 2604 while True: 2605 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2606 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2607 key, expression = parser(self) 2608 2609 if expression: 2610 this.set(key, expression) 2611 if key == "limit": 2612 offset = expression.args.pop("offset", None) 2613 2614 if offset: 2615 offset = exp.Offset(expression=offset) 2616 this.set("offset", offset) 2617 2618 limit_by_expressions = expression.expressions 2619 expression.set("expressions", None) 2620 offset.set("expressions", limit_by_expressions) 2621 continue 2622 break 2623 2624 if self.SUPPORTS_IMPLICIT_UNNEST and this and "from" in this.args: 2625 this = self._implicit_unnests_to_explicit(this) 2626 2627 return this 2628 2629 def _parse_hint(self) -> t.Optional[exp.Hint]: 2630 if self._match(TokenType.HINT): 2631 hints = [] 2632 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2633 hints.extend(hint) 2634 2635 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2636 self.raise_error("Expected */ after HINT") 2637 2638 return self.expression(exp.Hint, expressions=hints) 2639 2640 return None 2641 2642 def _parse_into(self) -> t.Optional[exp.Into]: 2643 if not self._match(TokenType.INTO): 2644 return None 2645 2646 temp = self._match(TokenType.TEMPORARY) 2647 unlogged = self._match_text_seq("UNLOGGED") 2648 self._match(TokenType.TABLE) 2649 2650 return self.expression( 2651 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2652 ) 2653 2654 def _parse_from( 2655 self, joins: bool = False, skip_from_token: bool = False 2656 ) -> t.Optional[exp.From]: 2657 if not skip_from_token and not self._match(TokenType.FROM): 2658 return None 2659 2660 return self.expression( 2661 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2662 ) 2663 2664 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2665 if not self._match(TokenType.MATCH_RECOGNIZE): 2666 return None 2667 2668 self._match_l_paren() 2669 2670 partition = self._parse_partition_by() 2671 order = self._parse_order() 2672 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2673 2674 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2675 rows = exp.var("ONE ROW PER MATCH") 2676 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2677 text = "ALL ROWS PER MATCH" 2678 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2679 text += " SHOW EMPTY MATCHES" 2680 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2681 text += " OMIT EMPTY MATCHES" 2682 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2683 text += " WITH UNMATCHED ROWS" 2684 rows = exp.var(text) 2685 else: 2686 rows = None 2687 2688 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2689 text = "AFTER MATCH SKIP" 2690 if self._match_text_seq("PAST", "LAST", "ROW"): 2691 text += " PAST LAST ROW" 2692 elif self._match_text_seq("TO", "NEXT", "ROW"): 2693 text += " TO NEXT ROW" 2694 elif self._match_text_seq("TO", "FIRST"): 2695 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2696 elif self._match_text_seq("TO", "LAST"): 2697 text += f" TO LAST {self._advance_any().text}" # type: ignore 2698 after = exp.var(text) 2699 else: 2700 after = None 2701 2702 if self._match_text_seq("PATTERN"): 2703 self._match_l_paren() 2704 2705 if not self._curr: 2706 self.raise_error("Expecting )", self._curr) 2707 2708 paren = 1 2709 start = self._curr 2710 2711 while self._curr and paren > 0: 2712 if self._curr.token_type == TokenType.L_PAREN: 2713 paren += 1 2714 if self._curr.token_type == TokenType.R_PAREN: 2715 paren -= 1 2716 2717 end = self._prev 2718 self._advance() 2719 2720 if paren > 0: 2721 self.raise_error("Expecting )", self._curr) 2722 2723 pattern = exp.var(self._find_sql(start, end)) 2724 else: 2725 pattern = None 2726 2727 define = ( 2728 self._parse_csv(self._parse_name_as_expression) 2729 if self._match_text_seq("DEFINE") 2730 else None 2731 ) 2732 2733 self._match_r_paren() 2734 2735 return self.expression( 2736 exp.MatchRecognize, 2737 partition_by=partition, 2738 order=order, 2739 measures=measures, 2740 rows=rows, 2741 after=after, 2742 pattern=pattern, 2743 define=define, 2744 alias=self._parse_table_alias(), 2745 ) 2746 2747 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2748 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2749 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 2750 cross_apply = False 2751 2752 if cross_apply is not None: 2753 this = self._parse_select(table=True) 2754 view = None 2755 outer = None 2756 elif self._match(TokenType.LATERAL): 2757 this = self._parse_select(table=True) 2758 view = self._match(TokenType.VIEW) 2759 outer = self._match(TokenType.OUTER) 2760 else: 2761 return None 2762 2763 if not this: 2764 this = ( 2765 self._parse_unnest() 2766 or self._parse_function() 2767 or self._parse_id_var(any_token=False) 2768 ) 2769 2770 while self._match(TokenType.DOT): 2771 this = exp.Dot( 2772 this=this, 2773 expression=self._parse_function() or self._parse_id_var(any_token=False), 2774 ) 2775 2776 if view: 2777 table = self._parse_id_var(any_token=False) 2778 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2779 table_alias: t.Optional[exp.TableAlias] = self.expression( 2780 exp.TableAlias, this=table, columns=columns 2781 ) 2782 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2783 # We move the alias from the lateral's child node to the lateral itself 2784 table_alias = this.args["alias"].pop() 2785 else: 2786 table_alias = self._parse_table_alias() 2787 2788 return self.expression( 2789 exp.Lateral, 2790 this=this, 2791 view=view, 2792 outer=outer, 2793 alias=table_alias, 2794 cross_apply=cross_apply, 2795 ) 2796 2797 def _parse_join_parts( 2798 self, 2799 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2800 return ( 2801 self._match_set(self.JOIN_METHODS) and self._prev, 2802 self._match_set(self.JOIN_SIDES) and self._prev, 2803 self._match_set(self.JOIN_KINDS) and self._prev, 2804 ) 2805 2806 def _parse_join( 2807 self, skip_join_token: bool = False, parse_bracket: bool = False 2808 ) -> t.Optional[exp.Join]: 2809 if self._match(TokenType.COMMA): 2810 return self.expression(exp.Join, this=self._parse_table()) 2811 2812 index = self._index 2813 method, side, kind = self._parse_join_parts() 2814 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2815 join = self._match(TokenType.JOIN) 2816 2817 if not skip_join_token and not join: 2818 self._retreat(index) 2819 kind = None 2820 method = None 2821 side = None 2822 2823 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2824 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2825 2826 if not skip_join_token and not join and not outer_apply and not cross_apply: 2827 return None 2828 2829 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2830 2831 if method: 2832 kwargs["method"] = method.text 2833 if side: 2834 kwargs["side"] = side.text 2835 if kind: 2836 kwargs["kind"] = kind.text 2837 if hint: 2838 kwargs["hint"] = hint 2839 2840 if self._match(TokenType.ON): 2841 kwargs["on"] = self._parse_conjunction() 2842 elif self._match(TokenType.USING): 2843 kwargs["using"] = self._parse_wrapped_id_vars() 2844 elif not (kind and kind.token_type == TokenType.CROSS): 2845 index = self._index 2846 join = self._parse_join() 2847 2848 if join and self._match(TokenType.ON): 2849 kwargs["on"] = self._parse_conjunction() 2850 elif join and self._match(TokenType.USING): 2851 kwargs["using"] = self._parse_wrapped_id_vars() 2852 else: 2853 join = None 2854 self._retreat(index) 2855 2856 kwargs["this"].set("joins", [join] if join else None) 2857 2858 comments = [c for token in (method, side, kind) if token for c in token.comments] 2859 return self.expression(exp.Join, comments=comments, **kwargs) 2860 2861 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2862 this = self._parse_conjunction() 2863 2864 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2865 return this 2866 2867 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 2868 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 2869 2870 return this 2871 2872 def _parse_index_params(self) -> exp.IndexParameters: 2873 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2874 2875 if self._match(TokenType.L_PAREN, advance=False): 2876 columns = self._parse_wrapped_csv(self._parse_with_operator) 2877 else: 2878 columns = None 2879 2880 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 2881 partition_by = self._parse_partition_by() 2882 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 2883 tablespace = ( 2884 self._parse_var(any_token=True) 2885 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 2886 else None 2887 ) 2888 where = self._parse_where() 2889 2890 return self.expression( 2891 exp.IndexParameters, 2892 using=using, 2893 columns=columns, 2894 include=include, 2895 partition_by=partition_by, 2896 where=where, 2897 with_storage=with_storage, 2898 tablespace=tablespace, 2899 ) 2900 2901 def _parse_index( 2902 self, 2903 index: t.Optional[exp.Expression] = None, 2904 ) -> t.Optional[exp.Index]: 2905 if index: 2906 unique = None 2907 primary = None 2908 amp = None 2909 2910 self._match(TokenType.ON) 2911 self._match(TokenType.TABLE) # hive 2912 table = self._parse_table_parts(schema=True) 2913 else: 2914 unique = self._match(TokenType.UNIQUE) 2915 primary = self._match_text_seq("PRIMARY") 2916 amp = self._match_text_seq("AMP") 2917 2918 if not self._match(TokenType.INDEX): 2919 return None 2920 2921 index = self._parse_id_var() 2922 table = None 2923 2924 params = self._parse_index_params() 2925 2926 return self.expression( 2927 exp.Index, 2928 this=index, 2929 table=table, 2930 unique=unique, 2931 primary=primary, 2932 amp=amp, 2933 params=params, 2934 ) 2935 2936 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2937 hints: t.List[exp.Expression] = [] 2938 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2939 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2940 hints.append( 2941 self.expression( 2942 exp.WithTableHint, 2943 expressions=self._parse_csv( 2944 lambda: self._parse_function() or self._parse_var(any_token=True) 2945 ), 2946 ) 2947 ) 2948 self._match_r_paren() 2949 else: 2950 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2951 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2952 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2953 2954 self._match_texts(("INDEX", "KEY")) 2955 if self._match(TokenType.FOR): 2956 hint.set("target", self._advance_any() and self._prev.text.upper()) 2957 2958 hint.set("expressions", self._parse_wrapped_id_vars()) 2959 hints.append(hint) 2960 2961 return hints or None 2962 2963 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2964 return ( 2965 (not schema and self._parse_function(optional_parens=False)) 2966 or self._parse_id_var(any_token=False) 2967 or self._parse_string_as_identifier() 2968 or self._parse_placeholder() 2969 ) 2970 2971 def _parse_table_parts( 2972 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 2973 ) -> exp.Table: 2974 catalog = None 2975 db = None 2976 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 2977 2978 while self._match(TokenType.DOT): 2979 if catalog: 2980 # This allows nesting the table in arbitrarily many dot expressions if needed 2981 table = self.expression( 2982 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2983 ) 2984 else: 2985 catalog = db 2986 db = table 2987 # "" used for tsql FROM a..b case 2988 table = self._parse_table_part(schema=schema) or "" 2989 2990 if ( 2991 wildcard 2992 and self._is_connected() 2993 and (isinstance(table, exp.Identifier) or not table) 2994 and self._match(TokenType.STAR) 2995 ): 2996 if isinstance(table, exp.Identifier): 2997 table.args["this"] += "*" 2998 else: 2999 table = exp.Identifier(this="*") 3000 3001 if is_db_reference: 3002 catalog = db 3003 db = table 3004 table = None 3005 3006 if not table and not is_db_reference: 3007 self.raise_error(f"Expected table name but got {self._curr}") 3008 if not db and is_db_reference: 3009 self.raise_error(f"Expected database name but got {self._curr}") 3010 3011 return self.expression( 3012 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 3013 ) 3014 3015 def _parse_table( 3016 self, 3017 schema: bool = False, 3018 joins: bool = False, 3019 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3020 parse_bracket: bool = False, 3021 is_db_reference: bool = False, 3022 ) -> t.Optional[exp.Expression]: 3023 lateral = self._parse_lateral() 3024 if lateral: 3025 return lateral 3026 3027 unnest = self._parse_unnest() 3028 if unnest: 3029 return unnest 3030 3031 values = self._parse_derived_table_values() 3032 if values: 3033 return values 3034 3035 subquery = self._parse_select(table=True) 3036 if subquery: 3037 if not subquery.args.get("pivots"): 3038 subquery.set("pivots", self._parse_pivots()) 3039 return subquery 3040 3041 bracket = parse_bracket and self._parse_bracket(None) 3042 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3043 3044 only = self._match(TokenType.ONLY) 3045 3046 this = t.cast( 3047 exp.Expression, 3048 bracket 3049 or self._parse_bracket( 3050 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3051 ), 3052 ) 3053 3054 if only: 3055 this.set("only", only) 3056 3057 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3058 self._match_text_seq("*") 3059 3060 if schema: 3061 return self._parse_schema(this=this) 3062 3063 version = self._parse_version() 3064 3065 if version: 3066 this.set("version", version) 3067 3068 if self.dialect.ALIAS_POST_TABLESAMPLE: 3069 table_sample = self._parse_table_sample() 3070 3071 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3072 if alias: 3073 this.set("alias", alias) 3074 3075 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3076 return self.expression( 3077 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3078 ) 3079 3080 this.set("hints", self._parse_table_hints()) 3081 3082 if not this.args.get("pivots"): 3083 this.set("pivots", self._parse_pivots()) 3084 3085 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3086 table_sample = self._parse_table_sample() 3087 3088 if table_sample: 3089 table_sample.set("this", this) 3090 this = table_sample 3091 3092 if joins: 3093 for join in iter(self._parse_join, None): 3094 this.append("joins", join) 3095 3096 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3097 this.set("ordinality", True) 3098 this.set("alias", self._parse_table_alias()) 3099 3100 return this 3101 3102 def _parse_version(self) -> t.Optional[exp.Version]: 3103 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3104 this = "TIMESTAMP" 3105 elif self._match(TokenType.VERSION_SNAPSHOT): 3106 this = "VERSION" 3107 else: 3108 return None 3109 3110 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3111 kind = self._prev.text.upper() 3112 start = self._parse_bitwise() 3113 self._match_texts(("TO", "AND")) 3114 end = self._parse_bitwise() 3115 expression: t.Optional[exp.Expression] = self.expression( 3116 exp.Tuple, expressions=[start, end] 3117 ) 3118 elif self._match_text_seq("CONTAINED", "IN"): 3119 kind = "CONTAINED IN" 3120 expression = self.expression( 3121 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3122 ) 3123 elif self._match(TokenType.ALL): 3124 kind = "ALL" 3125 expression = None 3126 else: 3127 self._match_text_seq("AS", "OF") 3128 kind = "AS OF" 3129 expression = self._parse_type() 3130 3131 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3132 3133 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3134 if not self._match(TokenType.UNNEST): 3135 return None 3136 3137 expressions = self._parse_wrapped_csv(self._parse_equality) 3138 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3139 3140 alias = self._parse_table_alias() if with_alias else None 3141 3142 if alias: 3143 if self.dialect.UNNEST_COLUMN_ONLY: 3144 if alias.args.get("columns"): 3145 self.raise_error("Unexpected extra column alias in unnest.") 3146 3147 alias.set("columns", [alias.this]) 3148 alias.set("this", None) 3149 3150 columns = alias.args.get("columns") or [] 3151 if offset and len(expressions) < len(columns): 3152 offset = columns.pop() 3153 3154 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3155 self._match(TokenType.ALIAS) 3156 offset = self._parse_id_var( 3157 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3158 ) or exp.to_identifier("offset") 3159 3160 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3161 3162 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3163 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3164 if not is_derived and not self._match_text_seq("VALUES"): 3165 return None 3166 3167 expressions = self._parse_csv(self._parse_value) 3168 alias = self._parse_table_alias() 3169 3170 if is_derived: 3171 self._match_r_paren() 3172 3173 return self.expression( 3174 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3175 ) 3176 3177 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3178 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3179 as_modifier and self._match_text_seq("USING", "SAMPLE") 3180 ): 3181 return None 3182 3183 bucket_numerator = None 3184 bucket_denominator = None 3185 bucket_field = None 3186 percent = None 3187 size = None 3188 seed = None 3189 3190 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3191 matched_l_paren = self._match(TokenType.L_PAREN) 3192 3193 if self.TABLESAMPLE_CSV: 3194 num = None 3195 expressions = self._parse_csv(self._parse_primary) 3196 else: 3197 expressions = None 3198 num = ( 3199 self._parse_factor() 3200 if self._match(TokenType.NUMBER, advance=False) 3201 else self._parse_primary() or self._parse_placeholder() 3202 ) 3203 3204 if self._match_text_seq("BUCKET"): 3205 bucket_numerator = self._parse_number() 3206 self._match_text_seq("OUT", "OF") 3207 bucket_denominator = bucket_denominator = self._parse_number() 3208 self._match(TokenType.ON) 3209 bucket_field = self._parse_field() 3210 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3211 percent = num 3212 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3213 size = num 3214 else: 3215 percent = num 3216 3217 if matched_l_paren: 3218 self._match_r_paren() 3219 3220 if self._match(TokenType.L_PAREN): 3221 method = self._parse_var(upper=True) 3222 seed = self._match(TokenType.COMMA) and self._parse_number() 3223 self._match_r_paren() 3224 elif self._match_texts(("SEED", "REPEATABLE")): 3225 seed = self._parse_wrapped(self._parse_number) 3226 3227 return self.expression( 3228 exp.TableSample, 3229 expressions=expressions, 3230 method=method, 3231 bucket_numerator=bucket_numerator, 3232 bucket_denominator=bucket_denominator, 3233 bucket_field=bucket_field, 3234 percent=percent, 3235 size=size, 3236 seed=seed, 3237 ) 3238 3239 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3240 return list(iter(self._parse_pivot, None)) or None 3241 3242 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 3243 return list(iter(self._parse_join, None)) or None 3244 3245 # https://duckdb.org/docs/sql/statements/pivot 3246 def _parse_simplified_pivot(self) -> exp.Pivot: 3247 def _parse_on() -> t.Optional[exp.Expression]: 3248 this = self._parse_bitwise() 3249 return self._parse_in(this) if self._match(TokenType.IN) else this 3250 3251 this = self._parse_table() 3252 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3253 using = self._match(TokenType.USING) and self._parse_csv( 3254 lambda: self._parse_alias(self._parse_function()) 3255 ) 3256 group = self._parse_group() 3257 return self.expression( 3258 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3259 ) 3260 3261 def _parse_pivot_in(self) -> exp.In: 3262 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3263 this = self._parse_conjunction() 3264 3265 self._match(TokenType.ALIAS) 3266 alias = self._parse_field() 3267 if alias: 3268 return self.expression(exp.PivotAlias, this=this, alias=alias) 3269 3270 return this 3271 3272 value = self._parse_column() 3273 3274 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3275 self.raise_error("Expecting IN (") 3276 3277 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3278 3279 self._match_r_paren() 3280 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3281 3282 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3283 index = self._index 3284 include_nulls = None 3285 3286 if self._match(TokenType.PIVOT): 3287 unpivot = False 3288 elif self._match(TokenType.UNPIVOT): 3289 unpivot = True 3290 3291 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3292 if self._match_text_seq("INCLUDE", "NULLS"): 3293 include_nulls = True 3294 elif self._match_text_seq("EXCLUDE", "NULLS"): 3295 include_nulls = False 3296 else: 3297 return None 3298 3299 expressions = [] 3300 3301 if not self._match(TokenType.L_PAREN): 3302 self._retreat(index) 3303 return None 3304 3305 if unpivot: 3306 expressions = self._parse_csv(self._parse_column) 3307 else: 3308 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3309 3310 if not expressions: 3311 self.raise_error("Failed to parse PIVOT's aggregation list") 3312 3313 if not self._match(TokenType.FOR): 3314 self.raise_error("Expecting FOR") 3315 3316 field = self._parse_pivot_in() 3317 3318 self._match_r_paren() 3319 3320 pivot = self.expression( 3321 exp.Pivot, 3322 expressions=expressions, 3323 field=field, 3324 unpivot=unpivot, 3325 include_nulls=include_nulls, 3326 ) 3327 3328 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3329 pivot.set("alias", self._parse_table_alias()) 3330 3331 if not unpivot: 3332 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3333 3334 columns: t.List[exp.Expression] = [] 3335 for fld in pivot.args["field"].expressions: 3336 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3337 for name in names: 3338 if self.PREFIXED_PIVOT_COLUMNS: 3339 name = f"{name}_{field_name}" if name else field_name 3340 else: 3341 name = f"{field_name}_{name}" if name else field_name 3342 3343 columns.append(exp.to_identifier(name)) 3344 3345 pivot.set("columns", columns) 3346 3347 return pivot 3348 3349 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3350 return [agg.alias for agg in aggregations] 3351 3352 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3353 if not skip_where_token and not self._match(TokenType.PREWHERE): 3354 return None 3355 3356 return self.expression( 3357 exp.PreWhere, comments=self._prev_comments, this=self._parse_conjunction() 3358 ) 3359 3360 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3361 if not skip_where_token and not self._match(TokenType.WHERE): 3362 return None 3363 3364 return self.expression( 3365 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3366 ) 3367 3368 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3369 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3370 return None 3371 3372 elements = defaultdict(list) 3373 3374 if self._match(TokenType.ALL): 3375 return self.expression(exp.Group, all=True) 3376 3377 while True: 3378 expressions = self._parse_csv(self._parse_conjunction) 3379 if expressions: 3380 elements["expressions"].extend(expressions) 3381 3382 grouping_sets = self._parse_grouping_sets() 3383 if grouping_sets: 3384 elements["grouping_sets"].extend(grouping_sets) 3385 3386 rollup = None 3387 cube = None 3388 totals = None 3389 3390 index = self._index 3391 with_ = self._match(TokenType.WITH) 3392 if self._match(TokenType.ROLLUP): 3393 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3394 elements["rollup"].extend(ensure_list(rollup)) 3395 3396 if self._match(TokenType.CUBE): 3397 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3398 elements["cube"].extend(ensure_list(cube)) 3399 3400 if self._match_text_seq("TOTALS"): 3401 totals = True 3402 elements["totals"] = True # type: ignore 3403 3404 if not (grouping_sets or rollup or cube or totals): 3405 if with_: 3406 self._retreat(index) 3407 break 3408 3409 return self.expression(exp.Group, **elements) # type: ignore 3410 3411 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3412 if not self._match(TokenType.GROUPING_SETS): 3413 return None 3414 3415 return self._parse_wrapped_csv(self._parse_grouping_set) 3416 3417 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3418 if self._match(TokenType.L_PAREN): 3419 grouping_set = self._parse_csv(self._parse_column) 3420 self._match_r_paren() 3421 return self.expression(exp.Tuple, expressions=grouping_set) 3422 3423 return self._parse_column() 3424 3425 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3426 if not skip_having_token and not self._match(TokenType.HAVING): 3427 return None 3428 return self.expression(exp.Having, this=self._parse_conjunction()) 3429 3430 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3431 if not self._match(TokenType.QUALIFY): 3432 return None 3433 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3434 3435 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3436 if skip_start_token: 3437 start = None 3438 elif self._match(TokenType.START_WITH): 3439 start = self._parse_conjunction() 3440 else: 3441 return None 3442 3443 self._match(TokenType.CONNECT_BY) 3444 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3445 exp.Prior, this=self._parse_bitwise() 3446 ) 3447 connect = self._parse_conjunction() 3448 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3449 3450 if not start and self._match(TokenType.START_WITH): 3451 start = self._parse_conjunction() 3452 3453 return self.expression(exp.Connect, start=start, connect=connect) 3454 3455 def _parse_name_as_expression(self) -> exp.Alias: 3456 return self.expression( 3457 exp.Alias, 3458 alias=self._parse_id_var(any_token=True), 3459 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3460 ) 3461 3462 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3463 if self._match_text_seq("INTERPOLATE"): 3464 return self._parse_wrapped_csv(self._parse_name_as_expression) 3465 return None 3466 3467 def _parse_order( 3468 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3469 ) -> t.Optional[exp.Expression]: 3470 siblings = None 3471 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3472 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3473 return this 3474 3475 siblings = True 3476 3477 return self.expression( 3478 exp.Order, 3479 this=this, 3480 expressions=self._parse_csv(self._parse_ordered), 3481 interpolate=self._parse_interpolate(), 3482 siblings=siblings, 3483 ) 3484 3485 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3486 if not self._match(token): 3487 return None 3488 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3489 3490 def _parse_ordered( 3491 self, parse_method: t.Optional[t.Callable] = None 3492 ) -> t.Optional[exp.Ordered]: 3493 this = parse_method() if parse_method else self._parse_conjunction() 3494 if not this: 3495 return None 3496 3497 asc = self._match(TokenType.ASC) 3498 desc = self._match(TokenType.DESC) or (asc and False) 3499 3500 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3501 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3502 3503 nulls_first = is_nulls_first or False 3504 explicitly_null_ordered = is_nulls_first or is_nulls_last 3505 3506 if ( 3507 not explicitly_null_ordered 3508 and ( 3509 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3510 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3511 ) 3512 and self.dialect.NULL_ORDERING != "nulls_are_last" 3513 ): 3514 nulls_first = True 3515 3516 if self._match_text_seq("WITH", "FILL"): 3517 with_fill = self.expression( 3518 exp.WithFill, 3519 **{ # type: ignore 3520 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3521 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3522 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3523 }, 3524 ) 3525 else: 3526 with_fill = None 3527 3528 return self.expression( 3529 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3530 ) 3531 3532 def _parse_limit( 3533 self, this: t.Optional[exp.Expression] = None, top: bool = False 3534 ) -> t.Optional[exp.Expression]: 3535 if self._match(TokenType.TOP if top else TokenType.LIMIT): 3536 comments = self._prev_comments 3537 if top: 3538 limit_paren = self._match(TokenType.L_PAREN) 3539 expression = self._parse_term() if limit_paren else self._parse_number() 3540 3541 if limit_paren: 3542 self._match_r_paren() 3543 else: 3544 expression = self._parse_term() 3545 3546 if self._match(TokenType.COMMA): 3547 offset = expression 3548 expression = self._parse_term() 3549 else: 3550 offset = None 3551 3552 limit_exp = self.expression( 3553 exp.Limit, 3554 this=this, 3555 expression=expression, 3556 offset=offset, 3557 comments=comments, 3558 expressions=self._parse_limit_by(), 3559 ) 3560 3561 return limit_exp 3562 3563 if self._match(TokenType.FETCH): 3564 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3565 direction = self._prev.text.upper() if direction else "FIRST" 3566 3567 count = self._parse_field(tokens=self.FETCH_TOKENS) 3568 percent = self._match(TokenType.PERCENT) 3569 3570 self._match_set((TokenType.ROW, TokenType.ROWS)) 3571 3572 only = self._match_text_seq("ONLY") 3573 with_ties = self._match_text_seq("WITH", "TIES") 3574 3575 if only and with_ties: 3576 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3577 3578 return self.expression( 3579 exp.Fetch, 3580 direction=direction, 3581 count=count, 3582 percent=percent, 3583 with_ties=with_ties, 3584 ) 3585 3586 return this 3587 3588 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3589 if not self._match(TokenType.OFFSET): 3590 return this 3591 3592 count = self._parse_term() 3593 self._match_set((TokenType.ROW, TokenType.ROWS)) 3594 3595 return self.expression( 3596 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3597 ) 3598 3599 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3600 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3601 3602 def _parse_locks(self) -> t.List[exp.Lock]: 3603 locks = [] 3604 while True: 3605 if self._match_text_seq("FOR", "UPDATE"): 3606 update = True 3607 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3608 "LOCK", "IN", "SHARE", "MODE" 3609 ): 3610 update = False 3611 else: 3612 break 3613 3614 expressions = None 3615 if self._match_text_seq("OF"): 3616 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3617 3618 wait: t.Optional[bool | exp.Expression] = None 3619 if self._match_text_seq("NOWAIT"): 3620 wait = True 3621 elif self._match_text_seq("WAIT"): 3622 wait = self._parse_primary() 3623 elif self._match_text_seq("SKIP", "LOCKED"): 3624 wait = False 3625 3626 locks.append( 3627 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3628 ) 3629 3630 return locks 3631 3632 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3633 while this and self._match_set(self.SET_OPERATIONS): 3634 token_type = self._prev.token_type 3635 3636 if token_type == TokenType.UNION: 3637 operation = exp.Union 3638 elif token_type == TokenType.EXCEPT: 3639 operation = exp.Except 3640 else: 3641 operation = exp.Intersect 3642 3643 comments = self._prev.comments 3644 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3645 by_name = self._match_text_seq("BY", "NAME") 3646 expression = self._parse_select(nested=True, parse_set_operation=False) 3647 3648 this = self.expression( 3649 operation, 3650 comments=comments, 3651 this=this, 3652 distinct=distinct, 3653 by_name=by_name, 3654 expression=expression, 3655 ) 3656 3657 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3658 expression = this.expression 3659 3660 if expression: 3661 for arg in self.UNION_MODIFIERS: 3662 expr = expression.args.get(arg) 3663 if expr: 3664 this.set(arg, expr.pop()) 3665 3666 return this 3667 3668 def _parse_expression(self) -> t.Optional[exp.Expression]: 3669 return self._parse_alias(self._parse_conjunction()) 3670 3671 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3672 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3673 3674 def _parse_equality(self) -> t.Optional[exp.Expression]: 3675 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3676 3677 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3678 return self._parse_tokens(self._parse_range, self.COMPARISON) 3679 3680 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3681 this = this or self._parse_bitwise() 3682 negate = self._match(TokenType.NOT) 3683 3684 if self._match_set(self.RANGE_PARSERS): 3685 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3686 if not expression: 3687 return this 3688 3689 this = expression 3690 elif self._match(TokenType.ISNULL): 3691 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3692 3693 # Postgres supports ISNULL and NOTNULL for conditions. 3694 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3695 if self._match(TokenType.NOTNULL): 3696 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3697 this = self.expression(exp.Not, this=this) 3698 3699 if negate: 3700 this = self.expression(exp.Not, this=this) 3701 3702 if self._match(TokenType.IS): 3703 this = self._parse_is(this) 3704 3705 return this 3706 3707 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3708 index = self._index - 1 3709 negate = self._match(TokenType.NOT) 3710 3711 if self._match_text_seq("DISTINCT", "FROM"): 3712 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3713 return self.expression(klass, this=this, expression=self._parse_bitwise()) 3714 3715 expression = self._parse_null() or self._parse_boolean() 3716 if not expression: 3717 self._retreat(index) 3718 return None 3719 3720 this = self.expression(exp.Is, this=this, expression=expression) 3721 return self.expression(exp.Not, this=this) if negate else this 3722 3723 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3724 unnest = self._parse_unnest(with_alias=False) 3725 if unnest: 3726 this = self.expression(exp.In, this=this, unnest=unnest) 3727 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 3728 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 3729 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3730 3731 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 3732 this = self.expression(exp.In, this=this, query=expressions[0]) 3733 else: 3734 this = self.expression(exp.In, this=this, expressions=expressions) 3735 3736 if matched_l_paren: 3737 self._match_r_paren(this) 3738 elif not self._match(TokenType.R_BRACKET, expression=this): 3739 self.raise_error("Expecting ]") 3740 else: 3741 this = self.expression(exp.In, this=this, field=self._parse_field()) 3742 3743 return this 3744 3745 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 3746 low = self._parse_bitwise() 3747 self._match(TokenType.AND) 3748 high = self._parse_bitwise() 3749 return self.expression(exp.Between, this=this, low=low, high=high) 3750 3751 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3752 if not self._match(TokenType.ESCAPE): 3753 return this 3754 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3755 3756 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Interval]: 3757 index = self._index 3758 3759 if not self._match(TokenType.INTERVAL) and match_interval: 3760 return None 3761 3762 if self._match(TokenType.STRING, advance=False): 3763 this = self._parse_primary() 3764 else: 3765 this = self._parse_term() 3766 3767 if not this or ( 3768 isinstance(this, exp.Column) 3769 and not this.table 3770 and not this.this.quoted 3771 and this.name.upper() == "IS" 3772 ): 3773 self._retreat(index) 3774 return None 3775 3776 unit = self._parse_function() or ( 3777 not self._match(TokenType.ALIAS, advance=False) 3778 and self._parse_var(any_token=True, upper=True) 3779 ) 3780 3781 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3782 # each INTERVAL expression into this canonical form so it's easy to transpile 3783 if this and this.is_number: 3784 this = exp.Literal.string(this.name) 3785 elif this and this.is_string: 3786 parts = this.name.split() 3787 3788 if len(parts) == 2: 3789 if unit: 3790 # This is not actually a unit, it's something else (e.g. a "window side") 3791 unit = None 3792 self._retreat(self._index - 1) 3793 3794 this = exp.Literal.string(parts[0]) 3795 unit = self.expression(exp.Var, this=parts[1].upper()) 3796 3797 return self.expression(exp.Interval, this=this, unit=unit) 3798 3799 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3800 this = self._parse_term() 3801 3802 while True: 3803 if self._match_set(self.BITWISE): 3804 this = self.expression( 3805 self.BITWISE[self._prev.token_type], 3806 this=this, 3807 expression=self._parse_term(), 3808 ) 3809 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 3810 this = self.expression( 3811 exp.DPipe, 3812 this=this, 3813 expression=self._parse_term(), 3814 safe=not self.dialect.STRICT_STRING_CONCAT, 3815 ) 3816 elif self._match(TokenType.DQMARK): 3817 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3818 elif self._match_pair(TokenType.LT, TokenType.LT): 3819 this = self.expression( 3820 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3821 ) 3822 elif self._match_pair(TokenType.GT, TokenType.GT): 3823 this = self.expression( 3824 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3825 ) 3826 else: 3827 break 3828 3829 return this 3830 3831 def _parse_term(self) -> t.Optional[exp.Expression]: 3832 return self._parse_tokens(self._parse_factor, self.TERM) 3833 3834 def _parse_factor(self) -> t.Optional[exp.Expression]: 3835 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 3836 this = parse_method() 3837 3838 while self._match_set(self.FACTOR): 3839 this = self.expression( 3840 self.FACTOR[self._prev.token_type], 3841 this=this, 3842 comments=self._prev_comments, 3843 expression=parse_method(), 3844 ) 3845 if isinstance(this, exp.Div): 3846 this.args["typed"] = self.dialect.TYPED_DIVISION 3847 this.args["safe"] = self.dialect.SAFE_DIVISION 3848 3849 return this 3850 3851 def _parse_exponent(self) -> t.Optional[exp.Expression]: 3852 return self._parse_tokens(self._parse_unary, self.EXPONENT) 3853 3854 def _parse_unary(self) -> t.Optional[exp.Expression]: 3855 if self._match_set(self.UNARY_PARSERS): 3856 return self.UNARY_PARSERS[self._prev.token_type](self) 3857 return self._parse_at_time_zone(self._parse_type()) 3858 3859 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3860 interval = parse_interval and self._parse_interval() 3861 if interval: 3862 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 3863 while True: 3864 index = self._index 3865 self._match(TokenType.PLUS) 3866 3867 if not self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 3868 self._retreat(index) 3869 break 3870 3871 interval = self.expression( # type: ignore 3872 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 3873 ) 3874 3875 return interval 3876 3877 index = self._index 3878 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3879 this = self._parse_column() 3880 3881 if data_type: 3882 if isinstance(this, exp.Literal): 3883 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3884 if parser: 3885 return parser(self, this, data_type) 3886 return self.expression(exp.Cast, this=this, to=data_type) 3887 if not data_type.expressions: 3888 self._retreat(index) 3889 return self._parse_column() 3890 return self._parse_column_ops(data_type) 3891 3892 return this and self._parse_column_ops(this) 3893 3894 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3895 this = self._parse_type() 3896 if not this: 3897 return None 3898 3899 return self.expression( 3900 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3901 ) 3902 3903 def _parse_types( 3904 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3905 ) -> t.Optional[exp.Expression]: 3906 index = self._index 3907 3908 prefix = self._match_text_seq("SYSUDTLIB", ".") 3909 3910 if not self._match_set(self.TYPE_TOKENS): 3911 identifier = allow_identifiers and self._parse_id_var( 3912 any_token=False, tokens=(TokenType.VAR,) 3913 ) 3914 if identifier: 3915 tokens = self.dialect.tokenize(identifier.name) 3916 3917 if len(tokens) != 1: 3918 self.raise_error("Unexpected identifier", self._prev) 3919 3920 if tokens[0].token_type in self.TYPE_TOKENS: 3921 self._prev = tokens[0] 3922 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 3923 type_name = identifier.name 3924 3925 while self._match(TokenType.DOT): 3926 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3927 3928 return exp.DataType.build(type_name, udt=True) 3929 else: 3930 self._retreat(self._index - 1) 3931 return None 3932 else: 3933 return None 3934 3935 type_token = self._prev.token_type 3936 3937 if type_token == TokenType.PSEUDO_TYPE: 3938 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 3939 3940 if type_token == TokenType.OBJECT_IDENTIFIER: 3941 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 3942 3943 nested = type_token in self.NESTED_TYPE_TOKENS 3944 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3945 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 3946 expressions = None 3947 maybe_func = False 3948 3949 if self._match(TokenType.L_PAREN): 3950 if is_struct: 3951 expressions = self._parse_csv(self._parse_struct_types) 3952 elif nested: 3953 expressions = self._parse_csv( 3954 lambda: self._parse_types( 3955 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3956 ) 3957 ) 3958 elif type_token in self.ENUM_TYPE_TOKENS: 3959 expressions = self._parse_csv(self._parse_equality) 3960 elif is_aggregate: 3961 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 3962 any_token=False, tokens=(TokenType.VAR,) 3963 ) 3964 if not func_or_ident or not self._match(TokenType.COMMA): 3965 return None 3966 expressions = self._parse_csv( 3967 lambda: self._parse_types( 3968 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3969 ) 3970 ) 3971 expressions.insert(0, func_or_ident) 3972 else: 3973 expressions = self._parse_csv(self._parse_type_size) 3974 3975 if not expressions or not self._match(TokenType.R_PAREN): 3976 self._retreat(index) 3977 return None 3978 3979 maybe_func = True 3980 3981 this: t.Optional[exp.Expression] = None 3982 values: t.Optional[t.List[exp.Expression]] = None 3983 3984 if nested and self._match(TokenType.LT): 3985 if is_struct: 3986 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 3987 else: 3988 expressions = self._parse_csv( 3989 lambda: self._parse_types( 3990 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3991 ) 3992 ) 3993 3994 if not self._match(TokenType.GT): 3995 self.raise_error("Expecting >") 3996 3997 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3998 values = self._parse_csv(self._parse_conjunction) 3999 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4000 4001 if type_token in self.TIMESTAMPS: 4002 if self._match_text_seq("WITH", "TIME", "ZONE"): 4003 maybe_func = False 4004 tz_type = ( 4005 exp.DataType.Type.TIMETZ 4006 if type_token in self.TIMES 4007 else exp.DataType.Type.TIMESTAMPTZ 4008 ) 4009 this = exp.DataType(this=tz_type, expressions=expressions) 4010 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4011 maybe_func = False 4012 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4013 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4014 maybe_func = False 4015 elif type_token == TokenType.INTERVAL: 4016 unit = self._parse_var() 4017 4018 if self._match_text_seq("TO"): 4019 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 4020 else: 4021 span = None 4022 4023 if span or not unit: 4024 this = self.expression( 4025 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 4026 ) 4027 else: 4028 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4029 4030 if maybe_func and check_func: 4031 index2 = self._index 4032 peek = self._parse_string() 4033 4034 if not peek: 4035 self._retreat(index) 4036 return None 4037 4038 self._retreat(index2) 4039 4040 if not this: 4041 if self._match_text_seq("UNSIGNED"): 4042 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4043 if not unsigned_type_token: 4044 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4045 4046 type_token = unsigned_type_token or type_token 4047 4048 this = exp.DataType( 4049 this=exp.DataType.Type[type_token.value], 4050 expressions=expressions, 4051 nested=nested, 4052 values=values, 4053 prefix=prefix, 4054 ) 4055 4056 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 4057 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 4058 4059 return this 4060 4061 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4062 index = self._index 4063 this = self._parse_type(parse_interval=False) or self._parse_id_var() 4064 self._match(TokenType.COLON) 4065 column_def = self._parse_column_def(this) 4066 4067 if type_required and ( 4068 (isinstance(this, exp.Column) and this.this is column_def) or this is column_def 4069 ): 4070 self._retreat(index) 4071 return self._parse_types() 4072 4073 return column_def 4074 4075 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4076 if not self._match_text_seq("AT", "TIME", "ZONE"): 4077 return this 4078 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4079 4080 def _parse_column(self) -> t.Optional[exp.Expression]: 4081 this = self._parse_column_reference() 4082 return self._parse_column_ops(this) if this else self._parse_bracket(this) 4083 4084 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4085 this = self._parse_field() 4086 if ( 4087 not this 4088 and self._match(TokenType.VALUES, advance=False) 4089 and self.VALUES_FOLLOWED_BY_PAREN 4090 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4091 ): 4092 this = self._parse_id_var() 4093 4094 return self.expression(exp.Column, this=this) if isinstance(this, exp.Identifier) else this 4095 4096 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4097 this = self._parse_bracket(this) 4098 4099 while self._match_set(self.COLUMN_OPERATORS): 4100 op_token = self._prev.token_type 4101 op = self.COLUMN_OPERATORS.get(op_token) 4102 4103 if op_token == TokenType.DCOLON: 4104 field = self._parse_types() 4105 if not field: 4106 self.raise_error("Expected type") 4107 elif op and self._curr: 4108 field = self._parse_column_reference() 4109 else: 4110 field = self._parse_field(anonymous_func=True, any_token=True) 4111 4112 if isinstance(field, exp.Func): 4113 # bigquery allows function calls like x.y.count(...) 4114 # SAFE.SUBSTR(...) 4115 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4116 this = self._replace_columns_with_dots(this) 4117 4118 if op: 4119 this = op(self, this, field) 4120 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4121 this = self.expression( 4122 exp.Column, 4123 this=field, 4124 table=this.this, 4125 db=this.args.get("table"), 4126 catalog=this.args.get("db"), 4127 ) 4128 else: 4129 this = self.expression(exp.Dot, this=this, expression=field) 4130 this = self._parse_bracket(this) 4131 return this 4132 4133 def _parse_primary(self) -> t.Optional[exp.Expression]: 4134 if self._match_set(self.PRIMARY_PARSERS): 4135 token_type = self._prev.token_type 4136 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4137 4138 if token_type == TokenType.STRING: 4139 expressions = [primary] 4140 while self._match(TokenType.STRING): 4141 expressions.append(exp.Literal.string(self._prev.text)) 4142 4143 if len(expressions) > 1: 4144 return self.expression(exp.Concat, expressions=expressions) 4145 4146 return primary 4147 4148 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4149 return exp.Literal.number(f"0.{self._prev.text}") 4150 4151 if self._match(TokenType.L_PAREN): 4152 comments = self._prev_comments 4153 query = self._parse_select() 4154 4155 if query: 4156 expressions = [query] 4157 else: 4158 expressions = self._parse_expressions() 4159 4160 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4161 4162 if isinstance(this, exp.UNWRAPPED_QUERIES): 4163 this = self._parse_set_operations( 4164 self._parse_subquery(this=this, parse_alias=False) 4165 ) 4166 elif isinstance(this, exp.Subquery): 4167 this = self._parse_subquery( 4168 this=self._parse_set_operations(this), parse_alias=False 4169 ) 4170 elif len(expressions) > 1: 4171 this = self.expression(exp.Tuple, expressions=expressions) 4172 else: 4173 this = self.expression(exp.Paren, this=this) 4174 4175 if this: 4176 this.add_comments(comments) 4177 4178 self._match_r_paren(expression=this) 4179 return this 4180 4181 return None 4182 4183 def _parse_field( 4184 self, 4185 any_token: bool = False, 4186 tokens: t.Optional[t.Collection[TokenType]] = None, 4187 anonymous_func: bool = False, 4188 ) -> t.Optional[exp.Expression]: 4189 return ( 4190 self._parse_primary() 4191 or self._parse_function(anonymous=anonymous_func) 4192 or self._parse_id_var(any_token=any_token, tokens=tokens) 4193 ) 4194 4195 def _parse_function( 4196 self, 4197 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4198 anonymous: bool = False, 4199 optional_parens: bool = True, 4200 ) -> t.Optional[exp.Expression]: 4201 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4202 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4203 fn_syntax = False 4204 if ( 4205 self._match(TokenType.L_BRACE, advance=False) 4206 and self._next 4207 and self._next.text.upper() == "FN" 4208 ): 4209 self._advance(2) 4210 fn_syntax = True 4211 4212 func = self._parse_function_call( 4213 functions=functions, anonymous=anonymous, optional_parens=optional_parens 4214 ) 4215 4216 if fn_syntax: 4217 self._match(TokenType.R_BRACE) 4218 4219 return func 4220 4221 def _parse_function_call( 4222 self, 4223 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4224 anonymous: bool = False, 4225 optional_parens: bool = True, 4226 ) -> t.Optional[exp.Expression]: 4227 if not self._curr: 4228 return None 4229 4230 comments = self._curr.comments 4231 token_type = self._curr.token_type 4232 this = self._curr.text 4233 upper = this.upper() 4234 4235 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4236 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4237 self._advance() 4238 return parser(self) 4239 4240 if not self._next or self._next.token_type != TokenType.L_PAREN: 4241 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4242 self._advance() 4243 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4244 4245 return None 4246 4247 if token_type not in self.FUNC_TOKENS: 4248 return None 4249 4250 self._advance(2) 4251 4252 parser = self.FUNCTION_PARSERS.get(upper) 4253 if parser and not anonymous: 4254 this = parser(self) 4255 else: 4256 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4257 4258 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4259 this = self.expression(subquery_predicate, this=self._parse_select()) 4260 self._match_r_paren() 4261 return this 4262 4263 if functions is None: 4264 functions = self.FUNCTIONS 4265 4266 function = functions.get(upper) 4267 4268 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4269 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4270 4271 if alias: 4272 args = self._kv_to_prop_eq(args) 4273 4274 if function and not anonymous: 4275 if "dialect" in function.__code__.co_varnames: 4276 func = function(args, dialect=self.dialect) 4277 else: 4278 func = function(args) 4279 4280 func = self.validate_expression(func, args) 4281 if not self.dialect.NORMALIZE_FUNCTIONS: 4282 func.meta["name"] = this 4283 4284 this = func 4285 else: 4286 if token_type == TokenType.IDENTIFIER: 4287 this = exp.Identifier(this=this, quoted=True) 4288 this = self.expression(exp.Anonymous, this=this, expressions=args) 4289 4290 if isinstance(this, exp.Expression): 4291 this.add_comments(comments) 4292 4293 self._match_r_paren(this) 4294 return self._parse_window(this) 4295 4296 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4297 transformed = [] 4298 4299 for e in expressions: 4300 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4301 if isinstance(e, exp.Alias): 4302 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4303 4304 if not isinstance(e, exp.PropertyEQ): 4305 e = self.expression( 4306 exp.PropertyEQ, this=exp.to_identifier(e.name), expression=e.expression 4307 ) 4308 4309 if isinstance(e.this, exp.Column): 4310 e.this.replace(e.this.this) 4311 4312 transformed.append(e) 4313 4314 return transformed 4315 4316 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4317 return self._parse_column_def(self._parse_id_var()) 4318 4319 def _parse_user_defined_function( 4320 self, kind: t.Optional[TokenType] = None 4321 ) -> t.Optional[exp.Expression]: 4322 this = self._parse_id_var() 4323 4324 while self._match(TokenType.DOT): 4325 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4326 4327 if not self._match(TokenType.L_PAREN): 4328 return this 4329 4330 expressions = self._parse_csv(self._parse_function_parameter) 4331 self._match_r_paren() 4332 return self.expression( 4333 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4334 ) 4335 4336 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4337 literal = self._parse_primary() 4338 if literal: 4339 return self.expression(exp.Introducer, this=token.text, expression=literal) 4340 4341 return self.expression(exp.Identifier, this=token.text) 4342 4343 def _parse_session_parameter(self) -> exp.SessionParameter: 4344 kind = None 4345 this = self._parse_id_var() or self._parse_primary() 4346 4347 if this and self._match(TokenType.DOT): 4348 kind = this.name 4349 this = self._parse_var() or self._parse_primary() 4350 4351 return self.expression(exp.SessionParameter, this=this, kind=kind) 4352 4353 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4354 index = self._index 4355 4356 if self._match(TokenType.L_PAREN): 4357 expressions = t.cast( 4358 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 4359 ) 4360 4361 if not self._match(TokenType.R_PAREN): 4362 self._retreat(index) 4363 else: 4364 expressions = [self._parse_id_var()] 4365 4366 if self._match_set(self.LAMBDAS): 4367 return self.LAMBDAS[self._prev.token_type](self, expressions) 4368 4369 self._retreat(index) 4370 4371 this: t.Optional[exp.Expression] 4372 4373 if self._match(TokenType.DISTINCT): 4374 this = self.expression( 4375 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 4376 ) 4377 else: 4378 this = self._parse_select_or_expression(alias=alias) 4379 4380 return self._parse_limit( 4381 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4382 ) 4383 4384 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4385 index = self._index 4386 4387 if not self.errors: 4388 try: 4389 if self._parse_select(nested=True): 4390 return this 4391 except ParseError: 4392 pass 4393 finally: 4394 self.errors.clear() 4395 self._retreat(index) 4396 4397 if not self._match(TokenType.L_PAREN): 4398 return this 4399 4400 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4401 4402 self._match_r_paren() 4403 return self.expression(exp.Schema, this=this, expressions=args) 4404 4405 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4406 return self._parse_column_def(self._parse_field(any_token=True)) 4407 4408 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4409 # column defs are not really columns, they're identifiers 4410 if isinstance(this, exp.Column): 4411 this = this.this 4412 4413 kind = self._parse_types(schema=True) 4414 4415 if self._match_text_seq("FOR", "ORDINALITY"): 4416 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4417 4418 constraints: t.List[exp.Expression] = [] 4419 4420 if not kind and self._match(TokenType.ALIAS): 4421 constraints.append( 4422 self.expression( 4423 exp.ComputedColumnConstraint, 4424 this=self._parse_conjunction(), 4425 persisted=self._match_text_seq("PERSISTED"), 4426 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4427 ) 4428 ) 4429 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4430 self._match(TokenType.ALIAS) 4431 constraints.append( 4432 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4433 ) 4434 4435 while True: 4436 constraint = self._parse_column_constraint() 4437 if not constraint: 4438 break 4439 constraints.append(constraint) 4440 4441 if not kind and not constraints: 4442 return this 4443 4444 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4445 4446 def _parse_auto_increment( 4447 self, 4448 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4449 start = None 4450 increment = None 4451 4452 if self._match(TokenType.L_PAREN, advance=False): 4453 args = self._parse_wrapped_csv(self._parse_bitwise) 4454 start = seq_get(args, 0) 4455 increment = seq_get(args, 1) 4456 elif self._match_text_seq("START"): 4457 start = self._parse_bitwise() 4458 self._match_text_seq("INCREMENT") 4459 increment = self._parse_bitwise() 4460 4461 if start and increment: 4462 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4463 4464 return exp.AutoIncrementColumnConstraint() 4465 4466 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4467 if not self._match_text_seq("REFRESH"): 4468 self._retreat(self._index - 1) 4469 return None 4470 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4471 4472 def _parse_compress(self) -> exp.CompressColumnConstraint: 4473 if self._match(TokenType.L_PAREN, advance=False): 4474 return self.expression( 4475 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4476 ) 4477 4478 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4479 4480 def _parse_generated_as_identity( 4481 self, 4482 ) -> ( 4483 exp.GeneratedAsIdentityColumnConstraint 4484 | exp.ComputedColumnConstraint 4485 | exp.GeneratedAsRowColumnConstraint 4486 ): 4487 if self._match_text_seq("BY", "DEFAULT"): 4488 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4489 this = self.expression( 4490 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4491 ) 4492 else: 4493 self._match_text_seq("ALWAYS") 4494 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4495 4496 self._match(TokenType.ALIAS) 4497 4498 if self._match_text_seq("ROW"): 4499 start = self._match_text_seq("START") 4500 if not start: 4501 self._match(TokenType.END) 4502 hidden = self._match_text_seq("HIDDEN") 4503 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4504 4505 identity = self._match_text_seq("IDENTITY") 4506 4507 if self._match(TokenType.L_PAREN): 4508 if self._match(TokenType.START_WITH): 4509 this.set("start", self._parse_bitwise()) 4510 if self._match_text_seq("INCREMENT", "BY"): 4511 this.set("increment", self._parse_bitwise()) 4512 if self._match_text_seq("MINVALUE"): 4513 this.set("minvalue", self._parse_bitwise()) 4514 if self._match_text_seq("MAXVALUE"): 4515 this.set("maxvalue", self._parse_bitwise()) 4516 4517 if self._match_text_seq("CYCLE"): 4518 this.set("cycle", True) 4519 elif self._match_text_seq("NO", "CYCLE"): 4520 this.set("cycle", False) 4521 4522 if not identity: 4523 this.set("expression", self._parse_bitwise()) 4524 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4525 args = self._parse_csv(self._parse_bitwise) 4526 this.set("start", seq_get(args, 0)) 4527 this.set("increment", seq_get(args, 1)) 4528 4529 self._match_r_paren() 4530 4531 return this 4532 4533 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4534 self._match_text_seq("LENGTH") 4535 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4536 4537 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 4538 if self._match_text_seq("NULL"): 4539 return self.expression(exp.NotNullColumnConstraint) 4540 if self._match_text_seq("CASESPECIFIC"): 4541 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4542 if self._match_text_seq("FOR", "REPLICATION"): 4543 return self.expression(exp.NotForReplicationColumnConstraint) 4544 return None 4545 4546 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4547 if self._match(TokenType.CONSTRAINT): 4548 this = self._parse_id_var() 4549 else: 4550 this = None 4551 4552 if self._match_texts(self.CONSTRAINT_PARSERS): 4553 return self.expression( 4554 exp.ColumnConstraint, 4555 this=this, 4556 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4557 ) 4558 4559 return this 4560 4561 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4562 if not self._match(TokenType.CONSTRAINT): 4563 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4564 4565 return self.expression( 4566 exp.Constraint, 4567 this=self._parse_id_var(), 4568 expressions=self._parse_unnamed_constraints(), 4569 ) 4570 4571 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 4572 constraints = [] 4573 while True: 4574 constraint = self._parse_unnamed_constraint() or self._parse_function() 4575 if not constraint: 4576 break 4577 constraints.append(constraint) 4578 4579 return constraints 4580 4581 def _parse_unnamed_constraint( 4582 self, constraints: t.Optional[t.Collection[str]] = None 4583 ) -> t.Optional[exp.Expression]: 4584 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4585 constraints or self.CONSTRAINT_PARSERS 4586 ): 4587 return None 4588 4589 constraint = self._prev.text.upper() 4590 if constraint not in self.CONSTRAINT_PARSERS: 4591 self.raise_error(f"No parser found for schema constraint {constraint}.") 4592 4593 return self.CONSTRAINT_PARSERS[constraint](self) 4594 4595 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4596 self._match_text_seq("KEY") 4597 return self.expression( 4598 exp.UniqueColumnConstraint, 4599 this=self._parse_schema(self._parse_id_var(any_token=False)), 4600 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4601 on_conflict=self._parse_on_conflict(), 4602 ) 4603 4604 def _parse_key_constraint_options(self) -> t.List[str]: 4605 options = [] 4606 while True: 4607 if not self._curr: 4608 break 4609 4610 if self._match(TokenType.ON): 4611 action = None 4612 on = self._advance_any() and self._prev.text 4613 4614 if self._match_text_seq("NO", "ACTION"): 4615 action = "NO ACTION" 4616 elif self._match_text_seq("CASCADE"): 4617 action = "CASCADE" 4618 elif self._match_text_seq("RESTRICT"): 4619 action = "RESTRICT" 4620 elif self._match_pair(TokenType.SET, TokenType.NULL): 4621 action = "SET NULL" 4622 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4623 action = "SET DEFAULT" 4624 else: 4625 self.raise_error("Invalid key constraint") 4626 4627 options.append(f"ON {on} {action}") 4628 elif self._match_text_seq("NOT", "ENFORCED"): 4629 options.append("NOT ENFORCED") 4630 elif self._match_text_seq("DEFERRABLE"): 4631 options.append("DEFERRABLE") 4632 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4633 options.append("INITIALLY DEFERRED") 4634 elif self._match_text_seq("NORELY"): 4635 options.append("NORELY") 4636 elif self._match_text_seq("MATCH", "FULL"): 4637 options.append("MATCH FULL") 4638 else: 4639 break 4640 4641 return options 4642 4643 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4644 if match and not self._match(TokenType.REFERENCES): 4645 return None 4646 4647 expressions = None 4648 this = self._parse_table(schema=True) 4649 options = self._parse_key_constraint_options() 4650 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4651 4652 def _parse_foreign_key(self) -> exp.ForeignKey: 4653 expressions = self._parse_wrapped_id_vars() 4654 reference = self._parse_references() 4655 options = {} 4656 4657 while self._match(TokenType.ON): 4658 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4659 self.raise_error("Expected DELETE or UPDATE") 4660 4661 kind = self._prev.text.lower() 4662 4663 if self._match_text_seq("NO", "ACTION"): 4664 action = "NO ACTION" 4665 elif self._match(TokenType.SET): 4666 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4667 action = "SET " + self._prev.text.upper() 4668 else: 4669 self._advance() 4670 action = self._prev.text.upper() 4671 4672 options[kind] = action 4673 4674 return self.expression( 4675 exp.ForeignKey, 4676 expressions=expressions, 4677 reference=reference, 4678 **options, # type: ignore 4679 ) 4680 4681 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4682 return self._parse_field() 4683 4684 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 4685 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 4686 self._retreat(self._index - 1) 4687 return None 4688 4689 id_vars = self._parse_wrapped_id_vars() 4690 return self.expression( 4691 exp.PeriodForSystemTimeConstraint, 4692 this=seq_get(id_vars, 0), 4693 expression=seq_get(id_vars, 1), 4694 ) 4695 4696 def _parse_primary_key( 4697 self, wrapped_optional: bool = False, in_props: bool = False 4698 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4699 desc = ( 4700 self._match_set((TokenType.ASC, TokenType.DESC)) 4701 and self._prev.token_type == TokenType.DESC 4702 ) 4703 4704 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4705 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4706 4707 expressions = self._parse_wrapped_csv( 4708 self._parse_primary_key_part, optional=wrapped_optional 4709 ) 4710 options = self._parse_key_constraint_options() 4711 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4712 4713 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 4714 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 4715 4716 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4717 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4718 return this 4719 4720 bracket_kind = self._prev.token_type 4721 expressions = self._parse_csv( 4722 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 4723 ) 4724 4725 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 4726 self.raise_error("Expected ]") 4727 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 4728 self.raise_error("Expected }") 4729 4730 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4731 if bracket_kind == TokenType.L_BRACE: 4732 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 4733 elif not this or this.name.upper() == "ARRAY": 4734 this = self.expression(exp.Array, expressions=expressions) 4735 else: 4736 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 4737 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4738 4739 self._add_comments(this) 4740 return self._parse_bracket(this) 4741 4742 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4743 if self._match(TokenType.COLON): 4744 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4745 return this 4746 4747 def _parse_case(self) -> t.Optional[exp.Expression]: 4748 ifs = [] 4749 default = None 4750 4751 comments = self._prev_comments 4752 expression = self._parse_conjunction() 4753 4754 while self._match(TokenType.WHEN): 4755 this = self._parse_conjunction() 4756 self._match(TokenType.THEN) 4757 then = self._parse_conjunction() 4758 ifs.append(self.expression(exp.If, this=this, true=then)) 4759 4760 if self._match(TokenType.ELSE): 4761 default = self._parse_conjunction() 4762 4763 if not self._match(TokenType.END): 4764 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 4765 default = exp.column("interval") 4766 else: 4767 self.raise_error("Expected END after CASE", self._prev) 4768 4769 return self._parse_window( 4770 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 4771 ) 4772 4773 def _parse_if(self) -> t.Optional[exp.Expression]: 4774 if self._match(TokenType.L_PAREN): 4775 args = self._parse_csv(self._parse_conjunction) 4776 this = self.validate_expression(exp.If.from_arg_list(args), args) 4777 self._match_r_paren() 4778 else: 4779 index = self._index - 1 4780 4781 if self.NO_PAREN_IF_COMMANDS and index == 0: 4782 return self._parse_as_command(self._prev) 4783 4784 condition = self._parse_conjunction() 4785 4786 if not condition: 4787 self._retreat(index) 4788 return None 4789 4790 self._match(TokenType.THEN) 4791 true = self._parse_conjunction() 4792 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4793 self._match(TokenType.END) 4794 this = self.expression(exp.If, this=condition, true=true, false=false) 4795 4796 return self._parse_window(this) 4797 4798 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4799 if not self._match_text_seq("VALUE", "FOR"): 4800 self._retreat(self._index - 1) 4801 return None 4802 4803 return self.expression( 4804 exp.NextValueFor, 4805 this=self._parse_column(), 4806 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4807 ) 4808 4809 def _parse_extract(self) -> exp.Extract: 4810 this = self._parse_function() or self._parse_var() or self._parse_type() 4811 4812 if self._match(TokenType.FROM): 4813 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4814 4815 if not self._match(TokenType.COMMA): 4816 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4817 4818 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4819 4820 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4821 this = self._parse_conjunction() 4822 4823 if not self._match(TokenType.ALIAS): 4824 if self._match(TokenType.COMMA): 4825 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4826 4827 self.raise_error("Expected AS after CAST") 4828 4829 fmt = None 4830 to = self._parse_types() 4831 4832 if self._match(TokenType.FORMAT): 4833 fmt_string = self._parse_string() 4834 fmt = self._parse_at_time_zone(fmt_string) 4835 4836 if not to: 4837 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 4838 if to.this in exp.DataType.TEMPORAL_TYPES: 4839 this = self.expression( 4840 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4841 this=this, 4842 format=exp.Literal.string( 4843 format_time( 4844 fmt_string.this if fmt_string else "", 4845 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 4846 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 4847 ) 4848 ), 4849 ) 4850 4851 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4852 this.set("zone", fmt.args["zone"]) 4853 return this 4854 elif not to: 4855 self.raise_error("Expected TYPE after CAST") 4856 elif isinstance(to, exp.Identifier): 4857 to = exp.DataType.build(to.name, udt=True) 4858 elif to.this == exp.DataType.Type.CHAR: 4859 if self._match(TokenType.CHARACTER_SET): 4860 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4861 4862 return self.expression( 4863 exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt, safe=safe 4864 ) 4865 4866 def _parse_string_agg(self) -> exp.Expression: 4867 if self._match(TokenType.DISTINCT): 4868 args: t.List[t.Optional[exp.Expression]] = [ 4869 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4870 ] 4871 if self._match(TokenType.COMMA): 4872 args.extend(self._parse_csv(self._parse_conjunction)) 4873 else: 4874 args = self._parse_csv(self._parse_conjunction) # type: ignore 4875 4876 index = self._index 4877 if not self._match(TokenType.R_PAREN) and args: 4878 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4879 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4880 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4881 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4882 4883 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4884 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4885 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4886 if not self._match_text_seq("WITHIN", "GROUP"): 4887 self._retreat(index) 4888 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4889 4890 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4891 order = self._parse_order(this=seq_get(args, 0)) 4892 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4893 4894 def _parse_convert( 4895 self, strict: bool, safe: t.Optional[bool] = None 4896 ) -> t.Optional[exp.Expression]: 4897 this = self._parse_bitwise() 4898 4899 if self._match(TokenType.USING): 4900 to: t.Optional[exp.Expression] = self.expression( 4901 exp.CharacterSet, this=self._parse_var() 4902 ) 4903 elif self._match(TokenType.COMMA): 4904 to = self._parse_types() 4905 else: 4906 to = None 4907 4908 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 4909 4910 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4911 """ 4912 There are generally two variants of the DECODE function: 4913 4914 - DECODE(bin, charset) 4915 - DECODE(expression, search, result [, search, result] ... [, default]) 4916 4917 The second variant will always be parsed into a CASE expression. Note that NULL 4918 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4919 instead of relying on pattern matching. 4920 """ 4921 args = self._parse_csv(self._parse_conjunction) 4922 4923 if len(args) < 3: 4924 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4925 4926 expression, *expressions = args 4927 if not expression: 4928 return None 4929 4930 ifs = [] 4931 for search, result in zip(expressions[::2], expressions[1::2]): 4932 if not search or not result: 4933 return None 4934 4935 if isinstance(search, exp.Literal): 4936 ifs.append( 4937 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4938 ) 4939 elif isinstance(search, exp.Null): 4940 ifs.append( 4941 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4942 ) 4943 else: 4944 cond = exp.or_( 4945 exp.EQ(this=expression.copy(), expression=search), 4946 exp.and_( 4947 exp.Is(this=expression.copy(), expression=exp.Null()), 4948 exp.Is(this=search.copy(), expression=exp.Null()), 4949 copy=False, 4950 ), 4951 copy=False, 4952 ) 4953 ifs.append(exp.If(this=cond, true=result)) 4954 4955 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4956 4957 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4958 self._match_text_seq("KEY") 4959 key = self._parse_column() 4960 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 4961 self._match_text_seq("VALUE") 4962 value = self._parse_bitwise() 4963 4964 if not key and not value: 4965 return None 4966 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4967 4968 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4969 if not this or not self._match_text_seq("FORMAT", "JSON"): 4970 return this 4971 4972 return self.expression(exp.FormatJson, this=this) 4973 4974 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4975 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4976 for value in values: 4977 if self._match_text_seq(value, "ON", on): 4978 return f"{value} ON {on}" 4979 4980 return None 4981 4982 @t.overload 4983 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 4984 4985 @t.overload 4986 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 4987 4988 def _parse_json_object(self, agg=False): 4989 star = self._parse_star() 4990 expressions = ( 4991 [star] 4992 if star 4993 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4994 ) 4995 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4996 4997 unique_keys = None 4998 if self._match_text_seq("WITH", "UNIQUE"): 4999 unique_keys = True 5000 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5001 unique_keys = False 5002 5003 self._match_text_seq("KEYS") 5004 5005 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5006 self._parse_type() 5007 ) 5008 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5009 5010 return self.expression( 5011 exp.JSONObjectAgg if agg else exp.JSONObject, 5012 expressions=expressions, 5013 null_handling=null_handling, 5014 unique_keys=unique_keys, 5015 return_type=return_type, 5016 encoding=encoding, 5017 ) 5018 5019 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5020 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5021 if not self._match_text_seq("NESTED"): 5022 this = self._parse_id_var() 5023 kind = self._parse_types(allow_identifiers=False) 5024 nested = None 5025 else: 5026 this = None 5027 kind = None 5028 nested = True 5029 5030 path = self._match_text_seq("PATH") and self._parse_string() 5031 nested_schema = nested and self._parse_json_schema() 5032 5033 return self.expression( 5034 exp.JSONColumnDef, 5035 this=this, 5036 kind=kind, 5037 path=path, 5038 nested_schema=nested_schema, 5039 ) 5040 5041 def _parse_json_schema(self) -> exp.JSONSchema: 5042 self._match_text_seq("COLUMNS") 5043 return self.expression( 5044 exp.JSONSchema, 5045 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5046 ) 5047 5048 def _parse_json_table(self) -> exp.JSONTable: 5049 this = self._parse_format_json(self._parse_bitwise()) 5050 path = self._match(TokenType.COMMA) and self._parse_string() 5051 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5052 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5053 schema = self._parse_json_schema() 5054 5055 return exp.JSONTable( 5056 this=this, 5057 schema=schema, 5058 path=path, 5059 error_handling=error_handling, 5060 empty_handling=empty_handling, 5061 ) 5062 5063 def _parse_match_against(self) -> exp.MatchAgainst: 5064 expressions = self._parse_csv(self._parse_column) 5065 5066 self._match_text_seq(")", "AGAINST", "(") 5067 5068 this = self._parse_string() 5069 5070 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5071 modifier = "IN NATURAL LANGUAGE MODE" 5072 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5073 modifier = f"{modifier} WITH QUERY EXPANSION" 5074 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5075 modifier = "IN BOOLEAN MODE" 5076 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5077 modifier = "WITH QUERY EXPANSION" 5078 else: 5079 modifier = None 5080 5081 return self.expression( 5082 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5083 ) 5084 5085 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5086 def _parse_open_json(self) -> exp.OpenJSON: 5087 this = self._parse_bitwise() 5088 path = self._match(TokenType.COMMA) and self._parse_string() 5089 5090 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5091 this = self._parse_field(any_token=True) 5092 kind = self._parse_types() 5093 path = self._parse_string() 5094 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5095 5096 return self.expression( 5097 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5098 ) 5099 5100 expressions = None 5101 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5102 self._match_l_paren() 5103 expressions = self._parse_csv(_parse_open_json_column_def) 5104 5105 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5106 5107 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5108 args = self._parse_csv(self._parse_bitwise) 5109 5110 if self._match(TokenType.IN): 5111 return self.expression( 5112 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5113 ) 5114 5115 if haystack_first: 5116 haystack = seq_get(args, 0) 5117 needle = seq_get(args, 1) 5118 else: 5119 needle = seq_get(args, 0) 5120 haystack = seq_get(args, 1) 5121 5122 return self.expression( 5123 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5124 ) 5125 5126 def _parse_predict(self) -> exp.Predict: 5127 self._match_text_seq("MODEL") 5128 this = self._parse_table() 5129 5130 self._match(TokenType.COMMA) 5131 self._match_text_seq("TABLE") 5132 5133 return self.expression( 5134 exp.Predict, 5135 this=this, 5136 expression=self._parse_table(), 5137 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5138 ) 5139 5140 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5141 args = self._parse_csv(self._parse_table) 5142 return exp.JoinHint(this=func_name.upper(), expressions=args) 5143 5144 def _parse_substring(self) -> exp.Substring: 5145 # Postgres supports the form: substring(string [from int] [for int]) 5146 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5147 5148 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5149 5150 if self._match(TokenType.FROM): 5151 args.append(self._parse_bitwise()) 5152 if self._match(TokenType.FOR): 5153 args.append(self._parse_bitwise()) 5154 5155 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5156 5157 def _parse_trim(self) -> exp.Trim: 5158 # https://www.w3resource.com/sql/character-functions/trim.php 5159 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5160 5161 position = None 5162 collation = None 5163 expression = None 5164 5165 if self._match_texts(self.TRIM_TYPES): 5166 position = self._prev.text.upper() 5167 5168 this = self._parse_bitwise() 5169 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5170 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5171 expression = self._parse_bitwise() 5172 5173 if invert_order: 5174 this, expression = expression, this 5175 5176 if self._match(TokenType.COLLATE): 5177 collation = self._parse_bitwise() 5178 5179 return self.expression( 5180 exp.Trim, this=this, position=position, expression=expression, collation=collation 5181 ) 5182 5183 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5184 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5185 5186 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5187 return self._parse_window(self._parse_id_var(), alias=True) 5188 5189 def _parse_respect_or_ignore_nulls( 5190 self, this: t.Optional[exp.Expression] 5191 ) -> t.Optional[exp.Expression]: 5192 if self._match_text_seq("IGNORE", "NULLS"): 5193 return self.expression(exp.IgnoreNulls, this=this) 5194 if self._match_text_seq("RESPECT", "NULLS"): 5195 return self.expression(exp.RespectNulls, this=this) 5196 return this 5197 5198 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5199 if self._match(TokenType.HAVING): 5200 self._match_texts(("MAX", "MIN")) 5201 max = self._prev.text.upper() != "MIN" 5202 return self.expression( 5203 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5204 ) 5205 5206 return this 5207 5208 def _parse_window( 5209 self, this: t.Optional[exp.Expression], alias: bool = False 5210 ) -> t.Optional[exp.Expression]: 5211 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5212 self._match(TokenType.WHERE) 5213 this = self.expression( 5214 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5215 ) 5216 self._match_r_paren() 5217 5218 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5219 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5220 if self._match_text_seq("WITHIN", "GROUP"): 5221 order = self._parse_wrapped(self._parse_order) 5222 this = self.expression(exp.WithinGroup, this=this, expression=order) 5223 5224 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5225 # Some dialects choose to implement and some do not. 5226 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5227 5228 # There is some code above in _parse_lambda that handles 5229 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5230 5231 # The below changes handle 5232 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5233 5234 # Oracle allows both formats 5235 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5236 # and Snowflake chose to do the same for familiarity 5237 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5238 if isinstance(this, exp.AggFunc): 5239 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5240 5241 if ignore_respect and ignore_respect is not this: 5242 ignore_respect.replace(ignore_respect.this) 5243 this = self.expression(ignore_respect.__class__, this=this) 5244 5245 this = self._parse_respect_or_ignore_nulls(this) 5246 5247 # bigquery select from window x AS (partition by ...) 5248 if alias: 5249 over = None 5250 self._match(TokenType.ALIAS) 5251 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5252 return this 5253 else: 5254 over = self._prev.text.upper() 5255 5256 if not self._match(TokenType.L_PAREN): 5257 return self.expression( 5258 exp.Window, this=this, alias=self._parse_id_var(False), over=over 5259 ) 5260 5261 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5262 5263 first = self._match(TokenType.FIRST) 5264 if self._match_text_seq("LAST"): 5265 first = False 5266 5267 partition, order = self._parse_partition_and_order() 5268 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5269 5270 if kind: 5271 self._match(TokenType.BETWEEN) 5272 start = self._parse_window_spec() 5273 self._match(TokenType.AND) 5274 end = self._parse_window_spec() 5275 5276 spec = self.expression( 5277 exp.WindowSpec, 5278 kind=kind, 5279 start=start["value"], 5280 start_side=start["side"], 5281 end=end["value"], 5282 end_side=end["side"], 5283 ) 5284 else: 5285 spec = None 5286 5287 self._match_r_paren() 5288 5289 window = self.expression( 5290 exp.Window, 5291 this=this, 5292 partition_by=partition, 5293 order=order, 5294 spec=spec, 5295 alias=window_alias, 5296 over=over, 5297 first=first, 5298 ) 5299 5300 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5301 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5302 return self._parse_window(window, alias=alias) 5303 5304 return window 5305 5306 def _parse_partition_and_order( 5307 self, 5308 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5309 return self._parse_partition_by(), self._parse_order() 5310 5311 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5312 self._match(TokenType.BETWEEN) 5313 5314 return { 5315 "value": ( 5316 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5317 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5318 or self._parse_bitwise() 5319 ), 5320 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5321 } 5322 5323 def _parse_alias( 5324 self, this: t.Optional[exp.Expression], explicit: bool = False 5325 ) -> t.Optional[exp.Expression]: 5326 any_token = self._match(TokenType.ALIAS) 5327 comments = self._prev_comments 5328 5329 if explicit and not any_token: 5330 return this 5331 5332 if self._match(TokenType.L_PAREN): 5333 aliases = self.expression( 5334 exp.Aliases, 5335 comments=comments, 5336 this=this, 5337 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5338 ) 5339 self._match_r_paren(aliases) 5340 return aliases 5341 5342 alias = self._parse_id_var(any_token) or ( 5343 self.STRING_ALIASES and self._parse_string_as_identifier() 5344 ) 5345 5346 if alias: 5347 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5348 column = this.this 5349 5350 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5351 if not this.comments and column and column.comments: 5352 this.comments = column.comments 5353 column.comments = None 5354 5355 return this 5356 5357 def _parse_id_var( 5358 self, 5359 any_token: bool = True, 5360 tokens: t.Optional[t.Collection[TokenType]] = None, 5361 ) -> t.Optional[exp.Expression]: 5362 identifier = self._parse_identifier() 5363 5364 if identifier: 5365 return identifier 5366 5367 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 5368 quoted = self._prev.token_type == TokenType.STRING 5369 return exp.Identifier(this=self._prev.text, quoted=quoted) 5370 5371 return None 5372 5373 def _parse_string(self) -> t.Optional[exp.Expression]: 5374 if self._match_set(self.STRING_PARSERS): 5375 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 5376 return self._parse_placeholder() 5377 5378 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5379 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5380 5381 def _parse_number(self) -> t.Optional[exp.Expression]: 5382 if self._match_set(self.NUMERIC_PARSERS): 5383 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 5384 return self._parse_placeholder() 5385 5386 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5387 if self._match(TokenType.IDENTIFIER): 5388 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5389 return self._parse_placeholder() 5390 5391 def _parse_var( 5392 self, 5393 any_token: bool = False, 5394 tokens: t.Optional[t.Collection[TokenType]] = None, 5395 upper: bool = False, 5396 ) -> t.Optional[exp.Expression]: 5397 if ( 5398 (any_token and self._advance_any()) 5399 or self._match(TokenType.VAR) 5400 or (self._match_set(tokens) if tokens else False) 5401 ): 5402 return self.expression( 5403 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5404 ) 5405 return self._parse_placeholder() 5406 5407 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5408 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5409 self._advance() 5410 return self._prev 5411 return None 5412 5413 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5414 return self._parse_var() or self._parse_string() 5415 5416 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 5417 return self._parse_primary() or self._parse_var(any_token=True) 5418 5419 def _parse_null(self) -> t.Optional[exp.Expression]: 5420 if self._match_set(self.NULL_TOKENS): 5421 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5422 return self._parse_placeholder() 5423 5424 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5425 if self._match(TokenType.TRUE): 5426 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5427 if self._match(TokenType.FALSE): 5428 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5429 return self._parse_placeholder() 5430 5431 def _parse_star(self) -> t.Optional[exp.Expression]: 5432 if self._match(TokenType.STAR): 5433 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5434 return self._parse_placeholder() 5435 5436 def _parse_parameter(self) -> exp.Parameter: 5437 self._match(TokenType.L_BRACE) 5438 this = self._parse_identifier() or self._parse_primary_or_var() 5439 expression = self._match(TokenType.COLON) and ( 5440 self._parse_identifier() or self._parse_primary_or_var() 5441 ) 5442 self._match(TokenType.R_BRACE) 5443 return self.expression(exp.Parameter, this=this, expression=expression) 5444 5445 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5446 if self._match_set(self.PLACEHOLDER_PARSERS): 5447 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5448 if placeholder: 5449 return placeholder 5450 self._advance(-1) 5451 return None 5452 5453 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 5454 if not self._match(TokenType.EXCEPT): 5455 return None 5456 if self._match(TokenType.L_PAREN, advance=False): 5457 return self._parse_wrapped_csv(self._parse_column) 5458 5459 except_column = self._parse_column() 5460 return [except_column] if except_column else None 5461 5462 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 5463 if not self._match(TokenType.REPLACE): 5464 return None 5465 if self._match(TokenType.L_PAREN, advance=False): 5466 return self._parse_wrapped_csv(self._parse_expression) 5467 5468 replace_expression = self._parse_expression() 5469 return [replace_expression] if replace_expression else None 5470 5471 def _parse_csv( 5472 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5473 ) -> t.List[exp.Expression]: 5474 parse_result = parse_method() 5475 items = [parse_result] if parse_result is not None else [] 5476 5477 while self._match(sep): 5478 self._add_comments(parse_result) 5479 parse_result = parse_method() 5480 if parse_result is not None: 5481 items.append(parse_result) 5482 5483 return items 5484 5485 def _parse_tokens( 5486 self, parse_method: t.Callable, expressions: t.Dict 5487 ) -> t.Optional[exp.Expression]: 5488 this = parse_method() 5489 5490 while self._match_set(expressions): 5491 this = self.expression( 5492 expressions[self._prev.token_type], 5493 this=this, 5494 comments=self._prev_comments, 5495 expression=parse_method(), 5496 ) 5497 5498 return this 5499 5500 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5501 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5502 5503 def _parse_wrapped_csv( 5504 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5505 ) -> t.List[exp.Expression]: 5506 return self._parse_wrapped( 5507 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5508 ) 5509 5510 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5511 wrapped = self._match(TokenType.L_PAREN) 5512 if not wrapped and not optional: 5513 self.raise_error("Expecting (") 5514 parse_result = parse_method() 5515 if wrapped: 5516 self._match_r_paren() 5517 return parse_result 5518 5519 def _parse_expressions(self) -> t.List[exp.Expression]: 5520 return self._parse_csv(self._parse_expression) 5521 5522 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5523 return self._parse_select() or self._parse_set_operations( 5524 self._parse_expression() if alias else self._parse_conjunction() 5525 ) 5526 5527 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5528 return self._parse_query_modifiers( 5529 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5530 ) 5531 5532 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5533 this = None 5534 if self._match_texts(self.TRANSACTION_KIND): 5535 this = self._prev.text 5536 5537 self._match_texts(("TRANSACTION", "WORK")) 5538 5539 modes = [] 5540 while True: 5541 mode = [] 5542 while self._match(TokenType.VAR): 5543 mode.append(self._prev.text) 5544 5545 if mode: 5546 modes.append(" ".join(mode)) 5547 if not self._match(TokenType.COMMA): 5548 break 5549 5550 return self.expression(exp.Transaction, this=this, modes=modes) 5551 5552 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5553 chain = None 5554 savepoint = None 5555 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5556 5557 self._match_texts(("TRANSACTION", "WORK")) 5558 5559 if self._match_text_seq("TO"): 5560 self._match_text_seq("SAVEPOINT") 5561 savepoint = self._parse_id_var() 5562 5563 if self._match(TokenType.AND): 5564 chain = not self._match_text_seq("NO") 5565 self._match_text_seq("CHAIN") 5566 5567 if is_rollback: 5568 return self.expression(exp.Rollback, savepoint=savepoint) 5569 5570 return self.expression(exp.Commit, chain=chain) 5571 5572 def _parse_refresh(self) -> exp.Refresh: 5573 self._match(TokenType.TABLE) 5574 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5575 5576 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5577 if not self._match_text_seq("ADD"): 5578 return None 5579 5580 self._match(TokenType.COLUMN) 5581 exists_column = self._parse_exists(not_=True) 5582 expression = self._parse_field_def() 5583 5584 if expression: 5585 expression.set("exists", exists_column) 5586 5587 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5588 if self._match_texts(("FIRST", "AFTER")): 5589 position = self._prev.text 5590 column_position = self.expression( 5591 exp.ColumnPosition, this=self._parse_column(), position=position 5592 ) 5593 expression.set("position", column_position) 5594 5595 return expression 5596 5597 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5598 drop = self._match(TokenType.DROP) and self._parse_drop() 5599 if drop and not isinstance(drop, exp.Command): 5600 drop.set("kind", drop.args.get("kind", "COLUMN")) 5601 return drop 5602 5603 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5604 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5605 return self.expression( 5606 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5607 ) 5608 5609 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5610 index = self._index - 1 5611 5612 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 5613 return self._parse_csv( 5614 lambda: self.expression( 5615 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 5616 ) 5617 ) 5618 5619 self._retreat(index) 5620 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 5621 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5622 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5623 5624 def _parse_alter_table_alter(self) -> exp.AlterColumn: 5625 self._match(TokenType.COLUMN) 5626 column = self._parse_field(any_token=True) 5627 5628 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5629 return self.expression(exp.AlterColumn, this=column, drop=True) 5630 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5631 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5632 if self._match(TokenType.COMMENT): 5633 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 5634 5635 self._match_text_seq("SET", "DATA") 5636 return self.expression( 5637 exp.AlterColumn, 5638 this=column, 5639 dtype=self._match_text_seq("TYPE") and self._parse_types(), 5640 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5641 using=self._match(TokenType.USING) and self._parse_conjunction(), 5642 ) 5643 5644 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5645 index = self._index - 1 5646 5647 partition_exists = self._parse_exists() 5648 if self._match(TokenType.PARTITION, advance=False): 5649 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5650 5651 self._retreat(index) 5652 return self._parse_csv(self._parse_drop_column) 5653 5654 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 5655 if self._match(TokenType.COLUMN): 5656 exists = self._parse_exists() 5657 old_column = self._parse_column() 5658 to = self._match_text_seq("TO") 5659 new_column = self._parse_column() 5660 5661 if old_column is None or to is None or new_column is None: 5662 return None 5663 5664 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 5665 5666 self._match_text_seq("TO") 5667 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5668 5669 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5670 start = self._prev 5671 5672 if not self._match(TokenType.TABLE): 5673 return self._parse_as_command(start) 5674 5675 exists = self._parse_exists() 5676 only = self._match_text_seq("ONLY") 5677 this = self._parse_table(schema=True) 5678 5679 if self._next: 5680 self._advance() 5681 5682 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5683 if parser: 5684 actions = ensure_list(parser(self)) 5685 options = self._parse_csv(self._parse_property) 5686 5687 if not self._curr and actions: 5688 return self.expression( 5689 exp.AlterTable, 5690 this=this, 5691 exists=exists, 5692 actions=actions, 5693 only=only, 5694 options=options, 5695 ) 5696 5697 return self._parse_as_command(start) 5698 5699 def _parse_merge(self) -> exp.Merge: 5700 self._match(TokenType.INTO) 5701 target = self._parse_table() 5702 5703 if target and self._match(TokenType.ALIAS, advance=False): 5704 target.set("alias", self._parse_table_alias()) 5705 5706 self._match(TokenType.USING) 5707 using = self._parse_table() 5708 5709 self._match(TokenType.ON) 5710 on = self._parse_conjunction() 5711 5712 return self.expression( 5713 exp.Merge, 5714 this=target, 5715 using=using, 5716 on=on, 5717 expressions=self._parse_when_matched(), 5718 ) 5719 5720 def _parse_when_matched(self) -> t.List[exp.When]: 5721 whens = [] 5722 5723 while self._match(TokenType.WHEN): 5724 matched = not self._match(TokenType.NOT) 5725 self._match_text_seq("MATCHED") 5726 source = ( 5727 False 5728 if self._match_text_seq("BY", "TARGET") 5729 else self._match_text_seq("BY", "SOURCE") 5730 ) 5731 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5732 5733 self._match(TokenType.THEN) 5734 5735 if self._match(TokenType.INSERT): 5736 _this = self._parse_star() 5737 if _this: 5738 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5739 else: 5740 then = self.expression( 5741 exp.Insert, 5742 this=self._parse_value(), 5743 expression=self._match_text_seq("VALUES") and self._parse_value(), 5744 ) 5745 elif self._match(TokenType.UPDATE): 5746 expressions = self._parse_star() 5747 if expressions: 5748 then = self.expression(exp.Update, expressions=expressions) 5749 else: 5750 then = self.expression( 5751 exp.Update, 5752 expressions=self._match(TokenType.SET) 5753 and self._parse_csv(self._parse_equality), 5754 ) 5755 elif self._match(TokenType.DELETE): 5756 then = self.expression(exp.Var, this=self._prev.text) 5757 else: 5758 then = None 5759 5760 whens.append( 5761 self.expression( 5762 exp.When, 5763 matched=matched, 5764 source=source, 5765 condition=condition, 5766 then=then, 5767 ) 5768 ) 5769 return whens 5770 5771 def _parse_show(self) -> t.Optional[exp.Expression]: 5772 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5773 if parser: 5774 return parser(self) 5775 return self._parse_as_command(self._prev) 5776 5777 def _parse_set_item_assignment( 5778 self, kind: t.Optional[str] = None 5779 ) -> t.Optional[exp.Expression]: 5780 index = self._index 5781 5782 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 5783 return self._parse_set_transaction(global_=kind == "GLOBAL") 5784 5785 left = self._parse_primary() or self._parse_id_var() 5786 assignment_delimiter = self._match_texts(("=", "TO")) 5787 5788 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5789 self._retreat(index) 5790 return None 5791 5792 right = self._parse_statement() or self._parse_id_var() 5793 this = self.expression(exp.EQ, this=left, expression=right) 5794 5795 return self.expression(exp.SetItem, this=this, kind=kind) 5796 5797 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5798 self._match_text_seq("TRANSACTION") 5799 characteristics = self._parse_csv( 5800 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5801 ) 5802 return self.expression( 5803 exp.SetItem, 5804 expressions=characteristics, 5805 kind="TRANSACTION", 5806 **{"global": global_}, # type: ignore 5807 ) 5808 5809 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5810 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5811 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5812 5813 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5814 index = self._index 5815 set_ = self.expression( 5816 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5817 ) 5818 5819 if self._curr: 5820 self._retreat(index) 5821 return self._parse_as_command(self._prev) 5822 5823 return set_ 5824 5825 def _parse_var_from_options( 5826 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 5827 ) -> t.Optional[exp.Var]: 5828 start = self._curr 5829 if not start: 5830 return None 5831 5832 option = start.text.upper() 5833 continuations = options.get(option) 5834 5835 index = self._index 5836 self._advance() 5837 for keywords in continuations or []: 5838 if isinstance(keywords, str): 5839 keywords = (keywords,) 5840 5841 if self._match_text_seq(*keywords): 5842 option = f"{option} {' '.join(keywords)}" 5843 break 5844 else: 5845 if continuations or continuations is None: 5846 if raise_unmatched: 5847 self.raise_error(f"Unknown option {option}") 5848 5849 self._retreat(index) 5850 return None 5851 5852 return exp.var(option) 5853 5854 def _parse_as_command(self, start: Token) -> exp.Command: 5855 while self._curr: 5856 self._advance() 5857 text = self._find_sql(start, self._prev) 5858 size = len(start.text) 5859 self._warn_unsupported() 5860 return exp.Command(this=text[:size], expression=text[size:]) 5861 5862 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5863 settings = [] 5864 5865 self._match_l_paren() 5866 kind = self._parse_id_var() 5867 5868 if self._match(TokenType.L_PAREN): 5869 while True: 5870 key = self._parse_id_var() 5871 value = self._parse_primary() 5872 5873 if not key and value is None: 5874 break 5875 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5876 self._match(TokenType.R_PAREN) 5877 5878 self._match_r_paren() 5879 5880 return self.expression( 5881 exp.DictProperty, 5882 this=this, 5883 kind=kind.this if kind else None, 5884 settings=settings, 5885 ) 5886 5887 def _parse_dict_range(self, this: str) -> exp.DictRange: 5888 self._match_l_paren() 5889 has_min = self._match_text_seq("MIN") 5890 if has_min: 5891 min = self._parse_var() or self._parse_primary() 5892 self._match_text_seq("MAX") 5893 max = self._parse_var() or self._parse_primary() 5894 else: 5895 max = self._parse_var() or self._parse_primary() 5896 min = exp.Literal.number(0) 5897 self._match_r_paren() 5898 return self.expression(exp.DictRange, this=this, min=min, max=max) 5899 5900 def _parse_comprehension( 5901 self, this: t.Optional[exp.Expression] 5902 ) -> t.Optional[exp.Comprehension]: 5903 index = self._index 5904 expression = self._parse_column() 5905 if not self._match(TokenType.IN): 5906 self._retreat(index - 1) 5907 return None 5908 iterator = self._parse_column() 5909 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5910 return self.expression( 5911 exp.Comprehension, 5912 this=this, 5913 expression=expression, 5914 iterator=iterator, 5915 condition=condition, 5916 ) 5917 5918 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 5919 if self._match(TokenType.HEREDOC_STRING): 5920 return self.expression(exp.Heredoc, this=self._prev.text) 5921 5922 if not self._match_text_seq("$"): 5923 return None 5924 5925 tags = ["$"] 5926 tag_text = None 5927 5928 if self._is_connected(): 5929 self._advance() 5930 tags.append(self._prev.text.upper()) 5931 else: 5932 self.raise_error("No closing $ found") 5933 5934 if tags[-1] != "$": 5935 if self._is_connected() and self._match_text_seq("$"): 5936 tag_text = tags[-1] 5937 tags.append("$") 5938 else: 5939 self.raise_error("No closing $ found") 5940 5941 heredoc_start = self._curr 5942 5943 while self._curr: 5944 if self._match_text_seq(*tags, advance=False): 5945 this = self._find_sql(heredoc_start, self._prev) 5946 self._advance(len(tags)) 5947 return self.expression(exp.Heredoc, this=this, tag=tag_text) 5948 5949 self._advance() 5950 5951 self.raise_error(f"No closing {''.join(tags)} found") 5952 return None 5953 5954 def _find_parser( 5955 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5956 ) -> t.Optional[t.Callable]: 5957 if not self._curr: 5958 return None 5959 5960 index = self._index 5961 this = [] 5962 while True: 5963 # The current token might be multiple words 5964 curr = self._curr.text.upper() 5965 key = curr.split(" ") 5966 this.append(curr) 5967 5968 self._advance() 5969 result, trie = in_trie(trie, key) 5970 if result == TrieResult.FAILED: 5971 break 5972 5973 if result == TrieResult.EXISTS: 5974 subparser = parsers[" ".join(this)] 5975 return subparser 5976 5977 self._retreat(index) 5978 return None 5979 5980 def _match(self, token_type, advance=True, expression=None): 5981 if not self._curr: 5982 return None 5983 5984 if self._curr.token_type == token_type: 5985 if advance: 5986 self._advance() 5987 self._add_comments(expression) 5988 return True 5989 5990 return None 5991 5992 def _match_set(self, types, advance=True): 5993 if not self._curr: 5994 return None 5995 5996 if self._curr.token_type in types: 5997 if advance: 5998 self._advance() 5999 return True 6000 6001 return None 6002 6003 def _match_pair(self, token_type_a, token_type_b, advance=True): 6004 if not self._curr or not self._next: 6005 return None 6006 6007 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6008 if advance: 6009 self._advance(2) 6010 return True 6011 6012 return None 6013 6014 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6015 if not self._match(TokenType.L_PAREN, expression=expression): 6016 self.raise_error("Expecting (") 6017 6018 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6019 if not self._match(TokenType.R_PAREN, expression=expression): 6020 self.raise_error("Expecting )") 6021 6022 def _match_texts(self, texts, advance=True): 6023 if self._curr and self._curr.text.upper() in texts: 6024 if advance: 6025 self._advance() 6026 return True 6027 return None 6028 6029 def _match_text_seq(self, *texts, advance=True): 6030 index = self._index 6031 for text in texts: 6032 if self._curr and self._curr.text.upper() == text: 6033 self._advance() 6034 else: 6035 self._retreat(index) 6036 return None 6037 6038 if not advance: 6039 self._retreat(index) 6040 6041 return True 6042 6043 @t.overload 6044 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: ... 6045 6046 @t.overload 6047 def _replace_columns_with_dots( 6048 self, this: t.Optional[exp.Expression] 6049 ) -> t.Optional[exp.Expression]: ... 6050 6051 def _replace_columns_with_dots(self, this): 6052 if isinstance(this, exp.Dot): 6053 exp.replace_children(this, self._replace_columns_with_dots) 6054 elif isinstance(this, exp.Column): 6055 exp.replace_children(this, self._replace_columns_with_dots) 6056 table = this.args.get("table") 6057 this = ( 6058 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 6059 ) 6060 6061 return this 6062 6063 def _replace_lambda( 6064 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 6065 ) -> t.Optional[exp.Expression]: 6066 if not node: 6067 return node 6068 6069 for column in node.find_all(exp.Column): 6070 if column.parts[0].name in lambda_variables: 6071 dot_or_id = column.to_dot() if column.table else column.this 6072 parent = column.parent 6073 6074 while isinstance(parent, exp.Dot): 6075 if not isinstance(parent.parent, exp.Dot): 6076 parent.replace(dot_or_id) 6077 break 6078 parent = parent.parent 6079 else: 6080 if column is node: 6081 node = dot_or_id 6082 else: 6083 column.replace(dot_or_id) 6084 return node 6085 6086 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6087 start = self._prev 6088 6089 # Not to be confused with TRUNCATE(number, decimals) function call 6090 if self._match(TokenType.L_PAREN): 6091 self._retreat(self._index - 2) 6092 return self._parse_function() 6093 6094 # Clickhouse supports TRUNCATE DATABASE as well 6095 is_database = self._match(TokenType.DATABASE) 6096 6097 self._match(TokenType.TABLE) 6098 6099 exists = self._parse_exists(not_=False) 6100 6101 expressions = self._parse_csv( 6102 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6103 ) 6104 6105 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6106 6107 if self._match_text_seq("RESTART", "IDENTITY"): 6108 identity = "RESTART" 6109 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6110 identity = "CONTINUE" 6111 else: 6112 identity = None 6113 6114 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6115 option = self._prev.text 6116 else: 6117 option = None 6118 6119 partition = self._parse_partition() 6120 6121 # Fallback case 6122 if self._curr: 6123 return self._parse_as_command(start) 6124 6125 return self.expression( 6126 exp.TruncateTable, 6127 expressions=expressions, 6128 is_database=is_database, 6129 exists=exists, 6130 cluster=cluster, 6131 identity=identity, 6132 option=option, 6133 partition=partition, 6134 ) 6135 6136 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6137 this = self._parse_ordered(self._parse_opclass) 6138 6139 if not self._match(TokenType.WITH): 6140 return this 6141 6142 op = self._parse_var(any_token=True) 6143 6144 return self.expression(exp.WithOperator, this=this, op=op)
24def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 25 if len(args) == 1 and args[0].is_star: 26 return exp.StarMap(this=args[0]) 27 28 keys = [] 29 values = [] 30 for i in range(0, len(args), 2): 31 keys.append(args[i]) 32 values.append(args[i + 1]) 33 34 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
50def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 51 # Default argument order is base, expression 52 this = seq_get(args, 0) 53 expression = seq_get(args, 1) 54 55 if expression: 56 if not dialect.LOG_BASE_FIRST: 57 this, expression = expression, this 58 return exp.Log(this=this, expression=expression) 59 60 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
63def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 64 def _builder(args: t.List, dialect: Dialect) -> E: 65 expression = expr_type( 66 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 67 ) 68 if len(args) > 2 and expr_type is exp.JSONExtract: 69 expression.set("expressions", args[2:]) 70 71 return expression 72 73 return _builder
86class Parser(metaclass=_Parser): 87 """ 88 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 89 90 Args: 91 error_level: The desired error level. 92 Default: ErrorLevel.IMMEDIATE 93 error_message_context: The amount of context to capture from a query string when displaying 94 the error message (in number of characters). 95 Default: 100 96 max_errors: Maximum number of error messages to include in a raised ParseError. 97 This is only relevant if error_level is ErrorLevel.RAISE. 98 Default: 3 99 """ 100 101 FUNCTIONS: t.Dict[str, t.Callable] = { 102 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 103 "CONCAT": lambda args, dialect: exp.Concat( 104 expressions=args, 105 safe=not dialect.STRICT_STRING_CONCAT, 106 coalesce=dialect.CONCAT_COALESCE, 107 ), 108 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 109 expressions=args, 110 safe=not dialect.STRICT_STRING_CONCAT, 111 coalesce=dialect.CONCAT_COALESCE, 112 ), 113 "DATE_TO_DATE_STR": lambda args: exp.Cast( 114 this=seq_get(args, 0), 115 to=exp.DataType(this=exp.DataType.Type.TEXT), 116 ), 117 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 118 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 119 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 120 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 121 "LIKE": build_like, 122 "LOG": build_logarithm, 123 "TIME_TO_TIME_STR": lambda args: exp.Cast( 124 this=seq_get(args, 0), 125 to=exp.DataType(this=exp.DataType.Type.TEXT), 126 ), 127 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 128 this=exp.Cast( 129 this=seq_get(args, 0), 130 to=exp.DataType(this=exp.DataType.Type.TEXT), 131 ), 132 start=exp.Literal.number(1), 133 length=exp.Literal.number(10), 134 ), 135 "VAR_MAP": build_var_map, 136 } 137 138 NO_PAREN_FUNCTIONS = { 139 TokenType.CURRENT_DATE: exp.CurrentDate, 140 TokenType.CURRENT_DATETIME: exp.CurrentDate, 141 TokenType.CURRENT_TIME: exp.CurrentTime, 142 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 143 TokenType.CURRENT_USER: exp.CurrentUser, 144 } 145 146 STRUCT_TYPE_TOKENS = { 147 TokenType.NESTED, 148 TokenType.STRUCT, 149 } 150 151 NESTED_TYPE_TOKENS = { 152 TokenType.ARRAY, 153 TokenType.LOWCARDINALITY, 154 TokenType.MAP, 155 TokenType.NULLABLE, 156 *STRUCT_TYPE_TOKENS, 157 } 158 159 ENUM_TYPE_TOKENS = { 160 TokenType.ENUM, 161 TokenType.ENUM8, 162 TokenType.ENUM16, 163 } 164 165 AGGREGATE_TYPE_TOKENS = { 166 TokenType.AGGREGATEFUNCTION, 167 TokenType.SIMPLEAGGREGATEFUNCTION, 168 } 169 170 TYPE_TOKENS = { 171 TokenType.BIT, 172 TokenType.BOOLEAN, 173 TokenType.TINYINT, 174 TokenType.UTINYINT, 175 TokenType.SMALLINT, 176 TokenType.USMALLINT, 177 TokenType.INT, 178 TokenType.UINT, 179 TokenType.BIGINT, 180 TokenType.UBIGINT, 181 TokenType.INT128, 182 TokenType.UINT128, 183 TokenType.INT256, 184 TokenType.UINT256, 185 TokenType.MEDIUMINT, 186 TokenType.UMEDIUMINT, 187 TokenType.FIXEDSTRING, 188 TokenType.FLOAT, 189 TokenType.DOUBLE, 190 TokenType.CHAR, 191 TokenType.NCHAR, 192 TokenType.VARCHAR, 193 TokenType.NVARCHAR, 194 TokenType.BPCHAR, 195 TokenType.TEXT, 196 TokenType.MEDIUMTEXT, 197 TokenType.LONGTEXT, 198 TokenType.MEDIUMBLOB, 199 TokenType.LONGBLOB, 200 TokenType.BINARY, 201 TokenType.VARBINARY, 202 TokenType.JSON, 203 TokenType.JSONB, 204 TokenType.INTERVAL, 205 TokenType.TINYBLOB, 206 TokenType.TINYTEXT, 207 TokenType.TIME, 208 TokenType.TIMETZ, 209 TokenType.TIMESTAMP, 210 TokenType.TIMESTAMP_S, 211 TokenType.TIMESTAMP_MS, 212 TokenType.TIMESTAMP_NS, 213 TokenType.TIMESTAMPTZ, 214 TokenType.TIMESTAMPLTZ, 215 TokenType.DATETIME, 216 TokenType.DATETIME64, 217 TokenType.DATE, 218 TokenType.DATE32, 219 TokenType.INT4RANGE, 220 TokenType.INT4MULTIRANGE, 221 TokenType.INT8RANGE, 222 TokenType.INT8MULTIRANGE, 223 TokenType.NUMRANGE, 224 TokenType.NUMMULTIRANGE, 225 TokenType.TSRANGE, 226 TokenType.TSMULTIRANGE, 227 TokenType.TSTZRANGE, 228 TokenType.TSTZMULTIRANGE, 229 TokenType.DATERANGE, 230 TokenType.DATEMULTIRANGE, 231 TokenType.DECIMAL, 232 TokenType.UDECIMAL, 233 TokenType.BIGDECIMAL, 234 TokenType.UUID, 235 TokenType.GEOGRAPHY, 236 TokenType.GEOMETRY, 237 TokenType.HLLSKETCH, 238 TokenType.HSTORE, 239 TokenType.PSEUDO_TYPE, 240 TokenType.SUPER, 241 TokenType.SERIAL, 242 TokenType.SMALLSERIAL, 243 TokenType.BIGSERIAL, 244 TokenType.XML, 245 TokenType.YEAR, 246 TokenType.UNIQUEIDENTIFIER, 247 TokenType.USERDEFINED, 248 TokenType.MONEY, 249 TokenType.SMALLMONEY, 250 TokenType.ROWVERSION, 251 TokenType.IMAGE, 252 TokenType.VARIANT, 253 TokenType.OBJECT, 254 TokenType.OBJECT_IDENTIFIER, 255 TokenType.INET, 256 TokenType.IPADDRESS, 257 TokenType.IPPREFIX, 258 TokenType.IPV4, 259 TokenType.IPV6, 260 TokenType.UNKNOWN, 261 TokenType.NULL, 262 TokenType.NAME, 263 *ENUM_TYPE_TOKENS, 264 *NESTED_TYPE_TOKENS, 265 *AGGREGATE_TYPE_TOKENS, 266 } 267 268 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 269 TokenType.BIGINT: TokenType.UBIGINT, 270 TokenType.INT: TokenType.UINT, 271 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 272 TokenType.SMALLINT: TokenType.USMALLINT, 273 TokenType.TINYINT: TokenType.UTINYINT, 274 TokenType.DECIMAL: TokenType.UDECIMAL, 275 } 276 277 SUBQUERY_PREDICATES = { 278 TokenType.ANY: exp.Any, 279 TokenType.ALL: exp.All, 280 TokenType.EXISTS: exp.Exists, 281 TokenType.SOME: exp.Any, 282 } 283 284 RESERVED_TOKENS = { 285 *Tokenizer.SINGLE_TOKENS.values(), 286 TokenType.SELECT, 287 } 288 289 DB_CREATABLES = { 290 TokenType.DATABASE, 291 TokenType.SCHEMA, 292 TokenType.TABLE, 293 TokenType.VIEW, 294 TokenType.MODEL, 295 TokenType.DICTIONARY, 296 TokenType.SEQUENCE, 297 TokenType.STORAGE_INTEGRATION, 298 } 299 300 CREATABLES = { 301 TokenType.COLUMN, 302 TokenType.CONSTRAINT, 303 TokenType.FUNCTION, 304 TokenType.INDEX, 305 TokenType.PROCEDURE, 306 TokenType.FOREIGN_KEY, 307 *DB_CREATABLES, 308 } 309 310 # Tokens that can represent identifiers 311 ID_VAR_TOKENS = { 312 TokenType.VAR, 313 TokenType.ANTI, 314 TokenType.APPLY, 315 TokenType.ASC, 316 TokenType.AUTO_INCREMENT, 317 TokenType.BEGIN, 318 TokenType.BPCHAR, 319 TokenType.CACHE, 320 TokenType.CASE, 321 TokenType.COLLATE, 322 TokenType.COMMAND, 323 TokenType.COMMENT, 324 TokenType.COMMIT, 325 TokenType.CONSTRAINT, 326 TokenType.DEFAULT, 327 TokenType.DELETE, 328 TokenType.DESC, 329 TokenType.DESCRIBE, 330 TokenType.DICTIONARY, 331 TokenType.DIV, 332 TokenType.END, 333 TokenType.EXECUTE, 334 TokenType.ESCAPE, 335 TokenType.FALSE, 336 TokenType.FIRST, 337 TokenType.FILTER, 338 TokenType.FINAL, 339 TokenType.FORMAT, 340 TokenType.FULL, 341 TokenType.IS, 342 TokenType.ISNULL, 343 TokenType.INTERVAL, 344 TokenType.KEEP, 345 TokenType.KILL, 346 TokenType.LEFT, 347 TokenType.LOAD, 348 TokenType.MERGE, 349 TokenType.NATURAL, 350 TokenType.NEXT, 351 TokenType.OFFSET, 352 TokenType.OPERATOR, 353 TokenType.ORDINALITY, 354 TokenType.OVERLAPS, 355 TokenType.OVERWRITE, 356 TokenType.PARTITION, 357 TokenType.PERCENT, 358 TokenType.PIVOT, 359 TokenType.PRAGMA, 360 TokenType.RANGE, 361 TokenType.RECURSIVE, 362 TokenType.REFERENCES, 363 TokenType.REFRESH, 364 TokenType.REPLACE, 365 TokenType.RIGHT, 366 TokenType.ROW, 367 TokenType.ROWS, 368 TokenType.SEMI, 369 TokenType.SET, 370 TokenType.SETTINGS, 371 TokenType.SHOW, 372 TokenType.TEMPORARY, 373 TokenType.TOP, 374 TokenType.TRUE, 375 TokenType.TRUNCATE, 376 TokenType.UNIQUE, 377 TokenType.UNPIVOT, 378 TokenType.UPDATE, 379 TokenType.USE, 380 TokenType.VOLATILE, 381 TokenType.WINDOW, 382 *CREATABLES, 383 *SUBQUERY_PREDICATES, 384 *TYPE_TOKENS, 385 *NO_PAREN_FUNCTIONS, 386 } 387 388 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 389 390 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 391 TokenType.ANTI, 392 TokenType.APPLY, 393 TokenType.ASOF, 394 TokenType.FULL, 395 TokenType.LEFT, 396 TokenType.LOCK, 397 TokenType.NATURAL, 398 TokenType.OFFSET, 399 TokenType.RIGHT, 400 TokenType.SEMI, 401 TokenType.WINDOW, 402 } 403 404 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 405 406 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 407 408 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 409 410 FUNC_TOKENS = { 411 TokenType.COLLATE, 412 TokenType.COMMAND, 413 TokenType.CURRENT_DATE, 414 TokenType.CURRENT_DATETIME, 415 TokenType.CURRENT_TIMESTAMP, 416 TokenType.CURRENT_TIME, 417 TokenType.CURRENT_USER, 418 TokenType.FILTER, 419 TokenType.FIRST, 420 TokenType.FORMAT, 421 TokenType.GLOB, 422 TokenType.IDENTIFIER, 423 TokenType.INDEX, 424 TokenType.ISNULL, 425 TokenType.ILIKE, 426 TokenType.INSERT, 427 TokenType.LIKE, 428 TokenType.MERGE, 429 TokenType.OFFSET, 430 TokenType.PRIMARY_KEY, 431 TokenType.RANGE, 432 TokenType.REPLACE, 433 TokenType.RLIKE, 434 TokenType.ROW, 435 TokenType.UNNEST, 436 TokenType.VAR, 437 TokenType.LEFT, 438 TokenType.RIGHT, 439 TokenType.SEQUENCE, 440 TokenType.DATE, 441 TokenType.DATETIME, 442 TokenType.TABLE, 443 TokenType.TIMESTAMP, 444 TokenType.TIMESTAMPTZ, 445 TokenType.TRUNCATE, 446 TokenType.WINDOW, 447 TokenType.XOR, 448 *TYPE_TOKENS, 449 *SUBQUERY_PREDICATES, 450 } 451 452 CONJUNCTION = { 453 TokenType.AND: exp.And, 454 TokenType.OR: exp.Or, 455 } 456 457 EQUALITY = { 458 TokenType.COLON_EQ: exp.PropertyEQ, 459 TokenType.EQ: exp.EQ, 460 TokenType.NEQ: exp.NEQ, 461 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 462 } 463 464 COMPARISON = { 465 TokenType.GT: exp.GT, 466 TokenType.GTE: exp.GTE, 467 TokenType.LT: exp.LT, 468 TokenType.LTE: exp.LTE, 469 } 470 471 BITWISE = { 472 TokenType.AMP: exp.BitwiseAnd, 473 TokenType.CARET: exp.BitwiseXor, 474 TokenType.PIPE: exp.BitwiseOr, 475 } 476 477 TERM = { 478 TokenType.DASH: exp.Sub, 479 TokenType.PLUS: exp.Add, 480 TokenType.MOD: exp.Mod, 481 TokenType.COLLATE: exp.Collate, 482 } 483 484 FACTOR = { 485 TokenType.DIV: exp.IntDiv, 486 TokenType.LR_ARROW: exp.Distance, 487 TokenType.SLASH: exp.Div, 488 TokenType.STAR: exp.Mul, 489 } 490 491 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 492 493 TIMES = { 494 TokenType.TIME, 495 TokenType.TIMETZ, 496 } 497 498 TIMESTAMPS = { 499 TokenType.TIMESTAMP, 500 TokenType.TIMESTAMPTZ, 501 TokenType.TIMESTAMPLTZ, 502 *TIMES, 503 } 504 505 SET_OPERATIONS = { 506 TokenType.UNION, 507 TokenType.INTERSECT, 508 TokenType.EXCEPT, 509 } 510 511 JOIN_METHODS = { 512 TokenType.ASOF, 513 TokenType.NATURAL, 514 TokenType.POSITIONAL, 515 } 516 517 JOIN_SIDES = { 518 TokenType.LEFT, 519 TokenType.RIGHT, 520 TokenType.FULL, 521 } 522 523 JOIN_KINDS = { 524 TokenType.INNER, 525 TokenType.OUTER, 526 TokenType.CROSS, 527 TokenType.SEMI, 528 TokenType.ANTI, 529 } 530 531 JOIN_HINTS: t.Set[str] = set() 532 533 LAMBDAS = { 534 TokenType.ARROW: lambda self, expressions: self.expression( 535 exp.Lambda, 536 this=self._replace_lambda( 537 self._parse_conjunction(), 538 {node.name for node in expressions}, 539 ), 540 expressions=expressions, 541 ), 542 TokenType.FARROW: lambda self, expressions: self.expression( 543 exp.Kwarg, 544 this=exp.var(expressions[0].name), 545 expression=self._parse_conjunction(), 546 ), 547 } 548 549 COLUMN_OPERATORS = { 550 TokenType.DOT: None, 551 TokenType.DCOLON: lambda self, this, to: self.expression( 552 exp.Cast if self.STRICT_CAST else exp.TryCast, 553 this=this, 554 to=to, 555 ), 556 TokenType.ARROW: lambda self, this, path: self.expression( 557 exp.JSONExtract, 558 this=this, 559 expression=self.dialect.to_json_path(path), 560 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 561 ), 562 TokenType.DARROW: lambda self, this, path: self.expression( 563 exp.JSONExtractScalar, 564 this=this, 565 expression=self.dialect.to_json_path(path), 566 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 567 ), 568 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 569 exp.JSONBExtract, 570 this=this, 571 expression=path, 572 ), 573 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 574 exp.JSONBExtractScalar, 575 this=this, 576 expression=path, 577 ), 578 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 579 exp.JSONBContains, 580 this=this, 581 expression=key, 582 ), 583 } 584 585 EXPRESSION_PARSERS = { 586 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 587 exp.Column: lambda self: self._parse_column(), 588 exp.Condition: lambda self: self._parse_conjunction(), 589 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 590 exp.Expression: lambda self: self._parse_expression(), 591 exp.From: lambda self: self._parse_from(), 592 exp.Group: lambda self: self._parse_group(), 593 exp.Having: lambda self: self._parse_having(), 594 exp.Identifier: lambda self: self._parse_id_var(), 595 exp.Join: lambda self: self._parse_join(), 596 exp.Lambda: lambda self: self._parse_lambda(), 597 exp.Lateral: lambda self: self._parse_lateral(), 598 exp.Limit: lambda self: self._parse_limit(), 599 exp.Offset: lambda self: self._parse_offset(), 600 exp.Order: lambda self: self._parse_order(), 601 exp.Ordered: lambda self: self._parse_ordered(), 602 exp.Properties: lambda self: self._parse_properties(), 603 exp.Qualify: lambda self: self._parse_qualify(), 604 exp.Returning: lambda self: self._parse_returning(), 605 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 606 exp.Table: lambda self: self._parse_table_parts(), 607 exp.TableAlias: lambda self: self._parse_table_alias(), 608 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 609 exp.Where: lambda self: self._parse_where(), 610 exp.Window: lambda self: self._parse_named_window(), 611 exp.With: lambda self: self._parse_with(), 612 "JOIN_TYPE": lambda self: self._parse_join_parts(), 613 } 614 615 STATEMENT_PARSERS = { 616 TokenType.ALTER: lambda self: self._parse_alter(), 617 TokenType.BEGIN: lambda self: self._parse_transaction(), 618 TokenType.CACHE: lambda self: self._parse_cache(), 619 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 620 TokenType.COMMENT: lambda self: self._parse_comment(), 621 TokenType.CREATE: lambda self: self._parse_create(), 622 TokenType.DELETE: lambda self: self._parse_delete(), 623 TokenType.DESC: lambda self: self._parse_describe(), 624 TokenType.DESCRIBE: lambda self: self._parse_describe(), 625 TokenType.DROP: lambda self: self._parse_drop(), 626 TokenType.INSERT: lambda self: self._parse_insert(), 627 TokenType.KILL: lambda self: self._parse_kill(), 628 TokenType.LOAD: lambda self: self._parse_load(), 629 TokenType.MERGE: lambda self: self._parse_merge(), 630 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 631 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 632 TokenType.REFRESH: lambda self: self._parse_refresh(), 633 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 634 TokenType.SET: lambda self: self._parse_set(), 635 TokenType.UNCACHE: lambda self: self._parse_uncache(), 636 TokenType.UPDATE: lambda self: self._parse_update(), 637 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 638 TokenType.USE: lambda self: self.expression( 639 exp.Use, 640 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 641 this=self._parse_table(schema=False), 642 ), 643 } 644 645 UNARY_PARSERS = { 646 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 647 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 648 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 649 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 650 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 651 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 652 } 653 654 STRING_PARSERS = { 655 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 656 exp.RawString, this=token.text 657 ), 658 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 659 exp.National, this=token.text 660 ), 661 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 662 TokenType.STRING: lambda self, token: self.expression( 663 exp.Literal, this=token.text, is_string=True 664 ), 665 TokenType.UNICODE_STRING: lambda self, token: self.expression( 666 exp.UnicodeString, 667 this=token.text, 668 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 669 ), 670 } 671 672 NUMERIC_PARSERS = { 673 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 674 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 675 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 676 TokenType.NUMBER: lambda self, token: self.expression( 677 exp.Literal, this=token.text, is_string=False 678 ), 679 } 680 681 PRIMARY_PARSERS = { 682 **STRING_PARSERS, 683 **NUMERIC_PARSERS, 684 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 685 TokenType.NULL: lambda self, _: self.expression(exp.Null), 686 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 687 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 688 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 689 TokenType.STAR: lambda self, _: self.expression( 690 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 691 ), 692 } 693 694 PLACEHOLDER_PARSERS = { 695 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 696 TokenType.PARAMETER: lambda self: self._parse_parameter(), 697 TokenType.COLON: lambda self: ( 698 self.expression(exp.Placeholder, this=self._prev.text) 699 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 700 else None 701 ), 702 } 703 704 RANGE_PARSERS = { 705 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 706 TokenType.GLOB: binary_range_parser(exp.Glob), 707 TokenType.ILIKE: binary_range_parser(exp.ILike), 708 TokenType.IN: lambda self, this: self._parse_in(this), 709 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 710 TokenType.IS: lambda self, this: self._parse_is(this), 711 TokenType.LIKE: binary_range_parser(exp.Like), 712 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 713 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 714 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 715 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 716 } 717 718 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 719 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 720 "AUTO": lambda self: self._parse_auto_property(), 721 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 722 "BACKUP": lambda self: self.expression( 723 exp.BackupProperty, this=self._parse_var(any_token=True) 724 ), 725 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 726 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 727 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 728 "CHECKSUM": lambda self: self._parse_checksum(), 729 "CLUSTER BY": lambda self: self._parse_cluster(), 730 "CLUSTERED": lambda self: self._parse_clustered_by(), 731 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 732 exp.CollateProperty, **kwargs 733 ), 734 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 735 "CONTAINS": lambda self: self._parse_contains_property(), 736 "COPY": lambda self: self._parse_copy_property(), 737 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 738 "DEFINER": lambda self: self._parse_definer(), 739 "DETERMINISTIC": lambda self: self.expression( 740 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 741 ), 742 "DISTKEY": lambda self: self._parse_distkey(), 743 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 744 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 745 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 746 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 747 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 748 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 749 "FREESPACE": lambda self: self._parse_freespace(), 750 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 751 "HEAP": lambda self: self.expression(exp.HeapProperty), 752 "IMMUTABLE": lambda self: self.expression( 753 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 754 ), 755 "INHERITS": lambda self: self.expression( 756 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 757 ), 758 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 759 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 760 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 761 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 762 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 763 "LIKE": lambda self: self._parse_create_like(), 764 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 765 "LOCK": lambda self: self._parse_locking(), 766 "LOCKING": lambda self: self._parse_locking(), 767 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 768 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 769 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 770 "MODIFIES": lambda self: self._parse_modifies_property(), 771 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 772 "NO": lambda self: self._parse_no_property(), 773 "ON": lambda self: self._parse_on_property(), 774 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 775 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 776 "PARTITION": lambda self: self._parse_partitioned_of(), 777 "PARTITION BY": lambda self: self._parse_partitioned_by(), 778 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 779 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 780 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 781 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 782 "READS": lambda self: self._parse_reads_property(), 783 "REMOTE": lambda self: self._parse_remote_with_connection(), 784 "RETURNS": lambda self: self._parse_returns(), 785 "ROW": lambda self: self._parse_row(), 786 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 787 "SAMPLE": lambda self: self.expression( 788 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 789 ), 790 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 791 "SETTINGS": lambda self: self.expression( 792 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 793 ), 794 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 795 "SORTKEY": lambda self: self._parse_sortkey(), 796 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 797 "STABLE": lambda self: self.expression( 798 exp.StabilityProperty, this=exp.Literal.string("STABLE") 799 ), 800 "STORED": lambda self: self._parse_stored(), 801 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 802 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 803 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 804 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 805 "TO": lambda self: self._parse_to_table(), 806 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 807 "TRANSFORM": lambda self: self.expression( 808 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 809 ), 810 "TTL": lambda self: self._parse_ttl(), 811 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 812 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 813 "VOLATILE": lambda self: self._parse_volatile_property(), 814 "WITH": lambda self: self._parse_with_property(), 815 } 816 817 CONSTRAINT_PARSERS = { 818 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 819 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 820 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 821 "CHARACTER SET": lambda self: self.expression( 822 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 823 ), 824 "CHECK": lambda self: self.expression( 825 exp.CheckColumnConstraint, 826 this=self._parse_wrapped(self._parse_conjunction), 827 enforced=self._match_text_seq("ENFORCED"), 828 ), 829 "COLLATE": lambda self: self.expression( 830 exp.CollateColumnConstraint, this=self._parse_var() 831 ), 832 "COMMENT": lambda self: self.expression( 833 exp.CommentColumnConstraint, this=self._parse_string() 834 ), 835 "COMPRESS": lambda self: self._parse_compress(), 836 "CLUSTERED": lambda self: self.expression( 837 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 838 ), 839 "NONCLUSTERED": lambda self: self.expression( 840 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 841 ), 842 "DEFAULT": lambda self: self.expression( 843 exp.DefaultColumnConstraint, this=self._parse_bitwise() 844 ), 845 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 846 "EXCLUDE": lambda self: self.expression( 847 exp.ExcludeColumnConstraint, this=self._parse_index_params() 848 ), 849 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 850 "FORMAT": lambda self: self.expression( 851 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 852 ), 853 "GENERATED": lambda self: self._parse_generated_as_identity(), 854 "IDENTITY": lambda self: self._parse_auto_increment(), 855 "INLINE": lambda self: self._parse_inline(), 856 "LIKE": lambda self: self._parse_create_like(), 857 "NOT": lambda self: self._parse_not_constraint(), 858 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 859 "ON": lambda self: ( 860 self._match(TokenType.UPDATE) 861 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 862 ) 863 or self.expression(exp.OnProperty, this=self._parse_id_var()), 864 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 865 "PERIOD": lambda self: self._parse_period_for_system_time(), 866 "PRIMARY KEY": lambda self: self._parse_primary_key(), 867 "REFERENCES": lambda self: self._parse_references(match=False), 868 "TITLE": lambda self: self.expression( 869 exp.TitleColumnConstraint, this=self._parse_var_or_string() 870 ), 871 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 872 "UNIQUE": lambda self: self._parse_unique(), 873 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 874 "WITH": lambda self: self.expression( 875 exp.Properties, expressions=self._parse_wrapped_properties() 876 ), 877 } 878 879 ALTER_PARSERS = { 880 "ADD": lambda self: self._parse_alter_table_add(), 881 "ALTER": lambda self: self._parse_alter_table_alter(), 882 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 883 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 884 "DROP": lambda self: self._parse_alter_table_drop(), 885 "RENAME": lambda self: self._parse_alter_table_rename(), 886 } 887 888 SCHEMA_UNNAMED_CONSTRAINTS = { 889 "CHECK", 890 "EXCLUDE", 891 "FOREIGN KEY", 892 "LIKE", 893 "PERIOD", 894 "PRIMARY KEY", 895 "UNIQUE", 896 } 897 898 NO_PAREN_FUNCTION_PARSERS = { 899 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 900 "CASE": lambda self: self._parse_case(), 901 "IF": lambda self: self._parse_if(), 902 "NEXT": lambda self: self._parse_next_value_for(), 903 } 904 905 INVALID_FUNC_NAME_TOKENS = { 906 TokenType.IDENTIFIER, 907 TokenType.STRING, 908 } 909 910 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 911 912 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 913 914 FUNCTION_PARSERS = { 915 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 916 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 917 "DECODE": lambda self: self._parse_decode(), 918 "EXTRACT": lambda self: self._parse_extract(), 919 "JSON_OBJECT": lambda self: self._parse_json_object(), 920 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 921 "JSON_TABLE": lambda self: self._parse_json_table(), 922 "MATCH": lambda self: self._parse_match_against(), 923 "OPENJSON": lambda self: self._parse_open_json(), 924 "POSITION": lambda self: self._parse_position(), 925 "PREDICT": lambda self: self._parse_predict(), 926 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 927 "STRING_AGG": lambda self: self._parse_string_agg(), 928 "SUBSTRING": lambda self: self._parse_substring(), 929 "TRIM": lambda self: self._parse_trim(), 930 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 931 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 932 } 933 934 QUERY_MODIFIER_PARSERS = { 935 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 936 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 937 TokenType.WHERE: lambda self: ("where", self._parse_where()), 938 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 939 TokenType.HAVING: lambda self: ("having", self._parse_having()), 940 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 941 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 942 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 943 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 944 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 945 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 946 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 947 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 948 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 949 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 950 TokenType.CLUSTER_BY: lambda self: ( 951 "cluster", 952 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 953 ), 954 TokenType.DISTRIBUTE_BY: lambda self: ( 955 "distribute", 956 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 957 ), 958 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 959 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 960 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 961 } 962 963 SET_PARSERS = { 964 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 965 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 966 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 967 "TRANSACTION": lambda self: self._parse_set_transaction(), 968 } 969 970 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 971 972 TYPE_LITERAL_PARSERS = { 973 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 974 } 975 976 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 977 978 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 979 980 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 981 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 982 "ISOLATION": ( 983 ("LEVEL", "REPEATABLE", "READ"), 984 ("LEVEL", "READ", "COMMITTED"), 985 ("LEVEL", "READ", "UNCOMITTED"), 986 ("LEVEL", "SERIALIZABLE"), 987 ), 988 "READ": ("WRITE", "ONLY"), 989 } 990 991 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 992 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 993 ) 994 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 995 996 CREATE_SEQUENCE: OPTIONS_TYPE = { 997 "SCALE": ("EXTEND", "NOEXTEND"), 998 "SHARD": ("EXTEND", "NOEXTEND"), 999 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1000 **dict.fromkeys( 1001 ( 1002 "SESSION", 1003 "GLOBAL", 1004 "KEEP", 1005 "NOKEEP", 1006 "ORDER", 1007 "NOORDER", 1008 "NOCACHE", 1009 "CYCLE", 1010 "NOCYCLE", 1011 "NOMINVALUE", 1012 "NOMAXVALUE", 1013 "NOSCALE", 1014 "NOSHARD", 1015 ), 1016 tuple(), 1017 ), 1018 } 1019 1020 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 1021 1022 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1023 1024 CLONE_KEYWORDS = {"CLONE", "COPY"} 1025 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1026 1027 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1028 1029 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1030 1031 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1032 1033 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1034 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1035 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1036 1037 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1038 1039 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1040 1041 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 1042 1043 DISTINCT_TOKENS = {TokenType.DISTINCT} 1044 1045 NULL_TOKENS = {TokenType.NULL} 1046 1047 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1048 1049 STRICT_CAST = True 1050 1051 PREFIXED_PIVOT_COLUMNS = False 1052 IDENTIFY_PIVOT_STRINGS = False 1053 1054 LOG_DEFAULTS_TO_LN = False 1055 1056 # Whether ADD is present for each column added by ALTER TABLE 1057 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1058 1059 # Whether the table sample clause expects CSV syntax 1060 TABLESAMPLE_CSV = False 1061 1062 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1063 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1064 1065 # Whether the TRIM function expects the characters to trim as its first argument 1066 TRIM_PATTERN_FIRST = False 1067 1068 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1069 STRING_ALIASES = False 1070 1071 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1072 MODIFIERS_ATTACHED_TO_UNION = True 1073 UNION_MODIFIERS = {"order", "limit", "offset"} 1074 1075 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1076 NO_PAREN_IF_COMMANDS = True 1077 1078 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1079 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1080 1081 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1082 # If this is True and '(' is not found, the keyword will be treated as an identifier 1083 VALUES_FOLLOWED_BY_PAREN = True 1084 1085 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1086 SUPPORTS_IMPLICIT_UNNEST = False 1087 1088 __slots__ = ( 1089 "error_level", 1090 "error_message_context", 1091 "max_errors", 1092 "dialect", 1093 "sql", 1094 "errors", 1095 "_tokens", 1096 "_index", 1097 "_curr", 1098 "_next", 1099 "_prev", 1100 "_prev_comments", 1101 ) 1102 1103 # Autofilled 1104 SHOW_TRIE: t.Dict = {} 1105 SET_TRIE: t.Dict = {} 1106 1107 def __init__( 1108 self, 1109 error_level: t.Optional[ErrorLevel] = None, 1110 error_message_context: int = 100, 1111 max_errors: int = 3, 1112 dialect: DialectType = None, 1113 ): 1114 from sqlglot.dialects import Dialect 1115 1116 self.error_level = error_level or ErrorLevel.IMMEDIATE 1117 self.error_message_context = error_message_context 1118 self.max_errors = max_errors 1119 self.dialect = Dialect.get_or_raise(dialect) 1120 self.reset() 1121 1122 def reset(self): 1123 self.sql = "" 1124 self.errors = [] 1125 self._tokens = [] 1126 self._index = 0 1127 self._curr = None 1128 self._next = None 1129 self._prev = None 1130 self._prev_comments = None 1131 1132 def parse( 1133 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1134 ) -> t.List[t.Optional[exp.Expression]]: 1135 """ 1136 Parses a list of tokens and returns a list of syntax trees, one tree 1137 per parsed SQL statement. 1138 1139 Args: 1140 raw_tokens: The list of tokens. 1141 sql: The original SQL string, used to produce helpful debug messages. 1142 1143 Returns: 1144 The list of the produced syntax trees. 1145 """ 1146 return self._parse( 1147 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1148 ) 1149 1150 def parse_into( 1151 self, 1152 expression_types: exp.IntoType, 1153 raw_tokens: t.List[Token], 1154 sql: t.Optional[str] = None, 1155 ) -> t.List[t.Optional[exp.Expression]]: 1156 """ 1157 Parses a list of tokens into a given Expression type. If a collection of Expression 1158 types is given instead, this method will try to parse the token list into each one 1159 of them, stopping at the first for which the parsing succeeds. 1160 1161 Args: 1162 expression_types: The expression type(s) to try and parse the token list into. 1163 raw_tokens: The list of tokens. 1164 sql: The original SQL string, used to produce helpful debug messages. 1165 1166 Returns: 1167 The target Expression. 1168 """ 1169 errors = [] 1170 for expression_type in ensure_list(expression_types): 1171 parser = self.EXPRESSION_PARSERS.get(expression_type) 1172 if not parser: 1173 raise TypeError(f"No parser registered for {expression_type}") 1174 1175 try: 1176 return self._parse(parser, raw_tokens, sql) 1177 except ParseError as e: 1178 e.errors[0]["into_expression"] = expression_type 1179 errors.append(e) 1180 1181 raise ParseError( 1182 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1183 errors=merge_errors(errors), 1184 ) from errors[-1] 1185 1186 def _parse( 1187 self, 1188 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1189 raw_tokens: t.List[Token], 1190 sql: t.Optional[str] = None, 1191 ) -> t.List[t.Optional[exp.Expression]]: 1192 self.reset() 1193 self.sql = sql or "" 1194 1195 total = len(raw_tokens) 1196 chunks: t.List[t.List[Token]] = [[]] 1197 1198 for i, token in enumerate(raw_tokens): 1199 if token.token_type == TokenType.SEMICOLON: 1200 if i < total - 1: 1201 chunks.append([]) 1202 else: 1203 chunks[-1].append(token) 1204 1205 expressions = [] 1206 1207 for tokens in chunks: 1208 self._index = -1 1209 self._tokens = tokens 1210 self._advance() 1211 1212 expressions.append(parse_method(self)) 1213 1214 if self._index < len(self._tokens): 1215 self.raise_error("Invalid expression / Unexpected token") 1216 1217 self.check_errors() 1218 1219 return expressions 1220 1221 def check_errors(self) -> None: 1222 """Logs or raises any found errors, depending on the chosen error level setting.""" 1223 if self.error_level == ErrorLevel.WARN: 1224 for error in self.errors: 1225 logger.error(str(error)) 1226 elif self.error_level == ErrorLevel.RAISE and self.errors: 1227 raise ParseError( 1228 concat_messages(self.errors, self.max_errors), 1229 errors=merge_errors(self.errors), 1230 ) 1231 1232 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1233 """ 1234 Appends an error in the list of recorded errors or raises it, depending on the chosen 1235 error level setting. 1236 """ 1237 token = token or self._curr or self._prev or Token.string("") 1238 start = token.start 1239 end = token.end + 1 1240 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1241 highlight = self.sql[start:end] 1242 end_context = self.sql[end : end + self.error_message_context] 1243 1244 error = ParseError.new( 1245 f"{message}. Line {token.line}, Col: {token.col}.\n" 1246 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1247 description=message, 1248 line=token.line, 1249 col=token.col, 1250 start_context=start_context, 1251 highlight=highlight, 1252 end_context=end_context, 1253 ) 1254 1255 if self.error_level == ErrorLevel.IMMEDIATE: 1256 raise error 1257 1258 self.errors.append(error) 1259 1260 def expression( 1261 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1262 ) -> E: 1263 """ 1264 Creates a new, validated Expression. 1265 1266 Args: 1267 exp_class: The expression class to instantiate. 1268 comments: An optional list of comments to attach to the expression. 1269 kwargs: The arguments to set for the expression along with their respective values. 1270 1271 Returns: 1272 The target expression. 1273 """ 1274 instance = exp_class(**kwargs) 1275 instance.add_comments(comments) if comments else self._add_comments(instance) 1276 return self.validate_expression(instance) 1277 1278 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1279 if expression and self._prev_comments: 1280 expression.add_comments(self._prev_comments) 1281 self._prev_comments = None 1282 1283 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1284 """ 1285 Validates an Expression, making sure that all its mandatory arguments are set. 1286 1287 Args: 1288 expression: The expression to validate. 1289 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1290 1291 Returns: 1292 The validated expression. 1293 """ 1294 if self.error_level != ErrorLevel.IGNORE: 1295 for error_message in expression.error_messages(args): 1296 self.raise_error(error_message) 1297 1298 return expression 1299 1300 def _find_sql(self, start: Token, end: Token) -> str: 1301 return self.sql[start.start : end.end + 1] 1302 1303 def _is_connected(self) -> bool: 1304 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1305 1306 def _advance(self, times: int = 1) -> None: 1307 self._index += times 1308 self._curr = seq_get(self._tokens, self._index) 1309 self._next = seq_get(self._tokens, self._index + 1) 1310 1311 if self._index > 0: 1312 self._prev = self._tokens[self._index - 1] 1313 self._prev_comments = self._prev.comments 1314 else: 1315 self._prev = None 1316 self._prev_comments = None 1317 1318 def _retreat(self, index: int) -> None: 1319 if index != self._index: 1320 self._advance(index - self._index) 1321 1322 def _warn_unsupported(self) -> None: 1323 if len(self._tokens) <= 1: 1324 return 1325 1326 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1327 # interested in emitting a warning for the one being currently processed. 1328 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1329 1330 logger.warning( 1331 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1332 ) 1333 1334 def _parse_command(self) -> exp.Command: 1335 self._warn_unsupported() 1336 return self.expression( 1337 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1338 ) 1339 1340 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1341 start = self._prev 1342 exists = self._parse_exists() if allow_exists else None 1343 1344 self._match(TokenType.ON) 1345 1346 kind = self._match_set(self.CREATABLES) and self._prev 1347 if not kind: 1348 return self._parse_as_command(start) 1349 1350 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1351 this = self._parse_user_defined_function(kind=kind.token_type) 1352 elif kind.token_type == TokenType.TABLE: 1353 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1354 elif kind.token_type == TokenType.COLUMN: 1355 this = self._parse_column() 1356 else: 1357 this = self._parse_id_var() 1358 1359 self._match(TokenType.IS) 1360 1361 return self.expression( 1362 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1363 ) 1364 1365 def _parse_to_table( 1366 self, 1367 ) -> exp.ToTableProperty: 1368 table = self._parse_table_parts(schema=True) 1369 return self.expression(exp.ToTableProperty, this=table) 1370 1371 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1372 def _parse_ttl(self) -> exp.Expression: 1373 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1374 this = self._parse_bitwise() 1375 1376 if self._match_text_seq("DELETE"): 1377 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1378 if self._match_text_seq("RECOMPRESS"): 1379 return self.expression( 1380 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1381 ) 1382 if self._match_text_seq("TO", "DISK"): 1383 return self.expression( 1384 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1385 ) 1386 if self._match_text_seq("TO", "VOLUME"): 1387 return self.expression( 1388 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1389 ) 1390 1391 return this 1392 1393 expressions = self._parse_csv(_parse_ttl_action) 1394 where = self._parse_where() 1395 group = self._parse_group() 1396 1397 aggregates = None 1398 if group and self._match(TokenType.SET): 1399 aggregates = self._parse_csv(self._parse_set_item) 1400 1401 return self.expression( 1402 exp.MergeTreeTTL, 1403 expressions=expressions, 1404 where=where, 1405 group=group, 1406 aggregates=aggregates, 1407 ) 1408 1409 def _parse_statement(self) -> t.Optional[exp.Expression]: 1410 if self._curr is None: 1411 return None 1412 1413 if self._match_set(self.STATEMENT_PARSERS): 1414 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1415 1416 if self._match_set(Tokenizer.COMMANDS): 1417 return self._parse_command() 1418 1419 expression = self._parse_expression() 1420 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1421 return self._parse_query_modifiers(expression) 1422 1423 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1424 start = self._prev 1425 temporary = self._match(TokenType.TEMPORARY) 1426 materialized = self._match_text_seq("MATERIALIZED") 1427 1428 kind = self._match_set(self.CREATABLES) and self._prev.text 1429 if not kind: 1430 return self._parse_as_command(start) 1431 1432 return self.expression( 1433 exp.Drop, 1434 comments=start.comments, 1435 exists=exists or self._parse_exists(), 1436 this=self._parse_table( 1437 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1438 ), 1439 kind=kind, 1440 temporary=temporary, 1441 materialized=materialized, 1442 cascade=self._match_text_seq("CASCADE"), 1443 constraints=self._match_text_seq("CONSTRAINTS"), 1444 purge=self._match_text_seq("PURGE"), 1445 ) 1446 1447 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1448 return ( 1449 self._match_text_seq("IF") 1450 and (not not_ or self._match(TokenType.NOT)) 1451 and self._match(TokenType.EXISTS) 1452 ) 1453 1454 def _parse_create(self) -> exp.Create | exp.Command: 1455 # Note: this can't be None because we've matched a statement parser 1456 start = self._prev 1457 comments = self._prev_comments 1458 1459 replace = ( 1460 start.token_type == TokenType.REPLACE 1461 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1462 or self._match_pair(TokenType.OR, TokenType.ALTER) 1463 ) 1464 1465 unique = self._match(TokenType.UNIQUE) 1466 1467 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1468 self._advance() 1469 1470 properties = None 1471 create_token = self._match_set(self.CREATABLES) and self._prev 1472 1473 if not create_token: 1474 # exp.Properties.Location.POST_CREATE 1475 properties = self._parse_properties() 1476 create_token = self._match_set(self.CREATABLES) and self._prev 1477 1478 if not properties or not create_token: 1479 return self._parse_as_command(start) 1480 1481 exists = self._parse_exists(not_=True) 1482 this = None 1483 expression: t.Optional[exp.Expression] = None 1484 indexes = None 1485 no_schema_binding = None 1486 begin = None 1487 end = None 1488 clone = None 1489 1490 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1491 nonlocal properties 1492 if properties and temp_props: 1493 properties.expressions.extend(temp_props.expressions) 1494 elif temp_props: 1495 properties = temp_props 1496 1497 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1498 this = self._parse_user_defined_function(kind=create_token.token_type) 1499 1500 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1501 extend_props(self._parse_properties()) 1502 1503 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1504 1505 if not expression: 1506 if self._match(TokenType.COMMAND): 1507 expression = self._parse_as_command(self._prev) 1508 else: 1509 begin = self._match(TokenType.BEGIN) 1510 return_ = self._match_text_seq("RETURN") 1511 1512 if self._match(TokenType.STRING, advance=False): 1513 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1514 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1515 expression = self._parse_string() 1516 extend_props(self._parse_properties()) 1517 else: 1518 expression = self._parse_statement() 1519 1520 end = self._match_text_seq("END") 1521 1522 if return_: 1523 expression = self.expression(exp.Return, this=expression) 1524 elif create_token.token_type == TokenType.INDEX: 1525 this = self._parse_index(index=self._parse_id_var()) 1526 elif create_token.token_type in self.DB_CREATABLES: 1527 table_parts = self._parse_table_parts( 1528 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1529 ) 1530 1531 # exp.Properties.Location.POST_NAME 1532 self._match(TokenType.COMMA) 1533 extend_props(self._parse_properties(before=True)) 1534 1535 this = self._parse_schema(this=table_parts) 1536 1537 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1538 extend_props(self._parse_properties()) 1539 1540 self._match(TokenType.ALIAS) 1541 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1542 # exp.Properties.Location.POST_ALIAS 1543 extend_props(self._parse_properties()) 1544 1545 if create_token.token_type == TokenType.SEQUENCE: 1546 expression = self._parse_types() 1547 extend_props(self._parse_properties()) 1548 else: 1549 expression = self._parse_ddl_select() 1550 1551 if create_token.token_type == TokenType.TABLE: 1552 # exp.Properties.Location.POST_EXPRESSION 1553 extend_props(self._parse_properties()) 1554 1555 indexes = [] 1556 while True: 1557 index = self._parse_index() 1558 1559 # exp.Properties.Location.POST_INDEX 1560 extend_props(self._parse_properties()) 1561 1562 if not index: 1563 break 1564 else: 1565 self._match(TokenType.COMMA) 1566 indexes.append(index) 1567 elif create_token.token_type == TokenType.VIEW: 1568 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1569 no_schema_binding = True 1570 1571 shallow = self._match_text_seq("SHALLOW") 1572 1573 if self._match_texts(self.CLONE_KEYWORDS): 1574 copy = self._prev.text.lower() == "copy" 1575 clone = self.expression( 1576 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1577 ) 1578 1579 if self._curr: 1580 return self._parse_as_command(start) 1581 1582 return self.expression( 1583 exp.Create, 1584 comments=comments, 1585 this=this, 1586 kind=create_token.text.upper(), 1587 replace=replace, 1588 unique=unique, 1589 expression=expression, 1590 exists=exists, 1591 properties=properties, 1592 indexes=indexes, 1593 no_schema_binding=no_schema_binding, 1594 begin=begin, 1595 end=end, 1596 clone=clone, 1597 ) 1598 1599 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1600 seq = exp.SequenceProperties() 1601 1602 options = [] 1603 index = self._index 1604 1605 while self._curr: 1606 if self._match_text_seq("INCREMENT"): 1607 self._match_text_seq("BY") 1608 self._match_text_seq("=") 1609 seq.set("increment", self._parse_term()) 1610 elif self._match_text_seq("MINVALUE"): 1611 seq.set("minvalue", self._parse_term()) 1612 elif self._match_text_seq("MAXVALUE"): 1613 seq.set("maxvalue", self._parse_term()) 1614 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1615 self._match_text_seq("=") 1616 seq.set("start", self._parse_term()) 1617 elif self._match_text_seq("CACHE"): 1618 # T-SQL allows empty CACHE which is initialized dynamically 1619 seq.set("cache", self._parse_number() or True) 1620 elif self._match_text_seq("OWNED", "BY"): 1621 # "OWNED BY NONE" is the default 1622 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1623 else: 1624 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1625 if opt: 1626 options.append(opt) 1627 else: 1628 break 1629 1630 seq.set("options", options if options else None) 1631 return None if self._index == index else seq 1632 1633 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1634 # only used for teradata currently 1635 self._match(TokenType.COMMA) 1636 1637 kwargs = { 1638 "no": self._match_text_seq("NO"), 1639 "dual": self._match_text_seq("DUAL"), 1640 "before": self._match_text_seq("BEFORE"), 1641 "default": self._match_text_seq("DEFAULT"), 1642 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1643 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1644 "after": self._match_text_seq("AFTER"), 1645 "minimum": self._match_texts(("MIN", "MINIMUM")), 1646 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1647 } 1648 1649 if self._match_texts(self.PROPERTY_PARSERS): 1650 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1651 try: 1652 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1653 except TypeError: 1654 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1655 1656 return None 1657 1658 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1659 return self._parse_wrapped_csv(self._parse_property) 1660 1661 def _parse_property(self) -> t.Optional[exp.Expression]: 1662 if self._match_texts(self.PROPERTY_PARSERS): 1663 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1664 1665 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1666 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1667 1668 if self._match_text_seq("COMPOUND", "SORTKEY"): 1669 return self._parse_sortkey(compound=True) 1670 1671 if self._match_text_seq("SQL", "SECURITY"): 1672 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1673 1674 index = self._index 1675 key = self._parse_column() 1676 1677 if not self._match(TokenType.EQ): 1678 self._retreat(index) 1679 return self._parse_sequence_properties() 1680 1681 return self.expression( 1682 exp.Property, 1683 this=key.to_dot() if isinstance(key, exp.Column) else key, 1684 value=self._parse_column() or self._parse_var(any_token=True), 1685 ) 1686 1687 def _parse_stored(self) -> exp.FileFormatProperty: 1688 self._match(TokenType.ALIAS) 1689 1690 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1691 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1692 1693 return self.expression( 1694 exp.FileFormatProperty, 1695 this=( 1696 self.expression( 1697 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1698 ) 1699 if input_format or output_format 1700 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1701 ), 1702 ) 1703 1704 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1705 self._match(TokenType.EQ) 1706 self._match(TokenType.ALIAS) 1707 return self.expression(exp_class, this=self._parse_field(), **kwargs) 1708 1709 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1710 properties = [] 1711 while True: 1712 if before: 1713 prop = self._parse_property_before() 1714 else: 1715 prop = self._parse_property() 1716 if not prop: 1717 break 1718 for p in ensure_list(prop): 1719 properties.append(p) 1720 1721 if properties: 1722 return self.expression(exp.Properties, expressions=properties) 1723 1724 return None 1725 1726 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1727 return self.expression( 1728 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1729 ) 1730 1731 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1732 if self._index >= 2: 1733 pre_volatile_token = self._tokens[self._index - 2] 1734 else: 1735 pre_volatile_token = None 1736 1737 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1738 return exp.VolatileProperty() 1739 1740 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1741 1742 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1743 self._match_pair(TokenType.EQ, TokenType.ON) 1744 1745 prop = self.expression(exp.WithSystemVersioningProperty) 1746 if self._match(TokenType.L_PAREN): 1747 self._match_text_seq("HISTORY_TABLE", "=") 1748 prop.set("this", self._parse_table_parts()) 1749 1750 if self._match(TokenType.COMMA): 1751 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1752 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1753 1754 self._match_r_paren() 1755 1756 return prop 1757 1758 def _parse_with_property( 1759 self, 1760 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1761 if self._match(TokenType.L_PAREN, advance=False): 1762 return self._parse_wrapped_properties() 1763 1764 if self._match_text_seq("JOURNAL"): 1765 return self._parse_withjournaltable() 1766 1767 if self._match_text_seq("DATA"): 1768 return self._parse_withdata(no=False) 1769 elif self._match_text_seq("NO", "DATA"): 1770 return self._parse_withdata(no=True) 1771 1772 if not self._next: 1773 return None 1774 1775 return self._parse_withisolatedloading() 1776 1777 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1778 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1779 self._match(TokenType.EQ) 1780 1781 user = self._parse_id_var() 1782 self._match(TokenType.PARAMETER) 1783 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1784 1785 if not user or not host: 1786 return None 1787 1788 return exp.DefinerProperty(this=f"{user}@{host}") 1789 1790 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1791 self._match(TokenType.TABLE) 1792 self._match(TokenType.EQ) 1793 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1794 1795 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1796 return self.expression(exp.LogProperty, no=no) 1797 1798 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1799 return self.expression(exp.JournalProperty, **kwargs) 1800 1801 def _parse_checksum(self) -> exp.ChecksumProperty: 1802 self._match(TokenType.EQ) 1803 1804 on = None 1805 if self._match(TokenType.ON): 1806 on = True 1807 elif self._match_text_seq("OFF"): 1808 on = False 1809 1810 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1811 1812 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 1813 return self.expression( 1814 exp.Cluster, 1815 expressions=( 1816 self._parse_wrapped_csv(self._parse_ordered) 1817 if wrapped 1818 else self._parse_csv(self._parse_ordered) 1819 ), 1820 ) 1821 1822 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1823 self._match_text_seq("BY") 1824 1825 self._match_l_paren() 1826 expressions = self._parse_csv(self._parse_column) 1827 self._match_r_paren() 1828 1829 if self._match_text_seq("SORTED", "BY"): 1830 self._match_l_paren() 1831 sorted_by = self._parse_csv(self._parse_ordered) 1832 self._match_r_paren() 1833 else: 1834 sorted_by = None 1835 1836 self._match(TokenType.INTO) 1837 buckets = self._parse_number() 1838 self._match_text_seq("BUCKETS") 1839 1840 return self.expression( 1841 exp.ClusteredByProperty, 1842 expressions=expressions, 1843 sorted_by=sorted_by, 1844 buckets=buckets, 1845 ) 1846 1847 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1848 if not self._match_text_seq("GRANTS"): 1849 self._retreat(self._index - 1) 1850 return None 1851 1852 return self.expression(exp.CopyGrantsProperty) 1853 1854 def _parse_freespace(self) -> exp.FreespaceProperty: 1855 self._match(TokenType.EQ) 1856 return self.expression( 1857 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1858 ) 1859 1860 def _parse_mergeblockratio( 1861 self, no: bool = False, default: bool = False 1862 ) -> exp.MergeBlockRatioProperty: 1863 if self._match(TokenType.EQ): 1864 return self.expression( 1865 exp.MergeBlockRatioProperty, 1866 this=self._parse_number(), 1867 percent=self._match(TokenType.PERCENT), 1868 ) 1869 1870 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1871 1872 def _parse_datablocksize( 1873 self, 1874 default: t.Optional[bool] = None, 1875 minimum: t.Optional[bool] = None, 1876 maximum: t.Optional[bool] = None, 1877 ) -> exp.DataBlocksizeProperty: 1878 self._match(TokenType.EQ) 1879 size = self._parse_number() 1880 1881 units = None 1882 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1883 units = self._prev.text 1884 1885 return self.expression( 1886 exp.DataBlocksizeProperty, 1887 size=size, 1888 units=units, 1889 default=default, 1890 minimum=minimum, 1891 maximum=maximum, 1892 ) 1893 1894 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1895 self._match(TokenType.EQ) 1896 always = self._match_text_seq("ALWAYS") 1897 manual = self._match_text_seq("MANUAL") 1898 never = self._match_text_seq("NEVER") 1899 default = self._match_text_seq("DEFAULT") 1900 1901 autotemp = None 1902 if self._match_text_seq("AUTOTEMP"): 1903 autotemp = self._parse_schema() 1904 1905 return self.expression( 1906 exp.BlockCompressionProperty, 1907 always=always, 1908 manual=manual, 1909 never=never, 1910 default=default, 1911 autotemp=autotemp, 1912 ) 1913 1914 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1915 no = self._match_text_seq("NO") 1916 concurrent = self._match_text_seq("CONCURRENT") 1917 self._match_text_seq("ISOLATED", "LOADING") 1918 for_all = self._match_text_seq("FOR", "ALL") 1919 for_insert = self._match_text_seq("FOR", "INSERT") 1920 for_none = self._match_text_seq("FOR", "NONE") 1921 return self.expression( 1922 exp.IsolatedLoadingProperty, 1923 no=no, 1924 concurrent=concurrent, 1925 for_all=for_all, 1926 for_insert=for_insert, 1927 for_none=for_none, 1928 ) 1929 1930 def _parse_locking(self) -> exp.LockingProperty: 1931 if self._match(TokenType.TABLE): 1932 kind = "TABLE" 1933 elif self._match(TokenType.VIEW): 1934 kind = "VIEW" 1935 elif self._match(TokenType.ROW): 1936 kind = "ROW" 1937 elif self._match_text_seq("DATABASE"): 1938 kind = "DATABASE" 1939 else: 1940 kind = None 1941 1942 if kind in ("DATABASE", "TABLE", "VIEW"): 1943 this = self._parse_table_parts() 1944 else: 1945 this = None 1946 1947 if self._match(TokenType.FOR): 1948 for_or_in = "FOR" 1949 elif self._match(TokenType.IN): 1950 for_or_in = "IN" 1951 else: 1952 for_or_in = None 1953 1954 if self._match_text_seq("ACCESS"): 1955 lock_type = "ACCESS" 1956 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1957 lock_type = "EXCLUSIVE" 1958 elif self._match_text_seq("SHARE"): 1959 lock_type = "SHARE" 1960 elif self._match_text_seq("READ"): 1961 lock_type = "READ" 1962 elif self._match_text_seq("WRITE"): 1963 lock_type = "WRITE" 1964 elif self._match_text_seq("CHECKSUM"): 1965 lock_type = "CHECKSUM" 1966 else: 1967 lock_type = None 1968 1969 override = self._match_text_seq("OVERRIDE") 1970 1971 return self.expression( 1972 exp.LockingProperty, 1973 this=this, 1974 kind=kind, 1975 for_or_in=for_or_in, 1976 lock_type=lock_type, 1977 override=override, 1978 ) 1979 1980 def _parse_partition_by(self) -> t.List[exp.Expression]: 1981 if self._match(TokenType.PARTITION_BY): 1982 return self._parse_csv(self._parse_conjunction) 1983 return [] 1984 1985 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 1986 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 1987 if self._match_text_seq("MINVALUE"): 1988 return exp.var("MINVALUE") 1989 if self._match_text_seq("MAXVALUE"): 1990 return exp.var("MAXVALUE") 1991 return self._parse_bitwise() 1992 1993 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 1994 expression = None 1995 from_expressions = None 1996 to_expressions = None 1997 1998 if self._match(TokenType.IN): 1999 this = self._parse_wrapped_csv(self._parse_bitwise) 2000 elif self._match(TokenType.FROM): 2001 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2002 self._match_text_seq("TO") 2003 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2004 elif self._match_text_seq("WITH", "(", "MODULUS"): 2005 this = self._parse_number() 2006 self._match_text_seq(",", "REMAINDER") 2007 expression = self._parse_number() 2008 self._match_r_paren() 2009 else: 2010 self.raise_error("Failed to parse partition bound spec.") 2011 2012 return self.expression( 2013 exp.PartitionBoundSpec, 2014 this=this, 2015 expression=expression, 2016 from_expressions=from_expressions, 2017 to_expressions=to_expressions, 2018 ) 2019 2020 # https://www.postgresql.org/docs/current/sql-createtable.html 2021 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2022 if not self._match_text_seq("OF"): 2023 self._retreat(self._index - 1) 2024 return None 2025 2026 this = self._parse_table(schema=True) 2027 2028 if self._match(TokenType.DEFAULT): 2029 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2030 elif self._match_text_seq("FOR", "VALUES"): 2031 expression = self._parse_partition_bound_spec() 2032 else: 2033 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2034 2035 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2036 2037 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2038 self._match(TokenType.EQ) 2039 return self.expression( 2040 exp.PartitionedByProperty, 2041 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2042 ) 2043 2044 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2045 if self._match_text_seq("AND", "STATISTICS"): 2046 statistics = True 2047 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2048 statistics = False 2049 else: 2050 statistics = None 2051 2052 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2053 2054 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2055 if self._match_text_seq("SQL"): 2056 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2057 return None 2058 2059 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2060 if self._match_text_seq("SQL", "DATA"): 2061 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2062 return None 2063 2064 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2065 if self._match_text_seq("PRIMARY", "INDEX"): 2066 return exp.NoPrimaryIndexProperty() 2067 if self._match_text_seq("SQL"): 2068 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2069 return None 2070 2071 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2072 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2073 return exp.OnCommitProperty() 2074 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2075 return exp.OnCommitProperty(delete=True) 2076 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2077 2078 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2079 if self._match_text_seq("SQL", "DATA"): 2080 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2081 return None 2082 2083 def _parse_distkey(self) -> exp.DistKeyProperty: 2084 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2085 2086 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2087 table = self._parse_table(schema=True) 2088 2089 options = [] 2090 while self._match_texts(("INCLUDING", "EXCLUDING")): 2091 this = self._prev.text.upper() 2092 2093 id_var = self._parse_id_var() 2094 if not id_var: 2095 return None 2096 2097 options.append( 2098 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2099 ) 2100 2101 return self.expression(exp.LikeProperty, this=table, expressions=options) 2102 2103 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2104 return self.expression( 2105 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2106 ) 2107 2108 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2109 self._match(TokenType.EQ) 2110 return self.expression( 2111 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2112 ) 2113 2114 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2115 self._match_text_seq("WITH", "CONNECTION") 2116 return self.expression( 2117 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2118 ) 2119 2120 def _parse_returns(self) -> exp.ReturnsProperty: 2121 value: t.Optional[exp.Expression] 2122 is_table = self._match(TokenType.TABLE) 2123 2124 if is_table: 2125 if self._match(TokenType.LT): 2126 value = self.expression( 2127 exp.Schema, 2128 this="TABLE", 2129 expressions=self._parse_csv(self._parse_struct_types), 2130 ) 2131 if not self._match(TokenType.GT): 2132 self.raise_error("Expecting >") 2133 else: 2134 value = self._parse_schema(exp.var("TABLE")) 2135 else: 2136 value = self._parse_types() 2137 2138 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 2139 2140 def _parse_describe(self) -> exp.Describe: 2141 kind = self._match_set(self.CREATABLES) and self._prev.text 2142 extended = self._match_text_seq("EXTENDED") 2143 this = self._parse_table(schema=True) 2144 properties = self._parse_properties() 2145 expressions = properties.expressions if properties else None 2146 return self.expression( 2147 exp.Describe, this=this, extended=extended, kind=kind, expressions=expressions 2148 ) 2149 2150 def _parse_insert(self) -> exp.Insert: 2151 comments = ensure_list(self._prev_comments) 2152 hint = self._parse_hint() 2153 overwrite = self._match(TokenType.OVERWRITE) 2154 ignore = self._match(TokenType.IGNORE) 2155 local = self._match_text_seq("LOCAL") 2156 alternative = None 2157 2158 if self._match_text_seq("DIRECTORY"): 2159 this: t.Optional[exp.Expression] = self.expression( 2160 exp.Directory, 2161 this=self._parse_var_or_string(), 2162 local=local, 2163 row_format=self._parse_row_format(match_row=True), 2164 ) 2165 else: 2166 if self._match(TokenType.OR): 2167 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2168 2169 self._match(TokenType.INTO) 2170 comments += ensure_list(self._prev_comments) 2171 self._match(TokenType.TABLE) 2172 this = self._parse_table(schema=True) 2173 2174 returning = self._parse_returning() 2175 2176 return self.expression( 2177 exp.Insert, 2178 comments=comments, 2179 hint=hint, 2180 this=this, 2181 by_name=self._match_text_seq("BY", "NAME"), 2182 exists=self._parse_exists(), 2183 partition=self._parse_partition(), 2184 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 2185 and self._parse_conjunction(), 2186 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2187 conflict=self._parse_on_conflict(), 2188 returning=returning or self._parse_returning(), 2189 overwrite=overwrite, 2190 alternative=alternative, 2191 ignore=ignore, 2192 ) 2193 2194 def _parse_kill(self) -> exp.Kill: 2195 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2196 2197 return self.expression( 2198 exp.Kill, 2199 this=self._parse_primary(), 2200 kind=kind, 2201 ) 2202 2203 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2204 conflict = self._match_text_seq("ON", "CONFLICT") 2205 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2206 2207 if not conflict and not duplicate: 2208 return None 2209 2210 conflict_keys = None 2211 constraint = None 2212 2213 if conflict: 2214 if self._match_text_seq("ON", "CONSTRAINT"): 2215 constraint = self._parse_id_var() 2216 elif self._match(TokenType.L_PAREN): 2217 conflict_keys = self._parse_csv(self._parse_id_var) 2218 self._match_r_paren() 2219 2220 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2221 if self._prev.token_type == TokenType.UPDATE: 2222 self._match(TokenType.SET) 2223 expressions = self._parse_csv(self._parse_equality) 2224 else: 2225 expressions = None 2226 2227 return self.expression( 2228 exp.OnConflict, 2229 duplicate=duplicate, 2230 expressions=expressions, 2231 action=action, 2232 conflict_keys=conflict_keys, 2233 constraint=constraint, 2234 ) 2235 2236 def _parse_returning(self) -> t.Optional[exp.Returning]: 2237 if not self._match(TokenType.RETURNING): 2238 return None 2239 return self.expression( 2240 exp.Returning, 2241 expressions=self._parse_csv(self._parse_expression), 2242 into=self._match(TokenType.INTO) and self._parse_table_part(), 2243 ) 2244 2245 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2246 if not self._match(TokenType.FORMAT): 2247 return None 2248 return self._parse_row_format() 2249 2250 def _parse_row_format( 2251 self, match_row: bool = False 2252 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2253 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2254 return None 2255 2256 if self._match_text_seq("SERDE"): 2257 this = self._parse_string() 2258 2259 serde_properties = None 2260 if self._match(TokenType.SERDE_PROPERTIES): 2261 serde_properties = self.expression( 2262 exp.SerdeProperties, expressions=self._parse_wrapped_properties() 2263 ) 2264 2265 return self.expression( 2266 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2267 ) 2268 2269 self._match_text_seq("DELIMITED") 2270 2271 kwargs = {} 2272 2273 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2274 kwargs["fields"] = self._parse_string() 2275 if self._match_text_seq("ESCAPED", "BY"): 2276 kwargs["escaped"] = self._parse_string() 2277 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2278 kwargs["collection_items"] = self._parse_string() 2279 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2280 kwargs["map_keys"] = self._parse_string() 2281 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2282 kwargs["lines"] = self._parse_string() 2283 if self._match_text_seq("NULL", "DEFINED", "AS"): 2284 kwargs["null"] = self._parse_string() 2285 2286 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2287 2288 def _parse_load(self) -> exp.LoadData | exp.Command: 2289 if self._match_text_seq("DATA"): 2290 local = self._match_text_seq("LOCAL") 2291 self._match_text_seq("INPATH") 2292 inpath = self._parse_string() 2293 overwrite = self._match(TokenType.OVERWRITE) 2294 self._match_pair(TokenType.INTO, TokenType.TABLE) 2295 2296 return self.expression( 2297 exp.LoadData, 2298 this=self._parse_table(schema=True), 2299 local=local, 2300 overwrite=overwrite, 2301 inpath=inpath, 2302 partition=self._parse_partition(), 2303 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2304 serde=self._match_text_seq("SERDE") and self._parse_string(), 2305 ) 2306 return self._parse_as_command(self._prev) 2307 2308 def _parse_delete(self) -> exp.Delete: 2309 # This handles MySQL's "Multiple-Table Syntax" 2310 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2311 tables = None 2312 comments = self._prev_comments 2313 if not self._match(TokenType.FROM, advance=False): 2314 tables = self._parse_csv(self._parse_table) or None 2315 2316 returning = self._parse_returning() 2317 2318 return self.expression( 2319 exp.Delete, 2320 comments=comments, 2321 tables=tables, 2322 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2323 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2324 where=self._parse_where(), 2325 returning=returning or self._parse_returning(), 2326 limit=self._parse_limit(), 2327 ) 2328 2329 def _parse_update(self) -> exp.Update: 2330 comments = self._prev_comments 2331 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2332 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2333 returning = self._parse_returning() 2334 return self.expression( 2335 exp.Update, 2336 comments=comments, 2337 **{ # type: ignore 2338 "this": this, 2339 "expressions": expressions, 2340 "from": self._parse_from(joins=True), 2341 "where": self._parse_where(), 2342 "returning": returning or self._parse_returning(), 2343 "order": self._parse_order(), 2344 "limit": self._parse_limit(), 2345 }, 2346 ) 2347 2348 def _parse_uncache(self) -> exp.Uncache: 2349 if not self._match(TokenType.TABLE): 2350 self.raise_error("Expecting TABLE after UNCACHE") 2351 2352 return self.expression( 2353 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2354 ) 2355 2356 def _parse_cache(self) -> exp.Cache: 2357 lazy = self._match_text_seq("LAZY") 2358 self._match(TokenType.TABLE) 2359 table = self._parse_table(schema=True) 2360 2361 options = [] 2362 if self._match_text_seq("OPTIONS"): 2363 self._match_l_paren() 2364 k = self._parse_string() 2365 self._match(TokenType.EQ) 2366 v = self._parse_string() 2367 options = [k, v] 2368 self._match_r_paren() 2369 2370 self._match(TokenType.ALIAS) 2371 return self.expression( 2372 exp.Cache, 2373 this=table, 2374 lazy=lazy, 2375 options=options, 2376 expression=self._parse_select(nested=True), 2377 ) 2378 2379 def _parse_partition(self) -> t.Optional[exp.Partition]: 2380 if not self._match(TokenType.PARTITION): 2381 return None 2382 2383 return self.expression( 2384 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2385 ) 2386 2387 def _parse_value(self) -> exp.Tuple: 2388 if self._match(TokenType.L_PAREN): 2389 expressions = self._parse_csv(self._parse_expression) 2390 self._match_r_paren() 2391 return self.expression(exp.Tuple, expressions=expressions) 2392 2393 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2394 return self.expression(exp.Tuple, expressions=[self._parse_expression()]) 2395 2396 def _parse_projections(self) -> t.List[exp.Expression]: 2397 return self._parse_expressions() 2398 2399 def _parse_select( 2400 self, 2401 nested: bool = False, 2402 table: bool = False, 2403 parse_subquery_alias: bool = True, 2404 parse_set_operation: bool = True, 2405 ) -> t.Optional[exp.Expression]: 2406 cte = self._parse_with() 2407 2408 if cte: 2409 this = self._parse_statement() 2410 2411 if not this: 2412 self.raise_error("Failed to parse any statement following CTE") 2413 return cte 2414 2415 if "with" in this.arg_types: 2416 this.set("with", cte) 2417 else: 2418 self.raise_error(f"{this.key} does not support CTE") 2419 this = cte 2420 2421 return this 2422 2423 # duckdb supports leading with FROM x 2424 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2425 2426 if self._match(TokenType.SELECT): 2427 comments = self._prev_comments 2428 2429 hint = self._parse_hint() 2430 all_ = self._match(TokenType.ALL) 2431 distinct = self._match_set(self.DISTINCT_TOKENS) 2432 2433 kind = ( 2434 self._match(TokenType.ALIAS) 2435 and self._match_texts(("STRUCT", "VALUE")) 2436 and self._prev.text.upper() 2437 ) 2438 2439 if distinct: 2440 distinct = self.expression( 2441 exp.Distinct, 2442 on=self._parse_value() if self._match(TokenType.ON) else None, 2443 ) 2444 2445 if all_ and distinct: 2446 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2447 2448 limit = self._parse_limit(top=True) 2449 projections = self._parse_projections() 2450 2451 this = self.expression( 2452 exp.Select, 2453 kind=kind, 2454 hint=hint, 2455 distinct=distinct, 2456 expressions=projections, 2457 limit=limit, 2458 ) 2459 this.comments = comments 2460 2461 into = self._parse_into() 2462 if into: 2463 this.set("into", into) 2464 2465 if not from_: 2466 from_ = self._parse_from() 2467 2468 if from_: 2469 this.set("from", from_) 2470 2471 this = self._parse_query_modifiers(this) 2472 elif (table or nested) and self._match(TokenType.L_PAREN): 2473 if self._match(TokenType.PIVOT): 2474 this = self._parse_simplified_pivot() 2475 elif self._match(TokenType.FROM): 2476 this = exp.select("*").from_( 2477 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2478 ) 2479 else: 2480 this = ( 2481 self._parse_table() 2482 if table 2483 else self._parse_select(nested=True, parse_set_operation=False) 2484 ) 2485 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2486 2487 self._match_r_paren() 2488 2489 # We return early here so that the UNION isn't attached to the subquery by the 2490 # following call to _parse_set_operations, but instead becomes the parent node 2491 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2492 elif self._match(TokenType.VALUES, advance=False): 2493 this = self._parse_derived_table_values() 2494 elif from_: 2495 this = exp.select("*").from_(from_.this, copy=False) 2496 else: 2497 this = None 2498 2499 if parse_set_operation: 2500 return self._parse_set_operations(this) 2501 return this 2502 2503 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2504 if not skip_with_token and not self._match(TokenType.WITH): 2505 return None 2506 2507 comments = self._prev_comments 2508 recursive = self._match(TokenType.RECURSIVE) 2509 2510 expressions = [] 2511 while True: 2512 expressions.append(self._parse_cte()) 2513 2514 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2515 break 2516 else: 2517 self._match(TokenType.WITH) 2518 2519 return self.expression( 2520 exp.With, comments=comments, expressions=expressions, recursive=recursive 2521 ) 2522 2523 def _parse_cte(self) -> exp.CTE: 2524 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2525 if not alias or not alias.this: 2526 self.raise_error("Expected CTE to have alias") 2527 2528 self._match(TokenType.ALIAS) 2529 return self.expression( 2530 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2531 ) 2532 2533 def _parse_table_alias( 2534 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2535 ) -> t.Optional[exp.TableAlias]: 2536 any_token = self._match(TokenType.ALIAS) 2537 alias = ( 2538 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2539 or self._parse_string_as_identifier() 2540 ) 2541 2542 index = self._index 2543 if self._match(TokenType.L_PAREN): 2544 columns = self._parse_csv(self._parse_function_parameter) 2545 self._match_r_paren() if columns else self._retreat(index) 2546 else: 2547 columns = None 2548 2549 if not alias and not columns: 2550 return None 2551 2552 return self.expression(exp.TableAlias, this=alias, columns=columns) 2553 2554 def _parse_subquery( 2555 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2556 ) -> t.Optional[exp.Subquery]: 2557 if not this: 2558 return None 2559 2560 return self.expression( 2561 exp.Subquery, 2562 this=this, 2563 pivots=self._parse_pivots(), 2564 alias=self._parse_table_alias() if parse_alias else None, 2565 ) 2566 2567 def _implicit_unnests_to_explicit(self, this: E) -> E: 2568 from sqlglot.optimizer.normalize_identifiers import ( 2569 normalize_identifiers as _norm, 2570 ) 2571 2572 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2573 for i, join in enumerate(this.args.get("joins") or []): 2574 table = join.this 2575 normalized_table = table.copy() 2576 normalized_table.meta["maybe_column"] = True 2577 normalized_table = _norm(normalized_table, dialect=self.dialect) 2578 2579 if isinstance(table, exp.Table) and not join.args.get("on"): 2580 if normalized_table.parts[0].name in refs: 2581 table_as_column = table.to_column() 2582 unnest = exp.Unnest(expressions=[table_as_column]) 2583 2584 # Table.to_column creates a parent Alias node that we want to convert to 2585 # a TableAlias and attach to the Unnest, so it matches the parser's output 2586 if isinstance(table.args.get("alias"), exp.TableAlias): 2587 table_as_column.replace(table_as_column.this) 2588 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2589 2590 table.replace(unnest) 2591 2592 refs.add(normalized_table.alias_or_name) 2593 2594 return this 2595 2596 def _parse_query_modifiers( 2597 self, this: t.Optional[exp.Expression] 2598 ) -> t.Optional[exp.Expression]: 2599 if isinstance(this, (exp.Query, exp.Table)): 2600 for join in iter(self._parse_join, None): 2601 this.append("joins", join) 2602 for lateral in iter(self._parse_lateral, None): 2603 this.append("laterals", lateral) 2604 2605 while True: 2606 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2607 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2608 key, expression = parser(self) 2609 2610 if expression: 2611 this.set(key, expression) 2612 if key == "limit": 2613 offset = expression.args.pop("offset", None) 2614 2615 if offset: 2616 offset = exp.Offset(expression=offset) 2617 this.set("offset", offset) 2618 2619 limit_by_expressions = expression.expressions 2620 expression.set("expressions", None) 2621 offset.set("expressions", limit_by_expressions) 2622 continue 2623 break 2624 2625 if self.SUPPORTS_IMPLICIT_UNNEST and this and "from" in this.args: 2626 this = self._implicit_unnests_to_explicit(this) 2627 2628 return this 2629 2630 def _parse_hint(self) -> t.Optional[exp.Hint]: 2631 if self._match(TokenType.HINT): 2632 hints = [] 2633 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2634 hints.extend(hint) 2635 2636 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2637 self.raise_error("Expected */ after HINT") 2638 2639 return self.expression(exp.Hint, expressions=hints) 2640 2641 return None 2642 2643 def _parse_into(self) -> t.Optional[exp.Into]: 2644 if not self._match(TokenType.INTO): 2645 return None 2646 2647 temp = self._match(TokenType.TEMPORARY) 2648 unlogged = self._match_text_seq("UNLOGGED") 2649 self._match(TokenType.TABLE) 2650 2651 return self.expression( 2652 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2653 ) 2654 2655 def _parse_from( 2656 self, joins: bool = False, skip_from_token: bool = False 2657 ) -> t.Optional[exp.From]: 2658 if not skip_from_token and not self._match(TokenType.FROM): 2659 return None 2660 2661 return self.expression( 2662 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2663 ) 2664 2665 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2666 if not self._match(TokenType.MATCH_RECOGNIZE): 2667 return None 2668 2669 self._match_l_paren() 2670 2671 partition = self._parse_partition_by() 2672 order = self._parse_order() 2673 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2674 2675 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2676 rows = exp.var("ONE ROW PER MATCH") 2677 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2678 text = "ALL ROWS PER MATCH" 2679 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2680 text += " SHOW EMPTY MATCHES" 2681 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2682 text += " OMIT EMPTY MATCHES" 2683 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2684 text += " WITH UNMATCHED ROWS" 2685 rows = exp.var(text) 2686 else: 2687 rows = None 2688 2689 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2690 text = "AFTER MATCH SKIP" 2691 if self._match_text_seq("PAST", "LAST", "ROW"): 2692 text += " PAST LAST ROW" 2693 elif self._match_text_seq("TO", "NEXT", "ROW"): 2694 text += " TO NEXT ROW" 2695 elif self._match_text_seq("TO", "FIRST"): 2696 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2697 elif self._match_text_seq("TO", "LAST"): 2698 text += f" TO LAST {self._advance_any().text}" # type: ignore 2699 after = exp.var(text) 2700 else: 2701 after = None 2702 2703 if self._match_text_seq("PATTERN"): 2704 self._match_l_paren() 2705 2706 if not self._curr: 2707 self.raise_error("Expecting )", self._curr) 2708 2709 paren = 1 2710 start = self._curr 2711 2712 while self._curr and paren > 0: 2713 if self._curr.token_type == TokenType.L_PAREN: 2714 paren += 1 2715 if self._curr.token_type == TokenType.R_PAREN: 2716 paren -= 1 2717 2718 end = self._prev 2719 self._advance() 2720 2721 if paren > 0: 2722 self.raise_error("Expecting )", self._curr) 2723 2724 pattern = exp.var(self._find_sql(start, end)) 2725 else: 2726 pattern = None 2727 2728 define = ( 2729 self._parse_csv(self._parse_name_as_expression) 2730 if self._match_text_seq("DEFINE") 2731 else None 2732 ) 2733 2734 self._match_r_paren() 2735 2736 return self.expression( 2737 exp.MatchRecognize, 2738 partition_by=partition, 2739 order=order, 2740 measures=measures, 2741 rows=rows, 2742 after=after, 2743 pattern=pattern, 2744 define=define, 2745 alias=self._parse_table_alias(), 2746 ) 2747 2748 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2749 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2750 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 2751 cross_apply = False 2752 2753 if cross_apply is not None: 2754 this = self._parse_select(table=True) 2755 view = None 2756 outer = None 2757 elif self._match(TokenType.LATERAL): 2758 this = self._parse_select(table=True) 2759 view = self._match(TokenType.VIEW) 2760 outer = self._match(TokenType.OUTER) 2761 else: 2762 return None 2763 2764 if not this: 2765 this = ( 2766 self._parse_unnest() 2767 or self._parse_function() 2768 or self._parse_id_var(any_token=False) 2769 ) 2770 2771 while self._match(TokenType.DOT): 2772 this = exp.Dot( 2773 this=this, 2774 expression=self._parse_function() or self._parse_id_var(any_token=False), 2775 ) 2776 2777 if view: 2778 table = self._parse_id_var(any_token=False) 2779 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2780 table_alias: t.Optional[exp.TableAlias] = self.expression( 2781 exp.TableAlias, this=table, columns=columns 2782 ) 2783 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2784 # We move the alias from the lateral's child node to the lateral itself 2785 table_alias = this.args["alias"].pop() 2786 else: 2787 table_alias = self._parse_table_alias() 2788 2789 return self.expression( 2790 exp.Lateral, 2791 this=this, 2792 view=view, 2793 outer=outer, 2794 alias=table_alias, 2795 cross_apply=cross_apply, 2796 ) 2797 2798 def _parse_join_parts( 2799 self, 2800 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2801 return ( 2802 self._match_set(self.JOIN_METHODS) and self._prev, 2803 self._match_set(self.JOIN_SIDES) and self._prev, 2804 self._match_set(self.JOIN_KINDS) and self._prev, 2805 ) 2806 2807 def _parse_join( 2808 self, skip_join_token: bool = False, parse_bracket: bool = False 2809 ) -> t.Optional[exp.Join]: 2810 if self._match(TokenType.COMMA): 2811 return self.expression(exp.Join, this=self._parse_table()) 2812 2813 index = self._index 2814 method, side, kind = self._parse_join_parts() 2815 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2816 join = self._match(TokenType.JOIN) 2817 2818 if not skip_join_token and not join: 2819 self._retreat(index) 2820 kind = None 2821 method = None 2822 side = None 2823 2824 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2825 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2826 2827 if not skip_join_token and not join and not outer_apply and not cross_apply: 2828 return None 2829 2830 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2831 2832 if method: 2833 kwargs["method"] = method.text 2834 if side: 2835 kwargs["side"] = side.text 2836 if kind: 2837 kwargs["kind"] = kind.text 2838 if hint: 2839 kwargs["hint"] = hint 2840 2841 if self._match(TokenType.ON): 2842 kwargs["on"] = self._parse_conjunction() 2843 elif self._match(TokenType.USING): 2844 kwargs["using"] = self._parse_wrapped_id_vars() 2845 elif not (kind and kind.token_type == TokenType.CROSS): 2846 index = self._index 2847 join = self._parse_join() 2848 2849 if join and self._match(TokenType.ON): 2850 kwargs["on"] = self._parse_conjunction() 2851 elif join and self._match(TokenType.USING): 2852 kwargs["using"] = self._parse_wrapped_id_vars() 2853 else: 2854 join = None 2855 self._retreat(index) 2856 2857 kwargs["this"].set("joins", [join] if join else None) 2858 2859 comments = [c for token in (method, side, kind) if token for c in token.comments] 2860 return self.expression(exp.Join, comments=comments, **kwargs) 2861 2862 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2863 this = self._parse_conjunction() 2864 2865 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2866 return this 2867 2868 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 2869 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 2870 2871 return this 2872 2873 def _parse_index_params(self) -> exp.IndexParameters: 2874 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2875 2876 if self._match(TokenType.L_PAREN, advance=False): 2877 columns = self._parse_wrapped_csv(self._parse_with_operator) 2878 else: 2879 columns = None 2880 2881 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 2882 partition_by = self._parse_partition_by() 2883 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 2884 tablespace = ( 2885 self._parse_var(any_token=True) 2886 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 2887 else None 2888 ) 2889 where = self._parse_where() 2890 2891 return self.expression( 2892 exp.IndexParameters, 2893 using=using, 2894 columns=columns, 2895 include=include, 2896 partition_by=partition_by, 2897 where=where, 2898 with_storage=with_storage, 2899 tablespace=tablespace, 2900 ) 2901 2902 def _parse_index( 2903 self, 2904 index: t.Optional[exp.Expression] = None, 2905 ) -> t.Optional[exp.Index]: 2906 if index: 2907 unique = None 2908 primary = None 2909 amp = None 2910 2911 self._match(TokenType.ON) 2912 self._match(TokenType.TABLE) # hive 2913 table = self._parse_table_parts(schema=True) 2914 else: 2915 unique = self._match(TokenType.UNIQUE) 2916 primary = self._match_text_seq("PRIMARY") 2917 amp = self._match_text_seq("AMP") 2918 2919 if not self._match(TokenType.INDEX): 2920 return None 2921 2922 index = self._parse_id_var() 2923 table = None 2924 2925 params = self._parse_index_params() 2926 2927 return self.expression( 2928 exp.Index, 2929 this=index, 2930 table=table, 2931 unique=unique, 2932 primary=primary, 2933 amp=amp, 2934 params=params, 2935 ) 2936 2937 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2938 hints: t.List[exp.Expression] = [] 2939 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2940 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2941 hints.append( 2942 self.expression( 2943 exp.WithTableHint, 2944 expressions=self._parse_csv( 2945 lambda: self._parse_function() or self._parse_var(any_token=True) 2946 ), 2947 ) 2948 ) 2949 self._match_r_paren() 2950 else: 2951 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2952 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2953 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2954 2955 self._match_texts(("INDEX", "KEY")) 2956 if self._match(TokenType.FOR): 2957 hint.set("target", self._advance_any() and self._prev.text.upper()) 2958 2959 hint.set("expressions", self._parse_wrapped_id_vars()) 2960 hints.append(hint) 2961 2962 return hints or None 2963 2964 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2965 return ( 2966 (not schema and self._parse_function(optional_parens=False)) 2967 or self._parse_id_var(any_token=False) 2968 or self._parse_string_as_identifier() 2969 or self._parse_placeholder() 2970 ) 2971 2972 def _parse_table_parts( 2973 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 2974 ) -> exp.Table: 2975 catalog = None 2976 db = None 2977 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 2978 2979 while self._match(TokenType.DOT): 2980 if catalog: 2981 # This allows nesting the table in arbitrarily many dot expressions if needed 2982 table = self.expression( 2983 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2984 ) 2985 else: 2986 catalog = db 2987 db = table 2988 # "" used for tsql FROM a..b case 2989 table = self._parse_table_part(schema=schema) or "" 2990 2991 if ( 2992 wildcard 2993 and self._is_connected() 2994 and (isinstance(table, exp.Identifier) or not table) 2995 and self._match(TokenType.STAR) 2996 ): 2997 if isinstance(table, exp.Identifier): 2998 table.args["this"] += "*" 2999 else: 3000 table = exp.Identifier(this="*") 3001 3002 if is_db_reference: 3003 catalog = db 3004 db = table 3005 table = None 3006 3007 if not table and not is_db_reference: 3008 self.raise_error(f"Expected table name but got {self._curr}") 3009 if not db and is_db_reference: 3010 self.raise_error(f"Expected database name but got {self._curr}") 3011 3012 return self.expression( 3013 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 3014 ) 3015 3016 def _parse_table( 3017 self, 3018 schema: bool = False, 3019 joins: bool = False, 3020 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3021 parse_bracket: bool = False, 3022 is_db_reference: bool = False, 3023 ) -> t.Optional[exp.Expression]: 3024 lateral = self._parse_lateral() 3025 if lateral: 3026 return lateral 3027 3028 unnest = self._parse_unnest() 3029 if unnest: 3030 return unnest 3031 3032 values = self._parse_derived_table_values() 3033 if values: 3034 return values 3035 3036 subquery = self._parse_select(table=True) 3037 if subquery: 3038 if not subquery.args.get("pivots"): 3039 subquery.set("pivots", self._parse_pivots()) 3040 return subquery 3041 3042 bracket = parse_bracket and self._parse_bracket(None) 3043 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3044 3045 only = self._match(TokenType.ONLY) 3046 3047 this = t.cast( 3048 exp.Expression, 3049 bracket 3050 or self._parse_bracket( 3051 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3052 ), 3053 ) 3054 3055 if only: 3056 this.set("only", only) 3057 3058 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3059 self._match_text_seq("*") 3060 3061 if schema: 3062 return self._parse_schema(this=this) 3063 3064 version = self._parse_version() 3065 3066 if version: 3067 this.set("version", version) 3068 3069 if self.dialect.ALIAS_POST_TABLESAMPLE: 3070 table_sample = self._parse_table_sample() 3071 3072 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3073 if alias: 3074 this.set("alias", alias) 3075 3076 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3077 return self.expression( 3078 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3079 ) 3080 3081 this.set("hints", self._parse_table_hints()) 3082 3083 if not this.args.get("pivots"): 3084 this.set("pivots", self._parse_pivots()) 3085 3086 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3087 table_sample = self._parse_table_sample() 3088 3089 if table_sample: 3090 table_sample.set("this", this) 3091 this = table_sample 3092 3093 if joins: 3094 for join in iter(self._parse_join, None): 3095 this.append("joins", join) 3096 3097 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3098 this.set("ordinality", True) 3099 this.set("alias", self._parse_table_alias()) 3100 3101 return this 3102 3103 def _parse_version(self) -> t.Optional[exp.Version]: 3104 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3105 this = "TIMESTAMP" 3106 elif self._match(TokenType.VERSION_SNAPSHOT): 3107 this = "VERSION" 3108 else: 3109 return None 3110 3111 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3112 kind = self._prev.text.upper() 3113 start = self._parse_bitwise() 3114 self._match_texts(("TO", "AND")) 3115 end = self._parse_bitwise() 3116 expression: t.Optional[exp.Expression] = self.expression( 3117 exp.Tuple, expressions=[start, end] 3118 ) 3119 elif self._match_text_seq("CONTAINED", "IN"): 3120 kind = "CONTAINED IN" 3121 expression = self.expression( 3122 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3123 ) 3124 elif self._match(TokenType.ALL): 3125 kind = "ALL" 3126 expression = None 3127 else: 3128 self._match_text_seq("AS", "OF") 3129 kind = "AS OF" 3130 expression = self._parse_type() 3131 3132 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3133 3134 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3135 if not self._match(TokenType.UNNEST): 3136 return None 3137 3138 expressions = self._parse_wrapped_csv(self._parse_equality) 3139 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3140 3141 alias = self._parse_table_alias() if with_alias else None 3142 3143 if alias: 3144 if self.dialect.UNNEST_COLUMN_ONLY: 3145 if alias.args.get("columns"): 3146 self.raise_error("Unexpected extra column alias in unnest.") 3147 3148 alias.set("columns", [alias.this]) 3149 alias.set("this", None) 3150 3151 columns = alias.args.get("columns") or [] 3152 if offset and len(expressions) < len(columns): 3153 offset = columns.pop() 3154 3155 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3156 self._match(TokenType.ALIAS) 3157 offset = self._parse_id_var( 3158 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3159 ) or exp.to_identifier("offset") 3160 3161 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3162 3163 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3164 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3165 if not is_derived and not self._match_text_seq("VALUES"): 3166 return None 3167 3168 expressions = self._parse_csv(self._parse_value) 3169 alias = self._parse_table_alias() 3170 3171 if is_derived: 3172 self._match_r_paren() 3173 3174 return self.expression( 3175 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3176 ) 3177 3178 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3179 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3180 as_modifier and self._match_text_seq("USING", "SAMPLE") 3181 ): 3182 return None 3183 3184 bucket_numerator = None 3185 bucket_denominator = None 3186 bucket_field = None 3187 percent = None 3188 size = None 3189 seed = None 3190 3191 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3192 matched_l_paren = self._match(TokenType.L_PAREN) 3193 3194 if self.TABLESAMPLE_CSV: 3195 num = None 3196 expressions = self._parse_csv(self._parse_primary) 3197 else: 3198 expressions = None 3199 num = ( 3200 self._parse_factor() 3201 if self._match(TokenType.NUMBER, advance=False) 3202 else self._parse_primary() or self._parse_placeholder() 3203 ) 3204 3205 if self._match_text_seq("BUCKET"): 3206 bucket_numerator = self._parse_number() 3207 self._match_text_seq("OUT", "OF") 3208 bucket_denominator = bucket_denominator = self._parse_number() 3209 self._match(TokenType.ON) 3210 bucket_field = self._parse_field() 3211 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3212 percent = num 3213 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3214 size = num 3215 else: 3216 percent = num 3217 3218 if matched_l_paren: 3219 self._match_r_paren() 3220 3221 if self._match(TokenType.L_PAREN): 3222 method = self._parse_var(upper=True) 3223 seed = self._match(TokenType.COMMA) and self._parse_number() 3224 self._match_r_paren() 3225 elif self._match_texts(("SEED", "REPEATABLE")): 3226 seed = self._parse_wrapped(self._parse_number) 3227 3228 return self.expression( 3229 exp.TableSample, 3230 expressions=expressions, 3231 method=method, 3232 bucket_numerator=bucket_numerator, 3233 bucket_denominator=bucket_denominator, 3234 bucket_field=bucket_field, 3235 percent=percent, 3236 size=size, 3237 seed=seed, 3238 ) 3239 3240 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3241 return list(iter(self._parse_pivot, None)) or None 3242 3243 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 3244 return list(iter(self._parse_join, None)) or None 3245 3246 # https://duckdb.org/docs/sql/statements/pivot 3247 def _parse_simplified_pivot(self) -> exp.Pivot: 3248 def _parse_on() -> t.Optional[exp.Expression]: 3249 this = self._parse_bitwise() 3250 return self._parse_in(this) if self._match(TokenType.IN) else this 3251 3252 this = self._parse_table() 3253 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3254 using = self._match(TokenType.USING) and self._parse_csv( 3255 lambda: self._parse_alias(self._parse_function()) 3256 ) 3257 group = self._parse_group() 3258 return self.expression( 3259 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3260 ) 3261 3262 def _parse_pivot_in(self) -> exp.In: 3263 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3264 this = self._parse_conjunction() 3265 3266 self._match(TokenType.ALIAS) 3267 alias = self._parse_field() 3268 if alias: 3269 return self.expression(exp.PivotAlias, this=this, alias=alias) 3270 3271 return this 3272 3273 value = self._parse_column() 3274 3275 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3276 self.raise_error("Expecting IN (") 3277 3278 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3279 3280 self._match_r_paren() 3281 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3282 3283 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3284 index = self._index 3285 include_nulls = None 3286 3287 if self._match(TokenType.PIVOT): 3288 unpivot = False 3289 elif self._match(TokenType.UNPIVOT): 3290 unpivot = True 3291 3292 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3293 if self._match_text_seq("INCLUDE", "NULLS"): 3294 include_nulls = True 3295 elif self._match_text_seq("EXCLUDE", "NULLS"): 3296 include_nulls = False 3297 else: 3298 return None 3299 3300 expressions = [] 3301 3302 if not self._match(TokenType.L_PAREN): 3303 self._retreat(index) 3304 return None 3305 3306 if unpivot: 3307 expressions = self._parse_csv(self._parse_column) 3308 else: 3309 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3310 3311 if not expressions: 3312 self.raise_error("Failed to parse PIVOT's aggregation list") 3313 3314 if not self._match(TokenType.FOR): 3315 self.raise_error("Expecting FOR") 3316 3317 field = self._parse_pivot_in() 3318 3319 self._match_r_paren() 3320 3321 pivot = self.expression( 3322 exp.Pivot, 3323 expressions=expressions, 3324 field=field, 3325 unpivot=unpivot, 3326 include_nulls=include_nulls, 3327 ) 3328 3329 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3330 pivot.set("alias", self._parse_table_alias()) 3331 3332 if not unpivot: 3333 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3334 3335 columns: t.List[exp.Expression] = [] 3336 for fld in pivot.args["field"].expressions: 3337 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3338 for name in names: 3339 if self.PREFIXED_PIVOT_COLUMNS: 3340 name = f"{name}_{field_name}" if name else field_name 3341 else: 3342 name = f"{field_name}_{name}" if name else field_name 3343 3344 columns.append(exp.to_identifier(name)) 3345 3346 pivot.set("columns", columns) 3347 3348 return pivot 3349 3350 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3351 return [agg.alias for agg in aggregations] 3352 3353 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3354 if not skip_where_token and not self._match(TokenType.PREWHERE): 3355 return None 3356 3357 return self.expression( 3358 exp.PreWhere, comments=self._prev_comments, this=self._parse_conjunction() 3359 ) 3360 3361 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3362 if not skip_where_token and not self._match(TokenType.WHERE): 3363 return None 3364 3365 return self.expression( 3366 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3367 ) 3368 3369 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3370 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3371 return None 3372 3373 elements = defaultdict(list) 3374 3375 if self._match(TokenType.ALL): 3376 return self.expression(exp.Group, all=True) 3377 3378 while True: 3379 expressions = self._parse_csv(self._parse_conjunction) 3380 if expressions: 3381 elements["expressions"].extend(expressions) 3382 3383 grouping_sets = self._parse_grouping_sets() 3384 if grouping_sets: 3385 elements["grouping_sets"].extend(grouping_sets) 3386 3387 rollup = None 3388 cube = None 3389 totals = None 3390 3391 index = self._index 3392 with_ = self._match(TokenType.WITH) 3393 if self._match(TokenType.ROLLUP): 3394 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3395 elements["rollup"].extend(ensure_list(rollup)) 3396 3397 if self._match(TokenType.CUBE): 3398 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3399 elements["cube"].extend(ensure_list(cube)) 3400 3401 if self._match_text_seq("TOTALS"): 3402 totals = True 3403 elements["totals"] = True # type: ignore 3404 3405 if not (grouping_sets or rollup or cube or totals): 3406 if with_: 3407 self._retreat(index) 3408 break 3409 3410 return self.expression(exp.Group, **elements) # type: ignore 3411 3412 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3413 if not self._match(TokenType.GROUPING_SETS): 3414 return None 3415 3416 return self._parse_wrapped_csv(self._parse_grouping_set) 3417 3418 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3419 if self._match(TokenType.L_PAREN): 3420 grouping_set = self._parse_csv(self._parse_column) 3421 self._match_r_paren() 3422 return self.expression(exp.Tuple, expressions=grouping_set) 3423 3424 return self._parse_column() 3425 3426 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3427 if not skip_having_token and not self._match(TokenType.HAVING): 3428 return None 3429 return self.expression(exp.Having, this=self._parse_conjunction()) 3430 3431 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3432 if not self._match(TokenType.QUALIFY): 3433 return None 3434 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3435 3436 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3437 if skip_start_token: 3438 start = None 3439 elif self._match(TokenType.START_WITH): 3440 start = self._parse_conjunction() 3441 else: 3442 return None 3443 3444 self._match(TokenType.CONNECT_BY) 3445 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3446 exp.Prior, this=self._parse_bitwise() 3447 ) 3448 connect = self._parse_conjunction() 3449 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3450 3451 if not start and self._match(TokenType.START_WITH): 3452 start = self._parse_conjunction() 3453 3454 return self.expression(exp.Connect, start=start, connect=connect) 3455 3456 def _parse_name_as_expression(self) -> exp.Alias: 3457 return self.expression( 3458 exp.Alias, 3459 alias=self._parse_id_var(any_token=True), 3460 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3461 ) 3462 3463 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3464 if self._match_text_seq("INTERPOLATE"): 3465 return self._parse_wrapped_csv(self._parse_name_as_expression) 3466 return None 3467 3468 def _parse_order( 3469 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3470 ) -> t.Optional[exp.Expression]: 3471 siblings = None 3472 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3473 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3474 return this 3475 3476 siblings = True 3477 3478 return self.expression( 3479 exp.Order, 3480 this=this, 3481 expressions=self._parse_csv(self._parse_ordered), 3482 interpolate=self._parse_interpolate(), 3483 siblings=siblings, 3484 ) 3485 3486 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3487 if not self._match(token): 3488 return None 3489 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3490 3491 def _parse_ordered( 3492 self, parse_method: t.Optional[t.Callable] = None 3493 ) -> t.Optional[exp.Ordered]: 3494 this = parse_method() if parse_method else self._parse_conjunction() 3495 if not this: 3496 return None 3497 3498 asc = self._match(TokenType.ASC) 3499 desc = self._match(TokenType.DESC) or (asc and False) 3500 3501 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3502 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3503 3504 nulls_first = is_nulls_first or False 3505 explicitly_null_ordered = is_nulls_first or is_nulls_last 3506 3507 if ( 3508 not explicitly_null_ordered 3509 and ( 3510 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3511 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3512 ) 3513 and self.dialect.NULL_ORDERING != "nulls_are_last" 3514 ): 3515 nulls_first = True 3516 3517 if self._match_text_seq("WITH", "FILL"): 3518 with_fill = self.expression( 3519 exp.WithFill, 3520 **{ # type: ignore 3521 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3522 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3523 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3524 }, 3525 ) 3526 else: 3527 with_fill = None 3528 3529 return self.expression( 3530 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3531 ) 3532 3533 def _parse_limit( 3534 self, this: t.Optional[exp.Expression] = None, top: bool = False 3535 ) -> t.Optional[exp.Expression]: 3536 if self._match(TokenType.TOP if top else TokenType.LIMIT): 3537 comments = self._prev_comments 3538 if top: 3539 limit_paren = self._match(TokenType.L_PAREN) 3540 expression = self._parse_term() if limit_paren else self._parse_number() 3541 3542 if limit_paren: 3543 self._match_r_paren() 3544 else: 3545 expression = self._parse_term() 3546 3547 if self._match(TokenType.COMMA): 3548 offset = expression 3549 expression = self._parse_term() 3550 else: 3551 offset = None 3552 3553 limit_exp = self.expression( 3554 exp.Limit, 3555 this=this, 3556 expression=expression, 3557 offset=offset, 3558 comments=comments, 3559 expressions=self._parse_limit_by(), 3560 ) 3561 3562 return limit_exp 3563 3564 if self._match(TokenType.FETCH): 3565 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3566 direction = self._prev.text.upper() if direction else "FIRST" 3567 3568 count = self._parse_field(tokens=self.FETCH_TOKENS) 3569 percent = self._match(TokenType.PERCENT) 3570 3571 self._match_set((TokenType.ROW, TokenType.ROWS)) 3572 3573 only = self._match_text_seq("ONLY") 3574 with_ties = self._match_text_seq("WITH", "TIES") 3575 3576 if only and with_ties: 3577 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3578 3579 return self.expression( 3580 exp.Fetch, 3581 direction=direction, 3582 count=count, 3583 percent=percent, 3584 with_ties=with_ties, 3585 ) 3586 3587 return this 3588 3589 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3590 if not self._match(TokenType.OFFSET): 3591 return this 3592 3593 count = self._parse_term() 3594 self._match_set((TokenType.ROW, TokenType.ROWS)) 3595 3596 return self.expression( 3597 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3598 ) 3599 3600 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3601 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3602 3603 def _parse_locks(self) -> t.List[exp.Lock]: 3604 locks = [] 3605 while True: 3606 if self._match_text_seq("FOR", "UPDATE"): 3607 update = True 3608 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3609 "LOCK", "IN", "SHARE", "MODE" 3610 ): 3611 update = False 3612 else: 3613 break 3614 3615 expressions = None 3616 if self._match_text_seq("OF"): 3617 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3618 3619 wait: t.Optional[bool | exp.Expression] = None 3620 if self._match_text_seq("NOWAIT"): 3621 wait = True 3622 elif self._match_text_seq("WAIT"): 3623 wait = self._parse_primary() 3624 elif self._match_text_seq("SKIP", "LOCKED"): 3625 wait = False 3626 3627 locks.append( 3628 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3629 ) 3630 3631 return locks 3632 3633 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3634 while this and self._match_set(self.SET_OPERATIONS): 3635 token_type = self._prev.token_type 3636 3637 if token_type == TokenType.UNION: 3638 operation = exp.Union 3639 elif token_type == TokenType.EXCEPT: 3640 operation = exp.Except 3641 else: 3642 operation = exp.Intersect 3643 3644 comments = self._prev.comments 3645 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3646 by_name = self._match_text_seq("BY", "NAME") 3647 expression = self._parse_select(nested=True, parse_set_operation=False) 3648 3649 this = self.expression( 3650 operation, 3651 comments=comments, 3652 this=this, 3653 distinct=distinct, 3654 by_name=by_name, 3655 expression=expression, 3656 ) 3657 3658 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3659 expression = this.expression 3660 3661 if expression: 3662 for arg in self.UNION_MODIFIERS: 3663 expr = expression.args.get(arg) 3664 if expr: 3665 this.set(arg, expr.pop()) 3666 3667 return this 3668 3669 def _parse_expression(self) -> t.Optional[exp.Expression]: 3670 return self._parse_alias(self._parse_conjunction()) 3671 3672 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3673 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3674 3675 def _parse_equality(self) -> t.Optional[exp.Expression]: 3676 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3677 3678 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3679 return self._parse_tokens(self._parse_range, self.COMPARISON) 3680 3681 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3682 this = this or self._parse_bitwise() 3683 negate = self._match(TokenType.NOT) 3684 3685 if self._match_set(self.RANGE_PARSERS): 3686 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3687 if not expression: 3688 return this 3689 3690 this = expression 3691 elif self._match(TokenType.ISNULL): 3692 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3693 3694 # Postgres supports ISNULL and NOTNULL for conditions. 3695 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3696 if self._match(TokenType.NOTNULL): 3697 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3698 this = self.expression(exp.Not, this=this) 3699 3700 if negate: 3701 this = self.expression(exp.Not, this=this) 3702 3703 if self._match(TokenType.IS): 3704 this = self._parse_is(this) 3705 3706 return this 3707 3708 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3709 index = self._index - 1 3710 negate = self._match(TokenType.NOT) 3711 3712 if self._match_text_seq("DISTINCT", "FROM"): 3713 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3714 return self.expression(klass, this=this, expression=self._parse_bitwise()) 3715 3716 expression = self._parse_null() or self._parse_boolean() 3717 if not expression: 3718 self._retreat(index) 3719 return None 3720 3721 this = self.expression(exp.Is, this=this, expression=expression) 3722 return self.expression(exp.Not, this=this) if negate else this 3723 3724 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3725 unnest = self._parse_unnest(with_alias=False) 3726 if unnest: 3727 this = self.expression(exp.In, this=this, unnest=unnest) 3728 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 3729 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 3730 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3731 3732 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 3733 this = self.expression(exp.In, this=this, query=expressions[0]) 3734 else: 3735 this = self.expression(exp.In, this=this, expressions=expressions) 3736 3737 if matched_l_paren: 3738 self._match_r_paren(this) 3739 elif not self._match(TokenType.R_BRACKET, expression=this): 3740 self.raise_error("Expecting ]") 3741 else: 3742 this = self.expression(exp.In, this=this, field=self._parse_field()) 3743 3744 return this 3745 3746 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 3747 low = self._parse_bitwise() 3748 self._match(TokenType.AND) 3749 high = self._parse_bitwise() 3750 return self.expression(exp.Between, this=this, low=low, high=high) 3751 3752 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3753 if not self._match(TokenType.ESCAPE): 3754 return this 3755 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3756 3757 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Interval]: 3758 index = self._index 3759 3760 if not self._match(TokenType.INTERVAL) and match_interval: 3761 return None 3762 3763 if self._match(TokenType.STRING, advance=False): 3764 this = self._parse_primary() 3765 else: 3766 this = self._parse_term() 3767 3768 if not this or ( 3769 isinstance(this, exp.Column) 3770 and not this.table 3771 and not this.this.quoted 3772 and this.name.upper() == "IS" 3773 ): 3774 self._retreat(index) 3775 return None 3776 3777 unit = self._parse_function() or ( 3778 not self._match(TokenType.ALIAS, advance=False) 3779 and self._parse_var(any_token=True, upper=True) 3780 ) 3781 3782 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3783 # each INTERVAL expression into this canonical form so it's easy to transpile 3784 if this and this.is_number: 3785 this = exp.Literal.string(this.name) 3786 elif this and this.is_string: 3787 parts = this.name.split() 3788 3789 if len(parts) == 2: 3790 if unit: 3791 # This is not actually a unit, it's something else (e.g. a "window side") 3792 unit = None 3793 self._retreat(self._index - 1) 3794 3795 this = exp.Literal.string(parts[0]) 3796 unit = self.expression(exp.Var, this=parts[1].upper()) 3797 3798 return self.expression(exp.Interval, this=this, unit=unit) 3799 3800 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3801 this = self._parse_term() 3802 3803 while True: 3804 if self._match_set(self.BITWISE): 3805 this = self.expression( 3806 self.BITWISE[self._prev.token_type], 3807 this=this, 3808 expression=self._parse_term(), 3809 ) 3810 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 3811 this = self.expression( 3812 exp.DPipe, 3813 this=this, 3814 expression=self._parse_term(), 3815 safe=not self.dialect.STRICT_STRING_CONCAT, 3816 ) 3817 elif self._match(TokenType.DQMARK): 3818 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3819 elif self._match_pair(TokenType.LT, TokenType.LT): 3820 this = self.expression( 3821 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3822 ) 3823 elif self._match_pair(TokenType.GT, TokenType.GT): 3824 this = self.expression( 3825 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3826 ) 3827 else: 3828 break 3829 3830 return this 3831 3832 def _parse_term(self) -> t.Optional[exp.Expression]: 3833 return self._parse_tokens(self._parse_factor, self.TERM) 3834 3835 def _parse_factor(self) -> t.Optional[exp.Expression]: 3836 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 3837 this = parse_method() 3838 3839 while self._match_set(self.FACTOR): 3840 this = self.expression( 3841 self.FACTOR[self._prev.token_type], 3842 this=this, 3843 comments=self._prev_comments, 3844 expression=parse_method(), 3845 ) 3846 if isinstance(this, exp.Div): 3847 this.args["typed"] = self.dialect.TYPED_DIVISION 3848 this.args["safe"] = self.dialect.SAFE_DIVISION 3849 3850 return this 3851 3852 def _parse_exponent(self) -> t.Optional[exp.Expression]: 3853 return self._parse_tokens(self._parse_unary, self.EXPONENT) 3854 3855 def _parse_unary(self) -> t.Optional[exp.Expression]: 3856 if self._match_set(self.UNARY_PARSERS): 3857 return self.UNARY_PARSERS[self._prev.token_type](self) 3858 return self._parse_at_time_zone(self._parse_type()) 3859 3860 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3861 interval = parse_interval and self._parse_interval() 3862 if interval: 3863 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 3864 while True: 3865 index = self._index 3866 self._match(TokenType.PLUS) 3867 3868 if not self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 3869 self._retreat(index) 3870 break 3871 3872 interval = self.expression( # type: ignore 3873 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 3874 ) 3875 3876 return interval 3877 3878 index = self._index 3879 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3880 this = self._parse_column() 3881 3882 if data_type: 3883 if isinstance(this, exp.Literal): 3884 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3885 if parser: 3886 return parser(self, this, data_type) 3887 return self.expression(exp.Cast, this=this, to=data_type) 3888 if not data_type.expressions: 3889 self._retreat(index) 3890 return self._parse_column() 3891 return self._parse_column_ops(data_type) 3892 3893 return this and self._parse_column_ops(this) 3894 3895 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3896 this = self._parse_type() 3897 if not this: 3898 return None 3899 3900 return self.expression( 3901 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3902 ) 3903 3904 def _parse_types( 3905 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3906 ) -> t.Optional[exp.Expression]: 3907 index = self._index 3908 3909 prefix = self._match_text_seq("SYSUDTLIB", ".") 3910 3911 if not self._match_set(self.TYPE_TOKENS): 3912 identifier = allow_identifiers and self._parse_id_var( 3913 any_token=False, tokens=(TokenType.VAR,) 3914 ) 3915 if identifier: 3916 tokens = self.dialect.tokenize(identifier.name) 3917 3918 if len(tokens) != 1: 3919 self.raise_error("Unexpected identifier", self._prev) 3920 3921 if tokens[0].token_type in self.TYPE_TOKENS: 3922 self._prev = tokens[0] 3923 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 3924 type_name = identifier.name 3925 3926 while self._match(TokenType.DOT): 3927 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3928 3929 return exp.DataType.build(type_name, udt=True) 3930 else: 3931 self._retreat(self._index - 1) 3932 return None 3933 else: 3934 return None 3935 3936 type_token = self._prev.token_type 3937 3938 if type_token == TokenType.PSEUDO_TYPE: 3939 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 3940 3941 if type_token == TokenType.OBJECT_IDENTIFIER: 3942 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 3943 3944 nested = type_token in self.NESTED_TYPE_TOKENS 3945 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3946 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 3947 expressions = None 3948 maybe_func = False 3949 3950 if self._match(TokenType.L_PAREN): 3951 if is_struct: 3952 expressions = self._parse_csv(self._parse_struct_types) 3953 elif nested: 3954 expressions = self._parse_csv( 3955 lambda: self._parse_types( 3956 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3957 ) 3958 ) 3959 elif type_token in self.ENUM_TYPE_TOKENS: 3960 expressions = self._parse_csv(self._parse_equality) 3961 elif is_aggregate: 3962 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 3963 any_token=False, tokens=(TokenType.VAR,) 3964 ) 3965 if not func_or_ident or not self._match(TokenType.COMMA): 3966 return None 3967 expressions = self._parse_csv( 3968 lambda: self._parse_types( 3969 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3970 ) 3971 ) 3972 expressions.insert(0, func_or_ident) 3973 else: 3974 expressions = self._parse_csv(self._parse_type_size) 3975 3976 if not expressions or not self._match(TokenType.R_PAREN): 3977 self._retreat(index) 3978 return None 3979 3980 maybe_func = True 3981 3982 this: t.Optional[exp.Expression] = None 3983 values: t.Optional[t.List[exp.Expression]] = None 3984 3985 if nested and self._match(TokenType.LT): 3986 if is_struct: 3987 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 3988 else: 3989 expressions = self._parse_csv( 3990 lambda: self._parse_types( 3991 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3992 ) 3993 ) 3994 3995 if not self._match(TokenType.GT): 3996 self.raise_error("Expecting >") 3997 3998 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3999 values = self._parse_csv(self._parse_conjunction) 4000 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4001 4002 if type_token in self.TIMESTAMPS: 4003 if self._match_text_seq("WITH", "TIME", "ZONE"): 4004 maybe_func = False 4005 tz_type = ( 4006 exp.DataType.Type.TIMETZ 4007 if type_token in self.TIMES 4008 else exp.DataType.Type.TIMESTAMPTZ 4009 ) 4010 this = exp.DataType(this=tz_type, expressions=expressions) 4011 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4012 maybe_func = False 4013 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4014 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4015 maybe_func = False 4016 elif type_token == TokenType.INTERVAL: 4017 unit = self._parse_var() 4018 4019 if self._match_text_seq("TO"): 4020 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 4021 else: 4022 span = None 4023 4024 if span or not unit: 4025 this = self.expression( 4026 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 4027 ) 4028 else: 4029 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4030 4031 if maybe_func and check_func: 4032 index2 = self._index 4033 peek = self._parse_string() 4034 4035 if not peek: 4036 self._retreat(index) 4037 return None 4038 4039 self._retreat(index2) 4040 4041 if not this: 4042 if self._match_text_seq("UNSIGNED"): 4043 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4044 if not unsigned_type_token: 4045 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4046 4047 type_token = unsigned_type_token or type_token 4048 4049 this = exp.DataType( 4050 this=exp.DataType.Type[type_token.value], 4051 expressions=expressions, 4052 nested=nested, 4053 values=values, 4054 prefix=prefix, 4055 ) 4056 4057 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 4058 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 4059 4060 return this 4061 4062 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4063 index = self._index 4064 this = self._parse_type(parse_interval=False) or self._parse_id_var() 4065 self._match(TokenType.COLON) 4066 column_def = self._parse_column_def(this) 4067 4068 if type_required and ( 4069 (isinstance(this, exp.Column) and this.this is column_def) or this is column_def 4070 ): 4071 self._retreat(index) 4072 return self._parse_types() 4073 4074 return column_def 4075 4076 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4077 if not self._match_text_seq("AT", "TIME", "ZONE"): 4078 return this 4079 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4080 4081 def _parse_column(self) -> t.Optional[exp.Expression]: 4082 this = self._parse_column_reference() 4083 return self._parse_column_ops(this) if this else self._parse_bracket(this) 4084 4085 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4086 this = self._parse_field() 4087 if ( 4088 not this 4089 and self._match(TokenType.VALUES, advance=False) 4090 and self.VALUES_FOLLOWED_BY_PAREN 4091 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4092 ): 4093 this = self._parse_id_var() 4094 4095 return self.expression(exp.Column, this=this) if isinstance(this, exp.Identifier) else this 4096 4097 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4098 this = self._parse_bracket(this) 4099 4100 while self._match_set(self.COLUMN_OPERATORS): 4101 op_token = self._prev.token_type 4102 op = self.COLUMN_OPERATORS.get(op_token) 4103 4104 if op_token == TokenType.DCOLON: 4105 field = self._parse_types() 4106 if not field: 4107 self.raise_error("Expected type") 4108 elif op and self._curr: 4109 field = self._parse_column_reference() 4110 else: 4111 field = self._parse_field(anonymous_func=True, any_token=True) 4112 4113 if isinstance(field, exp.Func): 4114 # bigquery allows function calls like x.y.count(...) 4115 # SAFE.SUBSTR(...) 4116 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4117 this = self._replace_columns_with_dots(this) 4118 4119 if op: 4120 this = op(self, this, field) 4121 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4122 this = self.expression( 4123 exp.Column, 4124 this=field, 4125 table=this.this, 4126 db=this.args.get("table"), 4127 catalog=this.args.get("db"), 4128 ) 4129 else: 4130 this = self.expression(exp.Dot, this=this, expression=field) 4131 this = self._parse_bracket(this) 4132 return this 4133 4134 def _parse_primary(self) -> t.Optional[exp.Expression]: 4135 if self._match_set(self.PRIMARY_PARSERS): 4136 token_type = self._prev.token_type 4137 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4138 4139 if token_type == TokenType.STRING: 4140 expressions = [primary] 4141 while self._match(TokenType.STRING): 4142 expressions.append(exp.Literal.string(self._prev.text)) 4143 4144 if len(expressions) > 1: 4145 return self.expression(exp.Concat, expressions=expressions) 4146 4147 return primary 4148 4149 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4150 return exp.Literal.number(f"0.{self._prev.text}") 4151 4152 if self._match(TokenType.L_PAREN): 4153 comments = self._prev_comments 4154 query = self._parse_select() 4155 4156 if query: 4157 expressions = [query] 4158 else: 4159 expressions = self._parse_expressions() 4160 4161 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4162 4163 if isinstance(this, exp.UNWRAPPED_QUERIES): 4164 this = self._parse_set_operations( 4165 self._parse_subquery(this=this, parse_alias=False) 4166 ) 4167 elif isinstance(this, exp.Subquery): 4168 this = self._parse_subquery( 4169 this=self._parse_set_operations(this), parse_alias=False 4170 ) 4171 elif len(expressions) > 1: 4172 this = self.expression(exp.Tuple, expressions=expressions) 4173 else: 4174 this = self.expression(exp.Paren, this=this) 4175 4176 if this: 4177 this.add_comments(comments) 4178 4179 self._match_r_paren(expression=this) 4180 return this 4181 4182 return None 4183 4184 def _parse_field( 4185 self, 4186 any_token: bool = False, 4187 tokens: t.Optional[t.Collection[TokenType]] = None, 4188 anonymous_func: bool = False, 4189 ) -> t.Optional[exp.Expression]: 4190 return ( 4191 self._parse_primary() 4192 or self._parse_function(anonymous=anonymous_func) 4193 or self._parse_id_var(any_token=any_token, tokens=tokens) 4194 ) 4195 4196 def _parse_function( 4197 self, 4198 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4199 anonymous: bool = False, 4200 optional_parens: bool = True, 4201 ) -> t.Optional[exp.Expression]: 4202 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4203 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4204 fn_syntax = False 4205 if ( 4206 self._match(TokenType.L_BRACE, advance=False) 4207 and self._next 4208 and self._next.text.upper() == "FN" 4209 ): 4210 self._advance(2) 4211 fn_syntax = True 4212 4213 func = self._parse_function_call( 4214 functions=functions, anonymous=anonymous, optional_parens=optional_parens 4215 ) 4216 4217 if fn_syntax: 4218 self._match(TokenType.R_BRACE) 4219 4220 return func 4221 4222 def _parse_function_call( 4223 self, 4224 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4225 anonymous: bool = False, 4226 optional_parens: bool = True, 4227 ) -> t.Optional[exp.Expression]: 4228 if not self._curr: 4229 return None 4230 4231 comments = self._curr.comments 4232 token_type = self._curr.token_type 4233 this = self._curr.text 4234 upper = this.upper() 4235 4236 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4237 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4238 self._advance() 4239 return parser(self) 4240 4241 if not self._next or self._next.token_type != TokenType.L_PAREN: 4242 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4243 self._advance() 4244 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4245 4246 return None 4247 4248 if token_type not in self.FUNC_TOKENS: 4249 return None 4250 4251 self._advance(2) 4252 4253 parser = self.FUNCTION_PARSERS.get(upper) 4254 if parser and not anonymous: 4255 this = parser(self) 4256 else: 4257 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4258 4259 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4260 this = self.expression(subquery_predicate, this=self._parse_select()) 4261 self._match_r_paren() 4262 return this 4263 4264 if functions is None: 4265 functions = self.FUNCTIONS 4266 4267 function = functions.get(upper) 4268 4269 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4270 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4271 4272 if alias: 4273 args = self._kv_to_prop_eq(args) 4274 4275 if function and not anonymous: 4276 if "dialect" in function.__code__.co_varnames: 4277 func = function(args, dialect=self.dialect) 4278 else: 4279 func = function(args) 4280 4281 func = self.validate_expression(func, args) 4282 if not self.dialect.NORMALIZE_FUNCTIONS: 4283 func.meta["name"] = this 4284 4285 this = func 4286 else: 4287 if token_type == TokenType.IDENTIFIER: 4288 this = exp.Identifier(this=this, quoted=True) 4289 this = self.expression(exp.Anonymous, this=this, expressions=args) 4290 4291 if isinstance(this, exp.Expression): 4292 this.add_comments(comments) 4293 4294 self._match_r_paren(this) 4295 return self._parse_window(this) 4296 4297 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4298 transformed = [] 4299 4300 for e in expressions: 4301 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4302 if isinstance(e, exp.Alias): 4303 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4304 4305 if not isinstance(e, exp.PropertyEQ): 4306 e = self.expression( 4307 exp.PropertyEQ, this=exp.to_identifier(e.name), expression=e.expression 4308 ) 4309 4310 if isinstance(e.this, exp.Column): 4311 e.this.replace(e.this.this) 4312 4313 transformed.append(e) 4314 4315 return transformed 4316 4317 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4318 return self._parse_column_def(self._parse_id_var()) 4319 4320 def _parse_user_defined_function( 4321 self, kind: t.Optional[TokenType] = None 4322 ) -> t.Optional[exp.Expression]: 4323 this = self._parse_id_var() 4324 4325 while self._match(TokenType.DOT): 4326 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4327 4328 if not self._match(TokenType.L_PAREN): 4329 return this 4330 4331 expressions = self._parse_csv(self._parse_function_parameter) 4332 self._match_r_paren() 4333 return self.expression( 4334 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4335 ) 4336 4337 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4338 literal = self._parse_primary() 4339 if literal: 4340 return self.expression(exp.Introducer, this=token.text, expression=literal) 4341 4342 return self.expression(exp.Identifier, this=token.text) 4343 4344 def _parse_session_parameter(self) -> exp.SessionParameter: 4345 kind = None 4346 this = self._parse_id_var() or self._parse_primary() 4347 4348 if this and self._match(TokenType.DOT): 4349 kind = this.name 4350 this = self._parse_var() or self._parse_primary() 4351 4352 return self.expression(exp.SessionParameter, this=this, kind=kind) 4353 4354 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4355 index = self._index 4356 4357 if self._match(TokenType.L_PAREN): 4358 expressions = t.cast( 4359 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 4360 ) 4361 4362 if not self._match(TokenType.R_PAREN): 4363 self._retreat(index) 4364 else: 4365 expressions = [self._parse_id_var()] 4366 4367 if self._match_set(self.LAMBDAS): 4368 return self.LAMBDAS[self._prev.token_type](self, expressions) 4369 4370 self._retreat(index) 4371 4372 this: t.Optional[exp.Expression] 4373 4374 if self._match(TokenType.DISTINCT): 4375 this = self.expression( 4376 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 4377 ) 4378 else: 4379 this = self._parse_select_or_expression(alias=alias) 4380 4381 return self._parse_limit( 4382 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4383 ) 4384 4385 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4386 index = self._index 4387 4388 if not self.errors: 4389 try: 4390 if self._parse_select(nested=True): 4391 return this 4392 except ParseError: 4393 pass 4394 finally: 4395 self.errors.clear() 4396 self._retreat(index) 4397 4398 if not self._match(TokenType.L_PAREN): 4399 return this 4400 4401 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4402 4403 self._match_r_paren() 4404 return self.expression(exp.Schema, this=this, expressions=args) 4405 4406 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4407 return self._parse_column_def(self._parse_field(any_token=True)) 4408 4409 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4410 # column defs are not really columns, they're identifiers 4411 if isinstance(this, exp.Column): 4412 this = this.this 4413 4414 kind = self._parse_types(schema=True) 4415 4416 if self._match_text_seq("FOR", "ORDINALITY"): 4417 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4418 4419 constraints: t.List[exp.Expression] = [] 4420 4421 if not kind and self._match(TokenType.ALIAS): 4422 constraints.append( 4423 self.expression( 4424 exp.ComputedColumnConstraint, 4425 this=self._parse_conjunction(), 4426 persisted=self._match_text_seq("PERSISTED"), 4427 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4428 ) 4429 ) 4430 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4431 self._match(TokenType.ALIAS) 4432 constraints.append( 4433 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4434 ) 4435 4436 while True: 4437 constraint = self._parse_column_constraint() 4438 if not constraint: 4439 break 4440 constraints.append(constraint) 4441 4442 if not kind and not constraints: 4443 return this 4444 4445 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4446 4447 def _parse_auto_increment( 4448 self, 4449 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4450 start = None 4451 increment = None 4452 4453 if self._match(TokenType.L_PAREN, advance=False): 4454 args = self._parse_wrapped_csv(self._parse_bitwise) 4455 start = seq_get(args, 0) 4456 increment = seq_get(args, 1) 4457 elif self._match_text_seq("START"): 4458 start = self._parse_bitwise() 4459 self._match_text_seq("INCREMENT") 4460 increment = self._parse_bitwise() 4461 4462 if start and increment: 4463 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4464 4465 return exp.AutoIncrementColumnConstraint() 4466 4467 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4468 if not self._match_text_seq("REFRESH"): 4469 self._retreat(self._index - 1) 4470 return None 4471 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4472 4473 def _parse_compress(self) -> exp.CompressColumnConstraint: 4474 if self._match(TokenType.L_PAREN, advance=False): 4475 return self.expression( 4476 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4477 ) 4478 4479 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4480 4481 def _parse_generated_as_identity( 4482 self, 4483 ) -> ( 4484 exp.GeneratedAsIdentityColumnConstraint 4485 | exp.ComputedColumnConstraint 4486 | exp.GeneratedAsRowColumnConstraint 4487 ): 4488 if self._match_text_seq("BY", "DEFAULT"): 4489 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4490 this = self.expression( 4491 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4492 ) 4493 else: 4494 self._match_text_seq("ALWAYS") 4495 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4496 4497 self._match(TokenType.ALIAS) 4498 4499 if self._match_text_seq("ROW"): 4500 start = self._match_text_seq("START") 4501 if not start: 4502 self._match(TokenType.END) 4503 hidden = self._match_text_seq("HIDDEN") 4504 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4505 4506 identity = self._match_text_seq("IDENTITY") 4507 4508 if self._match(TokenType.L_PAREN): 4509 if self._match(TokenType.START_WITH): 4510 this.set("start", self._parse_bitwise()) 4511 if self._match_text_seq("INCREMENT", "BY"): 4512 this.set("increment", self._parse_bitwise()) 4513 if self._match_text_seq("MINVALUE"): 4514 this.set("minvalue", self._parse_bitwise()) 4515 if self._match_text_seq("MAXVALUE"): 4516 this.set("maxvalue", self._parse_bitwise()) 4517 4518 if self._match_text_seq("CYCLE"): 4519 this.set("cycle", True) 4520 elif self._match_text_seq("NO", "CYCLE"): 4521 this.set("cycle", False) 4522 4523 if not identity: 4524 this.set("expression", self._parse_bitwise()) 4525 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4526 args = self._parse_csv(self._parse_bitwise) 4527 this.set("start", seq_get(args, 0)) 4528 this.set("increment", seq_get(args, 1)) 4529 4530 self._match_r_paren() 4531 4532 return this 4533 4534 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4535 self._match_text_seq("LENGTH") 4536 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4537 4538 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 4539 if self._match_text_seq("NULL"): 4540 return self.expression(exp.NotNullColumnConstraint) 4541 if self._match_text_seq("CASESPECIFIC"): 4542 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4543 if self._match_text_seq("FOR", "REPLICATION"): 4544 return self.expression(exp.NotForReplicationColumnConstraint) 4545 return None 4546 4547 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4548 if self._match(TokenType.CONSTRAINT): 4549 this = self._parse_id_var() 4550 else: 4551 this = None 4552 4553 if self._match_texts(self.CONSTRAINT_PARSERS): 4554 return self.expression( 4555 exp.ColumnConstraint, 4556 this=this, 4557 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4558 ) 4559 4560 return this 4561 4562 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4563 if not self._match(TokenType.CONSTRAINT): 4564 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4565 4566 return self.expression( 4567 exp.Constraint, 4568 this=self._parse_id_var(), 4569 expressions=self._parse_unnamed_constraints(), 4570 ) 4571 4572 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 4573 constraints = [] 4574 while True: 4575 constraint = self._parse_unnamed_constraint() or self._parse_function() 4576 if not constraint: 4577 break 4578 constraints.append(constraint) 4579 4580 return constraints 4581 4582 def _parse_unnamed_constraint( 4583 self, constraints: t.Optional[t.Collection[str]] = None 4584 ) -> t.Optional[exp.Expression]: 4585 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4586 constraints or self.CONSTRAINT_PARSERS 4587 ): 4588 return None 4589 4590 constraint = self._prev.text.upper() 4591 if constraint not in self.CONSTRAINT_PARSERS: 4592 self.raise_error(f"No parser found for schema constraint {constraint}.") 4593 4594 return self.CONSTRAINT_PARSERS[constraint](self) 4595 4596 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4597 self._match_text_seq("KEY") 4598 return self.expression( 4599 exp.UniqueColumnConstraint, 4600 this=self._parse_schema(self._parse_id_var(any_token=False)), 4601 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4602 on_conflict=self._parse_on_conflict(), 4603 ) 4604 4605 def _parse_key_constraint_options(self) -> t.List[str]: 4606 options = [] 4607 while True: 4608 if not self._curr: 4609 break 4610 4611 if self._match(TokenType.ON): 4612 action = None 4613 on = self._advance_any() and self._prev.text 4614 4615 if self._match_text_seq("NO", "ACTION"): 4616 action = "NO ACTION" 4617 elif self._match_text_seq("CASCADE"): 4618 action = "CASCADE" 4619 elif self._match_text_seq("RESTRICT"): 4620 action = "RESTRICT" 4621 elif self._match_pair(TokenType.SET, TokenType.NULL): 4622 action = "SET NULL" 4623 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4624 action = "SET DEFAULT" 4625 else: 4626 self.raise_error("Invalid key constraint") 4627 4628 options.append(f"ON {on} {action}") 4629 elif self._match_text_seq("NOT", "ENFORCED"): 4630 options.append("NOT ENFORCED") 4631 elif self._match_text_seq("DEFERRABLE"): 4632 options.append("DEFERRABLE") 4633 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4634 options.append("INITIALLY DEFERRED") 4635 elif self._match_text_seq("NORELY"): 4636 options.append("NORELY") 4637 elif self._match_text_seq("MATCH", "FULL"): 4638 options.append("MATCH FULL") 4639 else: 4640 break 4641 4642 return options 4643 4644 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4645 if match and not self._match(TokenType.REFERENCES): 4646 return None 4647 4648 expressions = None 4649 this = self._parse_table(schema=True) 4650 options = self._parse_key_constraint_options() 4651 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4652 4653 def _parse_foreign_key(self) -> exp.ForeignKey: 4654 expressions = self._parse_wrapped_id_vars() 4655 reference = self._parse_references() 4656 options = {} 4657 4658 while self._match(TokenType.ON): 4659 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4660 self.raise_error("Expected DELETE or UPDATE") 4661 4662 kind = self._prev.text.lower() 4663 4664 if self._match_text_seq("NO", "ACTION"): 4665 action = "NO ACTION" 4666 elif self._match(TokenType.SET): 4667 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4668 action = "SET " + self._prev.text.upper() 4669 else: 4670 self._advance() 4671 action = self._prev.text.upper() 4672 4673 options[kind] = action 4674 4675 return self.expression( 4676 exp.ForeignKey, 4677 expressions=expressions, 4678 reference=reference, 4679 **options, # type: ignore 4680 ) 4681 4682 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4683 return self._parse_field() 4684 4685 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 4686 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 4687 self._retreat(self._index - 1) 4688 return None 4689 4690 id_vars = self._parse_wrapped_id_vars() 4691 return self.expression( 4692 exp.PeriodForSystemTimeConstraint, 4693 this=seq_get(id_vars, 0), 4694 expression=seq_get(id_vars, 1), 4695 ) 4696 4697 def _parse_primary_key( 4698 self, wrapped_optional: bool = False, in_props: bool = False 4699 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4700 desc = ( 4701 self._match_set((TokenType.ASC, TokenType.DESC)) 4702 and self._prev.token_type == TokenType.DESC 4703 ) 4704 4705 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4706 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4707 4708 expressions = self._parse_wrapped_csv( 4709 self._parse_primary_key_part, optional=wrapped_optional 4710 ) 4711 options = self._parse_key_constraint_options() 4712 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4713 4714 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 4715 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 4716 4717 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4718 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4719 return this 4720 4721 bracket_kind = self._prev.token_type 4722 expressions = self._parse_csv( 4723 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 4724 ) 4725 4726 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 4727 self.raise_error("Expected ]") 4728 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 4729 self.raise_error("Expected }") 4730 4731 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4732 if bracket_kind == TokenType.L_BRACE: 4733 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 4734 elif not this or this.name.upper() == "ARRAY": 4735 this = self.expression(exp.Array, expressions=expressions) 4736 else: 4737 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 4738 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4739 4740 self._add_comments(this) 4741 return self._parse_bracket(this) 4742 4743 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4744 if self._match(TokenType.COLON): 4745 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4746 return this 4747 4748 def _parse_case(self) -> t.Optional[exp.Expression]: 4749 ifs = [] 4750 default = None 4751 4752 comments = self._prev_comments 4753 expression = self._parse_conjunction() 4754 4755 while self._match(TokenType.WHEN): 4756 this = self._parse_conjunction() 4757 self._match(TokenType.THEN) 4758 then = self._parse_conjunction() 4759 ifs.append(self.expression(exp.If, this=this, true=then)) 4760 4761 if self._match(TokenType.ELSE): 4762 default = self._parse_conjunction() 4763 4764 if not self._match(TokenType.END): 4765 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 4766 default = exp.column("interval") 4767 else: 4768 self.raise_error("Expected END after CASE", self._prev) 4769 4770 return self._parse_window( 4771 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 4772 ) 4773 4774 def _parse_if(self) -> t.Optional[exp.Expression]: 4775 if self._match(TokenType.L_PAREN): 4776 args = self._parse_csv(self._parse_conjunction) 4777 this = self.validate_expression(exp.If.from_arg_list(args), args) 4778 self._match_r_paren() 4779 else: 4780 index = self._index - 1 4781 4782 if self.NO_PAREN_IF_COMMANDS and index == 0: 4783 return self._parse_as_command(self._prev) 4784 4785 condition = self._parse_conjunction() 4786 4787 if not condition: 4788 self._retreat(index) 4789 return None 4790 4791 self._match(TokenType.THEN) 4792 true = self._parse_conjunction() 4793 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4794 self._match(TokenType.END) 4795 this = self.expression(exp.If, this=condition, true=true, false=false) 4796 4797 return self._parse_window(this) 4798 4799 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4800 if not self._match_text_seq("VALUE", "FOR"): 4801 self._retreat(self._index - 1) 4802 return None 4803 4804 return self.expression( 4805 exp.NextValueFor, 4806 this=self._parse_column(), 4807 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4808 ) 4809 4810 def _parse_extract(self) -> exp.Extract: 4811 this = self._parse_function() or self._parse_var() or self._parse_type() 4812 4813 if self._match(TokenType.FROM): 4814 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4815 4816 if not self._match(TokenType.COMMA): 4817 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4818 4819 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4820 4821 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4822 this = self._parse_conjunction() 4823 4824 if not self._match(TokenType.ALIAS): 4825 if self._match(TokenType.COMMA): 4826 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4827 4828 self.raise_error("Expected AS after CAST") 4829 4830 fmt = None 4831 to = self._parse_types() 4832 4833 if self._match(TokenType.FORMAT): 4834 fmt_string = self._parse_string() 4835 fmt = self._parse_at_time_zone(fmt_string) 4836 4837 if not to: 4838 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 4839 if to.this in exp.DataType.TEMPORAL_TYPES: 4840 this = self.expression( 4841 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4842 this=this, 4843 format=exp.Literal.string( 4844 format_time( 4845 fmt_string.this if fmt_string else "", 4846 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 4847 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 4848 ) 4849 ), 4850 ) 4851 4852 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4853 this.set("zone", fmt.args["zone"]) 4854 return this 4855 elif not to: 4856 self.raise_error("Expected TYPE after CAST") 4857 elif isinstance(to, exp.Identifier): 4858 to = exp.DataType.build(to.name, udt=True) 4859 elif to.this == exp.DataType.Type.CHAR: 4860 if self._match(TokenType.CHARACTER_SET): 4861 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4862 4863 return self.expression( 4864 exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt, safe=safe 4865 ) 4866 4867 def _parse_string_agg(self) -> exp.Expression: 4868 if self._match(TokenType.DISTINCT): 4869 args: t.List[t.Optional[exp.Expression]] = [ 4870 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4871 ] 4872 if self._match(TokenType.COMMA): 4873 args.extend(self._parse_csv(self._parse_conjunction)) 4874 else: 4875 args = self._parse_csv(self._parse_conjunction) # type: ignore 4876 4877 index = self._index 4878 if not self._match(TokenType.R_PAREN) and args: 4879 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4880 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4881 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4882 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4883 4884 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4885 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4886 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4887 if not self._match_text_seq("WITHIN", "GROUP"): 4888 self._retreat(index) 4889 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4890 4891 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4892 order = self._parse_order(this=seq_get(args, 0)) 4893 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4894 4895 def _parse_convert( 4896 self, strict: bool, safe: t.Optional[bool] = None 4897 ) -> t.Optional[exp.Expression]: 4898 this = self._parse_bitwise() 4899 4900 if self._match(TokenType.USING): 4901 to: t.Optional[exp.Expression] = self.expression( 4902 exp.CharacterSet, this=self._parse_var() 4903 ) 4904 elif self._match(TokenType.COMMA): 4905 to = self._parse_types() 4906 else: 4907 to = None 4908 4909 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 4910 4911 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4912 """ 4913 There are generally two variants of the DECODE function: 4914 4915 - DECODE(bin, charset) 4916 - DECODE(expression, search, result [, search, result] ... [, default]) 4917 4918 The second variant will always be parsed into a CASE expression. Note that NULL 4919 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4920 instead of relying on pattern matching. 4921 """ 4922 args = self._parse_csv(self._parse_conjunction) 4923 4924 if len(args) < 3: 4925 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4926 4927 expression, *expressions = args 4928 if not expression: 4929 return None 4930 4931 ifs = [] 4932 for search, result in zip(expressions[::2], expressions[1::2]): 4933 if not search or not result: 4934 return None 4935 4936 if isinstance(search, exp.Literal): 4937 ifs.append( 4938 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4939 ) 4940 elif isinstance(search, exp.Null): 4941 ifs.append( 4942 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4943 ) 4944 else: 4945 cond = exp.or_( 4946 exp.EQ(this=expression.copy(), expression=search), 4947 exp.and_( 4948 exp.Is(this=expression.copy(), expression=exp.Null()), 4949 exp.Is(this=search.copy(), expression=exp.Null()), 4950 copy=False, 4951 ), 4952 copy=False, 4953 ) 4954 ifs.append(exp.If(this=cond, true=result)) 4955 4956 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4957 4958 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4959 self._match_text_seq("KEY") 4960 key = self._parse_column() 4961 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 4962 self._match_text_seq("VALUE") 4963 value = self._parse_bitwise() 4964 4965 if not key and not value: 4966 return None 4967 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4968 4969 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4970 if not this or not self._match_text_seq("FORMAT", "JSON"): 4971 return this 4972 4973 return self.expression(exp.FormatJson, this=this) 4974 4975 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4976 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4977 for value in values: 4978 if self._match_text_seq(value, "ON", on): 4979 return f"{value} ON {on}" 4980 4981 return None 4982 4983 @t.overload 4984 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 4985 4986 @t.overload 4987 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 4988 4989 def _parse_json_object(self, agg=False): 4990 star = self._parse_star() 4991 expressions = ( 4992 [star] 4993 if star 4994 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4995 ) 4996 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4997 4998 unique_keys = None 4999 if self._match_text_seq("WITH", "UNIQUE"): 5000 unique_keys = True 5001 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5002 unique_keys = False 5003 5004 self._match_text_seq("KEYS") 5005 5006 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5007 self._parse_type() 5008 ) 5009 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5010 5011 return self.expression( 5012 exp.JSONObjectAgg if agg else exp.JSONObject, 5013 expressions=expressions, 5014 null_handling=null_handling, 5015 unique_keys=unique_keys, 5016 return_type=return_type, 5017 encoding=encoding, 5018 ) 5019 5020 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5021 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5022 if not self._match_text_seq("NESTED"): 5023 this = self._parse_id_var() 5024 kind = self._parse_types(allow_identifiers=False) 5025 nested = None 5026 else: 5027 this = None 5028 kind = None 5029 nested = True 5030 5031 path = self._match_text_seq("PATH") and self._parse_string() 5032 nested_schema = nested and self._parse_json_schema() 5033 5034 return self.expression( 5035 exp.JSONColumnDef, 5036 this=this, 5037 kind=kind, 5038 path=path, 5039 nested_schema=nested_schema, 5040 ) 5041 5042 def _parse_json_schema(self) -> exp.JSONSchema: 5043 self._match_text_seq("COLUMNS") 5044 return self.expression( 5045 exp.JSONSchema, 5046 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5047 ) 5048 5049 def _parse_json_table(self) -> exp.JSONTable: 5050 this = self._parse_format_json(self._parse_bitwise()) 5051 path = self._match(TokenType.COMMA) and self._parse_string() 5052 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5053 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5054 schema = self._parse_json_schema() 5055 5056 return exp.JSONTable( 5057 this=this, 5058 schema=schema, 5059 path=path, 5060 error_handling=error_handling, 5061 empty_handling=empty_handling, 5062 ) 5063 5064 def _parse_match_against(self) -> exp.MatchAgainst: 5065 expressions = self._parse_csv(self._parse_column) 5066 5067 self._match_text_seq(")", "AGAINST", "(") 5068 5069 this = self._parse_string() 5070 5071 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5072 modifier = "IN NATURAL LANGUAGE MODE" 5073 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5074 modifier = f"{modifier} WITH QUERY EXPANSION" 5075 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5076 modifier = "IN BOOLEAN MODE" 5077 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5078 modifier = "WITH QUERY EXPANSION" 5079 else: 5080 modifier = None 5081 5082 return self.expression( 5083 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5084 ) 5085 5086 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5087 def _parse_open_json(self) -> exp.OpenJSON: 5088 this = self._parse_bitwise() 5089 path = self._match(TokenType.COMMA) and self._parse_string() 5090 5091 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5092 this = self._parse_field(any_token=True) 5093 kind = self._parse_types() 5094 path = self._parse_string() 5095 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5096 5097 return self.expression( 5098 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5099 ) 5100 5101 expressions = None 5102 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5103 self._match_l_paren() 5104 expressions = self._parse_csv(_parse_open_json_column_def) 5105 5106 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5107 5108 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5109 args = self._parse_csv(self._parse_bitwise) 5110 5111 if self._match(TokenType.IN): 5112 return self.expression( 5113 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5114 ) 5115 5116 if haystack_first: 5117 haystack = seq_get(args, 0) 5118 needle = seq_get(args, 1) 5119 else: 5120 needle = seq_get(args, 0) 5121 haystack = seq_get(args, 1) 5122 5123 return self.expression( 5124 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5125 ) 5126 5127 def _parse_predict(self) -> exp.Predict: 5128 self._match_text_seq("MODEL") 5129 this = self._parse_table() 5130 5131 self._match(TokenType.COMMA) 5132 self._match_text_seq("TABLE") 5133 5134 return self.expression( 5135 exp.Predict, 5136 this=this, 5137 expression=self._parse_table(), 5138 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5139 ) 5140 5141 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5142 args = self._parse_csv(self._parse_table) 5143 return exp.JoinHint(this=func_name.upper(), expressions=args) 5144 5145 def _parse_substring(self) -> exp.Substring: 5146 # Postgres supports the form: substring(string [from int] [for int]) 5147 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5148 5149 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5150 5151 if self._match(TokenType.FROM): 5152 args.append(self._parse_bitwise()) 5153 if self._match(TokenType.FOR): 5154 args.append(self._parse_bitwise()) 5155 5156 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5157 5158 def _parse_trim(self) -> exp.Trim: 5159 # https://www.w3resource.com/sql/character-functions/trim.php 5160 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5161 5162 position = None 5163 collation = None 5164 expression = None 5165 5166 if self._match_texts(self.TRIM_TYPES): 5167 position = self._prev.text.upper() 5168 5169 this = self._parse_bitwise() 5170 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5171 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5172 expression = self._parse_bitwise() 5173 5174 if invert_order: 5175 this, expression = expression, this 5176 5177 if self._match(TokenType.COLLATE): 5178 collation = self._parse_bitwise() 5179 5180 return self.expression( 5181 exp.Trim, this=this, position=position, expression=expression, collation=collation 5182 ) 5183 5184 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5185 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5186 5187 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5188 return self._parse_window(self._parse_id_var(), alias=True) 5189 5190 def _parse_respect_or_ignore_nulls( 5191 self, this: t.Optional[exp.Expression] 5192 ) -> t.Optional[exp.Expression]: 5193 if self._match_text_seq("IGNORE", "NULLS"): 5194 return self.expression(exp.IgnoreNulls, this=this) 5195 if self._match_text_seq("RESPECT", "NULLS"): 5196 return self.expression(exp.RespectNulls, this=this) 5197 return this 5198 5199 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5200 if self._match(TokenType.HAVING): 5201 self._match_texts(("MAX", "MIN")) 5202 max = self._prev.text.upper() != "MIN" 5203 return self.expression( 5204 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5205 ) 5206 5207 return this 5208 5209 def _parse_window( 5210 self, this: t.Optional[exp.Expression], alias: bool = False 5211 ) -> t.Optional[exp.Expression]: 5212 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5213 self._match(TokenType.WHERE) 5214 this = self.expression( 5215 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5216 ) 5217 self._match_r_paren() 5218 5219 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5220 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5221 if self._match_text_seq("WITHIN", "GROUP"): 5222 order = self._parse_wrapped(self._parse_order) 5223 this = self.expression(exp.WithinGroup, this=this, expression=order) 5224 5225 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5226 # Some dialects choose to implement and some do not. 5227 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5228 5229 # There is some code above in _parse_lambda that handles 5230 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5231 5232 # The below changes handle 5233 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5234 5235 # Oracle allows both formats 5236 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5237 # and Snowflake chose to do the same for familiarity 5238 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5239 if isinstance(this, exp.AggFunc): 5240 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5241 5242 if ignore_respect and ignore_respect is not this: 5243 ignore_respect.replace(ignore_respect.this) 5244 this = self.expression(ignore_respect.__class__, this=this) 5245 5246 this = self._parse_respect_or_ignore_nulls(this) 5247 5248 # bigquery select from window x AS (partition by ...) 5249 if alias: 5250 over = None 5251 self._match(TokenType.ALIAS) 5252 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5253 return this 5254 else: 5255 over = self._prev.text.upper() 5256 5257 if not self._match(TokenType.L_PAREN): 5258 return self.expression( 5259 exp.Window, this=this, alias=self._parse_id_var(False), over=over 5260 ) 5261 5262 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5263 5264 first = self._match(TokenType.FIRST) 5265 if self._match_text_seq("LAST"): 5266 first = False 5267 5268 partition, order = self._parse_partition_and_order() 5269 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5270 5271 if kind: 5272 self._match(TokenType.BETWEEN) 5273 start = self._parse_window_spec() 5274 self._match(TokenType.AND) 5275 end = self._parse_window_spec() 5276 5277 spec = self.expression( 5278 exp.WindowSpec, 5279 kind=kind, 5280 start=start["value"], 5281 start_side=start["side"], 5282 end=end["value"], 5283 end_side=end["side"], 5284 ) 5285 else: 5286 spec = None 5287 5288 self._match_r_paren() 5289 5290 window = self.expression( 5291 exp.Window, 5292 this=this, 5293 partition_by=partition, 5294 order=order, 5295 spec=spec, 5296 alias=window_alias, 5297 over=over, 5298 first=first, 5299 ) 5300 5301 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5302 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5303 return self._parse_window(window, alias=alias) 5304 5305 return window 5306 5307 def _parse_partition_and_order( 5308 self, 5309 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5310 return self._parse_partition_by(), self._parse_order() 5311 5312 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5313 self._match(TokenType.BETWEEN) 5314 5315 return { 5316 "value": ( 5317 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5318 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5319 or self._parse_bitwise() 5320 ), 5321 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5322 } 5323 5324 def _parse_alias( 5325 self, this: t.Optional[exp.Expression], explicit: bool = False 5326 ) -> t.Optional[exp.Expression]: 5327 any_token = self._match(TokenType.ALIAS) 5328 comments = self._prev_comments 5329 5330 if explicit and not any_token: 5331 return this 5332 5333 if self._match(TokenType.L_PAREN): 5334 aliases = self.expression( 5335 exp.Aliases, 5336 comments=comments, 5337 this=this, 5338 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5339 ) 5340 self._match_r_paren(aliases) 5341 return aliases 5342 5343 alias = self._parse_id_var(any_token) or ( 5344 self.STRING_ALIASES and self._parse_string_as_identifier() 5345 ) 5346 5347 if alias: 5348 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5349 column = this.this 5350 5351 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5352 if not this.comments and column and column.comments: 5353 this.comments = column.comments 5354 column.comments = None 5355 5356 return this 5357 5358 def _parse_id_var( 5359 self, 5360 any_token: bool = True, 5361 tokens: t.Optional[t.Collection[TokenType]] = None, 5362 ) -> t.Optional[exp.Expression]: 5363 identifier = self._parse_identifier() 5364 5365 if identifier: 5366 return identifier 5367 5368 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 5369 quoted = self._prev.token_type == TokenType.STRING 5370 return exp.Identifier(this=self._prev.text, quoted=quoted) 5371 5372 return None 5373 5374 def _parse_string(self) -> t.Optional[exp.Expression]: 5375 if self._match_set(self.STRING_PARSERS): 5376 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 5377 return self._parse_placeholder() 5378 5379 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5380 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5381 5382 def _parse_number(self) -> t.Optional[exp.Expression]: 5383 if self._match_set(self.NUMERIC_PARSERS): 5384 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 5385 return self._parse_placeholder() 5386 5387 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5388 if self._match(TokenType.IDENTIFIER): 5389 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5390 return self._parse_placeholder() 5391 5392 def _parse_var( 5393 self, 5394 any_token: bool = False, 5395 tokens: t.Optional[t.Collection[TokenType]] = None, 5396 upper: bool = False, 5397 ) -> t.Optional[exp.Expression]: 5398 if ( 5399 (any_token and self._advance_any()) 5400 or self._match(TokenType.VAR) 5401 or (self._match_set(tokens) if tokens else False) 5402 ): 5403 return self.expression( 5404 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5405 ) 5406 return self._parse_placeholder() 5407 5408 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5409 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5410 self._advance() 5411 return self._prev 5412 return None 5413 5414 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5415 return self._parse_var() or self._parse_string() 5416 5417 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 5418 return self._parse_primary() or self._parse_var(any_token=True) 5419 5420 def _parse_null(self) -> t.Optional[exp.Expression]: 5421 if self._match_set(self.NULL_TOKENS): 5422 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5423 return self._parse_placeholder() 5424 5425 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5426 if self._match(TokenType.TRUE): 5427 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5428 if self._match(TokenType.FALSE): 5429 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5430 return self._parse_placeholder() 5431 5432 def _parse_star(self) -> t.Optional[exp.Expression]: 5433 if self._match(TokenType.STAR): 5434 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5435 return self._parse_placeholder() 5436 5437 def _parse_parameter(self) -> exp.Parameter: 5438 self._match(TokenType.L_BRACE) 5439 this = self._parse_identifier() or self._parse_primary_or_var() 5440 expression = self._match(TokenType.COLON) and ( 5441 self._parse_identifier() or self._parse_primary_or_var() 5442 ) 5443 self._match(TokenType.R_BRACE) 5444 return self.expression(exp.Parameter, this=this, expression=expression) 5445 5446 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5447 if self._match_set(self.PLACEHOLDER_PARSERS): 5448 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5449 if placeholder: 5450 return placeholder 5451 self._advance(-1) 5452 return None 5453 5454 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 5455 if not self._match(TokenType.EXCEPT): 5456 return None 5457 if self._match(TokenType.L_PAREN, advance=False): 5458 return self._parse_wrapped_csv(self._parse_column) 5459 5460 except_column = self._parse_column() 5461 return [except_column] if except_column else None 5462 5463 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 5464 if not self._match(TokenType.REPLACE): 5465 return None 5466 if self._match(TokenType.L_PAREN, advance=False): 5467 return self._parse_wrapped_csv(self._parse_expression) 5468 5469 replace_expression = self._parse_expression() 5470 return [replace_expression] if replace_expression else None 5471 5472 def _parse_csv( 5473 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5474 ) -> t.List[exp.Expression]: 5475 parse_result = parse_method() 5476 items = [parse_result] if parse_result is not None else [] 5477 5478 while self._match(sep): 5479 self._add_comments(parse_result) 5480 parse_result = parse_method() 5481 if parse_result is not None: 5482 items.append(parse_result) 5483 5484 return items 5485 5486 def _parse_tokens( 5487 self, parse_method: t.Callable, expressions: t.Dict 5488 ) -> t.Optional[exp.Expression]: 5489 this = parse_method() 5490 5491 while self._match_set(expressions): 5492 this = self.expression( 5493 expressions[self._prev.token_type], 5494 this=this, 5495 comments=self._prev_comments, 5496 expression=parse_method(), 5497 ) 5498 5499 return this 5500 5501 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5502 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5503 5504 def _parse_wrapped_csv( 5505 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5506 ) -> t.List[exp.Expression]: 5507 return self._parse_wrapped( 5508 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5509 ) 5510 5511 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5512 wrapped = self._match(TokenType.L_PAREN) 5513 if not wrapped and not optional: 5514 self.raise_error("Expecting (") 5515 parse_result = parse_method() 5516 if wrapped: 5517 self._match_r_paren() 5518 return parse_result 5519 5520 def _parse_expressions(self) -> t.List[exp.Expression]: 5521 return self._parse_csv(self._parse_expression) 5522 5523 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5524 return self._parse_select() or self._parse_set_operations( 5525 self._parse_expression() if alias else self._parse_conjunction() 5526 ) 5527 5528 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5529 return self._parse_query_modifiers( 5530 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5531 ) 5532 5533 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5534 this = None 5535 if self._match_texts(self.TRANSACTION_KIND): 5536 this = self._prev.text 5537 5538 self._match_texts(("TRANSACTION", "WORK")) 5539 5540 modes = [] 5541 while True: 5542 mode = [] 5543 while self._match(TokenType.VAR): 5544 mode.append(self._prev.text) 5545 5546 if mode: 5547 modes.append(" ".join(mode)) 5548 if not self._match(TokenType.COMMA): 5549 break 5550 5551 return self.expression(exp.Transaction, this=this, modes=modes) 5552 5553 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5554 chain = None 5555 savepoint = None 5556 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5557 5558 self._match_texts(("TRANSACTION", "WORK")) 5559 5560 if self._match_text_seq("TO"): 5561 self._match_text_seq("SAVEPOINT") 5562 savepoint = self._parse_id_var() 5563 5564 if self._match(TokenType.AND): 5565 chain = not self._match_text_seq("NO") 5566 self._match_text_seq("CHAIN") 5567 5568 if is_rollback: 5569 return self.expression(exp.Rollback, savepoint=savepoint) 5570 5571 return self.expression(exp.Commit, chain=chain) 5572 5573 def _parse_refresh(self) -> exp.Refresh: 5574 self._match(TokenType.TABLE) 5575 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5576 5577 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5578 if not self._match_text_seq("ADD"): 5579 return None 5580 5581 self._match(TokenType.COLUMN) 5582 exists_column = self._parse_exists(not_=True) 5583 expression = self._parse_field_def() 5584 5585 if expression: 5586 expression.set("exists", exists_column) 5587 5588 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5589 if self._match_texts(("FIRST", "AFTER")): 5590 position = self._prev.text 5591 column_position = self.expression( 5592 exp.ColumnPosition, this=self._parse_column(), position=position 5593 ) 5594 expression.set("position", column_position) 5595 5596 return expression 5597 5598 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5599 drop = self._match(TokenType.DROP) and self._parse_drop() 5600 if drop and not isinstance(drop, exp.Command): 5601 drop.set("kind", drop.args.get("kind", "COLUMN")) 5602 return drop 5603 5604 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5605 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5606 return self.expression( 5607 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5608 ) 5609 5610 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5611 index = self._index - 1 5612 5613 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 5614 return self._parse_csv( 5615 lambda: self.expression( 5616 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 5617 ) 5618 ) 5619 5620 self._retreat(index) 5621 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 5622 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5623 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5624 5625 def _parse_alter_table_alter(self) -> exp.AlterColumn: 5626 self._match(TokenType.COLUMN) 5627 column = self._parse_field(any_token=True) 5628 5629 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5630 return self.expression(exp.AlterColumn, this=column, drop=True) 5631 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5632 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5633 if self._match(TokenType.COMMENT): 5634 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 5635 5636 self._match_text_seq("SET", "DATA") 5637 return self.expression( 5638 exp.AlterColumn, 5639 this=column, 5640 dtype=self._match_text_seq("TYPE") and self._parse_types(), 5641 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5642 using=self._match(TokenType.USING) and self._parse_conjunction(), 5643 ) 5644 5645 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5646 index = self._index - 1 5647 5648 partition_exists = self._parse_exists() 5649 if self._match(TokenType.PARTITION, advance=False): 5650 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5651 5652 self._retreat(index) 5653 return self._parse_csv(self._parse_drop_column) 5654 5655 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 5656 if self._match(TokenType.COLUMN): 5657 exists = self._parse_exists() 5658 old_column = self._parse_column() 5659 to = self._match_text_seq("TO") 5660 new_column = self._parse_column() 5661 5662 if old_column is None or to is None or new_column is None: 5663 return None 5664 5665 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 5666 5667 self._match_text_seq("TO") 5668 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5669 5670 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5671 start = self._prev 5672 5673 if not self._match(TokenType.TABLE): 5674 return self._parse_as_command(start) 5675 5676 exists = self._parse_exists() 5677 only = self._match_text_seq("ONLY") 5678 this = self._parse_table(schema=True) 5679 5680 if self._next: 5681 self._advance() 5682 5683 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5684 if parser: 5685 actions = ensure_list(parser(self)) 5686 options = self._parse_csv(self._parse_property) 5687 5688 if not self._curr and actions: 5689 return self.expression( 5690 exp.AlterTable, 5691 this=this, 5692 exists=exists, 5693 actions=actions, 5694 only=only, 5695 options=options, 5696 ) 5697 5698 return self._parse_as_command(start) 5699 5700 def _parse_merge(self) -> exp.Merge: 5701 self._match(TokenType.INTO) 5702 target = self._parse_table() 5703 5704 if target and self._match(TokenType.ALIAS, advance=False): 5705 target.set("alias", self._parse_table_alias()) 5706 5707 self._match(TokenType.USING) 5708 using = self._parse_table() 5709 5710 self._match(TokenType.ON) 5711 on = self._parse_conjunction() 5712 5713 return self.expression( 5714 exp.Merge, 5715 this=target, 5716 using=using, 5717 on=on, 5718 expressions=self._parse_when_matched(), 5719 ) 5720 5721 def _parse_when_matched(self) -> t.List[exp.When]: 5722 whens = [] 5723 5724 while self._match(TokenType.WHEN): 5725 matched = not self._match(TokenType.NOT) 5726 self._match_text_seq("MATCHED") 5727 source = ( 5728 False 5729 if self._match_text_seq("BY", "TARGET") 5730 else self._match_text_seq("BY", "SOURCE") 5731 ) 5732 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5733 5734 self._match(TokenType.THEN) 5735 5736 if self._match(TokenType.INSERT): 5737 _this = self._parse_star() 5738 if _this: 5739 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5740 else: 5741 then = self.expression( 5742 exp.Insert, 5743 this=self._parse_value(), 5744 expression=self._match_text_seq("VALUES") and self._parse_value(), 5745 ) 5746 elif self._match(TokenType.UPDATE): 5747 expressions = self._parse_star() 5748 if expressions: 5749 then = self.expression(exp.Update, expressions=expressions) 5750 else: 5751 then = self.expression( 5752 exp.Update, 5753 expressions=self._match(TokenType.SET) 5754 and self._parse_csv(self._parse_equality), 5755 ) 5756 elif self._match(TokenType.DELETE): 5757 then = self.expression(exp.Var, this=self._prev.text) 5758 else: 5759 then = None 5760 5761 whens.append( 5762 self.expression( 5763 exp.When, 5764 matched=matched, 5765 source=source, 5766 condition=condition, 5767 then=then, 5768 ) 5769 ) 5770 return whens 5771 5772 def _parse_show(self) -> t.Optional[exp.Expression]: 5773 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5774 if parser: 5775 return parser(self) 5776 return self._parse_as_command(self._prev) 5777 5778 def _parse_set_item_assignment( 5779 self, kind: t.Optional[str] = None 5780 ) -> t.Optional[exp.Expression]: 5781 index = self._index 5782 5783 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 5784 return self._parse_set_transaction(global_=kind == "GLOBAL") 5785 5786 left = self._parse_primary() or self._parse_id_var() 5787 assignment_delimiter = self._match_texts(("=", "TO")) 5788 5789 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5790 self._retreat(index) 5791 return None 5792 5793 right = self._parse_statement() or self._parse_id_var() 5794 this = self.expression(exp.EQ, this=left, expression=right) 5795 5796 return self.expression(exp.SetItem, this=this, kind=kind) 5797 5798 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5799 self._match_text_seq("TRANSACTION") 5800 characteristics = self._parse_csv( 5801 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5802 ) 5803 return self.expression( 5804 exp.SetItem, 5805 expressions=characteristics, 5806 kind="TRANSACTION", 5807 **{"global": global_}, # type: ignore 5808 ) 5809 5810 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5811 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5812 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5813 5814 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5815 index = self._index 5816 set_ = self.expression( 5817 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5818 ) 5819 5820 if self._curr: 5821 self._retreat(index) 5822 return self._parse_as_command(self._prev) 5823 5824 return set_ 5825 5826 def _parse_var_from_options( 5827 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 5828 ) -> t.Optional[exp.Var]: 5829 start = self._curr 5830 if not start: 5831 return None 5832 5833 option = start.text.upper() 5834 continuations = options.get(option) 5835 5836 index = self._index 5837 self._advance() 5838 for keywords in continuations or []: 5839 if isinstance(keywords, str): 5840 keywords = (keywords,) 5841 5842 if self._match_text_seq(*keywords): 5843 option = f"{option} {' '.join(keywords)}" 5844 break 5845 else: 5846 if continuations or continuations is None: 5847 if raise_unmatched: 5848 self.raise_error(f"Unknown option {option}") 5849 5850 self._retreat(index) 5851 return None 5852 5853 return exp.var(option) 5854 5855 def _parse_as_command(self, start: Token) -> exp.Command: 5856 while self._curr: 5857 self._advance() 5858 text = self._find_sql(start, self._prev) 5859 size = len(start.text) 5860 self._warn_unsupported() 5861 return exp.Command(this=text[:size], expression=text[size:]) 5862 5863 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5864 settings = [] 5865 5866 self._match_l_paren() 5867 kind = self._parse_id_var() 5868 5869 if self._match(TokenType.L_PAREN): 5870 while True: 5871 key = self._parse_id_var() 5872 value = self._parse_primary() 5873 5874 if not key and value is None: 5875 break 5876 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5877 self._match(TokenType.R_PAREN) 5878 5879 self._match_r_paren() 5880 5881 return self.expression( 5882 exp.DictProperty, 5883 this=this, 5884 kind=kind.this if kind else None, 5885 settings=settings, 5886 ) 5887 5888 def _parse_dict_range(self, this: str) -> exp.DictRange: 5889 self._match_l_paren() 5890 has_min = self._match_text_seq("MIN") 5891 if has_min: 5892 min = self._parse_var() or self._parse_primary() 5893 self._match_text_seq("MAX") 5894 max = self._parse_var() or self._parse_primary() 5895 else: 5896 max = self._parse_var() or self._parse_primary() 5897 min = exp.Literal.number(0) 5898 self._match_r_paren() 5899 return self.expression(exp.DictRange, this=this, min=min, max=max) 5900 5901 def _parse_comprehension( 5902 self, this: t.Optional[exp.Expression] 5903 ) -> t.Optional[exp.Comprehension]: 5904 index = self._index 5905 expression = self._parse_column() 5906 if not self._match(TokenType.IN): 5907 self._retreat(index - 1) 5908 return None 5909 iterator = self._parse_column() 5910 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5911 return self.expression( 5912 exp.Comprehension, 5913 this=this, 5914 expression=expression, 5915 iterator=iterator, 5916 condition=condition, 5917 ) 5918 5919 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 5920 if self._match(TokenType.HEREDOC_STRING): 5921 return self.expression(exp.Heredoc, this=self._prev.text) 5922 5923 if not self._match_text_seq("$"): 5924 return None 5925 5926 tags = ["$"] 5927 tag_text = None 5928 5929 if self._is_connected(): 5930 self._advance() 5931 tags.append(self._prev.text.upper()) 5932 else: 5933 self.raise_error("No closing $ found") 5934 5935 if tags[-1] != "$": 5936 if self._is_connected() and self._match_text_seq("$"): 5937 tag_text = tags[-1] 5938 tags.append("$") 5939 else: 5940 self.raise_error("No closing $ found") 5941 5942 heredoc_start = self._curr 5943 5944 while self._curr: 5945 if self._match_text_seq(*tags, advance=False): 5946 this = self._find_sql(heredoc_start, self._prev) 5947 self._advance(len(tags)) 5948 return self.expression(exp.Heredoc, this=this, tag=tag_text) 5949 5950 self._advance() 5951 5952 self.raise_error(f"No closing {''.join(tags)} found") 5953 return None 5954 5955 def _find_parser( 5956 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5957 ) -> t.Optional[t.Callable]: 5958 if not self._curr: 5959 return None 5960 5961 index = self._index 5962 this = [] 5963 while True: 5964 # The current token might be multiple words 5965 curr = self._curr.text.upper() 5966 key = curr.split(" ") 5967 this.append(curr) 5968 5969 self._advance() 5970 result, trie = in_trie(trie, key) 5971 if result == TrieResult.FAILED: 5972 break 5973 5974 if result == TrieResult.EXISTS: 5975 subparser = parsers[" ".join(this)] 5976 return subparser 5977 5978 self._retreat(index) 5979 return None 5980 5981 def _match(self, token_type, advance=True, expression=None): 5982 if not self._curr: 5983 return None 5984 5985 if self._curr.token_type == token_type: 5986 if advance: 5987 self._advance() 5988 self._add_comments(expression) 5989 return True 5990 5991 return None 5992 5993 def _match_set(self, types, advance=True): 5994 if not self._curr: 5995 return None 5996 5997 if self._curr.token_type in types: 5998 if advance: 5999 self._advance() 6000 return True 6001 6002 return None 6003 6004 def _match_pair(self, token_type_a, token_type_b, advance=True): 6005 if not self._curr or not self._next: 6006 return None 6007 6008 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6009 if advance: 6010 self._advance(2) 6011 return True 6012 6013 return None 6014 6015 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6016 if not self._match(TokenType.L_PAREN, expression=expression): 6017 self.raise_error("Expecting (") 6018 6019 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6020 if not self._match(TokenType.R_PAREN, expression=expression): 6021 self.raise_error("Expecting )") 6022 6023 def _match_texts(self, texts, advance=True): 6024 if self._curr and self._curr.text.upper() in texts: 6025 if advance: 6026 self._advance() 6027 return True 6028 return None 6029 6030 def _match_text_seq(self, *texts, advance=True): 6031 index = self._index 6032 for text in texts: 6033 if self._curr and self._curr.text.upper() == text: 6034 self._advance() 6035 else: 6036 self._retreat(index) 6037 return None 6038 6039 if not advance: 6040 self._retreat(index) 6041 6042 return True 6043 6044 @t.overload 6045 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: ... 6046 6047 @t.overload 6048 def _replace_columns_with_dots( 6049 self, this: t.Optional[exp.Expression] 6050 ) -> t.Optional[exp.Expression]: ... 6051 6052 def _replace_columns_with_dots(self, this): 6053 if isinstance(this, exp.Dot): 6054 exp.replace_children(this, self._replace_columns_with_dots) 6055 elif isinstance(this, exp.Column): 6056 exp.replace_children(this, self._replace_columns_with_dots) 6057 table = this.args.get("table") 6058 this = ( 6059 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 6060 ) 6061 6062 return this 6063 6064 def _replace_lambda( 6065 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 6066 ) -> t.Optional[exp.Expression]: 6067 if not node: 6068 return node 6069 6070 for column in node.find_all(exp.Column): 6071 if column.parts[0].name in lambda_variables: 6072 dot_or_id = column.to_dot() if column.table else column.this 6073 parent = column.parent 6074 6075 while isinstance(parent, exp.Dot): 6076 if not isinstance(parent.parent, exp.Dot): 6077 parent.replace(dot_or_id) 6078 break 6079 parent = parent.parent 6080 else: 6081 if column is node: 6082 node = dot_or_id 6083 else: 6084 column.replace(dot_or_id) 6085 return node 6086 6087 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6088 start = self._prev 6089 6090 # Not to be confused with TRUNCATE(number, decimals) function call 6091 if self._match(TokenType.L_PAREN): 6092 self._retreat(self._index - 2) 6093 return self._parse_function() 6094 6095 # Clickhouse supports TRUNCATE DATABASE as well 6096 is_database = self._match(TokenType.DATABASE) 6097 6098 self._match(TokenType.TABLE) 6099 6100 exists = self._parse_exists(not_=False) 6101 6102 expressions = self._parse_csv( 6103 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6104 ) 6105 6106 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6107 6108 if self._match_text_seq("RESTART", "IDENTITY"): 6109 identity = "RESTART" 6110 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6111 identity = "CONTINUE" 6112 else: 6113 identity = None 6114 6115 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6116 option = self._prev.text 6117 else: 6118 option = None 6119 6120 partition = self._parse_partition() 6121 6122 # Fallback case 6123 if self._curr: 6124 return self._parse_as_command(start) 6125 6126 return self.expression( 6127 exp.TruncateTable, 6128 expressions=expressions, 6129 is_database=is_database, 6130 exists=exists, 6131 cluster=cluster, 6132 identity=identity, 6133 option=option, 6134 partition=partition, 6135 ) 6136 6137 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6138 this = self._parse_ordered(self._parse_opclass) 6139 6140 if not self._match(TokenType.WITH): 6141 return this 6142 6143 op = self._parse_var(any_token=True) 6144 6145 return self.expression(exp.WithOperator, this=this, op=op)
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1107 def __init__( 1108 self, 1109 error_level: t.Optional[ErrorLevel] = None, 1110 error_message_context: int = 100, 1111 max_errors: int = 3, 1112 dialect: DialectType = None, 1113 ): 1114 from sqlglot.dialects import Dialect 1115 1116 self.error_level = error_level or ErrorLevel.IMMEDIATE 1117 self.error_message_context = error_message_context 1118 self.max_errors = max_errors 1119 self.dialect = Dialect.get_or_raise(dialect) 1120 self.reset()
1132 def parse( 1133 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1134 ) -> t.List[t.Optional[exp.Expression]]: 1135 """ 1136 Parses a list of tokens and returns a list of syntax trees, one tree 1137 per parsed SQL statement. 1138 1139 Args: 1140 raw_tokens: The list of tokens. 1141 sql: The original SQL string, used to produce helpful debug messages. 1142 1143 Returns: 1144 The list of the produced syntax trees. 1145 """ 1146 return self._parse( 1147 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1148 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1150 def parse_into( 1151 self, 1152 expression_types: exp.IntoType, 1153 raw_tokens: t.List[Token], 1154 sql: t.Optional[str] = None, 1155 ) -> t.List[t.Optional[exp.Expression]]: 1156 """ 1157 Parses a list of tokens into a given Expression type. If a collection of Expression 1158 types is given instead, this method will try to parse the token list into each one 1159 of them, stopping at the first for which the parsing succeeds. 1160 1161 Args: 1162 expression_types: The expression type(s) to try and parse the token list into. 1163 raw_tokens: The list of tokens. 1164 sql: The original SQL string, used to produce helpful debug messages. 1165 1166 Returns: 1167 The target Expression. 1168 """ 1169 errors = [] 1170 for expression_type in ensure_list(expression_types): 1171 parser = self.EXPRESSION_PARSERS.get(expression_type) 1172 if not parser: 1173 raise TypeError(f"No parser registered for {expression_type}") 1174 1175 try: 1176 return self._parse(parser, raw_tokens, sql) 1177 except ParseError as e: 1178 e.errors[0]["into_expression"] = expression_type 1179 errors.append(e) 1180 1181 raise ParseError( 1182 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1183 errors=merge_errors(errors), 1184 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1221 def check_errors(self) -> None: 1222 """Logs or raises any found errors, depending on the chosen error level setting.""" 1223 if self.error_level == ErrorLevel.WARN: 1224 for error in self.errors: 1225 logger.error(str(error)) 1226 elif self.error_level == ErrorLevel.RAISE and self.errors: 1227 raise ParseError( 1228 concat_messages(self.errors, self.max_errors), 1229 errors=merge_errors(self.errors), 1230 )
Logs or raises any found errors, depending on the chosen error level setting.
1232 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1233 """ 1234 Appends an error in the list of recorded errors or raises it, depending on the chosen 1235 error level setting. 1236 """ 1237 token = token or self._curr or self._prev or Token.string("") 1238 start = token.start 1239 end = token.end + 1 1240 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1241 highlight = self.sql[start:end] 1242 end_context = self.sql[end : end + self.error_message_context] 1243 1244 error = ParseError.new( 1245 f"{message}. Line {token.line}, Col: {token.col}.\n" 1246 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1247 description=message, 1248 line=token.line, 1249 col=token.col, 1250 start_context=start_context, 1251 highlight=highlight, 1252 end_context=end_context, 1253 ) 1254 1255 if self.error_level == ErrorLevel.IMMEDIATE: 1256 raise error 1257 1258 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1260 def expression( 1261 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1262 ) -> E: 1263 """ 1264 Creates a new, validated Expression. 1265 1266 Args: 1267 exp_class: The expression class to instantiate. 1268 comments: An optional list of comments to attach to the expression. 1269 kwargs: The arguments to set for the expression along with their respective values. 1270 1271 Returns: 1272 The target expression. 1273 """ 1274 instance = exp_class(**kwargs) 1275 instance.add_comments(comments) if comments else self._add_comments(instance) 1276 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1283 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1284 """ 1285 Validates an Expression, making sure that all its mandatory arguments are set. 1286 1287 Args: 1288 expression: The expression to validate. 1289 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1290 1291 Returns: 1292 The validated expression. 1293 """ 1294 if self.error_level != ErrorLevel.IGNORE: 1295 for error_message in expression.error_messages(args): 1296 self.raise_error(error_message) 1297 1298 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.