sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18 T = t.TypeVar("T") 19 20logger = logging.getLogger("sqlglot") 21 22OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 23 24 25def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 26 if len(args) == 1 and args[0].is_star: 27 return exp.StarMap(this=args[0]) 28 29 keys = [] 30 values = [] 31 for i in range(0, len(args), 2): 32 keys.append(args[i]) 33 values.append(args[i + 1]) 34 35 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 36 37 38def build_like(args: t.List) -> exp.Escape | exp.Like: 39 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 40 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 41 42 43def binary_range_parser( 44 expr_type: t.Type[exp.Expression], reverse_args: bool = False 45) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 46 def _parse_binary_range( 47 self: Parser, this: t.Optional[exp.Expression] 48 ) -> t.Optional[exp.Expression]: 49 expression = self._parse_bitwise() 50 if reverse_args: 51 this, expression = expression, this 52 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 53 54 return _parse_binary_range 55 56 57def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 58 # Default argument order is base, expression 59 this = seq_get(args, 0) 60 expression = seq_get(args, 1) 61 62 if expression: 63 if not dialect.LOG_BASE_FIRST: 64 this, expression = expression, this 65 return exp.Log(this=this, expression=expression) 66 67 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 68 69 70def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 71 arg = seq_get(args, 0) 72 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 73 74 75def build_lower(args: t.List) -> exp.Lower | exp.Hex: 76 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 77 arg = seq_get(args, 0) 78 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 79 80 81def build_upper(args: t.List) -> exp.Upper | exp.Hex: 82 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 83 arg = seq_get(args, 0) 84 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 85 86 87def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 88 def _builder(args: t.List, dialect: Dialect) -> E: 89 expression = expr_type( 90 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 91 ) 92 if len(args) > 2 and expr_type is exp.JSONExtract: 93 expression.set("expressions", args[2:]) 94 95 return expression 96 97 return _builder 98 99 100def build_mod(args: t.List) -> exp.Mod: 101 this = seq_get(args, 0) 102 expression = seq_get(args, 1) 103 104 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 105 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 106 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 107 108 return exp.Mod(this=this, expression=expression) 109 110 111def build_pad(args: t.List, is_left: bool = True): 112 return exp.Pad( 113 this=seq_get(args, 0), 114 expression=seq_get(args, 1), 115 fill_pattern=seq_get(args, 2), 116 is_left=is_left, 117 ) 118 119 120def build_array_constructor( 121 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 122) -> exp.Expression: 123 array_exp = exp_class(expressions=args) 124 125 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 126 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 127 128 return array_exp 129 130 131def build_convert_timezone( 132 args: t.List, default_source_tz: t.Optional[str] = None 133) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 134 if len(args) == 2: 135 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 136 return exp.ConvertTimezone( 137 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 138 ) 139 140 return exp.ConvertTimezone.from_arg_list(args) 141 142 143def build_trim(args: t.List, is_left: bool = True): 144 return exp.Trim( 145 this=seq_get(args, 0), 146 expression=seq_get(args, 1), 147 position="LEADING" if is_left else "TRAILING", 148 ) 149 150 151class _Parser(type): 152 def __new__(cls, clsname, bases, attrs): 153 klass = super().__new__(cls, clsname, bases, attrs) 154 155 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 156 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 157 158 return klass 159 160 161class Parser(metaclass=_Parser): 162 """ 163 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 164 165 Args: 166 error_level: The desired error level. 167 Default: ErrorLevel.IMMEDIATE 168 error_message_context: The amount of context to capture from a query string when displaying 169 the error message (in number of characters). 170 Default: 100 171 max_errors: Maximum number of error messages to include in a raised ParseError. 172 This is only relevant if error_level is ErrorLevel.RAISE. 173 Default: 3 174 """ 175 176 FUNCTIONS: t.Dict[str, t.Callable] = { 177 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 178 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 179 "CONCAT": lambda args, dialect: exp.Concat( 180 expressions=args, 181 safe=not dialect.STRICT_STRING_CONCAT, 182 coalesce=dialect.CONCAT_COALESCE, 183 ), 184 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 185 expressions=args, 186 safe=not dialect.STRICT_STRING_CONCAT, 187 coalesce=dialect.CONCAT_COALESCE, 188 ), 189 "CONVERT_TIMEZONE": build_convert_timezone, 190 "DATE_TO_DATE_STR": lambda args: exp.Cast( 191 this=seq_get(args, 0), 192 to=exp.DataType(this=exp.DataType.Type.TEXT), 193 ), 194 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 195 start=seq_get(args, 0), 196 end=seq_get(args, 1), 197 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.number(1), unit=exp.var("DAY")), 198 ), 199 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 200 "HEX": build_hex, 201 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 202 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 203 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 204 "LIKE": build_like, 205 "LOG": build_logarithm, 206 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 207 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 208 "LOWER": build_lower, 209 "LPAD": lambda args: build_pad(args), 210 "LEFTPAD": lambda args: build_pad(args), 211 "LTRIM": lambda args: build_trim(args), 212 "MOD": build_mod, 213 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 214 "RPAD": lambda args: build_pad(args, is_left=False), 215 "RTRIM": lambda args: build_trim(args, is_left=False), 216 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 217 if len(args) != 2 218 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 219 "TIME_TO_TIME_STR": lambda args: exp.Cast( 220 this=seq_get(args, 0), 221 to=exp.DataType(this=exp.DataType.Type.TEXT), 222 ), 223 "TO_HEX": build_hex, 224 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 225 this=exp.Cast( 226 this=seq_get(args, 0), 227 to=exp.DataType(this=exp.DataType.Type.TEXT), 228 ), 229 start=exp.Literal.number(1), 230 length=exp.Literal.number(10), 231 ), 232 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 233 "UPPER": build_upper, 234 "VAR_MAP": build_var_map, 235 "COALESCE": lambda args: exp.Coalesce(this=seq_get(args, 0), expressions=args[1:]), 236 } 237 238 NO_PAREN_FUNCTIONS = { 239 TokenType.CURRENT_DATE: exp.CurrentDate, 240 TokenType.CURRENT_DATETIME: exp.CurrentDate, 241 TokenType.CURRENT_TIME: exp.CurrentTime, 242 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 243 TokenType.CURRENT_USER: exp.CurrentUser, 244 } 245 246 STRUCT_TYPE_TOKENS = { 247 TokenType.NESTED, 248 TokenType.OBJECT, 249 TokenType.STRUCT, 250 } 251 252 NESTED_TYPE_TOKENS = { 253 TokenType.ARRAY, 254 TokenType.LIST, 255 TokenType.LOWCARDINALITY, 256 TokenType.MAP, 257 TokenType.NULLABLE, 258 *STRUCT_TYPE_TOKENS, 259 } 260 261 ENUM_TYPE_TOKENS = { 262 TokenType.ENUM, 263 TokenType.ENUM8, 264 TokenType.ENUM16, 265 } 266 267 AGGREGATE_TYPE_TOKENS = { 268 TokenType.AGGREGATEFUNCTION, 269 TokenType.SIMPLEAGGREGATEFUNCTION, 270 } 271 272 TYPE_TOKENS = { 273 TokenType.BIT, 274 TokenType.BOOLEAN, 275 TokenType.TINYINT, 276 TokenType.UTINYINT, 277 TokenType.SMALLINT, 278 TokenType.USMALLINT, 279 TokenType.INT, 280 TokenType.UINT, 281 TokenType.BIGINT, 282 TokenType.UBIGINT, 283 TokenType.INT128, 284 TokenType.UINT128, 285 TokenType.INT256, 286 TokenType.UINT256, 287 TokenType.MEDIUMINT, 288 TokenType.UMEDIUMINT, 289 TokenType.FIXEDSTRING, 290 TokenType.FLOAT, 291 TokenType.DOUBLE, 292 TokenType.CHAR, 293 TokenType.NCHAR, 294 TokenType.VARCHAR, 295 TokenType.NVARCHAR, 296 TokenType.BPCHAR, 297 TokenType.TEXT, 298 TokenType.MEDIUMTEXT, 299 TokenType.LONGTEXT, 300 TokenType.MEDIUMBLOB, 301 TokenType.LONGBLOB, 302 TokenType.BINARY, 303 TokenType.VARBINARY, 304 TokenType.JSON, 305 TokenType.JSONB, 306 TokenType.INTERVAL, 307 TokenType.TINYBLOB, 308 TokenType.TINYTEXT, 309 TokenType.TIME, 310 TokenType.TIMETZ, 311 TokenType.TIMESTAMP, 312 TokenType.TIMESTAMP_S, 313 TokenType.TIMESTAMP_MS, 314 TokenType.TIMESTAMP_NS, 315 TokenType.TIMESTAMPTZ, 316 TokenType.TIMESTAMPLTZ, 317 TokenType.TIMESTAMPNTZ, 318 TokenType.DATETIME, 319 TokenType.DATETIME64, 320 TokenType.DATE, 321 TokenType.DATE32, 322 TokenType.INT4RANGE, 323 TokenType.INT4MULTIRANGE, 324 TokenType.INT8RANGE, 325 TokenType.INT8MULTIRANGE, 326 TokenType.NUMRANGE, 327 TokenType.NUMMULTIRANGE, 328 TokenType.TSRANGE, 329 TokenType.TSMULTIRANGE, 330 TokenType.TSTZRANGE, 331 TokenType.TSTZMULTIRANGE, 332 TokenType.DATERANGE, 333 TokenType.DATEMULTIRANGE, 334 TokenType.DECIMAL, 335 TokenType.UDECIMAL, 336 TokenType.BIGDECIMAL, 337 TokenType.UUID, 338 TokenType.GEOGRAPHY, 339 TokenType.GEOMETRY, 340 TokenType.HLLSKETCH, 341 TokenType.HSTORE, 342 TokenType.PSEUDO_TYPE, 343 TokenType.SUPER, 344 TokenType.SERIAL, 345 TokenType.SMALLSERIAL, 346 TokenType.BIGSERIAL, 347 TokenType.XML, 348 TokenType.YEAR, 349 TokenType.UNIQUEIDENTIFIER, 350 TokenType.USERDEFINED, 351 TokenType.MONEY, 352 TokenType.SMALLMONEY, 353 TokenType.ROWVERSION, 354 TokenType.IMAGE, 355 TokenType.VARIANT, 356 TokenType.VECTOR, 357 TokenType.OBJECT, 358 TokenType.OBJECT_IDENTIFIER, 359 TokenType.INET, 360 TokenType.IPADDRESS, 361 TokenType.IPPREFIX, 362 TokenType.IPV4, 363 TokenType.IPV6, 364 TokenType.UNKNOWN, 365 TokenType.NULL, 366 TokenType.NAME, 367 TokenType.TDIGEST, 368 *ENUM_TYPE_TOKENS, 369 *NESTED_TYPE_TOKENS, 370 *AGGREGATE_TYPE_TOKENS, 371 } 372 373 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 374 TokenType.BIGINT: TokenType.UBIGINT, 375 TokenType.INT: TokenType.UINT, 376 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 377 TokenType.SMALLINT: TokenType.USMALLINT, 378 TokenType.TINYINT: TokenType.UTINYINT, 379 TokenType.DECIMAL: TokenType.UDECIMAL, 380 } 381 382 SUBQUERY_PREDICATES = { 383 TokenType.ANY: exp.Any, 384 TokenType.ALL: exp.All, 385 TokenType.EXISTS: exp.Exists, 386 TokenType.SOME: exp.Any, 387 } 388 389 RESERVED_TOKENS = { 390 *Tokenizer.SINGLE_TOKENS.values(), 391 TokenType.SELECT, 392 } - {TokenType.IDENTIFIER} 393 394 DB_CREATABLES = { 395 TokenType.DATABASE, 396 TokenType.DICTIONARY, 397 TokenType.MODEL, 398 TokenType.SCHEMA, 399 TokenType.SEQUENCE, 400 TokenType.STORAGE_INTEGRATION, 401 TokenType.TABLE, 402 TokenType.TAG, 403 TokenType.VIEW, 404 TokenType.WAREHOUSE, 405 TokenType.STREAMLIT, 406 } 407 408 CREATABLES = { 409 TokenType.COLUMN, 410 TokenType.CONSTRAINT, 411 TokenType.FOREIGN_KEY, 412 TokenType.FUNCTION, 413 TokenType.INDEX, 414 TokenType.PROCEDURE, 415 *DB_CREATABLES, 416 } 417 418 ALTERABLES = { 419 TokenType.TABLE, 420 TokenType.VIEW, 421 } 422 423 # Tokens that can represent identifiers 424 ID_VAR_TOKENS = { 425 TokenType.ALL, 426 TokenType.VAR, 427 TokenType.ANTI, 428 TokenType.APPLY, 429 TokenType.ASC, 430 TokenType.ASOF, 431 TokenType.AUTO_INCREMENT, 432 TokenType.BEGIN, 433 TokenType.BPCHAR, 434 TokenType.CACHE, 435 TokenType.CASE, 436 TokenType.COLLATE, 437 TokenType.COMMAND, 438 TokenType.COMMENT, 439 TokenType.COMMIT, 440 TokenType.CONSTRAINT, 441 TokenType.COPY, 442 TokenType.CUBE, 443 TokenType.DEFAULT, 444 TokenType.DELETE, 445 TokenType.DESC, 446 TokenType.DESCRIBE, 447 TokenType.DICTIONARY, 448 TokenType.DIV, 449 TokenType.END, 450 TokenType.EXECUTE, 451 TokenType.ESCAPE, 452 TokenType.FALSE, 453 TokenType.FIRST, 454 TokenType.FILTER, 455 TokenType.FINAL, 456 TokenType.FORMAT, 457 TokenType.FULL, 458 TokenType.IDENTIFIER, 459 TokenType.IS, 460 TokenType.ISNULL, 461 TokenType.INTERVAL, 462 TokenType.KEEP, 463 TokenType.KILL, 464 TokenType.LEFT, 465 TokenType.LOAD, 466 TokenType.MERGE, 467 TokenType.NATURAL, 468 TokenType.NEXT, 469 TokenType.OFFSET, 470 TokenType.OPERATOR, 471 TokenType.ORDINALITY, 472 TokenType.OVERLAPS, 473 TokenType.OVERWRITE, 474 TokenType.PARTITION, 475 TokenType.PERCENT, 476 TokenType.PIVOT, 477 TokenType.PRAGMA, 478 TokenType.RANGE, 479 TokenType.RECURSIVE, 480 TokenType.REFERENCES, 481 TokenType.REFRESH, 482 TokenType.RENAME, 483 TokenType.REPLACE, 484 TokenType.RIGHT, 485 TokenType.ROLLUP, 486 TokenType.ROW, 487 TokenType.ROWS, 488 TokenType.SEMI, 489 TokenType.SET, 490 TokenType.SETTINGS, 491 TokenType.SHOW, 492 TokenType.TEMPORARY, 493 TokenType.TOP, 494 TokenType.TRUE, 495 TokenType.TRUNCATE, 496 TokenType.UNIQUE, 497 TokenType.UNNEST, 498 TokenType.UNPIVOT, 499 TokenType.UPDATE, 500 TokenType.USE, 501 TokenType.VOLATILE, 502 TokenType.WINDOW, 503 *CREATABLES, 504 *SUBQUERY_PREDICATES, 505 *TYPE_TOKENS, 506 *NO_PAREN_FUNCTIONS, 507 } 508 509 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 510 511 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 512 TokenType.ANTI, 513 TokenType.APPLY, 514 TokenType.ASOF, 515 TokenType.FULL, 516 TokenType.LEFT, 517 TokenType.LOCK, 518 TokenType.NATURAL, 519 TokenType.OFFSET, 520 TokenType.RIGHT, 521 TokenType.SEMI, 522 TokenType.WINDOW, 523 } 524 525 ALIAS_TOKENS = ID_VAR_TOKENS 526 527 ARRAY_CONSTRUCTORS = { 528 "ARRAY": exp.Array, 529 "LIST": exp.List, 530 } 531 532 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 533 534 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 535 536 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 537 538 FUNC_TOKENS = { 539 TokenType.COLLATE, 540 TokenType.COMMAND, 541 TokenType.CURRENT_DATE, 542 TokenType.CURRENT_DATETIME, 543 TokenType.CURRENT_TIMESTAMP, 544 TokenType.CURRENT_TIME, 545 TokenType.CURRENT_USER, 546 TokenType.FILTER, 547 TokenType.FIRST, 548 TokenType.FORMAT, 549 TokenType.GLOB, 550 TokenType.IDENTIFIER, 551 TokenType.INDEX, 552 TokenType.ISNULL, 553 TokenType.ILIKE, 554 TokenType.INSERT, 555 TokenType.LIKE, 556 TokenType.MERGE, 557 TokenType.OFFSET, 558 TokenType.PRIMARY_KEY, 559 TokenType.RANGE, 560 TokenType.REPLACE, 561 TokenType.RLIKE, 562 TokenType.ROW, 563 TokenType.UNNEST, 564 TokenType.VAR, 565 TokenType.LEFT, 566 TokenType.RIGHT, 567 TokenType.SEQUENCE, 568 TokenType.DATE, 569 TokenType.DATETIME, 570 TokenType.TABLE, 571 TokenType.TIMESTAMP, 572 TokenType.TIMESTAMPTZ, 573 TokenType.TRUNCATE, 574 TokenType.WINDOW, 575 TokenType.XOR, 576 *TYPE_TOKENS, 577 *SUBQUERY_PREDICATES, 578 } 579 580 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 581 TokenType.AND: exp.And, 582 } 583 584 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 585 TokenType.COLON_EQ: exp.PropertyEQ, 586 } 587 588 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 589 TokenType.OR: exp.Or, 590 } 591 592 EQUALITY = { 593 TokenType.EQ: exp.EQ, 594 TokenType.NEQ: exp.NEQ, 595 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 596 } 597 598 COMPARISON = { 599 TokenType.GT: exp.GT, 600 TokenType.GTE: exp.GTE, 601 TokenType.LT: exp.LT, 602 TokenType.LTE: exp.LTE, 603 } 604 605 BITWISE = { 606 TokenType.AMP: exp.BitwiseAnd, 607 TokenType.CARET: exp.BitwiseXor, 608 TokenType.PIPE: exp.BitwiseOr, 609 } 610 611 TERM = { 612 TokenType.DASH: exp.Sub, 613 TokenType.PLUS: exp.Add, 614 TokenType.MOD: exp.Mod, 615 TokenType.COLLATE: exp.Collate, 616 } 617 618 FACTOR = { 619 TokenType.DIV: exp.IntDiv, 620 TokenType.LR_ARROW: exp.Distance, 621 TokenType.SLASH: exp.Div, 622 TokenType.STAR: exp.Mul, 623 } 624 625 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 626 627 TIMES = { 628 TokenType.TIME, 629 TokenType.TIMETZ, 630 } 631 632 TIMESTAMPS = { 633 TokenType.TIMESTAMP, 634 TokenType.TIMESTAMPTZ, 635 TokenType.TIMESTAMPLTZ, 636 *TIMES, 637 } 638 639 SET_OPERATIONS = { 640 TokenType.UNION, 641 TokenType.INTERSECT, 642 TokenType.EXCEPT, 643 } 644 645 JOIN_METHODS = { 646 TokenType.ASOF, 647 TokenType.NATURAL, 648 TokenType.POSITIONAL, 649 } 650 651 JOIN_SIDES = { 652 TokenType.LEFT, 653 TokenType.RIGHT, 654 TokenType.FULL, 655 } 656 657 JOIN_KINDS = { 658 TokenType.ANTI, 659 TokenType.CROSS, 660 TokenType.INNER, 661 TokenType.OUTER, 662 TokenType.SEMI, 663 TokenType.STRAIGHT_JOIN, 664 } 665 666 JOIN_HINTS: t.Set[str] = set() 667 668 LAMBDAS = { 669 TokenType.ARROW: lambda self, expressions: self.expression( 670 exp.Lambda, 671 this=self._replace_lambda( 672 self._parse_assignment(), 673 expressions, 674 ), 675 expressions=expressions, 676 ), 677 TokenType.FARROW: lambda self, expressions: self.expression( 678 exp.Kwarg, 679 this=exp.var(expressions[0].name), 680 expression=self._parse_assignment(), 681 ), 682 } 683 684 COLUMN_OPERATORS = { 685 TokenType.DOT: None, 686 TokenType.DCOLON: lambda self, this, to: self.expression( 687 exp.Cast if self.STRICT_CAST else exp.TryCast, 688 this=this, 689 to=to, 690 ), 691 TokenType.ARROW: lambda self, this, path: self.expression( 692 exp.JSONExtract, 693 this=this, 694 expression=self.dialect.to_json_path(path), 695 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 696 ), 697 TokenType.DARROW: lambda self, this, path: self.expression( 698 exp.JSONExtractScalar, 699 this=this, 700 expression=self.dialect.to_json_path(path), 701 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 702 ), 703 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 704 exp.JSONBExtract, 705 this=this, 706 expression=path, 707 ), 708 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 709 exp.JSONBExtractScalar, 710 this=this, 711 expression=path, 712 ), 713 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 714 exp.JSONBContains, 715 this=this, 716 expression=key, 717 ), 718 } 719 720 EXPRESSION_PARSERS = { 721 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 722 exp.Column: lambda self: self._parse_column(), 723 exp.Condition: lambda self: self._parse_assignment(), 724 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 725 exp.Expression: lambda self: self._parse_expression(), 726 exp.From: lambda self: self._parse_from(joins=True), 727 exp.Group: lambda self: self._parse_group(), 728 exp.Having: lambda self: self._parse_having(), 729 exp.Identifier: lambda self: self._parse_id_var(), 730 exp.Join: lambda self: self._parse_join(), 731 exp.Lambda: lambda self: self._parse_lambda(), 732 exp.Lateral: lambda self: self._parse_lateral(), 733 exp.Limit: lambda self: self._parse_limit(), 734 exp.Offset: lambda self: self._parse_offset(), 735 exp.Order: lambda self: self._parse_order(), 736 exp.Ordered: lambda self: self._parse_ordered(), 737 exp.Properties: lambda self: self._parse_properties(), 738 exp.Qualify: lambda self: self._parse_qualify(), 739 exp.Returning: lambda self: self._parse_returning(), 740 exp.Select: lambda self: self._parse_select(), 741 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 742 exp.Table: lambda self: self._parse_table_parts(), 743 exp.TableAlias: lambda self: self._parse_table_alias(), 744 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 745 exp.Where: lambda self: self._parse_where(), 746 exp.Window: lambda self: self._parse_named_window(), 747 exp.With: lambda self: self._parse_with(), 748 "JOIN_TYPE": lambda self: self._parse_join_parts(), 749 } 750 751 STATEMENT_PARSERS = { 752 TokenType.ALTER: lambda self: self._parse_alter(), 753 TokenType.BEGIN: lambda self: self._parse_transaction(), 754 TokenType.CACHE: lambda self: self._parse_cache(), 755 TokenType.COMMENT: lambda self: self._parse_comment(), 756 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 757 TokenType.COPY: lambda self: self._parse_copy(), 758 TokenType.CREATE: lambda self: self._parse_create(), 759 TokenType.DELETE: lambda self: self._parse_delete(), 760 TokenType.DESC: lambda self: self._parse_describe(), 761 TokenType.DESCRIBE: lambda self: self._parse_describe(), 762 TokenType.DROP: lambda self: self._parse_drop(), 763 TokenType.INSERT: lambda self: self._parse_insert(), 764 TokenType.KILL: lambda self: self._parse_kill(), 765 TokenType.LOAD: lambda self: self._parse_load(), 766 TokenType.MERGE: lambda self: self._parse_merge(), 767 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 768 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 769 TokenType.REFRESH: lambda self: self._parse_refresh(), 770 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 771 TokenType.SET: lambda self: self._parse_set(), 772 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 773 TokenType.UNCACHE: lambda self: self._parse_uncache(), 774 TokenType.UPDATE: lambda self: self._parse_update(), 775 TokenType.USE: lambda self: self.expression( 776 exp.Use, 777 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 778 this=self._parse_table(schema=False), 779 ), 780 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 781 } 782 783 UNARY_PARSERS = { 784 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 785 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 786 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 787 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 788 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 789 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 790 } 791 792 STRING_PARSERS = { 793 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 794 exp.RawString, this=token.text 795 ), 796 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 797 exp.National, this=token.text 798 ), 799 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 800 TokenType.STRING: lambda self, token: self.expression( 801 exp.Literal, this=token.text, is_string=True 802 ), 803 TokenType.UNICODE_STRING: lambda self, token: self.expression( 804 exp.UnicodeString, 805 this=token.text, 806 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 807 ), 808 } 809 810 NUMERIC_PARSERS = { 811 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 812 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 813 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 814 TokenType.NUMBER: lambda self, token: self.expression( 815 exp.Literal, this=token.text, is_string=False 816 ), 817 } 818 819 PRIMARY_PARSERS = { 820 **STRING_PARSERS, 821 **NUMERIC_PARSERS, 822 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 823 TokenType.NULL: lambda self, _: self.expression(exp.Null), 824 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 825 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 826 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 827 TokenType.STAR: lambda self, _: self.expression( 828 exp.Star, 829 **{ 830 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 831 "replace": self._parse_star_op("REPLACE"), 832 "rename": self._parse_star_op("RENAME"), 833 }, 834 ), 835 } 836 837 PLACEHOLDER_PARSERS = { 838 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 839 TokenType.PARAMETER: lambda self: self._parse_parameter(), 840 TokenType.COLON: lambda self: ( 841 self.expression(exp.Placeholder, this=self._prev.text) 842 if self._match_set(self.ID_VAR_TOKENS) 843 else None 844 ), 845 } 846 847 RANGE_PARSERS = { 848 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 849 TokenType.GLOB: binary_range_parser(exp.Glob), 850 TokenType.ILIKE: binary_range_parser(exp.ILike), 851 TokenType.IN: lambda self, this: self._parse_in(this), 852 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 853 TokenType.IS: lambda self, this: self._parse_is(this), 854 TokenType.LIKE: binary_range_parser(exp.Like), 855 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 856 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 857 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 858 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 859 } 860 861 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 862 "ALLOWED_VALUES": lambda self: self.expression( 863 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 864 ), 865 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 866 "AUTO": lambda self: self._parse_auto_property(), 867 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 868 "BACKUP": lambda self: self.expression( 869 exp.BackupProperty, this=self._parse_var(any_token=True) 870 ), 871 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 872 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 873 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 874 "CHECKSUM": lambda self: self._parse_checksum(), 875 "CLUSTER BY": lambda self: self._parse_cluster(), 876 "CLUSTERED": lambda self: self._parse_clustered_by(), 877 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 878 exp.CollateProperty, **kwargs 879 ), 880 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 881 "CONTAINS": lambda self: self._parse_contains_property(), 882 "COPY": lambda self: self._parse_copy_property(), 883 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 884 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 885 "DEFINER": lambda self: self._parse_definer(), 886 "DETERMINISTIC": lambda self: self.expression( 887 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 888 ), 889 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 890 "DISTKEY": lambda self: self._parse_distkey(), 891 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 892 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 893 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 894 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 895 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 896 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 897 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 898 "FREESPACE": lambda self: self._parse_freespace(), 899 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 900 "HEAP": lambda self: self.expression(exp.HeapProperty), 901 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 902 "IMMUTABLE": lambda self: self.expression( 903 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 904 ), 905 "INHERITS": lambda self: self.expression( 906 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 907 ), 908 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 909 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 910 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 911 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 912 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 913 "LIKE": lambda self: self._parse_create_like(), 914 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 915 "LOCK": lambda self: self._parse_locking(), 916 "LOCKING": lambda self: self._parse_locking(), 917 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 918 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 919 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 920 "MODIFIES": lambda self: self._parse_modifies_property(), 921 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 922 "NO": lambda self: self._parse_no_property(), 923 "ON": lambda self: self._parse_on_property(), 924 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 925 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 926 "PARTITION": lambda self: self._parse_partitioned_of(), 927 "PARTITION BY": lambda self: self._parse_partitioned_by(), 928 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 929 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 930 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 931 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 932 "READS": lambda self: self._parse_reads_property(), 933 "REMOTE": lambda self: self._parse_remote_with_connection(), 934 "RETURNS": lambda self: self._parse_returns(), 935 "STRICT": lambda self: self.expression(exp.StrictProperty), 936 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 937 "ROW": lambda self: self._parse_row(), 938 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 939 "SAMPLE": lambda self: self.expression( 940 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 941 ), 942 "SECURE": lambda self: self.expression(exp.SecureProperty), 943 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 944 "SETTINGS": lambda self: self._parse_settings_property(), 945 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 946 "SORTKEY": lambda self: self._parse_sortkey(), 947 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 948 "STABLE": lambda self: self.expression( 949 exp.StabilityProperty, this=exp.Literal.string("STABLE") 950 ), 951 "STORED": lambda self: self._parse_stored(), 952 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 953 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 954 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 955 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 956 "TO": lambda self: self._parse_to_table(), 957 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 958 "TRANSFORM": lambda self: self.expression( 959 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 960 ), 961 "TTL": lambda self: self._parse_ttl(), 962 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 963 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 964 "VOLATILE": lambda self: self._parse_volatile_property(), 965 "WITH": lambda self: self._parse_with_property(), 966 } 967 968 CONSTRAINT_PARSERS = { 969 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 970 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 971 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 972 "CHARACTER SET": lambda self: self.expression( 973 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 974 ), 975 "CHECK": lambda self: self.expression( 976 exp.CheckColumnConstraint, 977 this=self._parse_wrapped(self._parse_assignment), 978 enforced=self._match_text_seq("ENFORCED"), 979 ), 980 "COLLATE": lambda self: self.expression( 981 exp.CollateColumnConstraint, 982 this=self._parse_identifier() or self._parse_column(), 983 ), 984 "COMMENT": lambda self: self.expression( 985 exp.CommentColumnConstraint, this=self._parse_string() 986 ), 987 "COMPRESS": lambda self: self._parse_compress(), 988 "CLUSTERED": lambda self: self.expression( 989 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 990 ), 991 "NONCLUSTERED": lambda self: self.expression( 992 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 993 ), 994 "DEFAULT": lambda self: self.expression( 995 exp.DefaultColumnConstraint, this=self._parse_bitwise() 996 ), 997 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 998 "EPHEMERAL": lambda self: self.expression( 999 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1000 ), 1001 "EXCLUDE": lambda self: self.expression( 1002 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1003 ), 1004 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1005 "FORMAT": lambda self: self.expression( 1006 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1007 ), 1008 "GENERATED": lambda self: self._parse_generated_as_identity(), 1009 "IDENTITY": lambda self: self._parse_auto_increment(), 1010 "INLINE": lambda self: self._parse_inline(), 1011 "LIKE": lambda self: self._parse_create_like(), 1012 "NOT": lambda self: self._parse_not_constraint(), 1013 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1014 "ON": lambda self: ( 1015 self._match(TokenType.UPDATE) 1016 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1017 ) 1018 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1019 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1020 "PERIOD": lambda self: self._parse_period_for_system_time(), 1021 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1022 "REFERENCES": lambda self: self._parse_references(match=False), 1023 "TITLE": lambda self: self.expression( 1024 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1025 ), 1026 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1027 "UNIQUE": lambda self: self._parse_unique(), 1028 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1029 "WITH": lambda self: self.expression( 1030 exp.Properties, expressions=self._parse_wrapped_properties() 1031 ), 1032 } 1033 1034 ALTER_PARSERS = { 1035 "ADD": lambda self: self._parse_alter_table_add(), 1036 "ALTER": lambda self: self._parse_alter_table_alter(), 1037 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1038 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1039 "DROP": lambda self: self._parse_alter_table_drop(), 1040 "RENAME": lambda self: self._parse_alter_table_rename(), 1041 "SET": lambda self: self._parse_alter_table_set(), 1042 "AS": lambda self: self._parse_select(), 1043 } 1044 1045 ALTER_ALTER_PARSERS = { 1046 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1047 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1048 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1049 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1050 } 1051 1052 SCHEMA_UNNAMED_CONSTRAINTS = { 1053 "CHECK", 1054 "EXCLUDE", 1055 "FOREIGN KEY", 1056 "LIKE", 1057 "PERIOD", 1058 "PRIMARY KEY", 1059 "UNIQUE", 1060 } 1061 1062 NO_PAREN_FUNCTION_PARSERS = { 1063 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1064 "CASE": lambda self: self._parse_case(), 1065 "CONNECT_BY_ROOT": lambda self: self.expression( 1066 exp.ConnectByRoot, this=self._parse_column() 1067 ), 1068 "IF": lambda self: self._parse_if(), 1069 "NEXT": lambda self: self._parse_next_value_for(), 1070 } 1071 1072 INVALID_FUNC_NAME_TOKENS = { 1073 TokenType.IDENTIFIER, 1074 TokenType.STRING, 1075 } 1076 1077 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1078 1079 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1080 1081 FUNCTION_PARSERS = { 1082 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1083 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1084 "DECODE": lambda self: self._parse_decode(), 1085 "EXTRACT": lambda self: self._parse_extract(), 1086 "GAP_FILL": lambda self: self._parse_gap_fill(), 1087 "JSON_OBJECT": lambda self: self._parse_json_object(), 1088 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1089 "JSON_TABLE": lambda self: self._parse_json_table(), 1090 "MATCH": lambda self: self._parse_match_against(), 1091 "OPENJSON": lambda self: self._parse_open_json(), 1092 "POSITION": lambda self: self._parse_position(), 1093 "PREDICT": lambda self: self._parse_predict(), 1094 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1095 "STRING_AGG": lambda self: self._parse_string_agg(), 1096 "SUBSTRING": lambda self: self._parse_substring(), 1097 "TRIM": lambda self: self._parse_trim(), 1098 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1099 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1100 } 1101 1102 QUERY_MODIFIER_PARSERS = { 1103 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1104 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1105 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1106 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1107 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1108 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1109 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1110 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1111 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1112 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1113 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1114 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1115 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1116 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1117 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1118 TokenType.CLUSTER_BY: lambda self: ( 1119 "cluster", 1120 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1121 ), 1122 TokenType.DISTRIBUTE_BY: lambda self: ( 1123 "distribute", 1124 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1125 ), 1126 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1127 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1128 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1129 } 1130 1131 SET_PARSERS = { 1132 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1133 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1134 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1135 "TRANSACTION": lambda self: self._parse_set_transaction(), 1136 } 1137 1138 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1139 1140 TYPE_LITERAL_PARSERS = { 1141 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1142 } 1143 1144 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1145 1146 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1147 1148 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1149 1150 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1151 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1152 "ISOLATION": ( 1153 ("LEVEL", "REPEATABLE", "READ"), 1154 ("LEVEL", "READ", "COMMITTED"), 1155 ("LEVEL", "READ", "UNCOMITTED"), 1156 ("LEVEL", "SERIALIZABLE"), 1157 ), 1158 "READ": ("WRITE", "ONLY"), 1159 } 1160 1161 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1162 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1163 ) 1164 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1165 1166 CREATE_SEQUENCE: OPTIONS_TYPE = { 1167 "SCALE": ("EXTEND", "NOEXTEND"), 1168 "SHARD": ("EXTEND", "NOEXTEND"), 1169 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1170 **dict.fromkeys( 1171 ( 1172 "SESSION", 1173 "GLOBAL", 1174 "KEEP", 1175 "NOKEEP", 1176 "ORDER", 1177 "NOORDER", 1178 "NOCACHE", 1179 "CYCLE", 1180 "NOCYCLE", 1181 "NOMINVALUE", 1182 "NOMAXVALUE", 1183 "NOSCALE", 1184 "NOSHARD", 1185 ), 1186 tuple(), 1187 ), 1188 } 1189 1190 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1191 1192 USABLES: OPTIONS_TYPE = dict.fromkeys( 1193 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1194 ) 1195 1196 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1197 1198 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1199 "TYPE": ("EVOLUTION",), 1200 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1201 } 1202 1203 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1204 "NOT": ("ENFORCED",), 1205 "MATCH": ( 1206 "FULL", 1207 "PARTIAL", 1208 "SIMPLE", 1209 ), 1210 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1211 **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()), 1212 } 1213 1214 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1215 1216 CLONE_KEYWORDS = {"CLONE", "COPY"} 1217 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1218 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1219 1220 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1221 1222 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1223 1224 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1225 1226 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1227 1228 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1229 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1230 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1231 1232 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1233 1234 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1235 1236 ADD_CONSTRAINT_TOKENS = { 1237 TokenType.CONSTRAINT, 1238 TokenType.FOREIGN_KEY, 1239 TokenType.INDEX, 1240 TokenType.KEY, 1241 TokenType.PRIMARY_KEY, 1242 TokenType.UNIQUE, 1243 } 1244 1245 DISTINCT_TOKENS = {TokenType.DISTINCT} 1246 1247 NULL_TOKENS = {TokenType.NULL} 1248 1249 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1250 1251 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1252 1253 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1254 1255 STRICT_CAST = True 1256 1257 PREFIXED_PIVOT_COLUMNS = False 1258 IDENTIFY_PIVOT_STRINGS = False 1259 1260 LOG_DEFAULTS_TO_LN = False 1261 1262 # Whether ADD is present for each column added by ALTER TABLE 1263 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1264 1265 # Whether the table sample clause expects CSV syntax 1266 TABLESAMPLE_CSV = False 1267 1268 # The default method used for table sampling 1269 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1270 1271 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1272 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1273 1274 # Whether the TRIM function expects the characters to trim as its first argument 1275 TRIM_PATTERN_FIRST = False 1276 1277 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1278 STRING_ALIASES = False 1279 1280 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1281 MODIFIERS_ATTACHED_TO_SET_OP = True 1282 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1283 1284 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1285 NO_PAREN_IF_COMMANDS = True 1286 1287 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1288 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1289 1290 # Whether the `:` operator is used to extract a value from a VARIANT column 1291 COLON_IS_VARIANT_EXTRACT = False 1292 1293 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1294 # If this is True and '(' is not found, the keyword will be treated as an identifier 1295 VALUES_FOLLOWED_BY_PAREN = True 1296 1297 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1298 SUPPORTS_IMPLICIT_UNNEST = False 1299 1300 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1301 INTERVAL_SPANS = True 1302 1303 # Whether a PARTITION clause can follow a table reference 1304 SUPPORTS_PARTITION_SELECTION = False 1305 1306 __slots__ = ( 1307 "error_level", 1308 "error_message_context", 1309 "max_errors", 1310 "dialect", 1311 "sql", 1312 "errors", 1313 "_tokens", 1314 "_index", 1315 "_curr", 1316 "_next", 1317 "_prev", 1318 "_prev_comments", 1319 ) 1320 1321 # Autofilled 1322 SHOW_TRIE: t.Dict = {} 1323 SET_TRIE: t.Dict = {} 1324 1325 def __init__( 1326 self, 1327 error_level: t.Optional[ErrorLevel] = None, 1328 error_message_context: int = 100, 1329 max_errors: int = 3, 1330 dialect: DialectType = None, 1331 ): 1332 from sqlglot.dialects import Dialect 1333 1334 self.error_level = error_level or ErrorLevel.IMMEDIATE 1335 self.error_message_context = error_message_context 1336 self.max_errors = max_errors 1337 self.dialect = Dialect.get_or_raise(dialect) 1338 self.reset() 1339 1340 def reset(self): 1341 self.sql = "" 1342 self.errors = [] 1343 self._tokens = [] 1344 self._index = 0 1345 self._curr = None 1346 self._next = None 1347 self._prev = None 1348 self._prev_comments = None 1349 1350 def parse( 1351 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1352 ) -> t.List[t.Optional[exp.Expression]]: 1353 """ 1354 Parses a list of tokens and returns a list of syntax trees, one tree 1355 per parsed SQL statement. 1356 1357 Args: 1358 raw_tokens: The list of tokens. 1359 sql: The original SQL string, used to produce helpful debug messages. 1360 1361 Returns: 1362 The list of the produced syntax trees. 1363 """ 1364 return self._parse( 1365 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1366 ) 1367 1368 def parse_into( 1369 self, 1370 expression_types: exp.IntoType, 1371 raw_tokens: t.List[Token], 1372 sql: t.Optional[str] = None, 1373 ) -> t.List[t.Optional[exp.Expression]]: 1374 """ 1375 Parses a list of tokens into a given Expression type. If a collection of Expression 1376 types is given instead, this method will try to parse the token list into each one 1377 of them, stopping at the first for which the parsing succeeds. 1378 1379 Args: 1380 expression_types: The expression type(s) to try and parse the token list into. 1381 raw_tokens: The list of tokens. 1382 sql: The original SQL string, used to produce helpful debug messages. 1383 1384 Returns: 1385 The target Expression. 1386 """ 1387 errors = [] 1388 for expression_type in ensure_list(expression_types): 1389 parser = self.EXPRESSION_PARSERS.get(expression_type) 1390 if not parser: 1391 raise TypeError(f"No parser registered for {expression_type}") 1392 1393 try: 1394 return self._parse(parser, raw_tokens, sql) 1395 except ParseError as e: 1396 e.errors[0]["into_expression"] = expression_type 1397 errors.append(e) 1398 1399 raise ParseError( 1400 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1401 errors=merge_errors(errors), 1402 ) from errors[-1] 1403 1404 def _parse( 1405 self, 1406 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1407 raw_tokens: t.List[Token], 1408 sql: t.Optional[str] = None, 1409 ) -> t.List[t.Optional[exp.Expression]]: 1410 self.reset() 1411 self.sql = sql or "" 1412 1413 total = len(raw_tokens) 1414 chunks: t.List[t.List[Token]] = [[]] 1415 1416 for i, token in enumerate(raw_tokens): 1417 if token.token_type == TokenType.SEMICOLON: 1418 if token.comments: 1419 chunks.append([token]) 1420 1421 if i < total - 1: 1422 chunks.append([]) 1423 else: 1424 chunks[-1].append(token) 1425 1426 expressions = [] 1427 1428 for tokens in chunks: 1429 self._index = -1 1430 self._tokens = tokens 1431 self._advance() 1432 1433 expressions.append(parse_method(self)) 1434 1435 if self._index < len(self._tokens): 1436 self.raise_error("Invalid expression / Unexpected token") 1437 1438 self.check_errors() 1439 1440 return expressions 1441 1442 def check_errors(self) -> None: 1443 """Logs or raises any found errors, depending on the chosen error level setting.""" 1444 if self.error_level == ErrorLevel.WARN: 1445 for error in self.errors: 1446 logger.error(str(error)) 1447 elif self.error_level == ErrorLevel.RAISE and self.errors: 1448 raise ParseError( 1449 concat_messages(self.errors, self.max_errors), 1450 errors=merge_errors(self.errors), 1451 ) 1452 1453 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1454 """ 1455 Appends an error in the list of recorded errors or raises it, depending on the chosen 1456 error level setting. 1457 """ 1458 token = token or self._curr or self._prev or Token.string("") 1459 start = token.start 1460 end = token.end + 1 1461 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1462 highlight = self.sql[start:end] 1463 end_context = self.sql[end : end + self.error_message_context] 1464 1465 error = ParseError.new( 1466 f"{message}. Line {token.line}, Col: {token.col}.\n" 1467 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1468 description=message, 1469 line=token.line, 1470 col=token.col, 1471 start_context=start_context, 1472 highlight=highlight, 1473 end_context=end_context, 1474 ) 1475 1476 if self.error_level == ErrorLevel.IMMEDIATE: 1477 raise error 1478 1479 self.errors.append(error) 1480 1481 def expression( 1482 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1483 ) -> E: 1484 """ 1485 Creates a new, validated Expression. 1486 1487 Args: 1488 exp_class: The expression class to instantiate. 1489 comments: An optional list of comments to attach to the expression. 1490 kwargs: The arguments to set for the expression along with their respective values. 1491 1492 Returns: 1493 The target expression. 1494 """ 1495 instance = exp_class(**kwargs) 1496 instance.add_comments(comments) if comments else self._add_comments(instance) 1497 return self.validate_expression(instance) 1498 1499 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1500 if expression and self._prev_comments: 1501 expression.add_comments(self._prev_comments) 1502 self._prev_comments = None 1503 1504 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1505 """ 1506 Validates an Expression, making sure that all its mandatory arguments are set. 1507 1508 Args: 1509 expression: The expression to validate. 1510 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1511 1512 Returns: 1513 The validated expression. 1514 """ 1515 if self.error_level != ErrorLevel.IGNORE: 1516 for error_message in expression.error_messages(args): 1517 self.raise_error(error_message) 1518 1519 return expression 1520 1521 def _find_sql(self, start: Token, end: Token) -> str: 1522 return self.sql[start.start : end.end + 1] 1523 1524 def _is_connected(self) -> bool: 1525 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1526 1527 def _advance(self, times: int = 1) -> None: 1528 self._index += times 1529 self._curr = seq_get(self._tokens, self._index) 1530 self._next = seq_get(self._tokens, self._index + 1) 1531 1532 if self._index > 0: 1533 self._prev = self._tokens[self._index - 1] 1534 self._prev_comments = self._prev.comments 1535 else: 1536 self._prev = None 1537 self._prev_comments = None 1538 1539 def _retreat(self, index: int) -> None: 1540 if index != self._index: 1541 self._advance(index - self._index) 1542 1543 def _warn_unsupported(self) -> None: 1544 if len(self._tokens) <= 1: 1545 return 1546 1547 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1548 # interested in emitting a warning for the one being currently processed. 1549 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1550 1551 logger.warning( 1552 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1553 ) 1554 1555 def _parse_command(self) -> exp.Command: 1556 self._warn_unsupported() 1557 return self.expression( 1558 exp.Command, 1559 comments=self._prev_comments, 1560 this=self._prev.text.upper(), 1561 expression=self._parse_string(), 1562 ) 1563 1564 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1565 """ 1566 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1567 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1568 solve this by setting & resetting the parser state accordingly 1569 """ 1570 index = self._index 1571 error_level = self.error_level 1572 1573 self.error_level = ErrorLevel.IMMEDIATE 1574 try: 1575 this = parse_method() 1576 except ParseError: 1577 this = None 1578 finally: 1579 if not this or retreat: 1580 self._retreat(index) 1581 self.error_level = error_level 1582 1583 return this 1584 1585 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1586 start = self._prev 1587 exists = self._parse_exists() if allow_exists else None 1588 1589 self._match(TokenType.ON) 1590 1591 materialized = self._match_text_seq("MATERIALIZED") 1592 kind = self._match_set(self.CREATABLES) and self._prev 1593 if not kind: 1594 return self._parse_as_command(start) 1595 1596 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1597 this = self._parse_user_defined_function(kind=kind.token_type) 1598 elif kind.token_type == TokenType.TABLE: 1599 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1600 elif kind.token_type == TokenType.COLUMN: 1601 this = self._parse_column() 1602 else: 1603 this = self._parse_id_var() 1604 1605 self._match(TokenType.IS) 1606 1607 return self.expression( 1608 exp.Comment, 1609 this=this, 1610 kind=kind.text, 1611 expression=self._parse_string(), 1612 exists=exists, 1613 materialized=materialized, 1614 ) 1615 1616 def _parse_to_table( 1617 self, 1618 ) -> exp.ToTableProperty: 1619 table = self._parse_table_parts(schema=True) 1620 return self.expression(exp.ToTableProperty, this=table) 1621 1622 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1623 def _parse_ttl(self) -> exp.Expression: 1624 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1625 this = self._parse_bitwise() 1626 1627 if self._match_text_seq("DELETE"): 1628 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1629 if self._match_text_seq("RECOMPRESS"): 1630 return self.expression( 1631 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1632 ) 1633 if self._match_text_seq("TO", "DISK"): 1634 return self.expression( 1635 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1636 ) 1637 if self._match_text_seq("TO", "VOLUME"): 1638 return self.expression( 1639 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1640 ) 1641 1642 return this 1643 1644 expressions = self._parse_csv(_parse_ttl_action) 1645 where = self._parse_where() 1646 group = self._parse_group() 1647 1648 aggregates = None 1649 if group and self._match(TokenType.SET): 1650 aggregates = self._parse_csv(self._parse_set_item) 1651 1652 return self.expression( 1653 exp.MergeTreeTTL, 1654 expressions=expressions, 1655 where=where, 1656 group=group, 1657 aggregates=aggregates, 1658 ) 1659 1660 def _parse_statement(self) -> t.Optional[exp.Expression]: 1661 if self._curr is None: 1662 return None 1663 1664 if self._match_set(self.STATEMENT_PARSERS): 1665 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1666 1667 if self._match_set(self.dialect.tokenizer.COMMANDS): 1668 return self._parse_command() 1669 1670 expression = self._parse_expression() 1671 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1672 return self._parse_query_modifiers(expression) 1673 1674 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1675 start = self._prev 1676 temporary = self._match(TokenType.TEMPORARY) 1677 materialized = self._match_text_seq("MATERIALIZED") 1678 1679 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1680 if not kind: 1681 return self._parse_as_command(start) 1682 1683 if_exists = exists or self._parse_exists() 1684 table = self._parse_table_parts( 1685 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1686 ) 1687 1688 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1689 1690 if self._match(TokenType.L_PAREN, advance=False): 1691 expressions = self._parse_wrapped_csv(self._parse_types) 1692 else: 1693 expressions = None 1694 1695 return self.expression( 1696 exp.Drop, 1697 comments=start.comments, 1698 exists=if_exists, 1699 this=table, 1700 expressions=expressions, 1701 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1702 temporary=temporary, 1703 materialized=materialized, 1704 cascade=self._match_text_seq("CASCADE"), 1705 constraints=self._match_text_seq("CONSTRAINTS"), 1706 purge=self._match_text_seq("PURGE"), 1707 cluster=cluster, 1708 ) 1709 1710 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1711 return ( 1712 self._match_text_seq("IF") 1713 and (not not_ or self._match(TokenType.NOT)) 1714 and self._match(TokenType.EXISTS) 1715 ) 1716 1717 def _parse_create(self) -> exp.Create | exp.Command: 1718 # Note: this can't be None because we've matched a statement parser 1719 start = self._prev 1720 comments = self._prev_comments 1721 1722 replace = ( 1723 start.token_type == TokenType.REPLACE 1724 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1725 or self._match_pair(TokenType.OR, TokenType.ALTER) 1726 ) 1727 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1728 1729 unique = self._match(TokenType.UNIQUE) 1730 1731 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1732 clustered = True 1733 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1734 "COLUMNSTORE" 1735 ): 1736 clustered = False 1737 else: 1738 clustered = None 1739 1740 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1741 self._advance() 1742 1743 properties = None 1744 create_token = self._match_set(self.CREATABLES) and self._prev 1745 1746 if not create_token: 1747 # exp.Properties.Location.POST_CREATE 1748 properties = self._parse_properties() 1749 create_token = self._match_set(self.CREATABLES) and self._prev 1750 1751 if not properties or not create_token: 1752 return self._parse_as_command(start) 1753 1754 concurrently = self._match_text_seq("CONCURRENTLY") 1755 exists = self._parse_exists(not_=True) 1756 this = None 1757 expression: t.Optional[exp.Expression] = None 1758 indexes = None 1759 no_schema_binding = None 1760 begin = None 1761 end = None 1762 clone = None 1763 1764 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1765 nonlocal properties 1766 if properties and temp_props: 1767 properties.expressions.extend(temp_props.expressions) 1768 elif temp_props: 1769 properties = temp_props 1770 1771 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1772 this = self._parse_user_defined_function(kind=create_token.token_type) 1773 1774 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1775 extend_props(self._parse_properties()) 1776 1777 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1778 extend_props(self._parse_properties()) 1779 1780 if not expression: 1781 if self._match(TokenType.COMMAND): 1782 expression = self._parse_as_command(self._prev) 1783 else: 1784 begin = self._match(TokenType.BEGIN) 1785 return_ = self._match_text_seq("RETURN") 1786 1787 if self._match(TokenType.STRING, advance=False): 1788 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1789 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1790 expression = self._parse_string() 1791 extend_props(self._parse_properties()) 1792 else: 1793 expression = self._parse_statement() 1794 1795 end = self._match_text_seq("END") 1796 1797 if return_: 1798 expression = self.expression(exp.Return, this=expression) 1799 elif create_token.token_type == TokenType.INDEX: 1800 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1801 if not self._match(TokenType.ON): 1802 index = self._parse_id_var() 1803 anonymous = False 1804 else: 1805 index = None 1806 anonymous = True 1807 1808 this = self._parse_index(index=index, anonymous=anonymous) 1809 elif create_token.token_type in self.DB_CREATABLES: 1810 table_parts = self._parse_table_parts( 1811 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1812 ) 1813 1814 # exp.Properties.Location.POST_NAME 1815 self._match(TokenType.COMMA) 1816 extend_props(self._parse_properties(before=True)) 1817 1818 this = self._parse_schema(this=table_parts) 1819 1820 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1821 extend_props(self._parse_properties()) 1822 1823 self._match(TokenType.ALIAS) 1824 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1825 # exp.Properties.Location.POST_ALIAS 1826 extend_props(self._parse_properties()) 1827 1828 if create_token.token_type == TokenType.SEQUENCE: 1829 expression = self._parse_types() 1830 extend_props(self._parse_properties()) 1831 else: 1832 expression = self._parse_ddl_select() 1833 1834 if create_token.token_type == TokenType.TABLE: 1835 # exp.Properties.Location.POST_EXPRESSION 1836 extend_props(self._parse_properties()) 1837 1838 indexes = [] 1839 while True: 1840 index = self._parse_index() 1841 1842 # exp.Properties.Location.POST_INDEX 1843 extend_props(self._parse_properties()) 1844 if not index: 1845 break 1846 else: 1847 self._match(TokenType.COMMA) 1848 indexes.append(index) 1849 elif create_token.token_type == TokenType.VIEW: 1850 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1851 no_schema_binding = True 1852 1853 shallow = self._match_text_seq("SHALLOW") 1854 1855 if self._match_texts(self.CLONE_KEYWORDS): 1856 copy = self._prev.text.lower() == "copy" 1857 clone = self.expression( 1858 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1859 ) 1860 1861 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1862 return self._parse_as_command(start) 1863 1864 create_kind_text = create_token.text.upper() 1865 return self.expression( 1866 exp.Create, 1867 comments=comments, 1868 this=this, 1869 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 1870 replace=replace, 1871 refresh=refresh, 1872 unique=unique, 1873 expression=expression, 1874 exists=exists, 1875 properties=properties, 1876 indexes=indexes, 1877 no_schema_binding=no_schema_binding, 1878 begin=begin, 1879 end=end, 1880 clone=clone, 1881 concurrently=concurrently, 1882 clustered=clustered, 1883 ) 1884 1885 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1886 seq = exp.SequenceProperties() 1887 1888 options = [] 1889 index = self._index 1890 1891 while self._curr: 1892 self._match(TokenType.COMMA) 1893 if self._match_text_seq("INCREMENT"): 1894 self._match_text_seq("BY") 1895 self._match_text_seq("=") 1896 seq.set("increment", self._parse_term()) 1897 elif self._match_text_seq("MINVALUE"): 1898 seq.set("minvalue", self._parse_term()) 1899 elif self._match_text_seq("MAXVALUE"): 1900 seq.set("maxvalue", self._parse_term()) 1901 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1902 self._match_text_seq("=") 1903 seq.set("start", self._parse_term()) 1904 elif self._match_text_seq("CACHE"): 1905 # T-SQL allows empty CACHE which is initialized dynamically 1906 seq.set("cache", self._parse_number() or True) 1907 elif self._match_text_seq("OWNED", "BY"): 1908 # "OWNED BY NONE" is the default 1909 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1910 else: 1911 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1912 if opt: 1913 options.append(opt) 1914 else: 1915 break 1916 1917 seq.set("options", options if options else None) 1918 return None if self._index == index else seq 1919 1920 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1921 # only used for teradata currently 1922 self._match(TokenType.COMMA) 1923 1924 kwargs = { 1925 "no": self._match_text_seq("NO"), 1926 "dual": self._match_text_seq("DUAL"), 1927 "before": self._match_text_seq("BEFORE"), 1928 "default": self._match_text_seq("DEFAULT"), 1929 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1930 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1931 "after": self._match_text_seq("AFTER"), 1932 "minimum": self._match_texts(("MIN", "MINIMUM")), 1933 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1934 } 1935 1936 if self._match_texts(self.PROPERTY_PARSERS): 1937 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1938 try: 1939 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1940 except TypeError: 1941 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1942 1943 return None 1944 1945 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1946 return self._parse_wrapped_csv(self._parse_property) 1947 1948 def _parse_property(self) -> t.Optional[exp.Expression]: 1949 if self._match_texts(self.PROPERTY_PARSERS): 1950 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1951 1952 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1953 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1954 1955 if self._match_text_seq("COMPOUND", "SORTKEY"): 1956 return self._parse_sortkey(compound=True) 1957 1958 if self._match_text_seq("SQL", "SECURITY"): 1959 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1960 1961 index = self._index 1962 key = self._parse_column() 1963 1964 if not self._match(TokenType.EQ): 1965 self._retreat(index) 1966 return self._parse_sequence_properties() 1967 1968 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 1969 if isinstance(key, exp.Column): 1970 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 1971 1972 value = self._parse_bitwise() or self._parse_var(any_token=True) 1973 1974 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 1975 if isinstance(value, exp.Column): 1976 value = exp.var(value.name) 1977 1978 return self.expression(exp.Property, this=key, value=value) 1979 1980 def _parse_stored(self) -> exp.FileFormatProperty: 1981 self._match(TokenType.ALIAS) 1982 1983 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1984 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1985 1986 return self.expression( 1987 exp.FileFormatProperty, 1988 this=( 1989 self.expression( 1990 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1991 ) 1992 if input_format or output_format 1993 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1994 ), 1995 ) 1996 1997 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 1998 field = self._parse_field() 1999 if isinstance(field, exp.Identifier) and not field.quoted: 2000 field = exp.var(field) 2001 2002 return field 2003 2004 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2005 self._match(TokenType.EQ) 2006 self._match(TokenType.ALIAS) 2007 2008 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2009 2010 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2011 properties = [] 2012 while True: 2013 if before: 2014 prop = self._parse_property_before() 2015 else: 2016 prop = self._parse_property() 2017 if not prop: 2018 break 2019 for p in ensure_list(prop): 2020 properties.append(p) 2021 2022 if properties: 2023 return self.expression(exp.Properties, expressions=properties) 2024 2025 return None 2026 2027 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2028 return self.expression( 2029 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2030 ) 2031 2032 def _parse_settings_property(self) -> exp.SettingsProperty: 2033 return self.expression( 2034 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2035 ) 2036 2037 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2038 if self._index >= 2: 2039 pre_volatile_token = self._tokens[self._index - 2] 2040 else: 2041 pre_volatile_token = None 2042 2043 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2044 return exp.VolatileProperty() 2045 2046 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2047 2048 def _parse_retention_period(self) -> exp.Var: 2049 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2050 number = self._parse_number() 2051 number_str = f"{number} " if number else "" 2052 unit = self._parse_var(any_token=True) 2053 return exp.var(f"{number_str}{unit}") 2054 2055 def _parse_system_versioning_property( 2056 self, with_: bool = False 2057 ) -> exp.WithSystemVersioningProperty: 2058 self._match(TokenType.EQ) 2059 prop = self.expression( 2060 exp.WithSystemVersioningProperty, 2061 **{ # type: ignore 2062 "on": True, 2063 "with": with_, 2064 }, 2065 ) 2066 2067 if self._match_text_seq("OFF"): 2068 prop.set("on", False) 2069 return prop 2070 2071 self._match(TokenType.ON) 2072 if self._match(TokenType.L_PAREN): 2073 while self._curr and not self._match(TokenType.R_PAREN): 2074 if self._match_text_seq("HISTORY_TABLE", "="): 2075 prop.set("this", self._parse_table_parts()) 2076 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2077 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2078 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2079 prop.set("retention_period", self._parse_retention_period()) 2080 2081 self._match(TokenType.COMMA) 2082 2083 return prop 2084 2085 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2086 self._match(TokenType.EQ) 2087 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2088 prop = self.expression(exp.DataDeletionProperty, on=on) 2089 2090 if self._match(TokenType.L_PAREN): 2091 while self._curr and not self._match(TokenType.R_PAREN): 2092 if self._match_text_seq("FILTER_COLUMN", "="): 2093 prop.set("filter_column", self._parse_column()) 2094 elif self._match_text_seq("RETENTION_PERIOD", "="): 2095 prop.set("retention_period", self._parse_retention_period()) 2096 2097 self._match(TokenType.COMMA) 2098 2099 return prop 2100 2101 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2102 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2103 prop = self._parse_system_versioning_property(with_=True) 2104 self._match_r_paren() 2105 return prop 2106 2107 if self._match(TokenType.L_PAREN, advance=False): 2108 return self._parse_wrapped_properties() 2109 2110 if self._match_text_seq("JOURNAL"): 2111 return self._parse_withjournaltable() 2112 2113 if self._match_texts(self.VIEW_ATTRIBUTES): 2114 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2115 2116 if self._match_text_seq("DATA"): 2117 return self._parse_withdata(no=False) 2118 elif self._match_text_seq("NO", "DATA"): 2119 return self._parse_withdata(no=True) 2120 2121 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2122 return self._parse_serde_properties(with_=True) 2123 2124 if self._match(TokenType.SCHEMA): 2125 return self.expression( 2126 exp.WithSchemaBindingProperty, 2127 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2128 ) 2129 2130 if not self._next: 2131 return None 2132 2133 return self._parse_withisolatedloading() 2134 2135 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2136 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2137 self._match(TokenType.EQ) 2138 2139 user = self._parse_id_var() 2140 self._match(TokenType.PARAMETER) 2141 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2142 2143 if not user or not host: 2144 return None 2145 2146 return exp.DefinerProperty(this=f"{user}@{host}") 2147 2148 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2149 self._match(TokenType.TABLE) 2150 self._match(TokenType.EQ) 2151 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2152 2153 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2154 return self.expression(exp.LogProperty, no=no) 2155 2156 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2157 return self.expression(exp.JournalProperty, **kwargs) 2158 2159 def _parse_checksum(self) -> exp.ChecksumProperty: 2160 self._match(TokenType.EQ) 2161 2162 on = None 2163 if self._match(TokenType.ON): 2164 on = True 2165 elif self._match_text_seq("OFF"): 2166 on = False 2167 2168 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2169 2170 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2171 return self.expression( 2172 exp.Cluster, 2173 expressions=( 2174 self._parse_wrapped_csv(self._parse_ordered) 2175 if wrapped 2176 else self._parse_csv(self._parse_ordered) 2177 ), 2178 ) 2179 2180 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2181 self._match_text_seq("BY") 2182 2183 self._match_l_paren() 2184 expressions = self._parse_csv(self._parse_column) 2185 self._match_r_paren() 2186 2187 if self._match_text_seq("SORTED", "BY"): 2188 self._match_l_paren() 2189 sorted_by = self._parse_csv(self._parse_ordered) 2190 self._match_r_paren() 2191 else: 2192 sorted_by = None 2193 2194 self._match(TokenType.INTO) 2195 buckets = self._parse_number() 2196 self._match_text_seq("BUCKETS") 2197 2198 return self.expression( 2199 exp.ClusteredByProperty, 2200 expressions=expressions, 2201 sorted_by=sorted_by, 2202 buckets=buckets, 2203 ) 2204 2205 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2206 if not self._match_text_seq("GRANTS"): 2207 self._retreat(self._index - 1) 2208 return None 2209 2210 return self.expression(exp.CopyGrantsProperty) 2211 2212 def _parse_freespace(self) -> exp.FreespaceProperty: 2213 self._match(TokenType.EQ) 2214 return self.expression( 2215 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2216 ) 2217 2218 def _parse_mergeblockratio( 2219 self, no: bool = False, default: bool = False 2220 ) -> exp.MergeBlockRatioProperty: 2221 if self._match(TokenType.EQ): 2222 return self.expression( 2223 exp.MergeBlockRatioProperty, 2224 this=self._parse_number(), 2225 percent=self._match(TokenType.PERCENT), 2226 ) 2227 2228 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2229 2230 def _parse_datablocksize( 2231 self, 2232 default: t.Optional[bool] = None, 2233 minimum: t.Optional[bool] = None, 2234 maximum: t.Optional[bool] = None, 2235 ) -> exp.DataBlocksizeProperty: 2236 self._match(TokenType.EQ) 2237 size = self._parse_number() 2238 2239 units = None 2240 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2241 units = self._prev.text 2242 2243 return self.expression( 2244 exp.DataBlocksizeProperty, 2245 size=size, 2246 units=units, 2247 default=default, 2248 minimum=minimum, 2249 maximum=maximum, 2250 ) 2251 2252 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2253 self._match(TokenType.EQ) 2254 always = self._match_text_seq("ALWAYS") 2255 manual = self._match_text_seq("MANUAL") 2256 never = self._match_text_seq("NEVER") 2257 default = self._match_text_seq("DEFAULT") 2258 2259 autotemp = None 2260 if self._match_text_seq("AUTOTEMP"): 2261 autotemp = self._parse_schema() 2262 2263 return self.expression( 2264 exp.BlockCompressionProperty, 2265 always=always, 2266 manual=manual, 2267 never=never, 2268 default=default, 2269 autotemp=autotemp, 2270 ) 2271 2272 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2273 index = self._index 2274 no = self._match_text_seq("NO") 2275 concurrent = self._match_text_seq("CONCURRENT") 2276 2277 if not self._match_text_seq("ISOLATED", "LOADING"): 2278 self._retreat(index) 2279 return None 2280 2281 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2282 return self.expression( 2283 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2284 ) 2285 2286 def _parse_locking(self) -> exp.LockingProperty: 2287 if self._match(TokenType.TABLE): 2288 kind = "TABLE" 2289 elif self._match(TokenType.VIEW): 2290 kind = "VIEW" 2291 elif self._match(TokenType.ROW): 2292 kind = "ROW" 2293 elif self._match_text_seq("DATABASE"): 2294 kind = "DATABASE" 2295 else: 2296 kind = None 2297 2298 if kind in ("DATABASE", "TABLE", "VIEW"): 2299 this = self._parse_table_parts() 2300 else: 2301 this = None 2302 2303 if self._match(TokenType.FOR): 2304 for_or_in = "FOR" 2305 elif self._match(TokenType.IN): 2306 for_or_in = "IN" 2307 else: 2308 for_or_in = None 2309 2310 if self._match_text_seq("ACCESS"): 2311 lock_type = "ACCESS" 2312 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2313 lock_type = "EXCLUSIVE" 2314 elif self._match_text_seq("SHARE"): 2315 lock_type = "SHARE" 2316 elif self._match_text_seq("READ"): 2317 lock_type = "READ" 2318 elif self._match_text_seq("WRITE"): 2319 lock_type = "WRITE" 2320 elif self._match_text_seq("CHECKSUM"): 2321 lock_type = "CHECKSUM" 2322 else: 2323 lock_type = None 2324 2325 override = self._match_text_seq("OVERRIDE") 2326 2327 return self.expression( 2328 exp.LockingProperty, 2329 this=this, 2330 kind=kind, 2331 for_or_in=for_or_in, 2332 lock_type=lock_type, 2333 override=override, 2334 ) 2335 2336 def _parse_partition_by(self) -> t.List[exp.Expression]: 2337 if self._match(TokenType.PARTITION_BY): 2338 return self._parse_csv(self._parse_assignment) 2339 return [] 2340 2341 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2342 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2343 if self._match_text_seq("MINVALUE"): 2344 return exp.var("MINVALUE") 2345 if self._match_text_seq("MAXVALUE"): 2346 return exp.var("MAXVALUE") 2347 return self._parse_bitwise() 2348 2349 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2350 expression = None 2351 from_expressions = None 2352 to_expressions = None 2353 2354 if self._match(TokenType.IN): 2355 this = self._parse_wrapped_csv(self._parse_bitwise) 2356 elif self._match(TokenType.FROM): 2357 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2358 self._match_text_seq("TO") 2359 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2360 elif self._match_text_seq("WITH", "(", "MODULUS"): 2361 this = self._parse_number() 2362 self._match_text_seq(",", "REMAINDER") 2363 expression = self._parse_number() 2364 self._match_r_paren() 2365 else: 2366 self.raise_error("Failed to parse partition bound spec.") 2367 2368 return self.expression( 2369 exp.PartitionBoundSpec, 2370 this=this, 2371 expression=expression, 2372 from_expressions=from_expressions, 2373 to_expressions=to_expressions, 2374 ) 2375 2376 # https://www.postgresql.org/docs/current/sql-createtable.html 2377 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2378 if not self._match_text_seq("OF"): 2379 self._retreat(self._index - 1) 2380 return None 2381 2382 this = self._parse_table(schema=True) 2383 2384 if self._match(TokenType.DEFAULT): 2385 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2386 elif self._match_text_seq("FOR", "VALUES"): 2387 expression = self._parse_partition_bound_spec() 2388 else: 2389 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2390 2391 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2392 2393 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2394 self._match(TokenType.EQ) 2395 return self.expression( 2396 exp.PartitionedByProperty, 2397 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2398 ) 2399 2400 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2401 if self._match_text_seq("AND", "STATISTICS"): 2402 statistics = True 2403 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2404 statistics = False 2405 else: 2406 statistics = None 2407 2408 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2409 2410 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2411 if self._match_text_seq("SQL"): 2412 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2413 return None 2414 2415 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2416 if self._match_text_seq("SQL", "DATA"): 2417 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2418 return None 2419 2420 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2421 if self._match_text_seq("PRIMARY", "INDEX"): 2422 return exp.NoPrimaryIndexProperty() 2423 if self._match_text_seq("SQL"): 2424 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2425 return None 2426 2427 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2428 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2429 return exp.OnCommitProperty() 2430 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2431 return exp.OnCommitProperty(delete=True) 2432 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2433 2434 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2435 if self._match_text_seq("SQL", "DATA"): 2436 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2437 return None 2438 2439 def _parse_distkey(self) -> exp.DistKeyProperty: 2440 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2441 2442 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2443 table = self._parse_table(schema=True) 2444 2445 options = [] 2446 while self._match_texts(("INCLUDING", "EXCLUDING")): 2447 this = self._prev.text.upper() 2448 2449 id_var = self._parse_id_var() 2450 if not id_var: 2451 return None 2452 2453 options.append( 2454 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2455 ) 2456 2457 return self.expression(exp.LikeProperty, this=table, expressions=options) 2458 2459 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2460 return self.expression( 2461 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2462 ) 2463 2464 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2465 self._match(TokenType.EQ) 2466 return self.expression( 2467 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2468 ) 2469 2470 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2471 self._match_text_seq("WITH", "CONNECTION") 2472 return self.expression( 2473 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2474 ) 2475 2476 def _parse_returns(self) -> exp.ReturnsProperty: 2477 value: t.Optional[exp.Expression] 2478 null = None 2479 is_table = self._match(TokenType.TABLE) 2480 2481 if is_table: 2482 if self._match(TokenType.LT): 2483 value = self.expression( 2484 exp.Schema, 2485 this="TABLE", 2486 expressions=self._parse_csv(self._parse_struct_types), 2487 ) 2488 if not self._match(TokenType.GT): 2489 self.raise_error("Expecting >") 2490 else: 2491 value = self._parse_schema(exp.var("TABLE")) 2492 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2493 null = True 2494 value = None 2495 else: 2496 value = self._parse_types() 2497 2498 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2499 2500 def _parse_describe(self) -> exp.Describe: 2501 kind = self._match_set(self.CREATABLES) and self._prev.text 2502 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2503 if self._match(TokenType.DOT): 2504 style = None 2505 self._retreat(self._index - 2) 2506 this = self._parse_table(schema=True) 2507 properties = self._parse_properties() 2508 expressions = properties.expressions if properties else None 2509 partition = self._parse_partition() 2510 return self.expression( 2511 exp.Describe, 2512 this=this, 2513 style=style, 2514 kind=kind, 2515 expressions=expressions, 2516 partition=partition, 2517 ) 2518 2519 def _parse_insert(self) -> exp.Insert: 2520 comments = ensure_list(self._prev_comments) 2521 hint = self._parse_hint() 2522 overwrite = self._match(TokenType.OVERWRITE) 2523 ignore = self._match(TokenType.IGNORE) 2524 local = self._match_text_seq("LOCAL") 2525 alternative = None 2526 is_function = None 2527 2528 if self._match_text_seq("DIRECTORY"): 2529 this: t.Optional[exp.Expression] = self.expression( 2530 exp.Directory, 2531 this=self._parse_var_or_string(), 2532 local=local, 2533 row_format=self._parse_row_format(match_row=True), 2534 ) 2535 else: 2536 if self._match(TokenType.OR): 2537 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2538 2539 self._match(TokenType.INTO) 2540 comments += ensure_list(self._prev_comments) 2541 self._match(TokenType.TABLE) 2542 is_function = self._match(TokenType.FUNCTION) 2543 2544 this = ( 2545 self._parse_table(schema=True, parse_partition=True) 2546 if not is_function 2547 else self._parse_function() 2548 ) 2549 2550 returning = self._parse_returning() 2551 2552 return self.expression( 2553 exp.Insert, 2554 comments=comments, 2555 hint=hint, 2556 is_function=is_function, 2557 this=this, 2558 stored=self._match_text_seq("STORED") and self._parse_stored(), 2559 by_name=self._match_text_seq("BY", "NAME"), 2560 exists=self._parse_exists(), 2561 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2562 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2563 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2564 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2565 conflict=self._parse_on_conflict(), 2566 returning=returning or self._parse_returning(), 2567 overwrite=overwrite, 2568 alternative=alternative, 2569 ignore=ignore, 2570 ) 2571 2572 def _parse_kill(self) -> exp.Kill: 2573 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2574 2575 return self.expression( 2576 exp.Kill, 2577 this=self._parse_primary(), 2578 kind=kind, 2579 ) 2580 2581 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2582 conflict = self._match_text_seq("ON", "CONFLICT") 2583 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2584 2585 if not conflict and not duplicate: 2586 return None 2587 2588 conflict_keys = None 2589 constraint = None 2590 2591 if conflict: 2592 if self._match_text_seq("ON", "CONSTRAINT"): 2593 constraint = self._parse_id_var() 2594 elif self._match(TokenType.L_PAREN): 2595 conflict_keys = self._parse_csv(self._parse_id_var) 2596 self._match_r_paren() 2597 2598 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2599 if self._prev.token_type == TokenType.UPDATE: 2600 self._match(TokenType.SET) 2601 expressions = self._parse_csv(self._parse_equality) 2602 else: 2603 expressions = None 2604 2605 return self.expression( 2606 exp.OnConflict, 2607 duplicate=duplicate, 2608 expressions=expressions, 2609 action=action, 2610 conflict_keys=conflict_keys, 2611 constraint=constraint, 2612 ) 2613 2614 def _parse_returning(self) -> t.Optional[exp.Returning]: 2615 if not self._match(TokenType.RETURNING): 2616 return None 2617 return self.expression( 2618 exp.Returning, 2619 expressions=self._parse_csv(self._parse_expression), 2620 into=self._match(TokenType.INTO) and self._parse_table_part(), 2621 ) 2622 2623 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2624 if not self._match(TokenType.FORMAT): 2625 return None 2626 return self._parse_row_format() 2627 2628 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2629 index = self._index 2630 with_ = with_ or self._match_text_seq("WITH") 2631 2632 if not self._match(TokenType.SERDE_PROPERTIES): 2633 self._retreat(index) 2634 return None 2635 return self.expression( 2636 exp.SerdeProperties, 2637 **{ # type: ignore 2638 "expressions": self._parse_wrapped_properties(), 2639 "with": with_, 2640 }, 2641 ) 2642 2643 def _parse_row_format( 2644 self, match_row: bool = False 2645 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2646 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2647 return None 2648 2649 if self._match_text_seq("SERDE"): 2650 this = self._parse_string() 2651 2652 serde_properties = self._parse_serde_properties() 2653 2654 return self.expression( 2655 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2656 ) 2657 2658 self._match_text_seq("DELIMITED") 2659 2660 kwargs = {} 2661 2662 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2663 kwargs["fields"] = self._parse_string() 2664 if self._match_text_seq("ESCAPED", "BY"): 2665 kwargs["escaped"] = self._parse_string() 2666 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2667 kwargs["collection_items"] = self._parse_string() 2668 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2669 kwargs["map_keys"] = self._parse_string() 2670 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2671 kwargs["lines"] = self._parse_string() 2672 if self._match_text_seq("NULL", "DEFINED", "AS"): 2673 kwargs["null"] = self._parse_string() 2674 2675 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2676 2677 def _parse_load(self) -> exp.LoadData | exp.Command: 2678 if self._match_text_seq("DATA"): 2679 local = self._match_text_seq("LOCAL") 2680 self._match_text_seq("INPATH") 2681 inpath = self._parse_string() 2682 overwrite = self._match(TokenType.OVERWRITE) 2683 self._match_pair(TokenType.INTO, TokenType.TABLE) 2684 2685 return self.expression( 2686 exp.LoadData, 2687 this=self._parse_table(schema=True), 2688 local=local, 2689 overwrite=overwrite, 2690 inpath=inpath, 2691 partition=self._parse_partition(), 2692 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2693 serde=self._match_text_seq("SERDE") and self._parse_string(), 2694 ) 2695 return self._parse_as_command(self._prev) 2696 2697 def _parse_delete(self) -> exp.Delete: 2698 # This handles MySQL's "Multiple-Table Syntax" 2699 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2700 tables = None 2701 comments = self._prev_comments 2702 if not self._match(TokenType.FROM, advance=False): 2703 tables = self._parse_csv(self._parse_table) or None 2704 2705 returning = self._parse_returning() 2706 2707 return self.expression( 2708 exp.Delete, 2709 comments=comments, 2710 tables=tables, 2711 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2712 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2713 where=self._parse_where(), 2714 returning=returning or self._parse_returning(), 2715 limit=self._parse_limit(), 2716 ) 2717 2718 def _parse_update(self) -> exp.Update: 2719 comments = self._prev_comments 2720 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2721 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2722 returning = self._parse_returning() 2723 return self.expression( 2724 exp.Update, 2725 comments=comments, 2726 **{ # type: ignore 2727 "this": this, 2728 "expressions": expressions, 2729 "from": self._parse_from(joins=True), 2730 "where": self._parse_where(), 2731 "returning": returning or self._parse_returning(), 2732 "order": self._parse_order(), 2733 "limit": self._parse_limit(), 2734 }, 2735 ) 2736 2737 def _parse_uncache(self) -> exp.Uncache: 2738 if not self._match(TokenType.TABLE): 2739 self.raise_error("Expecting TABLE after UNCACHE") 2740 2741 return self.expression( 2742 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2743 ) 2744 2745 def _parse_cache(self) -> exp.Cache: 2746 lazy = self._match_text_seq("LAZY") 2747 self._match(TokenType.TABLE) 2748 table = self._parse_table(schema=True) 2749 2750 options = [] 2751 if self._match_text_seq("OPTIONS"): 2752 self._match_l_paren() 2753 k = self._parse_string() 2754 self._match(TokenType.EQ) 2755 v = self._parse_string() 2756 options = [k, v] 2757 self._match_r_paren() 2758 2759 self._match(TokenType.ALIAS) 2760 return self.expression( 2761 exp.Cache, 2762 this=table, 2763 lazy=lazy, 2764 options=options, 2765 expression=self._parse_select(nested=True), 2766 ) 2767 2768 def _parse_partition(self) -> t.Optional[exp.Partition]: 2769 if not self._match(TokenType.PARTITION): 2770 return None 2771 2772 return self.expression( 2773 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2774 ) 2775 2776 def _parse_value(self) -> t.Optional[exp.Tuple]: 2777 if self._match(TokenType.L_PAREN): 2778 expressions = self._parse_csv(self._parse_expression) 2779 self._match_r_paren() 2780 return self.expression(exp.Tuple, expressions=expressions) 2781 2782 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2783 expression = self._parse_expression() 2784 if expression: 2785 return self.expression(exp.Tuple, expressions=[expression]) 2786 return None 2787 2788 def _parse_projections(self) -> t.List[exp.Expression]: 2789 return self._parse_expressions() 2790 2791 def _parse_select( 2792 self, 2793 nested: bool = False, 2794 table: bool = False, 2795 parse_subquery_alias: bool = True, 2796 parse_set_operation: bool = True, 2797 ) -> t.Optional[exp.Expression]: 2798 cte = self._parse_with() 2799 2800 if cte: 2801 this = self._parse_statement() 2802 2803 if not this: 2804 self.raise_error("Failed to parse any statement following CTE") 2805 return cte 2806 2807 if "with" in this.arg_types: 2808 this.set("with", cte) 2809 else: 2810 self.raise_error(f"{this.key} does not support CTE") 2811 this = cte 2812 2813 return this 2814 2815 # duckdb supports leading with FROM x 2816 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2817 2818 if self._match(TokenType.SELECT): 2819 comments = self._prev_comments 2820 2821 hint = self._parse_hint() 2822 2823 if self._next and not self._next.token_type == TokenType.DOT: 2824 all_ = self._match(TokenType.ALL) 2825 distinct = self._match_set(self.DISTINCT_TOKENS) 2826 else: 2827 all_, distinct = None, None 2828 2829 kind = ( 2830 self._match(TokenType.ALIAS) 2831 and self._match_texts(("STRUCT", "VALUE")) 2832 and self._prev.text.upper() 2833 ) 2834 2835 if distinct: 2836 distinct = self.expression( 2837 exp.Distinct, 2838 on=self._parse_value() if self._match(TokenType.ON) else None, 2839 ) 2840 2841 if all_ and distinct: 2842 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2843 2844 limit = self._parse_limit(top=True) 2845 projections = self._parse_projections() 2846 2847 this = self.expression( 2848 exp.Select, 2849 kind=kind, 2850 hint=hint, 2851 distinct=distinct, 2852 expressions=projections, 2853 limit=limit, 2854 ) 2855 this.comments = comments 2856 2857 into = self._parse_into() 2858 if into: 2859 this.set("into", into) 2860 2861 if not from_: 2862 from_ = self._parse_from() 2863 2864 if from_: 2865 this.set("from", from_) 2866 2867 this = self._parse_query_modifiers(this) 2868 elif (table or nested) and self._match(TokenType.L_PAREN): 2869 if self._match(TokenType.PIVOT): 2870 this = self._parse_simplified_pivot() 2871 elif self._match(TokenType.FROM): 2872 this = exp.select("*").from_( 2873 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2874 ) 2875 else: 2876 this = ( 2877 self._parse_table() 2878 if table 2879 else self._parse_select(nested=True, parse_set_operation=False) 2880 ) 2881 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2882 2883 self._match_r_paren() 2884 2885 # We return early here so that the UNION isn't attached to the subquery by the 2886 # following call to _parse_set_operations, but instead becomes the parent node 2887 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2888 elif self._match(TokenType.VALUES, advance=False): 2889 this = self._parse_derived_table_values() 2890 elif from_: 2891 this = exp.select("*").from_(from_.this, copy=False) 2892 elif self._match(TokenType.SUMMARIZE): 2893 table = self._match(TokenType.TABLE) 2894 this = self._parse_select() or self._parse_string() or self._parse_table() 2895 return self.expression(exp.Summarize, this=this, table=table) 2896 elif self._match(TokenType.DESCRIBE): 2897 this = self._parse_describe() 2898 elif self._match_text_seq("STREAM"): 2899 this = self.expression(exp.Stream, this=self._parse_function()) 2900 else: 2901 this = None 2902 2903 return self._parse_set_operations(this) if parse_set_operation else this 2904 2905 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2906 if not skip_with_token and not self._match(TokenType.WITH): 2907 return None 2908 2909 comments = self._prev_comments 2910 recursive = self._match(TokenType.RECURSIVE) 2911 2912 expressions = [] 2913 while True: 2914 expressions.append(self._parse_cte()) 2915 2916 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2917 break 2918 else: 2919 self._match(TokenType.WITH) 2920 2921 return self.expression( 2922 exp.With, comments=comments, expressions=expressions, recursive=recursive 2923 ) 2924 2925 def _parse_cte(self) -> exp.CTE: 2926 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2927 if not alias or not alias.this: 2928 self.raise_error("Expected CTE to have alias") 2929 2930 self._match(TokenType.ALIAS) 2931 comments = self._prev_comments 2932 2933 if self._match_text_seq("NOT", "MATERIALIZED"): 2934 materialized = False 2935 elif self._match_text_seq("MATERIALIZED"): 2936 materialized = True 2937 else: 2938 materialized = None 2939 2940 return self.expression( 2941 exp.CTE, 2942 this=self._parse_wrapped(self._parse_statement), 2943 alias=alias, 2944 materialized=materialized, 2945 comments=comments, 2946 ) 2947 2948 def _parse_table_alias( 2949 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2950 ) -> t.Optional[exp.TableAlias]: 2951 any_token = self._match(TokenType.ALIAS) 2952 alias = ( 2953 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2954 or self._parse_string_as_identifier() 2955 ) 2956 2957 index = self._index 2958 if self._match(TokenType.L_PAREN): 2959 columns = self._parse_csv(self._parse_function_parameter) 2960 self._match_r_paren() if columns else self._retreat(index) 2961 else: 2962 columns = None 2963 2964 if not alias and not columns: 2965 return None 2966 2967 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 2968 2969 # We bubble up comments from the Identifier to the TableAlias 2970 if isinstance(alias, exp.Identifier): 2971 table_alias.add_comments(alias.pop_comments()) 2972 2973 return table_alias 2974 2975 def _parse_subquery( 2976 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2977 ) -> t.Optional[exp.Subquery]: 2978 if not this: 2979 return None 2980 2981 return self.expression( 2982 exp.Subquery, 2983 this=this, 2984 pivots=self._parse_pivots(), 2985 alias=self._parse_table_alias() if parse_alias else None, 2986 ) 2987 2988 def _implicit_unnests_to_explicit(self, this: E) -> E: 2989 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 2990 2991 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2992 for i, join in enumerate(this.args.get("joins") or []): 2993 table = join.this 2994 normalized_table = table.copy() 2995 normalized_table.meta["maybe_column"] = True 2996 normalized_table = _norm(normalized_table, dialect=self.dialect) 2997 2998 if isinstance(table, exp.Table) and not join.args.get("on"): 2999 if normalized_table.parts[0].name in refs: 3000 table_as_column = table.to_column() 3001 unnest = exp.Unnest(expressions=[table_as_column]) 3002 3003 # Table.to_column creates a parent Alias node that we want to convert to 3004 # a TableAlias and attach to the Unnest, so it matches the parser's output 3005 if isinstance(table.args.get("alias"), exp.TableAlias): 3006 table_as_column.replace(table_as_column.this) 3007 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3008 3009 table.replace(unnest) 3010 3011 refs.add(normalized_table.alias_or_name) 3012 3013 return this 3014 3015 def _parse_query_modifiers( 3016 self, this: t.Optional[exp.Expression] 3017 ) -> t.Optional[exp.Expression]: 3018 if isinstance(this, (exp.Query, exp.Table)): 3019 for join in self._parse_joins(): 3020 this.append("joins", join) 3021 for lateral in iter(self._parse_lateral, None): 3022 this.append("laterals", lateral) 3023 3024 while True: 3025 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3026 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3027 key, expression = parser(self) 3028 3029 if expression: 3030 this.set(key, expression) 3031 if key == "limit": 3032 offset = expression.args.pop("offset", None) 3033 3034 if offset: 3035 offset = exp.Offset(expression=offset) 3036 this.set("offset", offset) 3037 3038 limit_by_expressions = expression.expressions 3039 expression.set("expressions", None) 3040 offset.set("expressions", limit_by_expressions) 3041 continue 3042 break 3043 3044 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3045 this = self._implicit_unnests_to_explicit(this) 3046 3047 return this 3048 3049 def _parse_hint(self) -> t.Optional[exp.Hint]: 3050 if self._match(TokenType.HINT): 3051 hints = [] 3052 for hint in iter( 3053 lambda: self._parse_csv( 3054 lambda: self._parse_function() or self._parse_var(upper=True) 3055 ), 3056 [], 3057 ): 3058 hints.extend(hint) 3059 3060 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 3061 self.raise_error("Expected */ after HINT") 3062 3063 return self.expression(exp.Hint, expressions=hints) 3064 3065 return None 3066 3067 def _parse_into(self) -> t.Optional[exp.Into]: 3068 if not self._match(TokenType.INTO): 3069 return None 3070 3071 temp = self._match(TokenType.TEMPORARY) 3072 unlogged = self._match_text_seq("UNLOGGED") 3073 self._match(TokenType.TABLE) 3074 3075 return self.expression( 3076 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3077 ) 3078 3079 def _parse_from( 3080 self, joins: bool = False, skip_from_token: bool = False 3081 ) -> t.Optional[exp.From]: 3082 if not skip_from_token and not self._match(TokenType.FROM): 3083 return None 3084 3085 return self.expression( 3086 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3087 ) 3088 3089 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3090 return self.expression( 3091 exp.MatchRecognizeMeasure, 3092 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3093 this=self._parse_expression(), 3094 ) 3095 3096 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3097 if not self._match(TokenType.MATCH_RECOGNIZE): 3098 return None 3099 3100 self._match_l_paren() 3101 3102 partition = self._parse_partition_by() 3103 order = self._parse_order() 3104 3105 measures = ( 3106 self._parse_csv(self._parse_match_recognize_measure) 3107 if self._match_text_seq("MEASURES") 3108 else None 3109 ) 3110 3111 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3112 rows = exp.var("ONE ROW PER MATCH") 3113 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3114 text = "ALL ROWS PER MATCH" 3115 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3116 text += " SHOW EMPTY MATCHES" 3117 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3118 text += " OMIT EMPTY MATCHES" 3119 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3120 text += " WITH UNMATCHED ROWS" 3121 rows = exp.var(text) 3122 else: 3123 rows = None 3124 3125 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3126 text = "AFTER MATCH SKIP" 3127 if self._match_text_seq("PAST", "LAST", "ROW"): 3128 text += " PAST LAST ROW" 3129 elif self._match_text_seq("TO", "NEXT", "ROW"): 3130 text += " TO NEXT ROW" 3131 elif self._match_text_seq("TO", "FIRST"): 3132 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3133 elif self._match_text_seq("TO", "LAST"): 3134 text += f" TO LAST {self._advance_any().text}" # type: ignore 3135 after = exp.var(text) 3136 else: 3137 after = None 3138 3139 if self._match_text_seq("PATTERN"): 3140 self._match_l_paren() 3141 3142 if not self._curr: 3143 self.raise_error("Expecting )", self._curr) 3144 3145 paren = 1 3146 start = self._curr 3147 3148 while self._curr and paren > 0: 3149 if self._curr.token_type == TokenType.L_PAREN: 3150 paren += 1 3151 if self._curr.token_type == TokenType.R_PAREN: 3152 paren -= 1 3153 3154 end = self._prev 3155 self._advance() 3156 3157 if paren > 0: 3158 self.raise_error("Expecting )", self._curr) 3159 3160 pattern = exp.var(self._find_sql(start, end)) 3161 else: 3162 pattern = None 3163 3164 define = ( 3165 self._parse_csv(self._parse_name_as_expression) 3166 if self._match_text_seq("DEFINE") 3167 else None 3168 ) 3169 3170 self._match_r_paren() 3171 3172 return self.expression( 3173 exp.MatchRecognize, 3174 partition_by=partition, 3175 order=order, 3176 measures=measures, 3177 rows=rows, 3178 after=after, 3179 pattern=pattern, 3180 define=define, 3181 alias=self._parse_table_alias(), 3182 ) 3183 3184 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3185 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3186 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3187 cross_apply = False 3188 3189 if cross_apply is not None: 3190 this = self._parse_select(table=True) 3191 view = None 3192 outer = None 3193 elif self._match(TokenType.LATERAL): 3194 this = self._parse_select(table=True) 3195 view = self._match(TokenType.VIEW) 3196 outer = self._match(TokenType.OUTER) 3197 else: 3198 return None 3199 3200 if not this: 3201 this = ( 3202 self._parse_unnest() 3203 or self._parse_function() 3204 or self._parse_id_var(any_token=False) 3205 ) 3206 3207 while self._match(TokenType.DOT): 3208 this = exp.Dot( 3209 this=this, 3210 expression=self._parse_function() or self._parse_id_var(any_token=False), 3211 ) 3212 3213 if view: 3214 table = self._parse_id_var(any_token=False) 3215 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3216 table_alias: t.Optional[exp.TableAlias] = self.expression( 3217 exp.TableAlias, this=table, columns=columns 3218 ) 3219 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3220 # We move the alias from the lateral's child node to the lateral itself 3221 table_alias = this.args["alias"].pop() 3222 else: 3223 table_alias = self._parse_table_alias() 3224 3225 return self.expression( 3226 exp.Lateral, 3227 this=this, 3228 view=view, 3229 outer=outer, 3230 alias=table_alias, 3231 cross_apply=cross_apply, 3232 ) 3233 3234 def _parse_join_parts( 3235 self, 3236 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3237 return ( 3238 self._match_set(self.JOIN_METHODS) and self._prev, 3239 self._match_set(self.JOIN_SIDES) and self._prev, 3240 self._match_set(self.JOIN_KINDS) and self._prev, 3241 ) 3242 3243 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3244 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3245 this = self._parse_column() 3246 if isinstance(this, exp.Column): 3247 return this.this 3248 return this 3249 3250 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3251 3252 def _parse_join( 3253 self, skip_join_token: bool = False, parse_bracket: bool = False 3254 ) -> t.Optional[exp.Join]: 3255 if self._match(TokenType.COMMA): 3256 return self.expression(exp.Join, this=self._parse_table()) 3257 3258 index = self._index 3259 method, side, kind = self._parse_join_parts() 3260 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3261 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3262 3263 if not skip_join_token and not join: 3264 self._retreat(index) 3265 kind = None 3266 method = None 3267 side = None 3268 3269 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3270 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3271 3272 if not skip_join_token and not join and not outer_apply and not cross_apply: 3273 return None 3274 3275 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3276 3277 if method: 3278 kwargs["method"] = method.text 3279 if side: 3280 kwargs["side"] = side.text 3281 if kind: 3282 kwargs["kind"] = kind.text 3283 if hint: 3284 kwargs["hint"] = hint 3285 3286 if self._match(TokenType.MATCH_CONDITION): 3287 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3288 3289 if self._match(TokenType.ON): 3290 kwargs["on"] = self._parse_assignment() 3291 elif self._match(TokenType.USING): 3292 kwargs["using"] = self._parse_using_identifiers() 3293 elif ( 3294 not (outer_apply or cross_apply) 3295 and not isinstance(kwargs["this"], exp.Unnest) 3296 and not (kind and kind.token_type == TokenType.CROSS) 3297 ): 3298 index = self._index 3299 joins: t.Optional[list] = list(self._parse_joins()) 3300 3301 if joins and self._match(TokenType.ON): 3302 kwargs["on"] = self._parse_assignment() 3303 elif joins and self._match(TokenType.USING): 3304 kwargs["using"] = self._parse_using_identifiers() 3305 else: 3306 joins = None 3307 self._retreat(index) 3308 3309 kwargs["this"].set("joins", joins if joins else None) 3310 3311 comments = [c for token in (method, side, kind) if token for c in token.comments] 3312 return self.expression(exp.Join, comments=comments, **kwargs) 3313 3314 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3315 this = self._parse_assignment() 3316 3317 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3318 return this 3319 3320 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3321 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3322 3323 return this 3324 3325 def _parse_index_params(self) -> exp.IndexParameters: 3326 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3327 3328 if self._match(TokenType.L_PAREN, advance=False): 3329 columns = self._parse_wrapped_csv(self._parse_with_operator) 3330 else: 3331 columns = None 3332 3333 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3334 partition_by = self._parse_partition_by() 3335 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3336 tablespace = ( 3337 self._parse_var(any_token=True) 3338 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3339 else None 3340 ) 3341 where = self._parse_where() 3342 3343 on = self._parse_field() if self._match(TokenType.ON) else None 3344 3345 return self.expression( 3346 exp.IndexParameters, 3347 using=using, 3348 columns=columns, 3349 include=include, 3350 partition_by=partition_by, 3351 where=where, 3352 with_storage=with_storage, 3353 tablespace=tablespace, 3354 on=on, 3355 ) 3356 3357 def _parse_index( 3358 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3359 ) -> t.Optional[exp.Index]: 3360 if index or anonymous: 3361 unique = None 3362 primary = None 3363 amp = None 3364 3365 self._match(TokenType.ON) 3366 self._match(TokenType.TABLE) # hive 3367 table = self._parse_table_parts(schema=True) 3368 else: 3369 unique = self._match(TokenType.UNIQUE) 3370 primary = self._match_text_seq("PRIMARY") 3371 amp = self._match_text_seq("AMP") 3372 3373 if not self._match(TokenType.INDEX): 3374 return None 3375 3376 index = self._parse_id_var() 3377 table = None 3378 3379 params = self._parse_index_params() 3380 3381 return self.expression( 3382 exp.Index, 3383 this=index, 3384 table=table, 3385 unique=unique, 3386 primary=primary, 3387 amp=amp, 3388 params=params, 3389 ) 3390 3391 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3392 hints: t.List[exp.Expression] = [] 3393 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3394 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3395 hints.append( 3396 self.expression( 3397 exp.WithTableHint, 3398 expressions=self._parse_csv( 3399 lambda: self._parse_function() or self._parse_var(any_token=True) 3400 ), 3401 ) 3402 ) 3403 self._match_r_paren() 3404 else: 3405 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3406 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3407 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3408 3409 self._match_set((TokenType.INDEX, TokenType.KEY)) 3410 if self._match(TokenType.FOR): 3411 hint.set("target", self._advance_any() and self._prev.text.upper()) 3412 3413 hint.set("expressions", self._parse_wrapped_id_vars()) 3414 hints.append(hint) 3415 3416 return hints or None 3417 3418 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3419 return ( 3420 (not schema and self._parse_function(optional_parens=False)) 3421 or self._parse_id_var(any_token=False) 3422 or self._parse_string_as_identifier() 3423 or self._parse_placeholder() 3424 ) 3425 3426 def _parse_table_parts( 3427 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3428 ) -> exp.Table: 3429 catalog = None 3430 db = None 3431 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3432 3433 while self._match(TokenType.DOT): 3434 if catalog: 3435 # This allows nesting the table in arbitrarily many dot expressions if needed 3436 table = self.expression( 3437 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3438 ) 3439 else: 3440 catalog = db 3441 db = table 3442 # "" used for tsql FROM a..b case 3443 table = self._parse_table_part(schema=schema) or "" 3444 3445 if ( 3446 wildcard 3447 and self._is_connected() 3448 and (isinstance(table, exp.Identifier) or not table) 3449 and self._match(TokenType.STAR) 3450 ): 3451 if isinstance(table, exp.Identifier): 3452 table.args["this"] += "*" 3453 else: 3454 table = exp.Identifier(this="*") 3455 3456 # We bubble up comments from the Identifier to the Table 3457 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3458 3459 if is_db_reference: 3460 catalog = db 3461 db = table 3462 table = None 3463 3464 if not table and not is_db_reference: 3465 self.raise_error(f"Expected table name but got {self._curr}") 3466 if not db and is_db_reference: 3467 self.raise_error(f"Expected database name but got {self._curr}") 3468 3469 table = self.expression( 3470 exp.Table, 3471 comments=comments, 3472 this=table, 3473 db=db, 3474 catalog=catalog, 3475 ) 3476 3477 changes = self._parse_changes() 3478 if changes: 3479 table.set("changes", changes) 3480 3481 at_before = self._parse_historical_data() 3482 if at_before: 3483 table.set("when", at_before) 3484 3485 pivots = self._parse_pivots() 3486 if pivots: 3487 table.set("pivots", pivots) 3488 3489 return table 3490 3491 def _parse_table( 3492 self, 3493 schema: bool = False, 3494 joins: bool = False, 3495 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3496 parse_bracket: bool = False, 3497 is_db_reference: bool = False, 3498 parse_partition: bool = False, 3499 ) -> t.Optional[exp.Expression]: 3500 lateral = self._parse_lateral() 3501 if lateral: 3502 return lateral 3503 3504 unnest = self._parse_unnest() 3505 if unnest: 3506 return unnest 3507 3508 values = self._parse_derived_table_values() 3509 if values: 3510 return values 3511 3512 subquery = self._parse_select(table=True) 3513 if subquery: 3514 if not subquery.args.get("pivots"): 3515 subquery.set("pivots", self._parse_pivots()) 3516 return subquery 3517 3518 bracket = parse_bracket and self._parse_bracket(None) 3519 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3520 3521 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3522 self._parse_table 3523 ) 3524 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3525 3526 only = self._match(TokenType.ONLY) 3527 3528 this = t.cast( 3529 exp.Expression, 3530 bracket 3531 or rows_from 3532 or self._parse_bracket( 3533 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3534 ), 3535 ) 3536 3537 if only: 3538 this.set("only", only) 3539 3540 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3541 self._match_text_seq("*") 3542 3543 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3544 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3545 this.set("partition", self._parse_partition()) 3546 3547 if schema: 3548 return self._parse_schema(this=this) 3549 3550 version = self._parse_version() 3551 3552 if version: 3553 this.set("version", version) 3554 3555 if self.dialect.ALIAS_POST_TABLESAMPLE: 3556 this.set("sample", self._parse_table_sample()) 3557 3558 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3559 if alias: 3560 this.set("alias", alias) 3561 3562 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3563 return self.expression( 3564 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3565 ) 3566 3567 this.set("hints", self._parse_table_hints()) 3568 3569 if not this.args.get("pivots"): 3570 this.set("pivots", self._parse_pivots()) 3571 3572 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3573 this.set("sample", self._parse_table_sample()) 3574 3575 if joins: 3576 for join in self._parse_joins(): 3577 this.append("joins", join) 3578 3579 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3580 this.set("ordinality", True) 3581 this.set("alias", self._parse_table_alias()) 3582 3583 return this 3584 3585 def _parse_version(self) -> t.Optional[exp.Version]: 3586 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3587 this = "TIMESTAMP" 3588 elif self._match(TokenType.VERSION_SNAPSHOT): 3589 this = "VERSION" 3590 else: 3591 return None 3592 3593 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3594 kind = self._prev.text.upper() 3595 start = self._parse_bitwise() 3596 self._match_texts(("TO", "AND")) 3597 end = self._parse_bitwise() 3598 expression: t.Optional[exp.Expression] = self.expression( 3599 exp.Tuple, expressions=[start, end] 3600 ) 3601 elif self._match_text_seq("CONTAINED", "IN"): 3602 kind = "CONTAINED IN" 3603 expression = self.expression( 3604 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3605 ) 3606 elif self._match(TokenType.ALL): 3607 kind = "ALL" 3608 expression = None 3609 else: 3610 self._match_text_seq("AS", "OF") 3611 kind = "AS OF" 3612 expression = self._parse_type() 3613 3614 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3615 3616 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 3617 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 3618 index = self._index 3619 historical_data = None 3620 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 3621 this = self._prev.text.upper() 3622 kind = ( 3623 self._match(TokenType.L_PAREN) 3624 and self._match_texts(self.HISTORICAL_DATA_KIND) 3625 and self._prev.text.upper() 3626 ) 3627 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 3628 3629 if expression: 3630 self._match_r_paren() 3631 historical_data = self.expression( 3632 exp.HistoricalData, this=this, kind=kind, expression=expression 3633 ) 3634 else: 3635 self._retreat(index) 3636 3637 return historical_data 3638 3639 def _parse_changes(self) -> t.Optional[exp.Changes]: 3640 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 3641 return None 3642 3643 information = self._parse_var(any_token=True) 3644 self._match_r_paren() 3645 3646 return self.expression( 3647 exp.Changes, 3648 information=information, 3649 at_before=self._parse_historical_data(), 3650 end=self._parse_historical_data(), 3651 ) 3652 3653 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3654 if not self._match(TokenType.UNNEST): 3655 return None 3656 3657 expressions = self._parse_wrapped_csv(self._parse_equality) 3658 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3659 3660 alias = self._parse_table_alias() if with_alias else None 3661 3662 if alias: 3663 if self.dialect.UNNEST_COLUMN_ONLY: 3664 if alias.args.get("columns"): 3665 self.raise_error("Unexpected extra column alias in unnest.") 3666 3667 alias.set("columns", [alias.this]) 3668 alias.set("this", None) 3669 3670 columns = alias.args.get("columns") or [] 3671 if offset and len(expressions) < len(columns): 3672 offset = columns.pop() 3673 3674 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3675 self._match(TokenType.ALIAS) 3676 offset = self._parse_id_var( 3677 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3678 ) or exp.to_identifier("offset") 3679 3680 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3681 3682 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3683 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3684 if not is_derived and not ( 3685 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 3686 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 3687 ): 3688 return None 3689 3690 expressions = self._parse_csv(self._parse_value) 3691 alias = self._parse_table_alias() 3692 3693 if is_derived: 3694 self._match_r_paren() 3695 3696 return self.expression( 3697 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3698 ) 3699 3700 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3701 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3702 as_modifier and self._match_text_seq("USING", "SAMPLE") 3703 ): 3704 return None 3705 3706 bucket_numerator = None 3707 bucket_denominator = None 3708 bucket_field = None 3709 percent = None 3710 size = None 3711 seed = None 3712 3713 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3714 matched_l_paren = self._match(TokenType.L_PAREN) 3715 3716 if self.TABLESAMPLE_CSV: 3717 num = None 3718 expressions = self._parse_csv(self._parse_primary) 3719 else: 3720 expressions = None 3721 num = ( 3722 self._parse_factor() 3723 if self._match(TokenType.NUMBER, advance=False) 3724 else self._parse_primary() or self._parse_placeholder() 3725 ) 3726 3727 if self._match_text_seq("BUCKET"): 3728 bucket_numerator = self._parse_number() 3729 self._match_text_seq("OUT", "OF") 3730 bucket_denominator = bucket_denominator = self._parse_number() 3731 self._match(TokenType.ON) 3732 bucket_field = self._parse_field() 3733 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3734 percent = num 3735 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3736 size = num 3737 else: 3738 percent = num 3739 3740 if matched_l_paren: 3741 self._match_r_paren() 3742 3743 if self._match(TokenType.L_PAREN): 3744 method = self._parse_var(upper=True) 3745 seed = self._match(TokenType.COMMA) and self._parse_number() 3746 self._match_r_paren() 3747 elif self._match_texts(("SEED", "REPEATABLE")): 3748 seed = self._parse_wrapped(self._parse_number) 3749 3750 if not method and self.DEFAULT_SAMPLING_METHOD: 3751 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3752 3753 return self.expression( 3754 exp.TableSample, 3755 expressions=expressions, 3756 method=method, 3757 bucket_numerator=bucket_numerator, 3758 bucket_denominator=bucket_denominator, 3759 bucket_field=bucket_field, 3760 percent=percent, 3761 size=size, 3762 seed=seed, 3763 ) 3764 3765 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3766 return list(iter(self._parse_pivot, None)) or None 3767 3768 def _parse_joins(self) -> t.Iterator[exp.Join]: 3769 return iter(self._parse_join, None) 3770 3771 # https://duckdb.org/docs/sql/statements/pivot 3772 def _parse_simplified_pivot(self) -> exp.Pivot: 3773 def _parse_on() -> t.Optional[exp.Expression]: 3774 this = self._parse_bitwise() 3775 return self._parse_in(this) if self._match(TokenType.IN) else this 3776 3777 this = self._parse_table() 3778 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3779 using = self._match(TokenType.USING) and self._parse_csv( 3780 lambda: self._parse_alias(self._parse_function()) 3781 ) 3782 group = self._parse_group() 3783 return self.expression( 3784 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3785 ) 3786 3787 def _parse_pivot_in(self) -> exp.In | exp.PivotAny: 3788 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3789 this = self._parse_select_or_expression() 3790 3791 self._match(TokenType.ALIAS) 3792 alias = self._parse_bitwise() 3793 if alias: 3794 if isinstance(alias, exp.Column) and not alias.db: 3795 alias = alias.this 3796 return self.expression(exp.PivotAlias, this=this, alias=alias) 3797 3798 return this 3799 3800 value = self._parse_column() 3801 3802 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3803 self.raise_error("Expecting IN (") 3804 3805 if self._match(TokenType.ANY): 3806 expr: exp.PivotAny | exp.In = self.expression(exp.PivotAny, this=self._parse_order()) 3807 else: 3808 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3809 expr = self.expression(exp.In, this=value, expressions=aliased_expressions) 3810 3811 self._match_r_paren() 3812 return expr 3813 3814 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3815 index = self._index 3816 include_nulls = None 3817 3818 if self._match(TokenType.PIVOT): 3819 unpivot = False 3820 elif self._match(TokenType.UNPIVOT): 3821 unpivot = True 3822 3823 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3824 if self._match_text_seq("INCLUDE", "NULLS"): 3825 include_nulls = True 3826 elif self._match_text_seq("EXCLUDE", "NULLS"): 3827 include_nulls = False 3828 else: 3829 return None 3830 3831 expressions = [] 3832 3833 if not self._match(TokenType.L_PAREN): 3834 self._retreat(index) 3835 return None 3836 3837 if unpivot: 3838 expressions = self._parse_csv(self._parse_column) 3839 else: 3840 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3841 3842 if not expressions: 3843 self.raise_error("Failed to parse PIVOT's aggregation list") 3844 3845 if not self._match(TokenType.FOR): 3846 self.raise_error("Expecting FOR") 3847 3848 field = self._parse_pivot_in() 3849 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 3850 self._parse_bitwise 3851 ) 3852 3853 self._match_r_paren() 3854 3855 pivot = self.expression( 3856 exp.Pivot, 3857 expressions=expressions, 3858 field=field, 3859 unpivot=unpivot, 3860 include_nulls=include_nulls, 3861 default_on_null=default_on_null, 3862 ) 3863 3864 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3865 pivot.set("alias", self._parse_table_alias()) 3866 3867 if not unpivot: 3868 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3869 3870 columns: t.List[exp.Expression] = [] 3871 for fld in pivot.args["field"].expressions: 3872 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3873 for name in names: 3874 if self.PREFIXED_PIVOT_COLUMNS: 3875 name = f"{name}_{field_name}" if name else field_name 3876 else: 3877 name = f"{field_name}_{name}" if name else field_name 3878 3879 columns.append(exp.to_identifier(name)) 3880 3881 pivot.set("columns", columns) 3882 3883 return pivot 3884 3885 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3886 return [agg.alias for agg in aggregations] 3887 3888 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3889 if not skip_where_token and not self._match(TokenType.PREWHERE): 3890 return None 3891 3892 return self.expression( 3893 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 3894 ) 3895 3896 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3897 if not skip_where_token and not self._match(TokenType.WHERE): 3898 return None 3899 3900 return self.expression( 3901 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 3902 ) 3903 3904 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3905 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3906 return None 3907 3908 elements: t.Dict[str, t.Any] = defaultdict(list) 3909 3910 if self._match(TokenType.ALL): 3911 elements["all"] = True 3912 elif self._match(TokenType.DISTINCT): 3913 elements["all"] = False 3914 3915 while True: 3916 expressions = self._parse_csv( 3917 lambda: None 3918 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 3919 else self._parse_assignment() 3920 ) 3921 if expressions: 3922 elements["expressions"].extend(expressions) 3923 3924 grouping_sets = self._parse_grouping_sets() 3925 if grouping_sets: 3926 elements["grouping_sets"].extend(grouping_sets) 3927 3928 rollup = None 3929 cube = None 3930 totals = None 3931 3932 index = self._index 3933 with_ = self._match(TokenType.WITH) 3934 if self._match(TokenType.ROLLUP): 3935 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3936 elements["rollup"].extend(ensure_list(rollup)) 3937 3938 if self._match(TokenType.CUBE): 3939 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3940 elements["cube"].extend(ensure_list(cube)) 3941 3942 if self._match_text_seq("TOTALS"): 3943 totals = True 3944 elements["totals"] = True # type: ignore 3945 3946 if not (grouping_sets or rollup or cube or totals): 3947 if with_: 3948 self._retreat(index) 3949 break 3950 3951 return self.expression(exp.Group, **elements) # type: ignore 3952 3953 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3954 if not self._match(TokenType.GROUPING_SETS): 3955 return None 3956 3957 return self._parse_wrapped_csv(self._parse_grouping_set) 3958 3959 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3960 if self._match(TokenType.L_PAREN): 3961 grouping_set = self._parse_csv(self._parse_column) 3962 self._match_r_paren() 3963 return self.expression(exp.Tuple, expressions=grouping_set) 3964 3965 return self._parse_column() 3966 3967 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3968 if not skip_having_token and not self._match(TokenType.HAVING): 3969 return None 3970 return self.expression(exp.Having, this=self._parse_assignment()) 3971 3972 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3973 if not self._match(TokenType.QUALIFY): 3974 return None 3975 return self.expression(exp.Qualify, this=self._parse_assignment()) 3976 3977 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3978 if skip_start_token: 3979 start = None 3980 elif self._match(TokenType.START_WITH): 3981 start = self._parse_assignment() 3982 else: 3983 return None 3984 3985 self._match(TokenType.CONNECT_BY) 3986 nocycle = self._match_text_seq("NOCYCLE") 3987 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3988 exp.Prior, this=self._parse_bitwise() 3989 ) 3990 connect = self._parse_assignment() 3991 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3992 3993 if not start and self._match(TokenType.START_WITH): 3994 start = self._parse_assignment() 3995 3996 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3997 3998 def _parse_name_as_expression(self) -> exp.Alias: 3999 return self.expression( 4000 exp.Alias, 4001 alias=self._parse_id_var(any_token=True), 4002 this=self._match(TokenType.ALIAS) and self._parse_assignment(), 4003 ) 4004 4005 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4006 if self._match_text_seq("INTERPOLATE"): 4007 return self._parse_wrapped_csv(self._parse_name_as_expression) 4008 return None 4009 4010 def _parse_order( 4011 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4012 ) -> t.Optional[exp.Expression]: 4013 siblings = None 4014 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4015 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4016 return this 4017 4018 siblings = True 4019 4020 return self.expression( 4021 exp.Order, 4022 this=this, 4023 expressions=self._parse_csv(self._parse_ordered), 4024 siblings=siblings, 4025 ) 4026 4027 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4028 if not self._match(token): 4029 return None 4030 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4031 4032 def _parse_ordered( 4033 self, parse_method: t.Optional[t.Callable] = None 4034 ) -> t.Optional[exp.Ordered]: 4035 this = parse_method() if parse_method else self._parse_assignment() 4036 if not this: 4037 return None 4038 4039 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4040 this = exp.var("ALL") 4041 4042 asc = self._match(TokenType.ASC) 4043 desc = self._match(TokenType.DESC) or (asc and False) 4044 4045 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4046 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4047 4048 nulls_first = is_nulls_first or False 4049 explicitly_null_ordered = is_nulls_first or is_nulls_last 4050 4051 if ( 4052 not explicitly_null_ordered 4053 and ( 4054 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4055 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4056 ) 4057 and self.dialect.NULL_ORDERING != "nulls_are_last" 4058 ): 4059 nulls_first = True 4060 4061 if self._match_text_seq("WITH", "FILL"): 4062 with_fill = self.expression( 4063 exp.WithFill, 4064 **{ # type: ignore 4065 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4066 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4067 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4068 "interpolate": self._parse_interpolate(), 4069 }, 4070 ) 4071 else: 4072 with_fill = None 4073 4074 return self.expression( 4075 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4076 ) 4077 4078 def _parse_limit( 4079 self, 4080 this: t.Optional[exp.Expression] = None, 4081 top: bool = False, 4082 skip_limit_token: bool = False, 4083 ) -> t.Optional[exp.Expression]: 4084 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4085 comments = self._prev_comments 4086 if top: 4087 limit_paren = self._match(TokenType.L_PAREN) 4088 expression = self._parse_term() if limit_paren else self._parse_number() 4089 4090 if limit_paren: 4091 self._match_r_paren() 4092 else: 4093 expression = self._parse_term() 4094 4095 if self._match(TokenType.COMMA): 4096 offset = expression 4097 expression = self._parse_term() 4098 else: 4099 offset = None 4100 4101 limit_exp = self.expression( 4102 exp.Limit, 4103 this=this, 4104 expression=expression, 4105 offset=offset, 4106 comments=comments, 4107 expressions=self._parse_limit_by(), 4108 ) 4109 4110 return limit_exp 4111 4112 if self._match(TokenType.FETCH): 4113 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4114 direction = self._prev.text.upper() if direction else "FIRST" 4115 4116 count = self._parse_field(tokens=self.FETCH_TOKENS) 4117 percent = self._match(TokenType.PERCENT) 4118 4119 self._match_set((TokenType.ROW, TokenType.ROWS)) 4120 4121 only = self._match_text_seq("ONLY") 4122 with_ties = self._match_text_seq("WITH", "TIES") 4123 4124 if only and with_ties: 4125 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 4126 4127 return self.expression( 4128 exp.Fetch, 4129 direction=direction, 4130 count=count, 4131 percent=percent, 4132 with_ties=with_ties, 4133 ) 4134 4135 return this 4136 4137 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4138 if not self._match(TokenType.OFFSET): 4139 return this 4140 4141 count = self._parse_term() 4142 self._match_set((TokenType.ROW, TokenType.ROWS)) 4143 4144 return self.expression( 4145 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4146 ) 4147 4148 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4149 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4150 4151 def _parse_locks(self) -> t.List[exp.Lock]: 4152 locks = [] 4153 while True: 4154 if self._match_text_seq("FOR", "UPDATE"): 4155 update = True 4156 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4157 "LOCK", "IN", "SHARE", "MODE" 4158 ): 4159 update = False 4160 else: 4161 break 4162 4163 expressions = None 4164 if self._match_text_seq("OF"): 4165 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4166 4167 wait: t.Optional[bool | exp.Expression] = None 4168 if self._match_text_seq("NOWAIT"): 4169 wait = True 4170 elif self._match_text_seq("WAIT"): 4171 wait = self._parse_primary() 4172 elif self._match_text_seq("SKIP", "LOCKED"): 4173 wait = False 4174 4175 locks.append( 4176 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4177 ) 4178 4179 return locks 4180 4181 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4182 while this and self._match_set(self.SET_OPERATIONS): 4183 token_type = self._prev.token_type 4184 4185 if token_type == TokenType.UNION: 4186 operation: t.Type[exp.SetOperation] = exp.Union 4187 elif token_type == TokenType.EXCEPT: 4188 operation = exp.Except 4189 else: 4190 operation = exp.Intersect 4191 4192 comments = self._prev.comments 4193 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 4194 by_name = self._match_text_seq("BY", "NAME") 4195 expression = self._parse_select(nested=True, parse_set_operation=False) 4196 4197 this = self.expression( 4198 operation, 4199 comments=comments, 4200 this=this, 4201 distinct=distinct, 4202 by_name=by_name, 4203 expression=expression, 4204 ) 4205 4206 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4207 expression = this.expression 4208 4209 if expression: 4210 for arg in self.SET_OP_MODIFIERS: 4211 expr = expression.args.get(arg) 4212 if expr: 4213 this.set(arg, expr.pop()) 4214 4215 return this 4216 4217 def _parse_expression(self) -> t.Optional[exp.Expression]: 4218 return self._parse_alias(self._parse_assignment()) 4219 4220 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4221 this = self._parse_disjunction() 4222 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4223 # This allows us to parse <non-identifier token> := <expr> 4224 this = exp.column( 4225 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4226 ) 4227 4228 while self._match_set(self.ASSIGNMENT): 4229 this = self.expression( 4230 self.ASSIGNMENT[self._prev.token_type], 4231 this=this, 4232 comments=self._prev_comments, 4233 expression=self._parse_assignment(), 4234 ) 4235 4236 return this 4237 4238 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4239 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4240 4241 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4242 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4243 4244 def _parse_equality(self) -> t.Optional[exp.Expression]: 4245 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4246 4247 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4248 return self._parse_tokens(self._parse_range, self.COMPARISON) 4249 4250 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4251 this = this or self._parse_bitwise() 4252 negate = self._match(TokenType.NOT) 4253 4254 if self._match_set(self.RANGE_PARSERS): 4255 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4256 if not expression: 4257 return this 4258 4259 this = expression 4260 elif self._match(TokenType.ISNULL): 4261 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4262 4263 # Postgres supports ISNULL and NOTNULL for conditions. 4264 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4265 if self._match(TokenType.NOTNULL): 4266 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4267 this = self.expression(exp.Not, this=this) 4268 4269 if negate: 4270 this = self._negate_range(this) 4271 4272 if self._match(TokenType.IS): 4273 this = self._parse_is(this) 4274 4275 return this 4276 4277 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4278 if not this: 4279 return this 4280 4281 return self.expression(exp.Not, this=this) 4282 4283 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4284 index = self._index - 1 4285 negate = self._match(TokenType.NOT) 4286 4287 if self._match_text_seq("DISTINCT", "FROM"): 4288 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4289 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4290 4291 expression = self._parse_null() or self._parse_boolean() 4292 if not expression: 4293 self._retreat(index) 4294 return None 4295 4296 this = self.expression(exp.Is, this=this, expression=expression) 4297 return self.expression(exp.Not, this=this) if negate else this 4298 4299 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4300 unnest = self._parse_unnest(with_alias=False) 4301 if unnest: 4302 this = self.expression(exp.In, this=this, unnest=unnest) 4303 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4304 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4305 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4306 4307 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4308 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4309 else: 4310 this = self.expression(exp.In, this=this, expressions=expressions) 4311 4312 if matched_l_paren: 4313 self._match_r_paren(this) 4314 elif not self._match(TokenType.R_BRACKET, expression=this): 4315 self.raise_error("Expecting ]") 4316 else: 4317 this = self.expression(exp.In, this=this, field=self._parse_field()) 4318 4319 return this 4320 4321 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4322 low = self._parse_bitwise() 4323 self._match(TokenType.AND) 4324 high = self._parse_bitwise() 4325 return self.expression(exp.Between, this=this, low=low, high=high) 4326 4327 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4328 if not self._match(TokenType.ESCAPE): 4329 return this 4330 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4331 4332 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4333 index = self._index 4334 4335 if not self._match(TokenType.INTERVAL) and match_interval: 4336 return None 4337 4338 if self._match(TokenType.STRING, advance=False): 4339 this = self._parse_primary() 4340 else: 4341 this = self._parse_term() 4342 4343 if not this or ( 4344 isinstance(this, exp.Column) 4345 and not this.table 4346 and not this.this.quoted 4347 and this.name.upper() == "IS" 4348 ): 4349 self._retreat(index) 4350 return None 4351 4352 unit = self._parse_function() or ( 4353 not self._match(TokenType.ALIAS, advance=False) 4354 and self._parse_var(any_token=True, upper=True) 4355 ) 4356 4357 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4358 # each INTERVAL expression into this canonical form so it's easy to transpile 4359 if this and this.is_number: 4360 this = exp.Literal.string(this.to_py()) 4361 elif this and this.is_string: 4362 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4363 if len(parts) == 1: 4364 if unit: 4365 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4366 self._retreat(self._index - 1) 4367 4368 this = exp.Literal.string(parts[0][0]) 4369 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4370 4371 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4372 unit = self.expression( 4373 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4374 ) 4375 4376 interval = self.expression(exp.Interval, this=this, unit=unit) 4377 4378 index = self._index 4379 self._match(TokenType.PLUS) 4380 4381 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4382 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4383 return self.expression( 4384 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4385 ) 4386 4387 self._retreat(index) 4388 return interval 4389 4390 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4391 this = self._parse_term() 4392 4393 while True: 4394 if self._match_set(self.BITWISE): 4395 this = self.expression( 4396 self.BITWISE[self._prev.token_type], 4397 this=this, 4398 expression=self._parse_term(), 4399 ) 4400 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4401 this = self.expression( 4402 exp.DPipe, 4403 this=this, 4404 expression=self._parse_term(), 4405 safe=not self.dialect.STRICT_STRING_CONCAT, 4406 ) 4407 elif self._match(TokenType.DQMARK): 4408 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 4409 elif self._match_pair(TokenType.LT, TokenType.LT): 4410 this = self.expression( 4411 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4412 ) 4413 elif self._match_pair(TokenType.GT, TokenType.GT): 4414 this = self.expression( 4415 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4416 ) 4417 else: 4418 break 4419 4420 return this 4421 4422 def _parse_term(self) -> t.Optional[exp.Expression]: 4423 this = self._parse_factor() 4424 4425 while self._match_set(self.TERM): 4426 klass = self.TERM[self._prev.token_type] 4427 comments = self._prev_comments 4428 expression = self._parse_factor() 4429 4430 this = self.expression(klass, this=this, comments=comments, expression=expression) 4431 4432 if isinstance(this, exp.Collate): 4433 expr = this.expression 4434 4435 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4436 # fallback to Identifier / Var 4437 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4438 ident = expr.this 4439 if isinstance(ident, exp.Identifier): 4440 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4441 4442 return this 4443 4444 def _parse_factor(self) -> t.Optional[exp.Expression]: 4445 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4446 this = parse_method() 4447 4448 while self._match_set(self.FACTOR): 4449 klass = self.FACTOR[self._prev.token_type] 4450 comments = self._prev_comments 4451 expression = parse_method() 4452 4453 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4454 self._retreat(self._index - 1) 4455 return this 4456 4457 this = self.expression(klass, this=this, comments=comments, expression=expression) 4458 4459 if isinstance(this, exp.Div): 4460 this.args["typed"] = self.dialect.TYPED_DIVISION 4461 this.args["safe"] = self.dialect.SAFE_DIVISION 4462 4463 return this 4464 4465 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4466 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4467 4468 def _parse_unary(self) -> t.Optional[exp.Expression]: 4469 if self._match_set(self.UNARY_PARSERS): 4470 return self.UNARY_PARSERS[self._prev.token_type](self) 4471 return self._parse_at_time_zone(self._parse_type()) 4472 4473 def _parse_type( 4474 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4475 ) -> t.Optional[exp.Expression]: 4476 interval = parse_interval and self._parse_interval() 4477 if interval: 4478 return interval 4479 4480 index = self._index 4481 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4482 4483 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4484 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4485 if isinstance(data_type, exp.Cast): 4486 # This constructor can contain ops directly after it, for instance struct unnesting: 4487 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4488 return self._parse_column_ops(data_type) 4489 4490 if data_type: 4491 index2 = self._index 4492 this = self._parse_primary() 4493 4494 if isinstance(this, exp.Literal): 4495 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4496 if parser: 4497 return parser(self, this, data_type) 4498 4499 return self.expression(exp.Cast, this=this, to=data_type) 4500 4501 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4502 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4503 # 4504 # If the index difference here is greater than 1, that means the parser itself must have 4505 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4506 # 4507 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4508 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4509 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4510 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4511 # 4512 # In these cases, we don't really want to return the converted type, but instead retreat 4513 # and try to parse a Column or Identifier in the section below. 4514 if data_type.expressions and index2 - index > 1: 4515 self._retreat(index2) 4516 return self._parse_column_ops(data_type) 4517 4518 self._retreat(index) 4519 4520 if fallback_to_identifier: 4521 return self._parse_id_var() 4522 4523 this = self._parse_column() 4524 return this and self._parse_column_ops(this) 4525 4526 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4527 this = self._parse_type() 4528 if not this: 4529 return None 4530 4531 if isinstance(this, exp.Column) and not this.table: 4532 this = exp.var(this.name.upper()) 4533 4534 return self.expression( 4535 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4536 ) 4537 4538 def _parse_types( 4539 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4540 ) -> t.Optional[exp.Expression]: 4541 index = self._index 4542 4543 this: t.Optional[exp.Expression] = None 4544 prefix = self._match_text_seq("SYSUDTLIB", ".") 4545 4546 if not self._match_set(self.TYPE_TOKENS): 4547 identifier = allow_identifiers and self._parse_id_var( 4548 any_token=False, tokens=(TokenType.VAR,) 4549 ) 4550 if isinstance(identifier, exp.Identifier): 4551 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4552 4553 if len(tokens) != 1: 4554 self.raise_error("Unexpected identifier", self._prev) 4555 4556 if tokens[0].token_type in self.TYPE_TOKENS: 4557 self._prev = tokens[0] 4558 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4559 type_name = identifier.name 4560 4561 while self._match(TokenType.DOT): 4562 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4563 4564 this = exp.DataType.build(type_name, udt=True) 4565 else: 4566 self._retreat(self._index - 1) 4567 return None 4568 else: 4569 return None 4570 4571 type_token = self._prev.token_type 4572 4573 if type_token == TokenType.PSEUDO_TYPE: 4574 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4575 4576 if type_token == TokenType.OBJECT_IDENTIFIER: 4577 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4578 4579 # https://materialize.com/docs/sql/types/map/ 4580 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4581 key_type = self._parse_types( 4582 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4583 ) 4584 if not self._match(TokenType.FARROW): 4585 self._retreat(index) 4586 return None 4587 4588 value_type = self._parse_types( 4589 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4590 ) 4591 if not self._match(TokenType.R_BRACKET): 4592 self._retreat(index) 4593 return None 4594 4595 return exp.DataType( 4596 this=exp.DataType.Type.MAP, 4597 expressions=[key_type, value_type], 4598 nested=True, 4599 prefix=prefix, 4600 ) 4601 4602 nested = type_token in self.NESTED_TYPE_TOKENS 4603 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4604 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4605 expressions = None 4606 maybe_func = False 4607 4608 if self._match(TokenType.L_PAREN): 4609 if is_struct: 4610 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4611 elif nested: 4612 expressions = self._parse_csv( 4613 lambda: self._parse_types( 4614 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4615 ) 4616 ) 4617 elif type_token in self.ENUM_TYPE_TOKENS: 4618 expressions = self._parse_csv(self._parse_equality) 4619 elif is_aggregate: 4620 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4621 any_token=False, tokens=(TokenType.VAR,) 4622 ) 4623 if not func_or_ident or not self._match(TokenType.COMMA): 4624 return None 4625 expressions = self._parse_csv( 4626 lambda: self._parse_types( 4627 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4628 ) 4629 ) 4630 expressions.insert(0, func_or_ident) 4631 else: 4632 expressions = self._parse_csv(self._parse_type_size) 4633 4634 # https://docs.snowflake.com/en/sql-reference/data-types-vector 4635 if type_token == TokenType.VECTOR and len(expressions) == 2: 4636 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 4637 4638 if not expressions or not self._match(TokenType.R_PAREN): 4639 self._retreat(index) 4640 return None 4641 4642 maybe_func = True 4643 4644 values: t.Optional[t.List[exp.Expression]] = None 4645 4646 if nested and self._match(TokenType.LT): 4647 if is_struct: 4648 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4649 else: 4650 expressions = self._parse_csv( 4651 lambda: self._parse_types( 4652 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4653 ) 4654 ) 4655 4656 if not self._match(TokenType.GT): 4657 self.raise_error("Expecting >") 4658 4659 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4660 values = self._parse_csv(self._parse_assignment) 4661 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4662 4663 if type_token in self.TIMESTAMPS: 4664 if self._match_text_seq("WITH", "TIME", "ZONE"): 4665 maybe_func = False 4666 tz_type = ( 4667 exp.DataType.Type.TIMETZ 4668 if type_token in self.TIMES 4669 else exp.DataType.Type.TIMESTAMPTZ 4670 ) 4671 this = exp.DataType(this=tz_type, expressions=expressions) 4672 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4673 maybe_func = False 4674 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4675 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4676 maybe_func = False 4677 elif type_token == TokenType.INTERVAL: 4678 unit = self._parse_var(upper=True) 4679 if unit: 4680 if self._match_text_seq("TO"): 4681 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4682 4683 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4684 else: 4685 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4686 4687 if maybe_func and check_func: 4688 index2 = self._index 4689 peek = self._parse_string() 4690 4691 if not peek: 4692 self._retreat(index) 4693 return None 4694 4695 self._retreat(index2) 4696 4697 if not this: 4698 if self._match_text_seq("UNSIGNED"): 4699 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4700 if not unsigned_type_token: 4701 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4702 4703 type_token = unsigned_type_token or type_token 4704 4705 this = exp.DataType( 4706 this=exp.DataType.Type[type_token.value], 4707 expressions=expressions, 4708 nested=nested, 4709 prefix=prefix, 4710 ) 4711 4712 # Empty arrays/structs are allowed 4713 if values is not None: 4714 cls = exp.Struct if is_struct else exp.Array 4715 this = exp.cast(cls(expressions=values), this, copy=False) 4716 4717 elif expressions: 4718 this.set("expressions", expressions) 4719 4720 # https://materialize.com/docs/sql/types/list/#type-name 4721 while self._match(TokenType.LIST): 4722 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 4723 4724 index = self._index 4725 4726 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4727 matched_array = self._match(TokenType.ARRAY) 4728 4729 while self._curr: 4730 datatype_token = self._prev.token_type 4731 matched_l_bracket = self._match(TokenType.L_BRACKET) 4732 if not matched_l_bracket and not matched_array: 4733 break 4734 4735 matched_array = False 4736 values = self._parse_csv(self._parse_assignment) or None 4737 if ( 4738 values 4739 and not schema 4740 and ( 4741 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 4742 ) 4743 ): 4744 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 4745 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 4746 self._retreat(index) 4747 break 4748 4749 this = exp.DataType( 4750 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 4751 ) 4752 self._match(TokenType.R_BRACKET) 4753 4754 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 4755 converter = self.TYPE_CONVERTERS.get(this.this) 4756 if converter: 4757 this = converter(t.cast(exp.DataType, this)) 4758 4759 return this 4760 4761 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4762 index = self._index 4763 4764 if ( 4765 self._curr 4766 and self._next 4767 and self._curr.token_type in self.TYPE_TOKENS 4768 and self._next.token_type in self.TYPE_TOKENS 4769 ): 4770 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 4771 # type token. Without this, the list will be parsed as a type and we'll eventually crash 4772 this = self._parse_id_var() 4773 else: 4774 this = ( 4775 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4776 or self._parse_id_var() 4777 ) 4778 4779 self._match(TokenType.COLON) 4780 4781 if ( 4782 type_required 4783 and not isinstance(this, exp.DataType) 4784 and not self._match_set(self.TYPE_TOKENS, advance=False) 4785 ): 4786 self._retreat(index) 4787 return self._parse_types() 4788 4789 return self._parse_column_def(this) 4790 4791 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4792 if not self._match_text_seq("AT", "TIME", "ZONE"): 4793 return this 4794 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4795 4796 def _parse_column(self) -> t.Optional[exp.Expression]: 4797 this = self._parse_column_reference() 4798 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 4799 4800 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 4801 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 4802 4803 return column 4804 4805 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4806 this = self._parse_field() 4807 if ( 4808 not this 4809 and self._match(TokenType.VALUES, advance=False) 4810 and self.VALUES_FOLLOWED_BY_PAREN 4811 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4812 ): 4813 this = self._parse_id_var() 4814 4815 if isinstance(this, exp.Identifier): 4816 # We bubble up comments from the Identifier to the Column 4817 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4818 4819 return this 4820 4821 def _parse_colon_as_variant_extract( 4822 self, this: t.Optional[exp.Expression] 4823 ) -> t.Optional[exp.Expression]: 4824 casts = [] 4825 json_path = [] 4826 4827 while self._match(TokenType.COLON): 4828 start_index = self._index 4829 4830 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 4831 path = self._parse_column_ops( 4832 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 4833 ) 4834 4835 # The cast :: operator has a lower precedence than the extraction operator :, so 4836 # we rearrange the AST appropriately to avoid casting the JSON path 4837 while isinstance(path, exp.Cast): 4838 casts.append(path.to) 4839 path = path.this 4840 4841 if casts: 4842 dcolon_offset = next( 4843 i 4844 for i, t in enumerate(self._tokens[start_index:]) 4845 if t.token_type == TokenType.DCOLON 4846 ) 4847 end_token = self._tokens[start_index + dcolon_offset - 1] 4848 else: 4849 end_token = self._prev 4850 4851 if path: 4852 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 4853 4854 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 4855 # Databricks transforms it back to the colon/dot notation 4856 if json_path: 4857 this = self.expression( 4858 exp.JSONExtract, 4859 this=this, 4860 expression=self.dialect.to_json_path(exp.Literal.string(".".join(json_path))), 4861 variant_extract=True, 4862 ) 4863 4864 while casts: 4865 this = self.expression(exp.Cast, this=this, to=casts.pop()) 4866 4867 return this 4868 4869 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 4870 return self._parse_types() 4871 4872 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4873 this = self._parse_bracket(this) 4874 4875 while self._match_set(self.COLUMN_OPERATORS): 4876 op_token = self._prev.token_type 4877 op = self.COLUMN_OPERATORS.get(op_token) 4878 4879 if op_token == TokenType.DCOLON: 4880 field = self._parse_dcolon() 4881 if not field: 4882 self.raise_error("Expected type") 4883 elif op and self._curr: 4884 field = self._parse_column_reference() 4885 else: 4886 field = self._parse_field(any_token=True, anonymous_func=True) 4887 4888 if isinstance(field, exp.Func) and this: 4889 # bigquery allows function calls like x.y.count(...) 4890 # SAFE.SUBSTR(...) 4891 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4892 this = exp.replace_tree( 4893 this, 4894 lambda n: ( 4895 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4896 if n.table 4897 else n.this 4898 ) 4899 if isinstance(n, exp.Column) 4900 else n, 4901 ) 4902 4903 if op: 4904 this = op(self, this, field) 4905 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4906 this = self.expression( 4907 exp.Column, 4908 this=field, 4909 table=this.this, 4910 db=this.args.get("table"), 4911 catalog=this.args.get("db"), 4912 ) 4913 else: 4914 this = self.expression(exp.Dot, this=this, expression=field) 4915 4916 this = self._parse_bracket(this) 4917 4918 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 4919 4920 def _parse_primary(self) -> t.Optional[exp.Expression]: 4921 if self._match_set(self.PRIMARY_PARSERS): 4922 token_type = self._prev.token_type 4923 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4924 4925 if token_type == TokenType.STRING: 4926 expressions = [primary] 4927 while self._match(TokenType.STRING): 4928 expressions.append(exp.Literal.string(self._prev.text)) 4929 4930 if len(expressions) > 1: 4931 return self.expression(exp.Concat, expressions=expressions) 4932 4933 return primary 4934 4935 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4936 return exp.Literal.number(f"0.{self._prev.text}") 4937 4938 if self._match(TokenType.L_PAREN): 4939 comments = self._prev_comments 4940 query = self._parse_select() 4941 4942 if query: 4943 expressions = [query] 4944 else: 4945 expressions = self._parse_expressions() 4946 4947 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4948 4949 if not this and self._match(TokenType.R_PAREN, advance=False): 4950 this = self.expression(exp.Tuple) 4951 elif isinstance(this, exp.UNWRAPPED_QUERIES): 4952 this = self._parse_subquery(this=this, parse_alias=False) 4953 elif isinstance(this, exp.Subquery): 4954 this = self._parse_subquery( 4955 this=self._parse_set_operations(this), parse_alias=False 4956 ) 4957 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 4958 this = self.expression(exp.Tuple, expressions=expressions) 4959 else: 4960 this = self.expression(exp.Paren, this=this) 4961 4962 if this: 4963 this.add_comments(comments) 4964 4965 self._match_r_paren(expression=this) 4966 return this 4967 4968 return None 4969 4970 def _parse_field( 4971 self, 4972 any_token: bool = False, 4973 tokens: t.Optional[t.Collection[TokenType]] = None, 4974 anonymous_func: bool = False, 4975 ) -> t.Optional[exp.Expression]: 4976 if anonymous_func: 4977 field = ( 4978 self._parse_function(anonymous=anonymous_func, any_token=any_token) 4979 or self._parse_primary() 4980 ) 4981 else: 4982 field = self._parse_primary() or self._parse_function( 4983 anonymous=anonymous_func, any_token=any_token 4984 ) 4985 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 4986 4987 def _parse_function( 4988 self, 4989 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4990 anonymous: bool = False, 4991 optional_parens: bool = True, 4992 any_token: bool = False, 4993 ) -> t.Optional[exp.Expression]: 4994 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4995 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4996 fn_syntax = False 4997 if ( 4998 self._match(TokenType.L_BRACE, advance=False) 4999 and self._next 5000 and self._next.text.upper() == "FN" 5001 ): 5002 self._advance(2) 5003 fn_syntax = True 5004 5005 func = self._parse_function_call( 5006 functions=functions, 5007 anonymous=anonymous, 5008 optional_parens=optional_parens, 5009 any_token=any_token, 5010 ) 5011 5012 if fn_syntax: 5013 self._match(TokenType.R_BRACE) 5014 5015 return func 5016 5017 def _parse_function_call( 5018 self, 5019 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5020 anonymous: bool = False, 5021 optional_parens: bool = True, 5022 any_token: bool = False, 5023 ) -> t.Optional[exp.Expression]: 5024 if not self._curr: 5025 return None 5026 5027 comments = self._curr.comments 5028 token_type = self._curr.token_type 5029 this = self._curr.text 5030 upper = this.upper() 5031 5032 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5033 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5034 self._advance() 5035 return self._parse_window(parser(self)) 5036 5037 if not self._next or self._next.token_type != TokenType.L_PAREN: 5038 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5039 self._advance() 5040 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5041 5042 return None 5043 5044 if any_token: 5045 if token_type in self.RESERVED_TOKENS: 5046 return None 5047 elif token_type not in self.FUNC_TOKENS: 5048 return None 5049 5050 self._advance(2) 5051 5052 parser = self.FUNCTION_PARSERS.get(upper) 5053 if parser and not anonymous: 5054 this = parser(self) 5055 else: 5056 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5057 5058 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5059 this = self.expression(subquery_predicate, this=self._parse_select()) 5060 self._match_r_paren() 5061 return this 5062 5063 if functions is None: 5064 functions = self.FUNCTIONS 5065 5066 function = functions.get(upper) 5067 5068 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5069 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5070 5071 if alias: 5072 args = self._kv_to_prop_eq(args) 5073 5074 if function and not anonymous: 5075 if "dialect" in function.__code__.co_varnames: 5076 func = function(args, dialect=self.dialect) 5077 else: 5078 func = function(args) 5079 5080 func = self.validate_expression(func, args) 5081 if not self.dialect.NORMALIZE_FUNCTIONS: 5082 func.meta["name"] = this 5083 5084 this = func 5085 else: 5086 if token_type == TokenType.IDENTIFIER: 5087 this = exp.Identifier(this=this, quoted=True) 5088 this = self.expression(exp.Anonymous, this=this, expressions=args) 5089 5090 if isinstance(this, exp.Expression): 5091 this.add_comments(comments) 5092 5093 self._match_r_paren(this) 5094 return self._parse_window(this) 5095 5096 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5097 transformed = [] 5098 5099 for e in expressions: 5100 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5101 if isinstance(e, exp.Alias): 5102 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5103 5104 if not isinstance(e, exp.PropertyEQ): 5105 e = self.expression( 5106 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5107 ) 5108 5109 if isinstance(e.this, exp.Column): 5110 e.this.replace(e.this.this) 5111 5112 transformed.append(e) 5113 5114 return transformed 5115 5116 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5117 return self._parse_column_def(self._parse_id_var()) 5118 5119 def _parse_user_defined_function( 5120 self, kind: t.Optional[TokenType] = None 5121 ) -> t.Optional[exp.Expression]: 5122 this = self._parse_id_var() 5123 5124 while self._match(TokenType.DOT): 5125 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 5126 5127 if not self._match(TokenType.L_PAREN): 5128 return this 5129 5130 expressions = self._parse_csv(self._parse_function_parameter) 5131 self._match_r_paren() 5132 return self.expression( 5133 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5134 ) 5135 5136 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5137 literal = self._parse_primary() 5138 if literal: 5139 return self.expression(exp.Introducer, this=token.text, expression=literal) 5140 5141 return self.expression(exp.Identifier, this=token.text) 5142 5143 def _parse_session_parameter(self) -> exp.SessionParameter: 5144 kind = None 5145 this = self._parse_id_var() or self._parse_primary() 5146 5147 if this and self._match(TokenType.DOT): 5148 kind = this.name 5149 this = self._parse_var() or self._parse_primary() 5150 5151 return self.expression(exp.SessionParameter, this=this, kind=kind) 5152 5153 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5154 return self._parse_id_var() 5155 5156 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5157 index = self._index 5158 5159 if self._match(TokenType.L_PAREN): 5160 expressions = t.cast( 5161 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5162 ) 5163 5164 if not self._match(TokenType.R_PAREN): 5165 self._retreat(index) 5166 else: 5167 expressions = [self._parse_lambda_arg()] 5168 5169 if self._match_set(self.LAMBDAS): 5170 return self.LAMBDAS[self._prev.token_type](self, expressions) 5171 5172 self._retreat(index) 5173 5174 this: t.Optional[exp.Expression] 5175 5176 if self._match(TokenType.DISTINCT): 5177 this = self.expression( 5178 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5179 ) 5180 else: 5181 this = self._parse_select_or_expression(alias=alias) 5182 5183 return self._parse_limit( 5184 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5185 ) 5186 5187 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5188 index = self._index 5189 if not self._match(TokenType.L_PAREN): 5190 return this 5191 5192 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5193 # expr can be of both types 5194 if self._match_set(self.SELECT_START_TOKENS): 5195 self._retreat(index) 5196 return this 5197 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5198 self._match_r_paren() 5199 return self.expression(exp.Schema, this=this, expressions=args) 5200 5201 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5202 return self._parse_column_def(self._parse_field(any_token=True)) 5203 5204 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5205 # column defs are not really columns, they're identifiers 5206 if isinstance(this, exp.Column): 5207 this = this.this 5208 5209 kind = self._parse_types(schema=True) 5210 5211 if self._match_text_seq("FOR", "ORDINALITY"): 5212 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5213 5214 constraints: t.List[exp.Expression] = [] 5215 5216 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5217 ("ALIAS", "MATERIALIZED") 5218 ): 5219 persisted = self._prev.text.upper() == "MATERIALIZED" 5220 constraints.append( 5221 self.expression( 5222 exp.ComputedColumnConstraint, 5223 this=self._parse_assignment(), 5224 persisted=persisted or self._match_text_seq("PERSISTED"), 5225 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5226 ) 5227 ) 5228 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 5229 self._match(TokenType.ALIAS) 5230 constraints.append( 5231 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 5232 ) 5233 5234 while True: 5235 constraint = self._parse_column_constraint() 5236 if not constraint: 5237 break 5238 constraints.append(constraint) 5239 5240 if not kind and not constraints: 5241 return this 5242 5243 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5244 5245 def _parse_auto_increment( 5246 self, 5247 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5248 start = None 5249 increment = None 5250 5251 if self._match(TokenType.L_PAREN, advance=False): 5252 args = self._parse_wrapped_csv(self._parse_bitwise) 5253 start = seq_get(args, 0) 5254 increment = seq_get(args, 1) 5255 elif self._match_text_seq("START"): 5256 start = self._parse_bitwise() 5257 self._match_text_seq("INCREMENT") 5258 increment = self._parse_bitwise() 5259 5260 if start and increment: 5261 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 5262 5263 return exp.AutoIncrementColumnConstraint() 5264 5265 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5266 if not self._match_text_seq("REFRESH"): 5267 self._retreat(self._index - 1) 5268 return None 5269 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5270 5271 def _parse_compress(self) -> exp.CompressColumnConstraint: 5272 if self._match(TokenType.L_PAREN, advance=False): 5273 return self.expression( 5274 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5275 ) 5276 5277 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5278 5279 def _parse_generated_as_identity( 5280 self, 5281 ) -> ( 5282 exp.GeneratedAsIdentityColumnConstraint 5283 | exp.ComputedColumnConstraint 5284 | exp.GeneratedAsRowColumnConstraint 5285 ): 5286 if self._match_text_seq("BY", "DEFAULT"): 5287 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5288 this = self.expression( 5289 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5290 ) 5291 else: 5292 self._match_text_seq("ALWAYS") 5293 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5294 5295 self._match(TokenType.ALIAS) 5296 5297 if self._match_text_seq("ROW"): 5298 start = self._match_text_seq("START") 5299 if not start: 5300 self._match(TokenType.END) 5301 hidden = self._match_text_seq("HIDDEN") 5302 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5303 5304 identity = self._match_text_seq("IDENTITY") 5305 5306 if self._match(TokenType.L_PAREN): 5307 if self._match(TokenType.START_WITH): 5308 this.set("start", self._parse_bitwise()) 5309 if self._match_text_seq("INCREMENT", "BY"): 5310 this.set("increment", self._parse_bitwise()) 5311 if self._match_text_seq("MINVALUE"): 5312 this.set("minvalue", self._parse_bitwise()) 5313 if self._match_text_seq("MAXVALUE"): 5314 this.set("maxvalue", self._parse_bitwise()) 5315 5316 if self._match_text_seq("CYCLE"): 5317 this.set("cycle", True) 5318 elif self._match_text_seq("NO", "CYCLE"): 5319 this.set("cycle", False) 5320 5321 if not identity: 5322 this.set("expression", self._parse_range()) 5323 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5324 args = self._parse_csv(self._parse_bitwise) 5325 this.set("start", seq_get(args, 0)) 5326 this.set("increment", seq_get(args, 1)) 5327 5328 self._match_r_paren() 5329 5330 return this 5331 5332 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5333 self._match_text_seq("LENGTH") 5334 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5335 5336 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5337 if self._match_text_seq("NULL"): 5338 return self.expression(exp.NotNullColumnConstraint) 5339 if self._match_text_seq("CASESPECIFIC"): 5340 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5341 if self._match_text_seq("FOR", "REPLICATION"): 5342 return self.expression(exp.NotForReplicationColumnConstraint) 5343 return None 5344 5345 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5346 if self._match(TokenType.CONSTRAINT): 5347 this = self._parse_id_var() 5348 else: 5349 this = None 5350 5351 if self._match_texts(self.CONSTRAINT_PARSERS): 5352 return self.expression( 5353 exp.ColumnConstraint, 5354 this=this, 5355 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5356 ) 5357 5358 return this 5359 5360 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5361 if not self._match(TokenType.CONSTRAINT): 5362 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5363 5364 return self.expression( 5365 exp.Constraint, 5366 this=self._parse_id_var(), 5367 expressions=self._parse_unnamed_constraints(), 5368 ) 5369 5370 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5371 constraints = [] 5372 while True: 5373 constraint = self._parse_unnamed_constraint() or self._parse_function() 5374 if not constraint: 5375 break 5376 constraints.append(constraint) 5377 5378 return constraints 5379 5380 def _parse_unnamed_constraint( 5381 self, constraints: t.Optional[t.Collection[str]] = None 5382 ) -> t.Optional[exp.Expression]: 5383 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5384 constraints or self.CONSTRAINT_PARSERS 5385 ): 5386 return None 5387 5388 constraint = self._prev.text.upper() 5389 if constraint not in self.CONSTRAINT_PARSERS: 5390 self.raise_error(f"No parser found for schema constraint {constraint}.") 5391 5392 return self.CONSTRAINT_PARSERS[constraint](self) 5393 5394 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5395 return self._parse_id_var(any_token=False) 5396 5397 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5398 self._match_text_seq("KEY") 5399 return self.expression( 5400 exp.UniqueColumnConstraint, 5401 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5402 this=self._parse_schema(self._parse_unique_key()), 5403 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5404 on_conflict=self._parse_on_conflict(), 5405 ) 5406 5407 def _parse_key_constraint_options(self) -> t.List[str]: 5408 options = [] 5409 while True: 5410 if not self._curr: 5411 break 5412 5413 if self._match(TokenType.ON): 5414 action = None 5415 on = self._advance_any() and self._prev.text 5416 5417 if self._match_text_seq("NO", "ACTION"): 5418 action = "NO ACTION" 5419 elif self._match_text_seq("CASCADE"): 5420 action = "CASCADE" 5421 elif self._match_text_seq("RESTRICT"): 5422 action = "RESTRICT" 5423 elif self._match_pair(TokenType.SET, TokenType.NULL): 5424 action = "SET NULL" 5425 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5426 action = "SET DEFAULT" 5427 else: 5428 self.raise_error("Invalid key constraint") 5429 5430 options.append(f"ON {on} {action}") 5431 else: 5432 var = self._parse_var_from_options( 5433 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 5434 ) 5435 if not var: 5436 break 5437 options.append(var.name) 5438 5439 return options 5440 5441 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5442 if match and not self._match(TokenType.REFERENCES): 5443 return None 5444 5445 expressions = None 5446 this = self._parse_table(schema=True) 5447 options = self._parse_key_constraint_options() 5448 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5449 5450 def _parse_foreign_key(self) -> exp.ForeignKey: 5451 expressions = self._parse_wrapped_id_vars() 5452 reference = self._parse_references() 5453 options = {} 5454 5455 while self._match(TokenType.ON): 5456 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5457 self.raise_error("Expected DELETE or UPDATE") 5458 5459 kind = self._prev.text.lower() 5460 5461 if self._match_text_seq("NO", "ACTION"): 5462 action = "NO ACTION" 5463 elif self._match(TokenType.SET): 5464 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5465 action = "SET " + self._prev.text.upper() 5466 else: 5467 self._advance() 5468 action = self._prev.text.upper() 5469 5470 options[kind] = action 5471 5472 return self.expression( 5473 exp.ForeignKey, 5474 expressions=expressions, 5475 reference=reference, 5476 **options, # type: ignore 5477 ) 5478 5479 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5480 return self._parse_field() 5481 5482 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5483 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5484 self._retreat(self._index - 1) 5485 return None 5486 5487 id_vars = self._parse_wrapped_id_vars() 5488 return self.expression( 5489 exp.PeriodForSystemTimeConstraint, 5490 this=seq_get(id_vars, 0), 5491 expression=seq_get(id_vars, 1), 5492 ) 5493 5494 def _parse_primary_key( 5495 self, wrapped_optional: bool = False, in_props: bool = False 5496 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5497 desc = ( 5498 self._match_set((TokenType.ASC, TokenType.DESC)) 5499 and self._prev.token_type == TokenType.DESC 5500 ) 5501 5502 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5503 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5504 5505 expressions = self._parse_wrapped_csv( 5506 self._parse_primary_key_part, optional=wrapped_optional 5507 ) 5508 options = self._parse_key_constraint_options() 5509 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5510 5511 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5512 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5513 5514 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5515 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5516 return this 5517 5518 bracket_kind = self._prev.token_type 5519 expressions = self._parse_csv( 5520 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5521 ) 5522 5523 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5524 self.raise_error("Expected ]") 5525 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5526 self.raise_error("Expected }") 5527 5528 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5529 if bracket_kind == TokenType.L_BRACE: 5530 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5531 elif not this: 5532 this = build_array_constructor( 5533 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 5534 ) 5535 else: 5536 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5537 if constructor_type: 5538 return build_array_constructor( 5539 constructor_type, 5540 args=expressions, 5541 bracket_kind=bracket_kind, 5542 dialect=self.dialect, 5543 ) 5544 5545 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5546 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5547 5548 self._add_comments(this) 5549 return self._parse_bracket(this) 5550 5551 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5552 if self._match(TokenType.COLON): 5553 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5554 return this 5555 5556 def _parse_case(self) -> t.Optional[exp.Expression]: 5557 ifs = [] 5558 default = None 5559 5560 comments = self._prev_comments 5561 expression = self._parse_assignment() 5562 5563 while self._match(TokenType.WHEN): 5564 this = self._parse_assignment() 5565 self._match(TokenType.THEN) 5566 then = self._parse_assignment() 5567 ifs.append(self.expression(exp.If, this=this, true=then)) 5568 5569 if self._match(TokenType.ELSE): 5570 default = self._parse_assignment() 5571 5572 if not self._match(TokenType.END): 5573 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5574 default = exp.column("interval") 5575 else: 5576 self.raise_error("Expected END after CASE", self._prev) 5577 5578 return self.expression( 5579 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5580 ) 5581 5582 def _parse_if(self) -> t.Optional[exp.Expression]: 5583 if self._match(TokenType.L_PAREN): 5584 args = self._parse_csv(self._parse_assignment) 5585 this = self.validate_expression(exp.If.from_arg_list(args), args) 5586 self._match_r_paren() 5587 else: 5588 index = self._index - 1 5589 5590 if self.NO_PAREN_IF_COMMANDS and index == 0: 5591 return self._parse_as_command(self._prev) 5592 5593 condition = self._parse_assignment() 5594 5595 if not condition: 5596 self._retreat(index) 5597 return None 5598 5599 self._match(TokenType.THEN) 5600 true = self._parse_assignment() 5601 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5602 self._match(TokenType.END) 5603 this = self.expression(exp.If, this=condition, true=true, false=false) 5604 5605 return this 5606 5607 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5608 if not self._match_text_seq("VALUE", "FOR"): 5609 self._retreat(self._index - 1) 5610 return None 5611 5612 return self.expression( 5613 exp.NextValueFor, 5614 this=self._parse_column(), 5615 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5616 ) 5617 5618 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 5619 this = self._parse_function() or self._parse_var_or_string(upper=True) 5620 5621 if self._match(TokenType.FROM): 5622 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5623 5624 if not self._match(TokenType.COMMA): 5625 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5626 5627 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5628 5629 def _parse_gap_fill(self) -> exp.GapFill: 5630 self._match(TokenType.TABLE) 5631 this = self._parse_table() 5632 5633 self._match(TokenType.COMMA) 5634 args = [this, *self._parse_csv(self._parse_lambda)] 5635 5636 gap_fill = exp.GapFill.from_arg_list(args) 5637 return self.validate_expression(gap_fill, args) 5638 5639 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5640 this = self._parse_assignment() 5641 5642 if not self._match(TokenType.ALIAS): 5643 if self._match(TokenType.COMMA): 5644 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5645 5646 self.raise_error("Expected AS after CAST") 5647 5648 fmt = None 5649 to = self._parse_types() 5650 5651 if self._match(TokenType.FORMAT): 5652 fmt_string = self._parse_string() 5653 fmt = self._parse_at_time_zone(fmt_string) 5654 5655 if not to: 5656 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5657 if to.this in exp.DataType.TEMPORAL_TYPES: 5658 this = self.expression( 5659 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5660 this=this, 5661 format=exp.Literal.string( 5662 format_time( 5663 fmt_string.this if fmt_string else "", 5664 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5665 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5666 ) 5667 ), 5668 safe=safe, 5669 ) 5670 5671 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5672 this.set("zone", fmt.args["zone"]) 5673 return this 5674 elif not to: 5675 self.raise_error("Expected TYPE after CAST") 5676 elif isinstance(to, exp.Identifier): 5677 to = exp.DataType.build(to.name, udt=True) 5678 elif to.this == exp.DataType.Type.CHAR: 5679 if self._match(TokenType.CHARACTER_SET): 5680 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5681 5682 return self.expression( 5683 exp.Cast if strict else exp.TryCast, 5684 this=this, 5685 to=to, 5686 format=fmt, 5687 safe=safe, 5688 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5689 ) 5690 5691 def _parse_string_agg(self) -> exp.Expression: 5692 if self._match(TokenType.DISTINCT): 5693 args: t.List[t.Optional[exp.Expression]] = [ 5694 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 5695 ] 5696 if self._match(TokenType.COMMA): 5697 args.extend(self._parse_csv(self._parse_assignment)) 5698 else: 5699 args = self._parse_csv(self._parse_assignment) # type: ignore 5700 5701 index = self._index 5702 if not self._match(TokenType.R_PAREN) and args: 5703 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5704 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5705 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5706 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5707 5708 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5709 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5710 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5711 if not self._match_text_seq("WITHIN", "GROUP"): 5712 self._retreat(index) 5713 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5714 5715 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5716 order = self._parse_order(this=seq_get(args, 0)) 5717 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5718 5719 def _parse_convert( 5720 self, strict: bool, safe: t.Optional[bool] = None 5721 ) -> t.Optional[exp.Expression]: 5722 this = self._parse_bitwise() 5723 5724 if self._match(TokenType.USING): 5725 to: t.Optional[exp.Expression] = self.expression( 5726 exp.CharacterSet, this=self._parse_var() 5727 ) 5728 elif self._match(TokenType.COMMA): 5729 to = self._parse_types() 5730 else: 5731 to = None 5732 5733 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5734 5735 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5736 """ 5737 There are generally two variants of the DECODE function: 5738 5739 - DECODE(bin, charset) 5740 - DECODE(expression, search, result [, search, result] ... [, default]) 5741 5742 The second variant will always be parsed into a CASE expression. Note that NULL 5743 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5744 instead of relying on pattern matching. 5745 """ 5746 args = self._parse_csv(self._parse_assignment) 5747 5748 if len(args) < 3: 5749 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5750 5751 expression, *expressions = args 5752 if not expression: 5753 return None 5754 5755 ifs = [] 5756 for search, result in zip(expressions[::2], expressions[1::2]): 5757 if not search or not result: 5758 return None 5759 5760 if isinstance(search, exp.Literal): 5761 ifs.append( 5762 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5763 ) 5764 elif isinstance(search, exp.Null): 5765 ifs.append( 5766 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5767 ) 5768 else: 5769 cond = exp.or_( 5770 exp.EQ(this=expression.copy(), expression=search), 5771 exp.and_( 5772 exp.Is(this=expression.copy(), expression=exp.Null()), 5773 exp.Is(this=search.copy(), expression=exp.Null()), 5774 copy=False, 5775 ), 5776 copy=False, 5777 ) 5778 ifs.append(exp.If(this=cond, true=result)) 5779 5780 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5781 5782 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5783 self._match_text_seq("KEY") 5784 key = self._parse_column() 5785 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5786 self._match_text_seq("VALUE") 5787 value = self._parse_bitwise() 5788 5789 if not key and not value: 5790 return None 5791 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5792 5793 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5794 if not this or not self._match_text_seq("FORMAT", "JSON"): 5795 return this 5796 5797 return self.expression(exp.FormatJson, this=this) 5798 5799 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5800 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5801 for value in values: 5802 if self._match_text_seq(value, "ON", on): 5803 return f"{value} ON {on}" 5804 5805 return None 5806 5807 @t.overload 5808 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5809 5810 @t.overload 5811 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5812 5813 def _parse_json_object(self, agg=False): 5814 star = self._parse_star() 5815 expressions = ( 5816 [star] 5817 if star 5818 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5819 ) 5820 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5821 5822 unique_keys = None 5823 if self._match_text_seq("WITH", "UNIQUE"): 5824 unique_keys = True 5825 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5826 unique_keys = False 5827 5828 self._match_text_seq("KEYS") 5829 5830 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5831 self._parse_type() 5832 ) 5833 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5834 5835 return self.expression( 5836 exp.JSONObjectAgg if agg else exp.JSONObject, 5837 expressions=expressions, 5838 null_handling=null_handling, 5839 unique_keys=unique_keys, 5840 return_type=return_type, 5841 encoding=encoding, 5842 ) 5843 5844 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5845 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5846 if not self._match_text_seq("NESTED"): 5847 this = self._parse_id_var() 5848 kind = self._parse_types(allow_identifiers=False) 5849 nested = None 5850 else: 5851 this = None 5852 kind = None 5853 nested = True 5854 5855 path = self._match_text_seq("PATH") and self._parse_string() 5856 nested_schema = nested and self._parse_json_schema() 5857 5858 return self.expression( 5859 exp.JSONColumnDef, 5860 this=this, 5861 kind=kind, 5862 path=path, 5863 nested_schema=nested_schema, 5864 ) 5865 5866 def _parse_json_schema(self) -> exp.JSONSchema: 5867 self._match_text_seq("COLUMNS") 5868 return self.expression( 5869 exp.JSONSchema, 5870 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5871 ) 5872 5873 def _parse_json_table(self) -> exp.JSONTable: 5874 this = self._parse_format_json(self._parse_bitwise()) 5875 path = self._match(TokenType.COMMA) and self._parse_string() 5876 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5877 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5878 schema = self._parse_json_schema() 5879 5880 return exp.JSONTable( 5881 this=this, 5882 schema=schema, 5883 path=path, 5884 error_handling=error_handling, 5885 empty_handling=empty_handling, 5886 ) 5887 5888 def _parse_match_against(self) -> exp.MatchAgainst: 5889 expressions = self._parse_csv(self._parse_column) 5890 5891 self._match_text_seq(")", "AGAINST", "(") 5892 5893 this = self._parse_string() 5894 5895 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5896 modifier = "IN NATURAL LANGUAGE MODE" 5897 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5898 modifier = f"{modifier} WITH QUERY EXPANSION" 5899 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5900 modifier = "IN BOOLEAN MODE" 5901 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5902 modifier = "WITH QUERY EXPANSION" 5903 else: 5904 modifier = None 5905 5906 return self.expression( 5907 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5908 ) 5909 5910 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5911 def _parse_open_json(self) -> exp.OpenJSON: 5912 this = self._parse_bitwise() 5913 path = self._match(TokenType.COMMA) and self._parse_string() 5914 5915 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5916 this = self._parse_field(any_token=True) 5917 kind = self._parse_types() 5918 path = self._parse_string() 5919 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5920 5921 return self.expression( 5922 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5923 ) 5924 5925 expressions = None 5926 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5927 self._match_l_paren() 5928 expressions = self._parse_csv(_parse_open_json_column_def) 5929 5930 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5931 5932 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5933 args = self._parse_csv(self._parse_bitwise) 5934 5935 if self._match(TokenType.IN): 5936 return self.expression( 5937 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5938 ) 5939 5940 if haystack_first: 5941 haystack = seq_get(args, 0) 5942 needle = seq_get(args, 1) 5943 else: 5944 needle = seq_get(args, 0) 5945 haystack = seq_get(args, 1) 5946 5947 return self.expression( 5948 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5949 ) 5950 5951 def _parse_predict(self) -> exp.Predict: 5952 self._match_text_seq("MODEL") 5953 this = self._parse_table() 5954 5955 self._match(TokenType.COMMA) 5956 self._match_text_seq("TABLE") 5957 5958 return self.expression( 5959 exp.Predict, 5960 this=this, 5961 expression=self._parse_table(), 5962 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5963 ) 5964 5965 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5966 args = self._parse_csv(self._parse_table) 5967 return exp.JoinHint(this=func_name.upper(), expressions=args) 5968 5969 def _parse_substring(self) -> exp.Substring: 5970 # Postgres supports the form: substring(string [from int] [for int]) 5971 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5972 5973 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5974 5975 if self._match(TokenType.FROM): 5976 args.append(self._parse_bitwise()) 5977 if self._match(TokenType.FOR): 5978 if len(args) == 1: 5979 args.append(exp.Literal.number(1)) 5980 args.append(self._parse_bitwise()) 5981 5982 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5983 5984 def _parse_trim(self) -> exp.Trim: 5985 # https://www.w3resource.com/sql/character-functions/trim.php 5986 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5987 5988 position = None 5989 collation = None 5990 expression = None 5991 5992 if self._match_texts(self.TRIM_TYPES): 5993 position = self._prev.text.upper() 5994 5995 this = self._parse_bitwise() 5996 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5997 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5998 expression = self._parse_bitwise() 5999 6000 if invert_order: 6001 this, expression = expression, this 6002 6003 if self._match(TokenType.COLLATE): 6004 collation = self._parse_bitwise() 6005 6006 return self.expression( 6007 exp.Trim, this=this, position=position, expression=expression, collation=collation 6008 ) 6009 6010 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6011 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6012 6013 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6014 return self._parse_window(self._parse_id_var(), alias=True) 6015 6016 def _parse_respect_or_ignore_nulls( 6017 self, this: t.Optional[exp.Expression] 6018 ) -> t.Optional[exp.Expression]: 6019 if self._match_text_seq("IGNORE", "NULLS"): 6020 return self.expression(exp.IgnoreNulls, this=this) 6021 if self._match_text_seq("RESPECT", "NULLS"): 6022 return self.expression(exp.RespectNulls, this=this) 6023 return this 6024 6025 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6026 if self._match(TokenType.HAVING): 6027 self._match_texts(("MAX", "MIN")) 6028 max = self._prev.text.upper() != "MIN" 6029 return self.expression( 6030 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6031 ) 6032 6033 return this 6034 6035 def _parse_window( 6036 self, this: t.Optional[exp.Expression], alias: bool = False 6037 ) -> t.Optional[exp.Expression]: 6038 func = this 6039 comments = func.comments if isinstance(func, exp.Expression) else None 6040 6041 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6042 self._match(TokenType.WHERE) 6043 this = self.expression( 6044 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6045 ) 6046 self._match_r_paren() 6047 6048 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6049 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6050 if self._match_text_seq("WITHIN", "GROUP"): 6051 order = self._parse_wrapped(self._parse_order) 6052 this = self.expression(exp.WithinGroup, this=this, expression=order) 6053 6054 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6055 # Some dialects choose to implement and some do not. 6056 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6057 6058 # There is some code above in _parse_lambda that handles 6059 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6060 6061 # The below changes handle 6062 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6063 6064 # Oracle allows both formats 6065 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6066 # and Snowflake chose to do the same for familiarity 6067 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6068 if isinstance(this, exp.AggFunc): 6069 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6070 6071 if ignore_respect and ignore_respect is not this: 6072 ignore_respect.replace(ignore_respect.this) 6073 this = self.expression(ignore_respect.__class__, this=this) 6074 6075 this = self._parse_respect_or_ignore_nulls(this) 6076 6077 # bigquery select from window x AS (partition by ...) 6078 if alias: 6079 over = None 6080 self._match(TokenType.ALIAS) 6081 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6082 return this 6083 else: 6084 over = self._prev.text.upper() 6085 6086 if comments and isinstance(func, exp.Expression): 6087 func.pop_comments() 6088 6089 if not self._match(TokenType.L_PAREN): 6090 return self.expression( 6091 exp.Window, 6092 comments=comments, 6093 this=this, 6094 alias=self._parse_id_var(False), 6095 over=over, 6096 ) 6097 6098 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6099 6100 first = self._match(TokenType.FIRST) 6101 if self._match_text_seq("LAST"): 6102 first = False 6103 6104 partition, order = self._parse_partition_and_order() 6105 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6106 6107 if kind: 6108 self._match(TokenType.BETWEEN) 6109 start = self._parse_window_spec() 6110 self._match(TokenType.AND) 6111 end = self._parse_window_spec() 6112 6113 spec = self.expression( 6114 exp.WindowSpec, 6115 kind=kind, 6116 start=start["value"], 6117 start_side=start["side"], 6118 end=end["value"], 6119 end_side=end["side"], 6120 ) 6121 else: 6122 spec = None 6123 6124 self._match_r_paren() 6125 6126 window = self.expression( 6127 exp.Window, 6128 comments=comments, 6129 this=this, 6130 partition_by=partition, 6131 order=order, 6132 spec=spec, 6133 alias=window_alias, 6134 over=over, 6135 first=first, 6136 ) 6137 6138 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6139 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6140 return self._parse_window(window, alias=alias) 6141 6142 return window 6143 6144 def _parse_partition_and_order( 6145 self, 6146 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6147 return self._parse_partition_by(), self._parse_order() 6148 6149 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6150 self._match(TokenType.BETWEEN) 6151 6152 return { 6153 "value": ( 6154 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6155 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6156 or self._parse_bitwise() 6157 ), 6158 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6159 } 6160 6161 def _parse_alias( 6162 self, this: t.Optional[exp.Expression], explicit: bool = False 6163 ) -> t.Optional[exp.Expression]: 6164 any_token = self._match(TokenType.ALIAS) 6165 comments = self._prev_comments or [] 6166 6167 if explicit and not any_token: 6168 return this 6169 6170 if self._match(TokenType.L_PAREN): 6171 aliases = self.expression( 6172 exp.Aliases, 6173 comments=comments, 6174 this=this, 6175 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6176 ) 6177 self._match_r_paren(aliases) 6178 return aliases 6179 6180 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6181 self.STRING_ALIASES and self._parse_string_as_identifier() 6182 ) 6183 6184 if alias: 6185 comments.extend(alias.pop_comments()) 6186 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6187 column = this.this 6188 6189 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6190 if not this.comments and column and column.comments: 6191 this.comments = column.pop_comments() 6192 6193 return this 6194 6195 def _parse_id_var( 6196 self, 6197 any_token: bool = True, 6198 tokens: t.Optional[t.Collection[TokenType]] = None, 6199 ) -> t.Optional[exp.Expression]: 6200 expression = self._parse_identifier() 6201 if not expression and ( 6202 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6203 ): 6204 quoted = self._prev.token_type == TokenType.STRING 6205 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6206 6207 return expression 6208 6209 def _parse_string(self) -> t.Optional[exp.Expression]: 6210 if self._match_set(self.STRING_PARSERS): 6211 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6212 return self._parse_placeholder() 6213 6214 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6215 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6216 6217 def _parse_number(self) -> t.Optional[exp.Expression]: 6218 if self._match_set(self.NUMERIC_PARSERS): 6219 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6220 return self._parse_placeholder() 6221 6222 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6223 if self._match(TokenType.IDENTIFIER): 6224 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6225 return self._parse_placeholder() 6226 6227 def _parse_var( 6228 self, 6229 any_token: bool = False, 6230 tokens: t.Optional[t.Collection[TokenType]] = None, 6231 upper: bool = False, 6232 ) -> t.Optional[exp.Expression]: 6233 if ( 6234 (any_token and self._advance_any()) 6235 or self._match(TokenType.VAR) 6236 or (self._match_set(tokens) if tokens else False) 6237 ): 6238 return self.expression( 6239 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6240 ) 6241 return self._parse_placeholder() 6242 6243 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6244 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6245 self._advance() 6246 return self._prev 6247 return None 6248 6249 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6250 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6251 6252 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6253 return self._parse_primary() or self._parse_var(any_token=True) 6254 6255 def _parse_null(self) -> t.Optional[exp.Expression]: 6256 if self._match_set(self.NULL_TOKENS): 6257 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6258 return self._parse_placeholder() 6259 6260 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6261 if self._match(TokenType.TRUE): 6262 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6263 if self._match(TokenType.FALSE): 6264 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6265 return self._parse_placeholder() 6266 6267 def _parse_star(self) -> t.Optional[exp.Expression]: 6268 if self._match(TokenType.STAR): 6269 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6270 return self._parse_placeholder() 6271 6272 def _parse_parameter(self) -> exp.Parameter: 6273 this = self._parse_identifier() or self._parse_primary_or_var() 6274 return self.expression(exp.Parameter, this=this) 6275 6276 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6277 if self._match_set(self.PLACEHOLDER_PARSERS): 6278 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6279 if placeholder: 6280 return placeholder 6281 self._advance(-1) 6282 return None 6283 6284 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6285 if not self._match_texts(keywords): 6286 return None 6287 if self._match(TokenType.L_PAREN, advance=False): 6288 return self._parse_wrapped_csv(self._parse_expression) 6289 6290 expression = self._parse_expression() 6291 return [expression] if expression else None 6292 6293 def _parse_csv( 6294 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6295 ) -> t.List[exp.Expression]: 6296 parse_result = parse_method() 6297 items = [parse_result] if parse_result is not None else [] 6298 6299 while self._match(sep): 6300 self._add_comments(parse_result) 6301 parse_result = parse_method() 6302 if parse_result is not None: 6303 items.append(parse_result) 6304 6305 return items 6306 6307 def _parse_tokens( 6308 self, parse_method: t.Callable, expressions: t.Dict 6309 ) -> t.Optional[exp.Expression]: 6310 this = parse_method() 6311 6312 while self._match_set(expressions): 6313 this = self.expression( 6314 expressions[self._prev.token_type], 6315 this=this, 6316 comments=self._prev_comments, 6317 expression=parse_method(), 6318 ) 6319 6320 return this 6321 6322 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6323 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6324 6325 def _parse_wrapped_csv( 6326 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6327 ) -> t.List[exp.Expression]: 6328 return self._parse_wrapped( 6329 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6330 ) 6331 6332 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6333 wrapped = self._match(TokenType.L_PAREN) 6334 if not wrapped and not optional: 6335 self.raise_error("Expecting (") 6336 parse_result = parse_method() 6337 if wrapped: 6338 self._match_r_paren() 6339 return parse_result 6340 6341 def _parse_expressions(self) -> t.List[exp.Expression]: 6342 return self._parse_csv(self._parse_expression) 6343 6344 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6345 return self._parse_select() or self._parse_set_operations( 6346 self._parse_expression() if alias else self._parse_assignment() 6347 ) 6348 6349 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6350 return self._parse_query_modifiers( 6351 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6352 ) 6353 6354 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6355 this = None 6356 if self._match_texts(self.TRANSACTION_KIND): 6357 this = self._prev.text 6358 6359 self._match_texts(("TRANSACTION", "WORK")) 6360 6361 modes = [] 6362 while True: 6363 mode = [] 6364 while self._match(TokenType.VAR): 6365 mode.append(self._prev.text) 6366 6367 if mode: 6368 modes.append(" ".join(mode)) 6369 if not self._match(TokenType.COMMA): 6370 break 6371 6372 return self.expression(exp.Transaction, this=this, modes=modes) 6373 6374 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6375 chain = None 6376 savepoint = None 6377 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6378 6379 self._match_texts(("TRANSACTION", "WORK")) 6380 6381 if self._match_text_seq("TO"): 6382 self._match_text_seq("SAVEPOINT") 6383 savepoint = self._parse_id_var() 6384 6385 if self._match(TokenType.AND): 6386 chain = not self._match_text_seq("NO") 6387 self._match_text_seq("CHAIN") 6388 6389 if is_rollback: 6390 return self.expression(exp.Rollback, savepoint=savepoint) 6391 6392 return self.expression(exp.Commit, chain=chain) 6393 6394 def _parse_refresh(self) -> exp.Refresh: 6395 self._match(TokenType.TABLE) 6396 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6397 6398 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6399 if not self._match_text_seq("ADD"): 6400 return None 6401 6402 self._match(TokenType.COLUMN) 6403 exists_column = self._parse_exists(not_=True) 6404 expression = self._parse_field_def() 6405 6406 if expression: 6407 expression.set("exists", exists_column) 6408 6409 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6410 if self._match_texts(("FIRST", "AFTER")): 6411 position = self._prev.text 6412 column_position = self.expression( 6413 exp.ColumnPosition, this=self._parse_column(), position=position 6414 ) 6415 expression.set("position", column_position) 6416 6417 return expression 6418 6419 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6420 drop = self._match(TokenType.DROP) and self._parse_drop() 6421 if drop and not isinstance(drop, exp.Command): 6422 drop.set("kind", drop.args.get("kind", "COLUMN")) 6423 return drop 6424 6425 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6426 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6427 return self.expression( 6428 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6429 ) 6430 6431 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6432 index = self._index - 1 6433 6434 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6435 return self._parse_csv( 6436 lambda: self.expression( 6437 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6438 ) 6439 ) 6440 6441 self._retreat(index) 6442 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6443 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6444 6445 if self._match_text_seq("ADD", "COLUMNS"): 6446 schema = self._parse_schema() 6447 if schema: 6448 return [schema] 6449 return [] 6450 6451 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6452 6453 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6454 if self._match_texts(self.ALTER_ALTER_PARSERS): 6455 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6456 6457 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6458 # keyword after ALTER we default to parsing this statement 6459 self._match(TokenType.COLUMN) 6460 column = self._parse_field(any_token=True) 6461 6462 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6463 return self.expression(exp.AlterColumn, this=column, drop=True) 6464 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6465 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6466 if self._match(TokenType.COMMENT): 6467 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6468 if self._match_text_seq("DROP", "NOT", "NULL"): 6469 return self.expression( 6470 exp.AlterColumn, 6471 this=column, 6472 drop=True, 6473 allow_null=True, 6474 ) 6475 if self._match_text_seq("SET", "NOT", "NULL"): 6476 return self.expression( 6477 exp.AlterColumn, 6478 this=column, 6479 allow_null=False, 6480 ) 6481 self._match_text_seq("SET", "DATA") 6482 self._match_text_seq("TYPE") 6483 return self.expression( 6484 exp.AlterColumn, 6485 this=column, 6486 dtype=self._parse_types(), 6487 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6488 using=self._match(TokenType.USING) and self._parse_assignment(), 6489 ) 6490 6491 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6492 if self._match_texts(("ALL", "EVEN", "AUTO")): 6493 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6494 6495 self._match_text_seq("KEY", "DISTKEY") 6496 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6497 6498 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6499 if compound: 6500 self._match_text_seq("SORTKEY") 6501 6502 if self._match(TokenType.L_PAREN, advance=False): 6503 return self.expression( 6504 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6505 ) 6506 6507 self._match_texts(("AUTO", "NONE")) 6508 return self.expression( 6509 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6510 ) 6511 6512 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6513 index = self._index - 1 6514 6515 partition_exists = self._parse_exists() 6516 if self._match(TokenType.PARTITION, advance=False): 6517 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6518 6519 self._retreat(index) 6520 return self._parse_csv(self._parse_drop_column) 6521 6522 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 6523 if self._match(TokenType.COLUMN): 6524 exists = self._parse_exists() 6525 old_column = self._parse_column() 6526 to = self._match_text_seq("TO") 6527 new_column = self._parse_column() 6528 6529 if old_column is None or to is None or new_column is None: 6530 return None 6531 6532 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6533 6534 self._match_text_seq("TO") 6535 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 6536 6537 def _parse_alter_table_set(self) -> exp.AlterSet: 6538 alter_set = self.expression(exp.AlterSet) 6539 6540 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6541 "TABLE", "PROPERTIES" 6542 ): 6543 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6544 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6545 alter_set.set("expressions", [self._parse_assignment()]) 6546 elif self._match_texts(("LOGGED", "UNLOGGED")): 6547 alter_set.set("option", exp.var(self._prev.text.upper())) 6548 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6549 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6550 elif self._match_text_seq("LOCATION"): 6551 alter_set.set("location", self._parse_field()) 6552 elif self._match_text_seq("ACCESS", "METHOD"): 6553 alter_set.set("access_method", self._parse_field()) 6554 elif self._match_text_seq("TABLESPACE"): 6555 alter_set.set("tablespace", self._parse_field()) 6556 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6557 alter_set.set("file_format", [self._parse_field()]) 6558 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6559 alter_set.set("file_format", self._parse_wrapped_options()) 6560 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6561 alter_set.set("copy_options", self._parse_wrapped_options()) 6562 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6563 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6564 else: 6565 if self._match_text_seq("SERDE"): 6566 alter_set.set("serde", self._parse_field()) 6567 6568 alter_set.set("expressions", [self._parse_properties()]) 6569 6570 return alter_set 6571 6572 def _parse_alter(self) -> exp.Alter | exp.Command: 6573 start = self._prev 6574 6575 alter_token = self._match_set(self.ALTERABLES) and self._prev 6576 if not alter_token: 6577 return self._parse_as_command(start) 6578 6579 exists = self._parse_exists() 6580 only = self._match_text_seq("ONLY") 6581 this = self._parse_table(schema=True) 6582 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6583 6584 if self._next: 6585 self._advance() 6586 6587 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6588 if parser: 6589 actions = ensure_list(parser(self)) 6590 options = self._parse_csv(self._parse_property) 6591 6592 if not self._curr and actions: 6593 return self.expression( 6594 exp.Alter, 6595 this=this, 6596 kind=alter_token.text.upper(), 6597 exists=exists, 6598 actions=actions, 6599 only=only, 6600 options=options, 6601 cluster=cluster, 6602 ) 6603 6604 return self._parse_as_command(start) 6605 6606 def _parse_merge(self) -> exp.Merge: 6607 self._match(TokenType.INTO) 6608 target = self._parse_table() 6609 6610 if target and self._match(TokenType.ALIAS, advance=False): 6611 target.set("alias", self._parse_table_alias()) 6612 6613 self._match(TokenType.USING) 6614 using = self._parse_table() 6615 6616 self._match(TokenType.ON) 6617 on = self._parse_assignment() 6618 6619 return self.expression( 6620 exp.Merge, 6621 this=target, 6622 using=using, 6623 on=on, 6624 expressions=self._parse_when_matched(), 6625 ) 6626 6627 def _parse_when_matched(self) -> t.List[exp.When]: 6628 whens = [] 6629 6630 while self._match(TokenType.WHEN): 6631 matched = not self._match(TokenType.NOT) 6632 self._match_text_seq("MATCHED") 6633 source = ( 6634 False 6635 if self._match_text_seq("BY", "TARGET") 6636 else self._match_text_seq("BY", "SOURCE") 6637 ) 6638 condition = self._parse_assignment() if self._match(TokenType.AND) else None 6639 6640 self._match(TokenType.THEN) 6641 6642 if self._match(TokenType.INSERT): 6643 _this = self._parse_star() 6644 if _this: 6645 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 6646 else: 6647 then = self.expression( 6648 exp.Insert, 6649 this=self._parse_value(), 6650 expression=self._match_text_seq("VALUES") and self._parse_value(), 6651 ) 6652 elif self._match(TokenType.UPDATE): 6653 expressions = self._parse_star() 6654 if expressions: 6655 then = self.expression(exp.Update, expressions=expressions) 6656 else: 6657 then = self.expression( 6658 exp.Update, 6659 expressions=self._match(TokenType.SET) 6660 and self._parse_csv(self._parse_equality), 6661 ) 6662 elif self._match(TokenType.DELETE): 6663 then = self.expression(exp.Var, this=self._prev.text) 6664 else: 6665 then = None 6666 6667 whens.append( 6668 self.expression( 6669 exp.When, 6670 matched=matched, 6671 source=source, 6672 condition=condition, 6673 then=then, 6674 ) 6675 ) 6676 return whens 6677 6678 def _parse_show(self) -> t.Optional[exp.Expression]: 6679 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6680 if parser: 6681 return parser(self) 6682 return self._parse_as_command(self._prev) 6683 6684 def _parse_set_item_assignment( 6685 self, kind: t.Optional[str] = None 6686 ) -> t.Optional[exp.Expression]: 6687 index = self._index 6688 6689 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6690 return self._parse_set_transaction(global_=kind == "GLOBAL") 6691 6692 left = self._parse_primary() or self._parse_column() 6693 assignment_delimiter = self._match_texts(("=", "TO")) 6694 6695 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6696 self._retreat(index) 6697 return None 6698 6699 right = self._parse_statement() or self._parse_id_var() 6700 if isinstance(right, (exp.Column, exp.Identifier)): 6701 right = exp.var(right.name) 6702 6703 this = self.expression(exp.EQ, this=left, expression=right) 6704 return self.expression(exp.SetItem, this=this, kind=kind) 6705 6706 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6707 self._match_text_seq("TRANSACTION") 6708 characteristics = self._parse_csv( 6709 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6710 ) 6711 return self.expression( 6712 exp.SetItem, 6713 expressions=characteristics, 6714 kind="TRANSACTION", 6715 **{"global": global_}, # type: ignore 6716 ) 6717 6718 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6719 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6720 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6721 6722 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6723 index = self._index 6724 set_ = self.expression( 6725 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6726 ) 6727 6728 if self._curr: 6729 self._retreat(index) 6730 return self._parse_as_command(self._prev) 6731 6732 return set_ 6733 6734 def _parse_var_from_options( 6735 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6736 ) -> t.Optional[exp.Var]: 6737 start = self._curr 6738 if not start: 6739 return None 6740 6741 option = start.text.upper() 6742 continuations = options.get(option) 6743 6744 index = self._index 6745 self._advance() 6746 for keywords in continuations or []: 6747 if isinstance(keywords, str): 6748 keywords = (keywords,) 6749 6750 if self._match_text_seq(*keywords): 6751 option = f"{option} {' '.join(keywords)}" 6752 break 6753 else: 6754 if continuations or continuations is None: 6755 if raise_unmatched: 6756 self.raise_error(f"Unknown option {option}") 6757 6758 self._retreat(index) 6759 return None 6760 6761 return exp.var(option) 6762 6763 def _parse_as_command(self, start: Token) -> exp.Command: 6764 while self._curr: 6765 self._advance() 6766 text = self._find_sql(start, self._prev) 6767 size = len(start.text) 6768 self._warn_unsupported() 6769 return exp.Command(this=text[:size], expression=text[size:]) 6770 6771 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6772 settings = [] 6773 6774 self._match_l_paren() 6775 kind = self._parse_id_var() 6776 6777 if self._match(TokenType.L_PAREN): 6778 while True: 6779 key = self._parse_id_var() 6780 value = self._parse_primary() 6781 6782 if not key and value is None: 6783 break 6784 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6785 self._match(TokenType.R_PAREN) 6786 6787 self._match_r_paren() 6788 6789 return self.expression( 6790 exp.DictProperty, 6791 this=this, 6792 kind=kind.this if kind else None, 6793 settings=settings, 6794 ) 6795 6796 def _parse_dict_range(self, this: str) -> exp.DictRange: 6797 self._match_l_paren() 6798 has_min = self._match_text_seq("MIN") 6799 if has_min: 6800 min = self._parse_var() or self._parse_primary() 6801 self._match_text_seq("MAX") 6802 max = self._parse_var() or self._parse_primary() 6803 else: 6804 max = self._parse_var() or self._parse_primary() 6805 min = exp.Literal.number(0) 6806 self._match_r_paren() 6807 return self.expression(exp.DictRange, this=this, min=min, max=max) 6808 6809 def _parse_comprehension( 6810 self, this: t.Optional[exp.Expression] 6811 ) -> t.Optional[exp.Comprehension]: 6812 index = self._index 6813 expression = self._parse_column() 6814 if not self._match(TokenType.IN): 6815 self._retreat(index - 1) 6816 return None 6817 iterator = self._parse_column() 6818 condition = self._parse_assignment() if self._match_text_seq("IF") else None 6819 return self.expression( 6820 exp.Comprehension, 6821 this=this, 6822 expression=expression, 6823 iterator=iterator, 6824 condition=condition, 6825 ) 6826 6827 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6828 if self._match(TokenType.HEREDOC_STRING): 6829 return self.expression(exp.Heredoc, this=self._prev.text) 6830 6831 if not self._match_text_seq("$"): 6832 return None 6833 6834 tags = ["$"] 6835 tag_text = None 6836 6837 if self._is_connected(): 6838 self._advance() 6839 tags.append(self._prev.text.upper()) 6840 else: 6841 self.raise_error("No closing $ found") 6842 6843 if tags[-1] != "$": 6844 if self._is_connected() and self._match_text_seq("$"): 6845 tag_text = tags[-1] 6846 tags.append("$") 6847 else: 6848 self.raise_error("No closing $ found") 6849 6850 heredoc_start = self._curr 6851 6852 while self._curr: 6853 if self._match_text_seq(*tags, advance=False): 6854 this = self._find_sql(heredoc_start, self._prev) 6855 self._advance(len(tags)) 6856 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6857 6858 self._advance() 6859 6860 self.raise_error(f"No closing {''.join(tags)} found") 6861 return None 6862 6863 def _find_parser( 6864 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6865 ) -> t.Optional[t.Callable]: 6866 if not self._curr: 6867 return None 6868 6869 index = self._index 6870 this = [] 6871 while True: 6872 # The current token might be multiple words 6873 curr = self._curr.text.upper() 6874 key = curr.split(" ") 6875 this.append(curr) 6876 6877 self._advance() 6878 result, trie = in_trie(trie, key) 6879 if result == TrieResult.FAILED: 6880 break 6881 6882 if result == TrieResult.EXISTS: 6883 subparser = parsers[" ".join(this)] 6884 return subparser 6885 6886 self._retreat(index) 6887 return None 6888 6889 def _match(self, token_type, advance=True, expression=None): 6890 if not self._curr: 6891 return None 6892 6893 if self._curr.token_type == token_type: 6894 if advance: 6895 self._advance() 6896 self._add_comments(expression) 6897 return True 6898 6899 return None 6900 6901 def _match_set(self, types, advance=True): 6902 if not self._curr: 6903 return None 6904 6905 if self._curr.token_type in types: 6906 if advance: 6907 self._advance() 6908 return True 6909 6910 return None 6911 6912 def _match_pair(self, token_type_a, token_type_b, advance=True): 6913 if not self._curr or not self._next: 6914 return None 6915 6916 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6917 if advance: 6918 self._advance(2) 6919 return True 6920 6921 return None 6922 6923 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6924 if not self._match(TokenType.L_PAREN, expression=expression): 6925 self.raise_error("Expecting (") 6926 6927 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6928 if not self._match(TokenType.R_PAREN, expression=expression): 6929 self.raise_error("Expecting )") 6930 6931 def _match_texts(self, texts, advance=True): 6932 if self._curr and self._curr.text.upper() in texts: 6933 if advance: 6934 self._advance() 6935 return True 6936 return None 6937 6938 def _match_text_seq(self, *texts, advance=True): 6939 index = self._index 6940 for text in texts: 6941 if self._curr and self._curr.text.upper() == text: 6942 self._advance() 6943 else: 6944 self._retreat(index) 6945 return None 6946 6947 if not advance: 6948 self._retreat(index) 6949 6950 return True 6951 6952 def _replace_lambda( 6953 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 6954 ) -> t.Optional[exp.Expression]: 6955 if not node: 6956 return node 6957 6958 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 6959 6960 for column in node.find_all(exp.Column): 6961 typ = lambda_types.get(column.parts[0].name) 6962 if typ is not None: 6963 dot_or_id = column.to_dot() if column.table else column.this 6964 6965 if typ: 6966 dot_or_id = self.expression( 6967 exp.Cast, 6968 this=dot_or_id, 6969 to=typ, 6970 ) 6971 6972 parent = column.parent 6973 6974 while isinstance(parent, exp.Dot): 6975 if not isinstance(parent.parent, exp.Dot): 6976 parent.replace(dot_or_id) 6977 break 6978 parent = parent.parent 6979 else: 6980 if column is node: 6981 node = dot_or_id 6982 else: 6983 column.replace(dot_or_id) 6984 return node 6985 6986 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6987 start = self._prev 6988 6989 # Not to be confused with TRUNCATE(number, decimals) function call 6990 if self._match(TokenType.L_PAREN): 6991 self._retreat(self._index - 2) 6992 return self._parse_function() 6993 6994 # Clickhouse supports TRUNCATE DATABASE as well 6995 is_database = self._match(TokenType.DATABASE) 6996 6997 self._match(TokenType.TABLE) 6998 6999 exists = self._parse_exists(not_=False) 7000 7001 expressions = self._parse_csv( 7002 lambda: self._parse_table(schema=True, is_db_reference=is_database) 7003 ) 7004 7005 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7006 7007 if self._match_text_seq("RESTART", "IDENTITY"): 7008 identity = "RESTART" 7009 elif self._match_text_seq("CONTINUE", "IDENTITY"): 7010 identity = "CONTINUE" 7011 else: 7012 identity = None 7013 7014 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 7015 option = self._prev.text 7016 else: 7017 option = None 7018 7019 partition = self._parse_partition() 7020 7021 # Fallback case 7022 if self._curr: 7023 return self._parse_as_command(start) 7024 7025 return self.expression( 7026 exp.TruncateTable, 7027 expressions=expressions, 7028 is_database=is_database, 7029 exists=exists, 7030 cluster=cluster, 7031 identity=identity, 7032 option=option, 7033 partition=partition, 7034 ) 7035 7036 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 7037 this = self._parse_ordered(self._parse_opclass) 7038 7039 if not self._match(TokenType.WITH): 7040 return this 7041 7042 op = self._parse_var(any_token=True) 7043 7044 return self.expression(exp.WithOperator, this=this, op=op) 7045 7046 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 7047 self._match(TokenType.EQ) 7048 self._match(TokenType.L_PAREN) 7049 7050 opts: t.List[t.Optional[exp.Expression]] = [] 7051 while self._curr and not self._match(TokenType.R_PAREN): 7052 if self._match_text_seq("FORMAT_NAME", "="): 7053 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 7054 # so we parse it separately to use _parse_field() 7055 prop = self.expression( 7056 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 7057 ) 7058 opts.append(prop) 7059 else: 7060 opts.append(self._parse_property()) 7061 7062 self._match(TokenType.COMMA) 7063 7064 return opts 7065 7066 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 7067 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 7068 7069 options = [] 7070 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 7071 option = self._parse_var(any_token=True) 7072 prev = self._prev.text.upper() 7073 7074 # Different dialects might separate options and values by white space, "=" and "AS" 7075 self._match(TokenType.EQ) 7076 self._match(TokenType.ALIAS) 7077 7078 param = self.expression(exp.CopyParameter, this=option) 7079 7080 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 7081 TokenType.L_PAREN, advance=False 7082 ): 7083 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 7084 param.set("expressions", self._parse_wrapped_options()) 7085 elif prev == "FILE_FORMAT": 7086 # T-SQL's external file format case 7087 param.set("expression", self._parse_field()) 7088 else: 7089 param.set("expression", self._parse_unquoted_field()) 7090 7091 options.append(param) 7092 self._match(sep) 7093 7094 return options 7095 7096 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 7097 expr = self.expression(exp.Credentials) 7098 7099 if self._match_text_seq("STORAGE_INTEGRATION", "="): 7100 expr.set("storage", self._parse_field()) 7101 if self._match_text_seq("CREDENTIALS"): 7102 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 7103 creds = ( 7104 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 7105 ) 7106 expr.set("credentials", creds) 7107 if self._match_text_seq("ENCRYPTION"): 7108 expr.set("encryption", self._parse_wrapped_options()) 7109 if self._match_text_seq("IAM_ROLE"): 7110 expr.set("iam_role", self._parse_field()) 7111 if self._match_text_seq("REGION"): 7112 expr.set("region", self._parse_field()) 7113 7114 return expr 7115 7116 def _parse_file_location(self) -> t.Optional[exp.Expression]: 7117 return self._parse_field() 7118 7119 def _parse_copy(self) -> exp.Copy | exp.Command: 7120 start = self._prev 7121 7122 self._match(TokenType.INTO) 7123 7124 this = ( 7125 self._parse_select(nested=True, parse_subquery_alias=False) 7126 if self._match(TokenType.L_PAREN, advance=False) 7127 else self._parse_table(schema=True) 7128 ) 7129 7130 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 7131 7132 files = self._parse_csv(self._parse_file_location) 7133 credentials = self._parse_credentials() 7134 7135 self._match_text_seq("WITH") 7136 7137 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 7138 7139 # Fallback case 7140 if self._curr: 7141 return self._parse_as_command(start) 7142 7143 return self.expression( 7144 exp.Copy, 7145 this=this, 7146 kind=kind, 7147 credentials=credentials, 7148 files=files, 7149 params=params, 7150 )
26def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 27 if len(args) == 1 and args[0].is_star: 28 return exp.StarMap(this=args[0]) 29 30 keys = [] 31 values = [] 32 for i in range(0, len(args), 2): 33 keys.append(args[i]) 34 values.append(args[i + 1]) 35 36 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
44def binary_range_parser( 45 expr_type: t.Type[exp.Expression], reverse_args: bool = False 46) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 47 def _parse_binary_range( 48 self: Parser, this: t.Optional[exp.Expression] 49 ) -> t.Optional[exp.Expression]: 50 expression = self._parse_bitwise() 51 if reverse_args: 52 this, expression = expression, this 53 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 54 55 return _parse_binary_range
58def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 59 # Default argument order is base, expression 60 this = seq_get(args, 0) 61 expression = seq_get(args, 1) 62 63 if expression: 64 if not dialect.LOG_BASE_FIRST: 65 this, expression = expression, this 66 return exp.Log(this=this, expression=expression) 67 68 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
88def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 89 def _builder(args: t.List, dialect: Dialect) -> E: 90 expression = expr_type( 91 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 92 ) 93 if len(args) > 2 and expr_type is exp.JSONExtract: 94 expression.set("expressions", args[2:]) 95 96 return expression 97 98 return _builder
101def build_mod(args: t.List) -> exp.Mod: 102 this = seq_get(args, 0) 103 expression = seq_get(args, 1) 104 105 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 106 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 107 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 108 109 return exp.Mod(this=this, expression=expression)
121def build_array_constructor( 122 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 123) -> exp.Expression: 124 array_exp = exp_class(expressions=args) 125 126 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 127 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 128 129 return array_exp
132def build_convert_timezone( 133 args: t.List, default_source_tz: t.Optional[str] = None 134) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 135 if len(args) == 2: 136 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 137 return exp.ConvertTimezone( 138 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 139 ) 140 141 return exp.ConvertTimezone.from_arg_list(args)
162class Parser(metaclass=_Parser): 163 """ 164 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 165 166 Args: 167 error_level: The desired error level. 168 Default: ErrorLevel.IMMEDIATE 169 error_message_context: The amount of context to capture from a query string when displaying 170 the error message (in number of characters). 171 Default: 100 172 max_errors: Maximum number of error messages to include in a raised ParseError. 173 This is only relevant if error_level is ErrorLevel.RAISE. 174 Default: 3 175 """ 176 177 FUNCTIONS: t.Dict[str, t.Callable] = { 178 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 179 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 180 "CONCAT": lambda args, dialect: exp.Concat( 181 expressions=args, 182 safe=not dialect.STRICT_STRING_CONCAT, 183 coalesce=dialect.CONCAT_COALESCE, 184 ), 185 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 186 expressions=args, 187 safe=not dialect.STRICT_STRING_CONCAT, 188 coalesce=dialect.CONCAT_COALESCE, 189 ), 190 "CONVERT_TIMEZONE": build_convert_timezone, 191 "DATE_TO_DATE_STR": lambda args: exp.Cast( 192 this=seq_get(args, 0), 193 to=exp.DataType(this=exp.DataType.Type.TEXT), 194 ), 195 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 196 start=seq_get(args, 0), 197 end=seq_get(args, 1), 198 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.number(1), unit=exp.var("DAY")), 199 ), 200 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 201 "HEX": build_hex, 202 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 203 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 204 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 205 "LIKE": build_like, 206 "LOG": build_logarithm, 207 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 208 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 209 "LOWER": build_lower, 210 "LPAD": lambda args: build_pad(args), 211 "LEFTPAD": lambda args: build_pad(args), 212 "LTRIM": lambda args: build_trim(args), 213 "MOD": build_mod, 214 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 215 "RPAD": lambda args: build_pad(args, is_left=False), 216 "RTRIM": lambda args: build_trim(args, is_left=False), 217 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 218 if len(args) != 2 219 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 220 "TIME_TO_TIME_STR": lambda args: exp.Cast( 221 this=seq_get(args, 0), 222 to=exp.DataType(this=exp.DataType.Type.TEXT), 223 ), 224 "TO_HEX": build_hex, 225 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 226 this=exp.Cast( 227 this=seq_get(args, 0), 228 to=exp.DataType(this=exp.DataType.Type.TEXT), 229 ), 230 start=exp.Literal.number(1), 231 length=exp.Literal.number(10), 232 ), 233 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 234 "UPPER": build_upper, 235 "VAR_MAP": build_var_map, 236 "COALESCE": lambda args: exp.Coalesce(this=seq_get(args, 0), expressions=args[1:]), 237 } 238 239 NO_PAREN_FUNCTIONS = { 240 TokenType.CURRENT_DATE: exp.CurrentDate, 241 TokenType.CURRENT_DATETIME: exp.CurrentDate, 242 TokenType.CURRENT_TIME: exp.CurrentTime, 243 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 244 TokenType.CURRENT_USER: exp.CurrentUser, 245 } 246 247 STRUCT_TYPE_TOKENS = { 248 TokenType.NESTED, 249 TokenType.OBJECT, 250 TokenType.STRUCT, 251 } 252 253 NESTED_TYPE_TOKENS = { 254 TokenType.ARRAY, 255 TokenType.LIST, 256 TokenType.LOWCARDINALITY, 257 TokenType.MAP, 258 TokenType.NULLABLE, 259 *STRUCT_TYPE_TOKENS, 260 } 261 262 ENUM_TYPE_TOKENS = { 263 TokenType.ENUM, 264 TokenType.ENUM8, 265 TokenType.ENUM16, 266 } 267 268 AGGREGATE_TYPE_TOKENS = { 269 TokenType.AGGREGATEFUNCTION, 270 TokenType.SIMPLEAGGREGATEFUNCTION, 271 } 272 273 TYPE_TOKENS = { 274 TokenType.BIT, 275 TokenType.BOOLEAN, 276 TokenType.TINYINT, 277 TokenType.UTINYINT, 278 TokenType.SMALLINT, 279 TokenType.USMALLINT, 280 TokenType.INT, 281 TokenType.UINT, 282 TokenType.BIGINT, 283 TokenType.UBIGINT, 284 TokenType.INT128, 285 TokenType.UINT128, 286 TokenType.INT256, 287 TokenType.UINT256, 288 TokenType.MEDIUMINT, 289 TokenType.UMEDIUMINT, 290 TokenType.FIXEDSTRING, 291 TokenType.FLOAT, 292 TokenType.DOUBLE, 293 TokenType.CHAR, 294 TokenType.NCHAR, 295 TokenType.VARCHAR, 296 TokenType.NVARCHAR, 297 TokenType.BPCHAR, 298 TokenType.TEXT, 299 TokenType.MEDIUMTEXT, 300 TokenType.LONGTEXT, 301 TokenType.MEDIUMBLOB, 302 TokenType.LONGBLOB, 303 TokenType.BINARY, 304 TokenType.VARBINARY, 305 TokenType.JSON, 306 TokenType.JSONB, 307 TokenType.INTERVAL, 308 TokenType.TINYBLOB, 309 TokenType.TINYTEXT, 310 TokenType.TIME, 311 TokenType.TIMETZ, 312 TokenType.TIMESTAMP, 313 TokenType.TIMESTAMP_S, 314 TokenType.TIMESTAMP_MS, 315 TokenType.TIMESTAMP_NS, 316 TokenType.TIMESTAMPTZ, 317 TokenType.TIMESTAMPLTZ, 318 TokenType.TIMESTAMPNTZ, 319 TokenType.DATETIME, 320 TokenType.DATETIME64, 321 TokenType.DATE, 322 TokenType.DATE32, 323 TokenType.INT4RANGE, 324 TokenType.INT4MULTIRANGE, 325 TokenType.INT8RANGE, 326 TokenType.INT8MULTIRANGE, 327 TokenType.NUMRANGE, 328 TokenType.NUMMULTIRANGE, 329 TokenType.TSRANGE, 330 TokenType.TSMULTIRANGE, 331 TokenType.TSTZRANGE, 332 TokenType.TSTZMULTIRANGE, 333 TokenType.DATERANGE, 334 TokenType.DATEMULTIRANGE, 335 TokenType.DECIMAL, 336 TokenType.UDECIMAL, 337 TokenType.BIGDECIMAL, 338 TokenType.UUID, 339 TokenType.GEOGRAPHY, 340 TokenType.GEOMETRY, 341 TokenType.HLLSKETCH, 342 TokenType.HSTORE, 343 TokenType.PSEUDO_TYPE, 344 TokenType.SUPER, 345 TokenType.SERIAL, 346 TokenType.SMALLSERIAL, 347 TokenType.BIGSERIAL, 348 TokenType.XML, 349 TokenType.YEAR, 350 TokenType.UNIQUEIDENTIFIER, 351 TokenType.USERDEFINED, 352 TokenType.MONEY, 353 TokenType.SMALLMONEY, 354 TokenType.ROWVERSION, 355 TokenType.IMAGE, 356 TokenType.VARIANT, 357 TokenType.VECTOR, 358 TokenType.OBJECT, 359 TokenType.OBJECT_IDENTIFIER, 360 TokenType.INET, 361 TokenType.IPADDRESS, 362 TokenType.IPPREFIX, 363 TokenType.IPV4, 364 TokenType.IPV6, 365 TokenType.UNKNOWN, 366 TokenType.NULL, 367 TokenType.NAME, 368 TokenType.TDIGEST, 369 *ENUM_TYPE_TOKENS, 370 *NESTED_TYPE_TOKENS, 371 *AGGREGATE_TYPE_TOKENS, 372 } 373 374 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 375 TokenType.BIGINT: TokenType.UBIGINT, 376 TokenType.INT: TokenType.UINT, 377 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 378 TokenType.SMALLINT: TokenType.USMALLINT, 379 TokenType.TINYINT: TokenType.UTINYINT, 380 TokenType.DECIMAL: TokenType.UDECIMAL, 381 } 382 383 SUBQUERY_PREDICATES = { 384 TokenType.ANY: exp.Any, 385 TokenType.ALL: exp.All, 386 TokenType.EXISTS: exp.Exists, 387 TokenType.SOME: exp.Any, 388 } 389 390 RESERVED_TOKENS = { 391 *Tokenizer.SINGLE_TOKENS.values(), 392 TokenType.SELECT, 393 } - {TokenType.IDENTIFIER} 394 395 DB_CREATABLES = { 396 TokenType.DATABASE, 397 TokenType.DICTIONARY, 398 TokenType.MODEL, 399 TokenType.SCHEMA, 400 TokenType.SEQUENCE, 401 TokenType.STORAGE_INTEGRATION, 402 TokenType.TABLE, 403 TokenType.TAG, 404 TokenType.VIEW, 405 TokenType.WAREHOUSE, 406 TokenType.STREAMLIT, 407 } 408 409 CREATABLES = { 410 TokenType.COLUMN, 411 TokenType.CONSTRAINT, 412 TokenType.FOREIGN_KEY, 413 TokenType.FUNCTION, 414 TokenType.INDEX, 415 TokenType.PROCEDURE, 416 *DB_CREATABLES, 417 } 418 419 ALTERABLES = { 420 TokenType.TABLE, 421 TokenType.VIEW, 422 } 423 424 # Tokens that can represent identifiers 425 ID_VAR_TOKENS = { 426 TokenType.ALL, 427 TokenType.VAR, 428 TokenType.ANTI, 429 TokenType.APPLY, 430 TokenType.ASC, 431 TokenType.ASOF, 432 TokenType.AUTO_INCREMENT, 433 TokenType.BEGIN, 434 TokenType.BPCHAR, 435 TokenType.CACHE, 436 TokenType.CASE, 437 TokenType.COLLATE, 438 TokenType.COMMAND, 439 TokenType.COMMENT, 440 TokenType.COMMIT, 441 TokenType.CONSTRAINT, 442 TokenType.COPY, 443 TokenType.CUBE, 444 TokenType.DEFAULT, 445 TokenType.DELETE, 446 TokenType.DESC, 447 TokenType.DESCRIBE, 448 TokenType.DICTIONARY, 449 TokenType.DIV, 450 TokenType.END, 451 TokenType.EXECUTE, 452 TokenType.ESCAPE, 453 TokenType.FALSE, 454 TokenType.FIRST, 455 TokenType.FILTER, 456 TokenType.FINAL, 457 TokenType.FORMAT, 458 TokenType.FULL, 459 TokenType.IDENTIFIER, 460 TokenType.IS, 461 TokenType.ISNULL, 462 TokenType.INTERVAL, 463 TokenType.KEEP, 464 TokenType.KILL, 465 TokenType.LEFT, 466 TokenType.LOAD, 467 TokenType.MERGE, 468 TokenType.NATURAL, 469 TokenType.NEXT, 470 TokenType.OFFSET, 471 TokenType.OPERATOR, 472 TokenType.ORDINALITY, 473 TokenType.OVERLAPS, 474 TokenType.OVERWRITE, 475 TokenType.PARTITION, 476 TokenType.PERCENT, 477 TokenType.PIVOT, 478 TokenType.PRAGMA, 479 TokenType.RANGE, 480 TokenType.RECURSIVE, 481 TokenType.REFERENCES, 482 TokenType.REFRESH, 483 TokenType.RENAME, 484 TokenType.REPLACE, 485 TokenType.RIGHT, 486 TokenType.ROLLUP, 487 TokenType.ROW, 488 TokenType.ROWS, 489 TokenType.SEMI, 490 TokenType.SET, 491 TokenType.SETTINGS, 492 TokenType.SHOW, 493 TokenType.TEMPORARY, 494 TokenType.TOP, 495 TokenType.TRUE, 496 TokenType.TRUNCATE, 497 TokenType.UNIQUE, 498 TokenType.UNNEST, 499 TokenType.UNPIVOT, 500 TokenType.UPDATE, 501 TokenType.USE, 502 TokenType.VOLATILE, 503 TokenType.WINDOW, 504 *CREATABLES, 505 *SUBQUERY_PREDICATES, 506 *TYPE_TOKENS, 507 *NO_PAREN_FUNCTIONS, 508 } 509 510 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 511 512 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 513 TokenType.ANTI, 514 TokenType.APPLY, 515 TokenType.ASOF, 516 TokenType.FULL, 517 TokenType.LEFT, 518 TokenType.LOCK, 519 TokenType.NATURAL, 520 TokenType.OFFSET, 521 TokenType.RIGHT, 522 TokenType.SEMI, 523 TokenType.WINDOW, 524 } 525 526 ALIAS_TOKENS = ID_VAR_TOKENS 527 528 ARRAY_CONSTRUCTORS = { 529 "ARRAY": exp.Array, 530 "LIST": exp.List, 531 } 532 533 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 534 535 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 536 537 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 538 539 FUNC_TOKENS = { 540 TokenType.COLLATE, 541 TokenType.COMMAND, 542 TokenType.CURRENT_DATE, 543 TokenType.CURRENT_DATETIME, 544 TokenType.CURRENT_TIMESTAMP, 545 TokenType.CURRENT_TIME, 546 TokenType.CURRENT_USER, 547 TokenType.FILTER, 548 TokenType.FIRST, 549 TokenType.FORMAT, 550 TokenType.GLOB, 551 TokenType.IDENTIFIER, 552 TokenType.INDEX, 553 TokenType.ISNULL, 554 TokenType.ILIKE, 555 TokenType.INSERT, 556 TokenType.LIKE, 557 TokenType.MERGE, 558 TokenType.OFFSET, 559 TokenType.PRIMARY_KEY, 560 TokenType.RANGE, 561 TokenType.REPLACE, 562 TokenType.RLIKE, 563 TokenType.ROW, 564 TokenType.UNNEST, 565 TokenType.VAR, 566 TokenType.LEFT, 567 TokenType.RIGHT, 568 TokenType.SEQUENCE, 569 TokenType.DATE, 570 TokenType.DATETIME, 571 TokenType.TABLE, 572 TokenType.TIMESTAMP, 573 TokenType.TIMESTAMPTZ, 574 TokenType.TRUNCATE, 575 TokenType.WINDOW, 576 TokenType.XOR, 577 *TYPE_TOKENS, 578 *SUBQUERY_PREDICATES, 579 } 580 581 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 582 TokenType.AND: exp.And, 583 } 584 585 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 586 TokenType.COLON_EQ: exp.PropertyEQ, 587 } 588 589 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 590 TokenType.OR: exp.Or, 591 } 592 593 EQUALITY = { 594 TokenType.EQ: exp.EQ, 595 TokenType.NEQ: exp.NEQ, 596 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 597 } 598 599 COMPARISON = { 600 TokenType.GT: exp.GT, 601 TokenType.GTE: exp.GTE, 602 TokenType.LT: exp.LT, 603 TokenType.LTE: exp.LTE, 604 } 605 606 BITWISE = { 607 TokenType.AMP: exp.BitwiseAnd, 608 TokenType.CARET: exp.BitwiseXor, 609 TokenType.PIPE: exp.BitwiseOr, 610 } 611 612 TERM = { 613 TokenType.DASH: exp.Sub, 614 TokenType.PLUS: exp.Add, 615 TokenType.MOD: exp.Mod, 616 TokenType.COLLATE: exp.Collate, 617 } 618 619 FACTOR = { 620 TokenType.DIV: exp.IntDiv, 621 TokenType.LR_ARROW: exp.Distance, 622 TokenType.SLASH: exp.Div, 623 TokenType.STAR: exp.Mul, 624 } 625 626 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 627 628 TIMES = { 629 TokenType.TIME, 630 TokenType.TIMETZ, 631 } 632 633 TIMESTAMPS = { 634 TokenType.TIMESTAMP, 635 TokenType.TIMESTAMPTZ, 636 TokenType.TIMESTAMPLTZ, 637 *TIMES, 638 } 639 640 SET_OPERATIONS = { 641 TokenType.UNION, 642 TokenType.INTERSECT, 643 TokenType.EXCEPT, 644 } 645 646 JOIN_METHODS = { 647 TokenType.ASOF, 648 TokenType.NATURAL, 649 TokenType.POSITIONAL, 650 } 651 652 JOIN_SIDES = { 653 TokenType.LEFT, 654 TokenType.RIGHT, 655 TokenType.FULL, 656 } 657 658 JOIN_KINDS = { 659 TokenType.ANTI, 660 TokenType.CROSS, 661 TokenType.INNER, 662 TokenType.OUTER, 663 TokenType.SEMI, 664 TokenType.STRAIGHT_JOIN, 665 } 666 667 JOIN_HINTS: t.Set[str] = set() 668 669 LAMBDAS = { 670 TokenType.ARROW: lambda self, expressions: self.expression( 671 exp.Lambda, 672 this=self._replace_lambda( 673 self._parse_assignment(), 674 expressions, 675 ), 676 expressions=expressions, 677 ), 678 TokenType.FARROW: lambda self, expressions: self.expression( 679 exp.Kwarg, 680 this=exp.var(expressions[0].name), 681 expression=self._parse_assignment(), 682 ), 683 } 684 685 COLUMN_OPERATORS = { 686 TokenType.DOT: None, 687 TokenType.DCOLON: lambda self, this, to: self.expression( 688 exp.Cast if self.STRICT_CAST else exp.TryCast, 689 this=this, 690 to=to, 691 ), 692 TokenType.ARROW: lambda self, this, path: self.expression( 693 exp.JSONExtract, 694 this=this, 695 expression=self.dialect.to_json_path(path), 696 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 697 ), 698 TokenType.DARROW: lambda self, this, path: self.expression( 699 exp.JSONExtractScalar, 700 this=this, 701 expression=self.dialect.to_json_path(path), 702 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 703 ), 704 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 705 exp.JSONBExtract, 706 this=this, 707 expression=path, 708 ), 709 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 710 exp.JSONBExtractScalar, 711 this=this, 712 expression=path, 713 ), 714 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 715 exp.JSONBContains, 716 this=this, 717 expression=key, 718 ), 719 } 720 721 EXPRESSION_PARSERS = { 722 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 723 exp.Column: lambda self: self._parse_column(), 724 exp.Condition: lambda self: self._parse_assignment(), 725 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 726 exp.Expression: lambda self: self._parse_expression(), 727 exp.From: lambda self: self._parse_from(joins=True), 728 exp.Group: lambda self: self._parse_group(), 729 exp.Having: lambda self: self._parse_having(), 730 exp.Identifier: lambda self: self._parse_id_var(), 731 exp.Join: lambda self: self._parse_join(), 732 exp.Lambda: lambda self: self._parse_lambda(), 733 exp.Lateral: lambda self: self._parse_lateral(), 734 exp.Limit: lambda self: self._parse_limit(), 735 exp.Offset: lambda self: self._parse_offset(), 736 exp.Order: lambda self: self._parse_order(), 737 exp.Ordered: lambda self: self._parse_ordered(), 738 exp.Properties: lambda self: self._parse_properties(), 739 exp.Qualify: lambda self: self._parse_qualify(), 740 exp.Returning: lambda self: self._parse_returning(), 741 exp.Select: lambda self: self._parse_select(), 742 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 743 exp.Table: lambda self: self._parse_table_parts(), 744 exp.TableAlias: lambda self: self._parse_table_alias(), 745 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 746 exp.Where: lambda self: self._parse_where(), 747 exp.Window: lambda self: self._parse_named_window(), 748 exp.With: lambda self: self._parse_with(), 749 "JOIN_TYPE": lambda self: self._parse_join_parts(), 750 } 751 752 STATEMENT_PARSERS = { 753 TokenType.ALTER: lambda self: self._parse_alter(), 754 TokenType.BEGIN: lambda self: self._parse_transaction(), 755 TokenType.CACHE: lambda self: self._parse_cache(), 756 TokenType.COMMENT: lambda self: self._parse_comment(), 757 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 758 TokenType.COPY: lambda self: self._parse_copy(), 759 TokenType.CREATE: lambda self: self._parse_create(), 760 TokenType.DELETE: lambda self: self._parse_delete(), 761 TokenType.DESC: lambda self: self._parse_describe(), 762 TokenType.DESCRIBE: lambda self: self._parse_describe(), 763 TokenType.DROP: lambda self: self._parse_drop(), 764 TokenType.INSERT: lambda self: self._parse_insert(), 765 TokenType.KILL: lambda self: self._parse_kill(), 766 TokenType.LOAD: lambda self: self._parse_load(), 767 TokenType.MERGE: lambda self: self._parse_merge(), 768 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 769 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 770 TokenType.REFRESH: lambda self: self._parse_refresh(), 771 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 772 TokenType.SET: lambda self: self._parse_set(), 773 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 774 TokenType.UNCACHE: lambda self: self._parse_uncache(), 775 TokenType.UPDATE: lambda self: self._parse_update(), 776 TokenType.USE: lambda self: self.expression( 777 exp.Use, 778 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 779 this=self._parse_table(schema=False), 780 ), 781 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 782 } 783 784 UNARY_PARSERS = { 785 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 786 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 787 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 788 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 789 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 790 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 791 } 792 793 STRING_PARSERS = { 794 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 795 exp.RawString, this=token.text 796 ), 797 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 798 exp.National, this=token.text 799 ), 800 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 801 TokenType.STRING: lambda self, token: self.expression( 802 exp.Literal, this=token.text, is_string=True 803 ), 804 TokenType.UNICODE_STRING: lambda self, token: self.expression( 805 exp.UnicodeString, 806 this=token.text, 807 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 808 ), 809 } 810 811 NUMERIC_PARSERS = { 812 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 813 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 814 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 815 TokenType.NUMBER: lambda self, token: self.expression( 816 exp.Literal, this=token.text, is_string=False 817 ), 818 } 819 820 PRIMARY_PARSERS = { 821 **STRING_PARSERS, 822 **NUMERIC_PARSERS, 823 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 824 TokenType.NULL: lambda self, _: self.expression(exp.Null), 825 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 826 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 827 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 828 TokenType.STAR: lambda self, _: self.expression( 829 exp.Star, 830 **{ 831 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 832 "replace": self._parse_star_op("REPLACE"), 833 "rename": self._parse_star_op("RENAME"), 834 }, 835 ), 836 } 837 838 PLACEHOLDER_PARSERS = { 839 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 840 TokenType.PARAMETER: lambda self: self._parse_parameter(), 841 TokenType.COLON: lambda self: ( 842 self.expression(exp.Placeholder, this=self._prev.text) 843 if self._match_set(self.ID_VAR_TOKENS) 844 else None 845 ), 846 } 847 848 RANGE_PARSERS = { 849 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 850 TokenType.GLOB: binary_range_parser(exp.Glob), 851 TokenType.ILIKE: binary_range_parser(exp.ILike), 852 TokenType.IN: lambda self, this: self._parse_in(this), 853 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 854 TokenType.IS: lambda self, this: self._parse_is(this), 855 TokenType.LIKE: binary_range_parser(exp.Like), 856 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 857 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 858 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 859 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 860 } 861 862 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 863 "ALLOWED_VALUES": lambda self: self.expression( 864 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 865 ), 866 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 867 "AUTO": lambda self: self._parse_auto_property(), 868 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 869 "BACKUP": lambda self: self.expression( 870 exp.BackupProperty, this=self._parse_var(any_token=True) 871 ), 872 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 873 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 874 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 875 "CHECKSUM": lambda self: self._parse_checksum(), 876 "CLUSTER BY": lambda self: self._parse_cluster(), 877 "CLUSTERED": lambda self: self._parse_clustered_by(), 878 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 879 exp.CollateProperty, **kwargs 880 ), 881 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 882 "CONTAINS": lambda self: self._parse_contains_property(), 883 "COPY": lambda self: self._parse_copy_property(), 884 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 885 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 886 "DEFINER": lambda self: self._parse_definer(), 887 "DETERMINISTIC": lambda self: self.expression( 888 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 889 ), 890 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 891 "DISTKEY": lambda self: self._parse_distkey(), 892 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 893 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 894 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 895 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 896 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 897 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 898 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 899 "FREESPACE": lambda self: self._parse_freespace(), 900 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 901 "HEAP": lambda self: self.expression(exp.HeapProperty), 902 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 903 "IMMUTABLE": lambda self: self.expression( 904 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 905 ), 906 "INHERITS": lambda self: self.expression( 907 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 908 ), 909 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 910 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 911 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 912 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 913 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 914 "LIKE": lambda self: self._parse_create_like(), 915 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 916 "LOCK": lambda self: self._parse_locking(), 917 "LOCKING": lambda self: self._parse_locking(), 918 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 919 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 920 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 921 "MODIFIES": lambda self: self._parse_modifies_property(), 922 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 923 "NO": lambda self: self._parse_no_property(), 924 "ON": lambda self: self._parse_on_property(), 925 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 926 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 927 "PARTITION": lambda self: self._parse_partitioned_of(), 928 "PARTITION BY": lambda self: self._parse_partitioned_by(), 929 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 930 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 931 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 932 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 933 "READS": lambda self: self._parse_reads_property(), 934 "REMOTE": lambda self: self._parse_remote_with_connection(), 935 "RETURNS": lambda self: self._parse_returns(), 936 "STRICT": lambda self: self.expression(exp.StrictProperty), 937 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 938 "ROW": lambda self: self._parse_row(), 939 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 940 "SAMPLE": lambda self: self.expression( 941 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 942 ), 943 "SECURE": lambda self: self.expression(exp.SecureProperty), 944 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 945 "SETTINGS": lambda self: self._parse_settings_property(), 946 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 947 "SORTKEY": lambda self: self._parse_sortkey(), 948 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 949 "STABLE": lambda self: self.expression( 950 exp.StabilityProperty, this=exp.Literal.string("STABLE") 951 ), 952 "STORED": lambda self: self._parse_stored(), 953 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 954 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 955 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 956 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 957 "TO": lambda self: self._parse_to_table(), 958 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 959 "TRANSFORM": lambda self: self.expression( 960 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 961 ), 962 "TTL": lambda self: self._parse_ttl(), 963 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 964 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 965 "VOLATILE": lambda self: self._parse_volatile_property(), 966 "WITH": lambda self: self._parse_with_property(), 967 } 968 969 CONSTRAINT_PARSERS = { 970 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 971 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 972 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 973 "CHARACTER SET": lambda self: self.expression( 974 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 975 ), 976 "CHECK": lambda self: self.expression( 977 exp.CheckColumnConstraint, 978 this=self._parse_wrapped(self._parse_assignment), 979 enforced=self._match_text_seq("ENFORCED"), 980 ), 981 "COLLATE": lambda self: self.expression( 982 exp.CollateColumnConstraint, 983 this=self._parse_identifier() or self._parse_column(), 984 ), 985 "COMMENT": lambda self: self.expression( 986 exp.CommentColumnConstraint, this=self._parse_string() 987 ), 988 "COMPRESS": lambda self: self._parse_compress(), 989 "CLUSTERED": lambda self: self.expression( 990 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 991 ), 992 "NONCLUSTERED": lambda self: self.expression( 993 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 994 ), 995 "DEFAULT": lambda self: self.expression( 996 exp.DefaultColumnConstraint, this=self._parse_bitwise() 997 ), 998 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 999 "EPHEMERAL": lambda self: self.expression( 1000 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1001 ), 1002 "EXCLUDE": lambda self: self.expression( 1003 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1004 ), 1005 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1006 "FORMAT": lambda self: self.expression( 1007 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1008 ), 1009 "GENERATED": lambda self: self._parse_generated_as_identity(), 1010 "IDENTITY": lambda self: self._parse_auto_increment(), 1011 "INLINE": lambda self: self._parse_inline(), 1012 "LIKE": lambda self: self._parse_create_like(), 1013 "NOT": lambda self: self._parse_not_constraint(), 1014 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1015 "ON": lambda self: ( 1016 self._match(TokenType.UPDATE) 1017 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1018 ) 1019 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1020 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1021 "PERIOD": lambda self: self._parse_period_for_system_time(), 1022 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1023 "REFERENCES": lambda self: self._parse_references(match=False), 1024 "TITLE": lambda self: self.expression( 1025 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1026 ), 1027 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1028 "UNIQUE": lambda self: self._parse_unique(), 1029 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1030 "WITH": lambda self: self.expression( 1031 exp.Properties, expressions=self._parse_wrapped_properties() 1032 ), 1033 } 1034 1035 ALTER_PARSERS = { 1036 "ADD": lambda self: self._parse_alter_table_add(), 1037 "ALTER": lambda self: self._parse_alter_table_alter(), 1038 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1039 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1040 "DROP": lambda self: self._parse_alter_table_drop(), 1041 "RENAME": lambda self: self._parse_alter_table_rename(), 1042 "SET": lambda self: self._parse_alter_table_set(), 1043 "AS": lambda self: self._parse_select(), 1044 } 1045 1046 ALTER_ALTER_PARSERS = { 1047 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1048 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1049 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1050 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1051 } 1052 1053 SCHEMA_UNNAMED_CONSTRAINTS = { 1054 "CHECK", 1055 "EXCLUDE", 1056 "FOREIGN KEY", 1057 "LIKE", 1058 "PERIOD", 1059 "PRIMARY KEY", 1060 "UNIQUE", 1061 } 1062 1063 NO_PAREN_FUNCTION_PARSERS = { 1064 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1065 "CASE": lambda self: self._parse_case(), 1066 "CONNECT_BY_ROOT": lambda self: self.expression( 1067 exp.ConnectByRoot, this=self._parse_column() 1068 ), 1069 "IF": lambda self: self._parse_if(), 1070 "NEXT": lambda self: self._parse_next_value_for(), 1071 } 1072 1073 INVALID_FUNC_NAME_TOKENS = { 1074 TokenType.IDENTIFIER, 1075 TokenType.STRING, 1076 } 1077 1078 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1079 1080 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1081 1082 FUNCTION_PARSERS = { 1083 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1084 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1085 "DECODE": lambda self: self._parse_decode(), 1086 "EXTRACT": lambda self: self._parse_extract(), 1087 "GAP_FILL": lambda self: self._parse_gap_fill(), 1088 "JSON_OBJECT": lambda self: self._parse_json_object(), 1089 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1090 "JSON_TABLE": lambda self: self._parse_json_table(), 1091 "MATCH": lambda self: self._parse_match_against(), 1092 "OPENJSON": lambda self: self._parse_open_json(), 1093 "POSITION": lambda self: self._parse_position(), 1094 "PREDICT": lambda self: self._parse_predict(), 1095 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1096 "STRING_AGG": lambda self: self._parse_string_agg(), 1097 "SUBSTRING": lambda self: self._parse_substring(), 1098 "TRIM": lambda self: self._parse_trim(), 1099 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1100 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1101 } 1102 1103 QUERY_MODIFIER_PARSERS = { 1104 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1105 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1106 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1107 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1108 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1109 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1110 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1111 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1112 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1113 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1114 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1115 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1116 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1117 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1118 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1119 TokenType.CLUSTER_BY: lambda self: ( 1120 "cluster", 1121 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1122 ), 1123 TokenType.DISTRIBUTE_BY: lambda self: ( 1124 "distribute", 1125 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1126 ), 1127 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1128 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1129 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1130 } 1131 1132 SET_PARSERS = { 1133 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1134 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1135 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1136 "TRANSACTION": lambda self: self._parse_set_transaction(), 1137 } 1138 1139 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1140 1141 TYPE_LITERAL_PARSERS = { 1142 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1143 } 1144 1145 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1146 1147 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1148 1149 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1150 1151 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1152 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1153 "ISOLATION": ( 1154 ("LEVEL", "REPEATABLE", "READ"), 1155 ("LEVEL", "READ", "COMMITTED"), 1156 ("LEVEL", "READ", "UNCOMITTED"), 1157 ("LEVEL", "SERIALIZABLE"), 1158 ), 1159 "READ": ("WRITE", "ONLY"), 1160 } 1161 1162 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1163 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1164 ) 1165 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1166 1167 CREATE_SEQUENCE: OPTIONS_TYPE = { 1168 "SCALE": ("EXTEND", "NOEXTEND"), 1169 "SHARD": ("EXTEND", "NOEXTEND"), 1170 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1171 **dict.fromkeys( 1172 ( 1173 "SESSION", 1174 "GLOBAL", 1175 "KEEP", 1176 "NOKEEP", 1177 "ORDER", 1178 "NOORDER", 1179 "NOCACHE", 1180 "CYCLE", 1181 "NOCYCLE", 1182 "NOMINVALUE", 1183 "NOMAXVALUE", 1184 "NOSCALE", 1185 "NOSHARD", 1186 ), 1187 tuple(), 1188 ), 1189 } 1190 1191 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1192 1193 USABLES: OPTIONS_TYPE = dict.fromkeys( 1194 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1195 ) 1196 1197 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1198 1199 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1200 "TYPE": ("EVOLUTION",), 1201 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1202 } 1203 1204 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1205 "NOT": ("ENFORCED",), 1206 "MATCH": ( 1207 "FULL", 1208 "PARTIAL", 1209 "SIMPLE", 1210 ), 1211 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1212 **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()), 1213 } 1214 1215 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1216 1217 CLONE_KEYWORDS = {"CLONE", "COPY"} 1218 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1219 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1220 1221 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1222 1223 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1224 1225 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1226 1227 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1228 1229 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1230 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1231 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1232 1233 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1234 1235 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1236 1237 ADD_CONSTRAINT_TOKENS = { 1238 TokenType.CONSTRAINT, 1239 TokenType.FOREIGN_KEY, 1240 TokenType.INDEX, 1241 TokenType.KEY, 1242 TokenType.PRIMARY_KEY, 1243 TokenType.UNIQUE, 1244 } 1245 1246 DISTINCT_TOKENS = {TokenType.DISTINCT} 1247 1248 NULL_TOKENS = {TokenType.NULL} 1249 1250 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1251 1252 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1253 1254 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1255 1256 STRICT_CAST = True 1257 1258 PREFIXED_PIVOT_COLUMNS = False 1259 IDENTIFY_PIVOT_STRINGS = False 1260 1261 LOG_DEFAULTS_TO_LN = False 1262 1263 # Whether ADD is present for each column added by ALTER TABLE 1264 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1265 1266 # Whether the table sample clause expects CSV syntax 1267 TABLESAMPLE_CSV = False 1268 1269 # The default method used for table sampling 1270 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1271 1272 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1273 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1274 1275 # Whether the TRIM function expects the characters to trim as its first argument 1276 TRIM_PATTERN_FIRST = False 1277 1278 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1279 STRING_ALIASES = False 1280 1281 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1282 MODIFIERS_ATTACHED_TO_SET_OP = True 1283 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1284 1285 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1286 NO_PAREN_IF_COMMANDS = True 1287 1288 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1289 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1290 1291 # Whether the `:` operator is used to extract a value from a VARIANT column 1292 COLON_IS_VARIANT_EXTRACT = False 1293 1294 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1295 # If this is True and '(' is not found, the keyword will be treated as an identifier 1296 VALUES_FOLLOWED_BY_PAREN = True 1297 1298 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1299 SUPPORTS_IMPLICIT_UNNEST = False 1300 1301 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1302 INTERVAL_SPANS = True 1303 1304 # Whether a PARTITION clause can follow a table reference 1305 SUPPORTS_PARTITION_SELECTION = False 1306 1307 __slots__ = ( 1308 "error_level", 1309 "error_message_context", 1310 "max_errors", 1311 "dialect", 1312 "sql", 1313 "errors", 1314 "_tokens", 1315 "_index", 1316 "_curr", 1317 "_next", 1318 "_prev", 1319 "_prev_comments", 1320 ) 1321 1322 # Autofilled 1323 SHOW_TRIE: t.Dict = {} 1324 SET_TRIE: t.Dict = {} 1325 1326 def __init__( 1327 self, 1328 error_level: t.Optional[ErrorLevel] = None, 1329 error_message_context: int = 100, 1330 max_errors: int = 3, 1331 dialect: DialectType = None, 1332 ): 1333 from sqlglot.dialects import Dialect 1334 1335 self.error_level = error_level or ErrorLevel.IMMEDIATE 1336 self.error_message_context = error_message_context 1337 self.max_errors = max_errors 1338 self.dialect = Dialect.get_or_raise(dialect) 1339 self.reset() 1340 1341 def reset(self): 1342 self.sql = "" 1343 self.errors = [] 1344 self._tokens = [] 1345 self._index = 0 1346 self._curr = None 1347 self._next = None 1348 self._prev = None 1349 self._prev_comments = None 1350 1351 def parse( 1352 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1353 ) -> t.List[t.Optional[exp.Expression]]: 1354 """ 1355 Parses a list of tokens and returns a list of syntax trees, one tree 1356 per parsed SQL statement. 1357 1358 Args: 1359 raw_tokens: The list of tokens. 1360 sql: The original SQL string, used to produce helpful debug messages. 1361 1362 Returns: 1363 The list of the produced syntax trees. 1364 """ 1365 return self._parse( 1366 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1367 ) 1368 1369 def parse_into( 1370 self, 1371 expression_types: exp.IntoType, 1372 raw_tokens: t.List[Token], 1373 sql: t.Optional[str] = None, 1374 ) -> t.List[t.Optional[exp.Expression]]: 1375 """ 1376 Parses a list of tokens into a given Expression type. If a collection of Expression 1377 types is given instead, this method will try to parse the token list into each one 1378 of them, stopping at the first for which the parsing succeeds. 1379 1380 Args: 1381 expression_types: The expression type(s) to try and parse the token list into. 1382 raw_tokens: The list of tokens. 1383 sql: The original SQL string, used to produce helpful debug messages. 1384 1385 Returns: 1386 The target Expression. 1387 """ 1388 errors = [] 1389 for expression_type in ensure_list(expression_types): 1390 parser = self.EXPRESSION_PARSERS.get(expression_type) 1391 if not parser: 1392 raise TypeError(f"No parser registered for {expression_type}") 1393 1394 try: 1395 return self._parse(parser, raw_tokens, sql) 1396 except ParseError as e: 1397 e.errors[0]["into_expression"] = expression_type 1398 errors.append(e) 1399 1400 raise ParseError( 1401 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1402 errors=merge_errors(errors), 1403 ) from errors[-1] 1404 1405 def _parse( 1406 self, 1407 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1408 raw_tokens: t.List[Token], 1409 sql: t.Optional[str] = None, 1410 ) -> t.List[t.Optional[exp.Expression]]: 1411 self.reset() 1412 self.sql = sql or "" 1413 1414 total = len(raw_tokens) 1415 chunks: t.List[t.List[Token]] = [[]] 1416 1417 for i, token in enumerate(raw_tokens): 1418 if token.token_type == TokenType.SEMICOLON: 1419 if token.comments: 1420 chunks.append([token]) 1421 1422 if i < total - 1: 1423 chunks.append([]) 1424 else: 1425 chunks[-1].append(token) 1426 1427 expressions = [] 1428 1429 for tokens in chunks: 1430 self._index = -1 1431 self._tokens = tokens 1432 self._advance() 1433 1434 expressions.append(parse_method(self)) 1435 1436 if self._index < len(self._tokens): 1437 self.raise_error("Invalid expression / Unexpected token") 1438 1439 self.check_errors() 1440 1441 return expressions 1442 1443 def check_errors(self) -> None: 1444 """Logs or raises any found errors, depending on the chosen error level setting.""" 1445 if self.error_level == ErrorLevel.WARN: 1446 for error in self.errors: 1447 logger.error(str(error)) 1448 elif self.error_level == ErrorLevel.RAISE and self.errors: 1449 raise ParseError( 1450 concat_messages(self.errors, self.max_errors), 1451 errors=merge_errors(self.errors), 1452 ) 1453 1454 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1455 """ 1456 Appends an error in the list of recorded errors or raises it, depending on the chosen 1457 error level setting. 1458 """ 1459 token = token or self._curr or self._prev or Token.string("") 1460 start = token.start 1461 end = token.end + 1 1462 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1463 highlight = self.sql[start:end] 1464 end_context = self.sql[end : end + self.error_message_context] 1465 1466 error = ParseError.new( 1467 f"{message}. Line {token.line}, Col: {token.col}.\n" 1468 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1469 description=message, 1470 line=token.line, 1471 col=token.col, 1472 start_context=start_context, 1473 highlight=highlight, 1474 end_context=end_context, 1475 ) 1476 1477 if self.error_level == ErrorLevel.IMMEDIATE: 1478 raise error 1479 1480 self.errors.append(error) 1481 1482 def expression( 1483 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1484 ) -> E: 1485 """ 1486 Creates a new, validated Expression. 1487 1488 Args: 1489 exp_class: The expression class to instantiate. 1490 comments: An optional list of comments to attach to the expression. 1491 kwargs: The arguments to set for the expression along with their respective values. 1492 1493 Returns: 1494 The target expression. 1495 """ 1496 instance = exp_class(**kwargs) 1497 instance.add_comments(comments) if comments else self._add_comments(instance) 1498 return self.validate_expression(instance) 1499 1500 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1501 if expression and self._prev_comments: 1502 expression.add_comments(self._prev_comments) 1503 self._prev_comments = None 1504 1505 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1506 """ 1507 Validates an Expression, making sure that all its mandatory arguments are set. 1508 1509 Args: 1510 expression: The expression to validate. 1511 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1512 1513 Returns: 1514 The validated expression. 1515 """ 1516 if self.error_level != ErrorLevel.IGNORE: 1517 for error_message in expression.error_messages(args): 1518 self.raise_error(error_message) 1519 1520 return expression 1521 1522 def _find_sql(self, start: Token, end: Token) -> str: 1523 return self.sql[start.start : end.end + 1] 1524 1525 def _is_connected(self) -> bool: 1526 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1527 1528 def _advance(self, times: int = 1) -> None: 1529 self._index += times 1530 self._curr = seq_get(self._tokens, self._index) 1531 self._next = seq_get(self._tokens, self._index + 1) 1532 1533 if self._index > 0: 1534 self._prev = self._tokens[self._index - 1] 1535 self._prev_comments = self._prev.comments 1536 else: 1537 self._prev = None 1538 self._prev_comments = None 1539 1540 def _retreat(self, index: int) -> None: 1541 if index != self._index: 1542 self._advance(index - self._index) 1543 1544 def _warn_unsupported(self) -> None: 1545 if len(self._tokens) <= 1: 1546 return 1547 1548 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1549 # interested in emitting a warning for the one being currently processed. 1550 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1551 1552 logger.warning( 1553 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1554 ) 1555 1556 def _parse_command(self) -> exp.Command: 1557 self._warn_unsupported() 1558 return self.expression( 1559 exp.Command, 1560 comments=self._prev_comments, 1561 this=self._prev.text.upper(), 1562 expression=self._parse_string(), 1563 ) 1564 1565 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1566 """ 1567 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1568 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1569 solve this by setting & resetting the parser state accordingly 1570 """ 1571 index = self._index 1572 error_level = self.error_level 1573 1574 self.error_level = ErrorLevel.IMMEDIATE 1575 try: 1576 this = parse_method() 1577 except ParseError: 1578 this = None 1579 finally: 1580 if not this or retreat: 1581 self._retreat(index) 1582 self.error_level = error_level 1583 1584 return this 1585 1586 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1587 start = self._prev 1588 exists = self._parse_exists() if allow_exists else None 1589 1590 self._match(TokenType.ON) 1591 1592 materialized = self._match_text_seq("MATERIALIZED") 1593 kind = self._match_set(self.CREATABLES) and self._prev 1594 if not kind: 1595 return self._parse_as_command(start) 1596 1597 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1598 this = self._parse_user_defined_function(kind=kind.token_type) 1599 elif kind.token_type == TokenType.TABLE: 1600 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1601 elif kind.token_type == TokenType.COLUMN: 1602 this = self._parse_column() 1603 else: 1604 this = self._parse_id_var() 1605 1606 self._match(TokenType.IS) 1607 1608 return self.expression( 1609 exp.Comment, 1610 this=this, 1611 kind=kind.text, 1612 expression=self._parse_string(), 1613 exists=exists, 1614 materialized=materialized, 1615 ) 1616 1617 def _parse_to_table( 1618 self, 1619 ) -> exp.ToTableProperty: 1620 table = self._parse_table_parts(schema=True) 1621 return self.expression(exp.ToTableProperty, this=table) 1622 1623 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1624 def _parse_ttl(self) -> exp.Expression: 1625 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1626 this = self._parse_bitwise() 1627 1628 if self._match_text_seq("DELETE"): 1629 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1630 if self._match_text_seq("RECOMPRESS"): 1631 return self.expression( 1632 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1633 ) 1634 if self._match_text_seq("TO", "DISK"): 1635 return self.expression( 1636 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1637 ) 1638 if self._match_text_seq("TO", "VOLUME"): 1639 return self.expression( 1640 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1641 ) 1642 1643 return this 1644 1645 expressions = self._parse_csv(_parse_ttl_action) 1646 where = self._parse_where() 1647 group = self._parse_group() 1648 1649 aggregates = None 1650 if group and self._match(TokenType.SET): 1651 aggregates = self._parse_csv(self._parse_set_item) 1652 1653 return self.expression( 1654 exp.MergeTreeTTL, 1655 expressions=expressions, 1656 where=where, 1657 group=group, 1658 aggregates=aggregates, 1659 ) 1660 1661 def _parse_statement(self) -> t.Optional[exp.Expression]: 1662 if self._curr is None: 1663 return None 1664 1665 if self._match_set(self.STATEMENT_PARSERS): 1666 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1667 1668 if self._match_set(self.dialect.tokenizer.COMMANDS): 1669 return self._parse_command() 1670 1671 expression = self._parse_expression() 1672 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1673 return self._parse_query_modifiers(expression) 1674 1675 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1676 start = self._prev 1677 temporary = self._match(TokenType.TEMPORARY) 1678 materialized = self._match_text_seq("MATERIALIZED") 1679 1680 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1681 if not kind: 1682 return self._parse_as_command(start) 1683 1684 if_exists = exists or self._parse_exists() 1685 table = self._parse_table_parts( 1686 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1687 ) 1688 1689 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1690 1691 if self._match(TokenType.L_PAREN, advance=False): 1692 expressions = self._parse_wrapped_csv(self._parse_types) 1693 else: 1694 expressions = None 1695 1696 return self.expression( 1697 exp.Drop, 1698 comments=start.comments, 1699 exists=if_exists, 1700 this=table, 1701 expressions=expressions, 1702 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1703 temporary=temporary, 1704 materialized=materialized, 1705 cascade=self._match_text_seq("CASCADE"), 1706 constraints=self._match_text_seq("CONSTRAINTS"), 1707 purge=self._match_text_seq("PURGE"), 1708 cluster=cluster, 1709 ) 1710 1711 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1712 return ( 1713 self._match_text_seq("IF") 1714 and (not not_ or self._match(TokenType.NOT)) 1715 and self._match(TokenType.EXISTS) 1716 ) 1717 1718 def _parse_create(self) -> exp.Create | exp.Command: 1719 # Note: this can't be None because we've matched a statement parser 1720 start = self._prev 1721 comments = self._prev_comments 1722 1723 replace = ( 1724 start.token_type == TokenType.REPLACE 1725 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1726 or self._match_pair(TokenType.OR, TokenType.ALTER) 1727 ) 1728 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1729 1730 unique = self._match(TokenType.UNIQUE) 1731 1732 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1733 clustered = True 1734 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1735 "COLUMNSTORE" 1736 ): 1737 clustered = False 1738 else: 1739 clustered = None 1740 1741 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1742 self._advance() 1743 1744 properties = None 1745 create_token = self._match_set(self.CREATABLES) and self._prev 1746 1747 if not create_token: 1748 # exp.Properties.Location.POST_CREATE 1749 properties = self._parse_properties() 1750 create_token = self._match_set(self.CREATABLES) and self._prev 1751 1752 if not properties or not create_token: 1753 return self._parse_as_command(start) 1754 1755 concurrently = self._match_text_seq("CONCURRENTLY") 1756 exists = self._parse_exists(not_=True) 1757 this = None 1758 expression: t.Optional[exp.Expression] = None 1759 indexes = None 1760 no_schema_binding = None 1761 begin = None 1762 end = None 1763 clone = None 1764 1765 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1766 nonlocal properties 1767 if properties and temp_props: 1768 properties.expressions.extend(temp_props.expressions) 1769 elif temp_props: 1770 properties = temp_props 1771 1772 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1773 this = self._parse_user_defined_function(kind=create_token.token_type) 1774 1775 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1776 extend_props(self._parse_properties()) 1777 1778 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1779 extend_props(self._parse_properties()) 1780 1781 if not expression: 1782 if self._match(TokenType.COMMAND): 1783 expression = self._parse_as_command(self._prev) 1784 else: 1785 begin = self._match(TokenType.BEGIN) 1786 return_ = self._match_text_seq("RETURN") 1787 1788 if self._match(TokenType.STRING, advance=False): 1789 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1790 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1791 expression = self._parse_string() 1792 extend_props(self._parse_properties()) 1793 else: 1794 expression = self._parse_statement() 1795 1796 end = self._match_text_seq("END") 1797 1798 if return_: 1799 expression = self.expression(exp.Return, this=expression) 1800 elif create_token.token_type == TokenType.INDEX: 1801 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1802 if not self._match(TokenType.ON): 1803 index = self._parse_id_var() 1804 anonymous = False 1805 else: 1806 index = None 1807 anonymous = True 1808 1809 this = self._parse_index(index=index, anonymous=anonymous) 1810 elif create_token.token_type in self.DB_CREATABLES: 1811 table_parts = self._parse_table_parts( 1812 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1813 ) 1814 1815 # exp.Properties.Location.POST_NAME 1816 self._match(TokenType.COMMA) 1817 extend_props(self._parse_properties(before=True)) 1818 1819 this = self._parse_schema(this=table_parts) 1820 1821 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1822 extend_props(self._parse_properties()) 1823 1824 self._match(TokenType.ALIAS) 1825 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1826 # exp.Properties.Location.POST_ALIAS 1827 extend_props(self._parse_properties()) 1828 1829 if create_token.token_type == TokenType.SEQUENCE: 1830 expression = self._parse_types() 1831 extend_props(self._parse_properties()) 1832 else: 1833 expression = self._parse_ddl_select() 1834 1835 if create_token.token_type == TokenType.TABLE: 1836 # exp.Properties.Location.POST_EXPRESSION 1837 extend_props(self._parse_properties()) 1838 1839 indexes = [] 1840 while True: 1841 index = self._parse_index() 1842 1843 # exp.Properties.Location.POST_INDEX 1844 extend_props(self._parse_properties()) 1845 if not index: 1846 break 1847 else: 1848 self._match(TokenType.COMMA) 1849 indexes.append(index) 1850 elif create_token.token_type == TokenType.VIEW: 1851 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1852 no_schema_binding = True 1853 1854 shallow = self._match_text_seq("SHALLOW") 1855 1856 if self._match_texts(self.CLONE_KEYWORDS): 1857 copy = self._prev.text.lower() == "copy" 1858 clone = self.expression( 1859 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1860 ) 1861 1862 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1863 return self._parse_as_command(start) 1864 1865 create_kind_text = create_token.text.upper() 1866 return self.expression( 1867 exp.Create, 1868 comments=comments, 1869 this=this, 1870 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 1871 replace=replace, 1872 refresh=refresh, 1873 unique=unique, 1874 expression=expression, 1875 exists=exists, 1876 properties=properties, 1877 indexes=indexes, 1878 no_schema_binding=no_schema_binding, 1879 begin=begin, 1880 end=end, 1881 clone=clone, 1882 concurrently=concurrently, 1883 clustered=clustered, 1884 ) 1885 1886 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1887 seq = exp.SequenceProperties() 1888 1889 options = [] 1890 index = self._index 1891 1892 while self._curr: 1893 self._match(TokenType.COMMA) 1894 if self._match_text_seq("INCREMENT"): 1895 self._match_text_seq("BY") 1896 self._match_text_seq("=") 1897 seq.set("increment", self._parse_term()) 1898 elif self._match_text_seq("MINVALUE"): 1899 seq.set("minvalue", self._parse_term()) 1900 elif self._match_text_seq("MAXVALUE"): 1901 seq.set("maxvalue", self._parse_term()) 1902 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1903 self._match_text_seq("=") 1904 seq.set("start", self._parse_term()) 1905 elif self._match_text_seq("CACHE"): 1906 # T-SQL allows empty CACHE which is initialized dynamically 1907 seq.set("cache", self._parse_number() or True) 1908 elif self._match_text_seq("OWNED", "BY"): 1909 # "OWNED BY NONE" is the default 1910 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1911 else: 1912 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1913 if opt: 1914 options.append(opt) 1915 else: 1916 break 1917 1918 seq.set("options", options if options else None) 1919 return None if self._index == index else seq 1920 1921 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1922 # only used for teradata currently 1923 self._match(TokenType.COMMA) 1924 1925 kwargs = { 1926 "no": self._match_text_seq("NO"), 1927 "dual": self._match_text_seq("DUAL"), 1928 "before": self._match_text_seq("BEFORE"), 1929 "default": self._match_text_seq("DEFAULT"), 1930 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1931 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1932 "after": self._match_text_seq("AFTER"), 1933 "minimum": self._match_texts(("MIN", "MINIMUM")), 1934 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1935 } 1936 1937 if self._match_texts(self.PROPERTY_PARSERS): 1938 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1939 try: 1940 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1941 except TypeError: 1942 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1943 1944 return None 1945 1946 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1947 return self._parse_wrapped_csv(self._parse_property) 1948 1949 def _parse_property(self) -> t.Optional[exp.Expression]: 1950 if self._match_texts(self.PROPERTY_PARSERS): 1951 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1952 1953 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1954 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1955 1956 if self._match_text_seq("COMPOUND", "SORTKEY"): 1957 return self._parse_sortkey(compound=True) 1958 1959 if self._match_text_seq("SQL", "SECURITY"): 1960 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1961 1962 index = self._index 1963 key = self._parse_column() 1964 1965 if not self._match(TokenType.EQ): 1966 self._retreat(index) 1967 return self._parse_sequence_properties() 1968 1969 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 1970 if isinstance(key, exp.Column): 1971 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 1972 1973 value = self._parse_bitwise() or self._parse_var(any_token=True) 1974 1975 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 1976 if isinstance(value, exp.Column): 1977 value = exp.var(value.name) 1978 1979 return self.expression(exp.Property, this=key, value=value) 1980 1981 def _parse_stored(self) -> exp.FileFormatProperty: 1982 self._match(TokenType.ALIAS) 1983 1984 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1985 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1986 1987 return self.expression( 1988 exp.FileFormatProperty, 1989 this=( 1990 self.expression( 1991 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1992 ) 1993 if input_format or output_format 1994 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1995 ), 1996 ) 1997 1998 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 1999 field = self._parse_field() 2000 if isinstance(field, exp.Identifier) and not field.quoted: 2001 field = exp.var(field) 2002 2003 return field 2004 2005 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2006 self._match(TokenType.EQ) 2007 self._match(TokenType.ALIAS) 2008 2009 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2010 2011 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2012 properties = [] 2013 while True: 2014 if before: 2015 prop = self._parse_property_before() 2016 else: 2017 prop = self._parse_property() 2018 if not prop: 2019 break 2020 for p in ensure_list(prop): 2021 properties.append(p) 2022 2023 if properties: 2024 return self.expression(exp.Properties, expressions=properties) 2025 2026 return None 2027 2028 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2029 return self.expression( 2030 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2031 ) 2032 2033 def _parse_settings_property(self) -> exp.SettingsProperty: 2034 return self.expression( 2035 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2036 ) 2037 2038 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2039 if self._index >= 2: 2040 pre_volatile_token = self._tokens[self._index - 2] 2041 else: 2042 pre_volatile_token = None 2043 2044 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2045 return exp.VolatileProperty() 2046 2047 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2048 2049 def _parse_retention_period(self) -> exp.Var: 2050 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2051 number = self._parse_number() 2052 number_str = f"{number} " if number else "" 2053 unit = self._parse_var(any_token=True) 2054 return exp.var(f"{number_str}{unit}") 2055 2056 def _parse_system_versioning_property( 2057 self, with_: bool = False 2058 ) -> exp.WithSystemVersioningProperty: 2059 self._match(TokenType.EQ) 2060 prop = self.expression( 2061 exp.WithSystemVersioningProperty, 2062 **{ # type: ignore 2063 "on": True, 2064 "with": with_, 2065 }, 2066 ) 2067 2068 if self._match_text_seq("OFF"): 2069 prop.set("on", False) 2070 return prop 2071 2072 self._match(TokenType.ON) 2073 if self._match(TokenType.L_PAREN): 2074 while self._curr and not self._match(TokenType.R_PAREN): 2075 if self._match_text_seq("HISTORY_TABLE", "="): 2076 prop.set("this", self._parse_table_parts()) 2077 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2078 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2079 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2080 prop.set("retention_period", self._parse_retention_period()) 2081 2082 self._match(TokenType.COMMA) 2083 2084 return prop 2085 2086 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2087 self._match(TokenType.EQ) 2088 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2089 prop = self.expression(exp.DataDeletionProperty, on=on) 2090 2091 if self._match(TokenType.L_PAREN): 2092 while self._curr and not self._match(TokenType.R_PAREN): 2093 if self._match_text_seq("FILTER_COLUMN", "="): 2094 prop.set("filter_column", self._parse_column()) 2095 elif self._match_text_seq("RETENTION_PERIOD", "="): 2096 prop.set("retention_period", self._parse_retention_period()) 2097 2098 self._match(TokenType.COMMA) 2099 2100 return prop 2101 2102 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2103 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2104 prop = self._parse_system_versioning_property(with_=True) 2105 self._match_r_paren() 2106 return prop 2107 2108 if self._match(TokenType.L_PAREN, advance=False): 2109 return self._parse_wrapped_properties() 2110 2111 if self._match_text_seq("JOURNAL"): 2112 return self._parse_withjournaltable() 2113 2114 if self._match_texts(self.VIEW_ATTRIBUTES): 2115 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2116 2117 if self._match_text_seq("DATA"): 2118 return self._parse_withdata(no=False) 2119 elif self._match_text_seq("NO", "DATA"): 2120 return self._parse_withdata(no=True) 2121 2122 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2123 return self._parse_serde_properties(with_=True) 2124 2125 if self._match(TokenType.SCHEMA): 2126 return self.expression( 2127 exp.WithSchemaBindingProperty, 2128 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2129 ) 2130 2131 if not self._next: 2132 return None 2133 2134 return self._parse_withisolatedloading() 2135 2136 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2137 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2138 self._match(TokenType.EQ) 2139 2140 user = self._parse_id_var() 2141 self._match(TokenType.PARAMETER) 2142 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2143 2144 if not user or not host: 2145 return None 2146 2147 return exp.DefinerProperty(this=f"{user}@{host}") 2148 2149 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2150 self._match(TokenType.TABLE) 2151 self._match(TokenType.EQ) 2152 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2153 2154 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2155 return self.expression(exp.LogProperty, no=no) 2156 2157 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2158 return self.expression(exp.JournalProperty, **kwargs) 2159 2160 def _parse_checksum(self) -> exp.ChecksumProperty: 2161 self._match(TokenType.EQ) 2162 2163 on = None 2164 if self._match(TokenType.ON): 2165 on = True 2166 elif self._match_text_seq("OFF"): 2167 on = False 2168 2169 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2170 2171 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2172 return self.expression( 2173 exp.Cluster, 2174 expressions=( 2175 self._parse_wrapped_csv(self._parse_ordered) 2176 if wrapped 2177 else self._parse_csv(self._parse_ordered) 2178 ), 2179 ) 2180 2181 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2182 self._match_text_seq("BY") 2183 2184 self._match_l_paren() 2185 expressions = self._parse_csv(self._parse_column) 2186 self._match_r_paren() 2187 2188 if self._match_text_seq("SORTED", "BY"): 2189 self._match_l_paren() 2190 sorted_by = self._parse_csv(self._parse_ordered) 2191 self._match_r_paren() 2192 else: 2193 sorted_by = None 2194 2195 self._match(TokenType.INTO) 2196 buckets = self._parse_number() 2197 self._match_text_seq("BUCKETS") 2198 2199 return self.expression( 2200 exp.ClusteredByProperty, 2201 expressions=expressions, 2202 sorted_by=sorted_by, 2203 buckets=buckets, 2204 ) 2205 2206 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2207 if not self._match_text_seq("GRANTS"): 2208 self._retreat(self._index - 1) 2209 return None 2210 2211 return self.expression(exp.CopyGrantsProperty) 2212 2213 def _parse_freespace(self) -> exp.FreespaceProperty: 2214 self._match(TokenType.EQ) 2215 return self.expression( 2216 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2217 ) 2218 2219 def _parse_mergeblockratio( 2220 self, no: bool = False, default: bool = False 2221 ) -> exp.MergeBlockRatioProperty: 2222 if self._match(TokenType.EQ): 2223 return self.expression( 2224 exp.MergeBlockRatioProperty, 2225 this=self._parse_number(), 2226 percent=self._match(TokenType.PERCENT), 2227 ) 2228 2229 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2230 2231 def _parse_datablocksize( 2232 self, 2233 default: t.Optional[bool] = None, 2234 minimum: t.Optional[bool] = None, 2235 maximum: t.Optional[bool] = None, 2236 ) -> exp.DataBlocksizeProperty: 2237 self._match(TokenType.EQ) 2238 size = self._parse_number() 2239 2240 units = None 2241 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2242 units = self._prev.text 2243 2244 return self.expression( 2245 exp.DataBlocksizeProperty, 2246 size=size, 2247 units=units, 2248 default=default, 2249 minimum=minimum, 2250 maximum=maximum, 2251 ) 2252 2253 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2254 self._match(TokenType.EQ) 2255 always = self._match_text_seq("ALWAYS") 2256 manual = self._match_text_seq("MANUAL") 2257 never = self._match_text_seq("NEVER") 2258 default = self._match_text_seq("DEFAULT") 2259 2260 autotemp = None 2261 if self._match_text_seq("AUTOTEMP"): 2262 autotemp = self._parse_schema() 2263 2264 return self.expression( 2265 exp.BlockCompressionProperty, 2266 always=always, 2267 manual=manual, 2268 never=never, 2269 default=default, 2270 autotemp=autotemp, 2271 ) 2272 2273 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2274 index = self._index 2275 no = self._match_text_seq("NO") 2276 concurrent = self._match_text_seq("CONCURRENT") 2277 2278 if not self._match_text_seq("ISOLATED", "LOADING"): 2279 self._retreat(index) 2280 return None 2281 2282 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2283 return self.expression( 2284 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2285 ) 2286 2287 def _parse_locking(self) -> exp.LockingProperty: 2288 if self._match(TokenType.TABLE): 2289 kind = "TABLE" 2290 elif self._match(TokenType.VIEW): 2291 kind = "VIEW" 2292 elif self._match(TokenType.ROW): 2293 kind = "ROW" 2294 elif self._match_text_seq("DATABASE"): 2295 kind = "DATABASE" 2296 else: 2297 kind = None 2298 2299 if kind in ("DATABASE", "TABLE", "VIEW"): 2300 this = self._parse_table_parts() 2301 else: 2302 this = None 2303 2304 if self._match(TokenType.FOR): 2305 for_or_in = "FOR" 2306 elif self._match(TokenType.IN): 2307 for_or_in = "IN" 2308 else: 2309 for_or_in = None 2310 2311 if self._match_text_seq("ACCESS"): 2312 lock_type = "ACCESS" 2313 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2314 lock_type = "EXCLUSIVE" 2315 elif self._match_text_seq("SHARE"): 2316 lock_type = "SHARE" 2317 elif self._match_text_seq("READ"): 2318 lock_type = "READ" 2319 elif self._match_text_seq("WRITE"): 2320 lock_type = "WRITE" 2321 elif self._match_text_seq("CHECKSUM"): 2322 lock_type = "CHECKSUM" 2323 else: 2324 lock_type = None 2325 2326 override = self._match_text_seq("OVERRIDE") 2327 2328 return self.expression( 2329 exp.LockingProperty, 2330 this=this, 2331 kind=kind, 2332 for_or_in=for_or_in, 2333 lock_type=lock_type, 2334 override=override, 2335 ) 2336 2337 def _parse_partition_by(self) -> t.List[exp.Expression]: 2338 if self._match(TokenType.PARTITION_BY): 2339 return self._parse_csv(self._parse_assignment) 2340 return [] 2341 2342 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2343 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2344 if self._match_text_seq("MINVALUE"): 2345 return exp.var("MINVALUE") 2346 if self._match_text_seq("MAXVALUE"): 2347 return exp.var("MAXVALUE") 2348 return self._parse_bitwise() 2349 2350 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2351 expression = None 2352 from_expressions = None 2353 to_expressions = None 2354 2355 if self._match(TokenType.IN): 2356 this = self._parse_wrapped_csv(self._parse_bitwise) 2357 elif self._match(TokenType.FROM): 2358 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2359 self._match_text_seq("TO") 2360 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2361 elif self._match_text_seq("WITH", "(", "MODULUS"): 2362 this = self._parse_number() 2363 self._match_text_seq(",", "REMAINDER") 2364 expression = self._parse_number() 2365 self._match_r_paren() 2366 else: 2367 self.raise_error("Failed to parse partition bound spec.") 2368 2369 return self.expression( 2370 exp.PartitionBoundSpec, 2371 this=this, 2372 expression=expression, 2373 from_expressions=from_expressions, 2374 to_expressions=to_expressions, 2375 ) 2376 2377 # https://www.postgresql.org/docs/current/sql-createtable.html 2378 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2379 if not self._match_text_seq("OF"): 2380 self._retreat(self._index - 1) 2381 return None 2382 2383 this = self._parse_table(schema=True) 2384 2385 if self._match(TokenType.DEFAULT): 2386 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2387 elif self._match_text_seq("FOR", "VALUES"): 2388 expression = self._parse_partition_bound_spec() 2389 else: 2390 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2391 2392 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2393 2394 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2395 self._match(TokenType.EQ) 2396 return self.expression( 2397 exp.PartitionedByProperty, 2398 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2399 ) 2400 2401 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2402 if self._match_text_seq("AND", "STATISTICS"): 2403 statistics = True 2404 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2405 statistics = False 2406 else: 2407 statistics = None 2408 2409 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2410 2411 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2412 if self._match_text_seq("SQL"): 2413 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2414 return None 2415 2416 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2417 if self._match_text_seq("SQL", "DATA"): 2418 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2419 return None 2420 2421 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2422 if self._match_text_seq("PRIMARY", "INDEX"): 2423 return exp.NoPrimaryIndexProperty() 2424 if self._match_text_seq("SQL"): 2425 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2426 return None 2427 2428 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2429 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2430 return exp.OnCommitProperty() 2431 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2432 return exp.OnCommitProperty(delete=True) 2433 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2434 2435 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2436 if self._match_text_seq("SQL", "DATA"): 2437 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2438 return None 2439 2440 def _parse_distkey(self) -> exp.DistKeyProperty: 2441 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2442 2443 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2444 table = self._parse_table(schema=True) 2445 2446 options = [] 2447 while self._match_texts(("INCLUDING", "EXCLUDING")): 2448 this = self._prev.text.upper() 2449 2450 id_var = self._parse_id_var() 2451 if not id_var: 2452 return None 2453 2454 options.append( 2455 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2456 ) 2457 2458 return self.expression(exp.LikeProperty, this=table, expressions=options) 2459 2460 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2461 return self.expression( 2462 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2463 ) 2464 2465 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2466 self._match(TokenType.EQ) 2467 return self.expression( 2468 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2469 ) 2470 2471 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2472 self._match_text_seq("WITH", "CONNECTION") 2473 return self.expression( 2474 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2475 ) 2476 2477 def _parse_returns(self) -> exp.ReturnsProperty: 2478 value: t.Optional[exp.Expression] 2479 null = None 2480 is_table = self._match(TokenType.TABLE) 2481 2482 if is_table: 2483 if self._match(TokenType.LT): 2484 value = self.expression( 2485 exp.Schema, 2486 this="TABLE", 2487 expressions=self._parse_csv(self._parse_struct_types), 2488 ) 2489 if not self._match(TokenType.GT): 2490 self.raise_error("Expecting >") 2491 else: 2492 value = self._parse_schema(exp.var("TABLE")) 2493 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2494 null = True 2495 value = None 2496 else: 2497 value = self._parse_types() 2498 2499 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2500 2501 def _parse_describe(self) -> exp.Describe: 2502 kind = self._match_set(self.CREATABLES) and self._prev.text 2503 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2504 if self._match(TokenType.DOT): 2505 style = None 2506 self._retreat(self._index - 2) 2507 this = self._parse_table(schema=True) 2508 properties = self._parse_properties() 2509 expressions = properties.expressions if properties else None 2510 partition = self._parse_partition() 2511 return self.expression( 2512 exp.Describe, 2513 this=this, 2514 style=style, 2515 kind=kind, 2516 expressions=expressions, 2517 partition=partition, 2518 ) 2519 2520 def _parse_insert(self) -> exp.Insert: 2521 comments = ensure_list(self._prev_comments) 2522 hint = self._parse_hint() 2523 overwrite = self._match(TokenType.OVERWRITE) 2524 ignore = self._match(TokenType.IGNORE) 2525 local = self._match_text_seq("LOCAL") 2526 alternative = None 2527 is_function = None 2528 2529 if self._match_text_seq("DIRECTORY"): 2530 this: t.Optional[exp.Expression] = self.expression( 2531 exp.Directory, 2532 this=self._parse_var_or_string(), 2533 local=local, 2534 row_format=self._parse_row_format(match_row=True), 2535 ) 2536 else: 2537 if self._match(TokenType.OR): 2538 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2539 2540 self._match(TokenType.INTO) 2541 comments += ensure_list(self._prev_comments) 2542 self._match(TokenType.TABLE) 2543 is_function = self._match(TokenType.FUNCTION) 2544 2545 this = ( 2546 self._parse_table(schema=True, parse_partition=True) 2547 if not is_function 2548 else self._parse_function() 2549 ) 2550 2551 returning = self._parse_returning() 2552 2553 return self.expression( 2554 exp.Insert, 2555 comments=comments, 2556 hint=hint, 2557 is_function=is_function, 2558 this=this, 2559 stored=self._match_text_seq("STORED") and self._parse_stored(), 2560 by_name=self._match_text_seq("BY", "NAME"), 2561 exists=self._parse_exists(), 2562 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2563 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2564 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2565 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2566 conflict=self._parse_on_conflict(), 2567 returning=returning or self._parse_returning(), 2568 overwrite=overwrite, 2569 alternative=alternative, 2570 ignore=ignore, 2571 ) 2572 2573 def _parse_kill(self) -> exp.Kill: 2574 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2575 2576 return self.expression( 2577 exp.Kill, 2578 this=self._parse_primary(), 2579 kind=kind, 2580 ) 2581 2582 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2583 conflict = self._match_text_seq("ON", "CONFLICT") 2584 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2585 2586 if not conflict and not duplicate: 2587 return None 2588 2589 conflict_keys = None 2590 constraint = None 2591 2592 if conflict: 2593 if self._match_text_seq("ON", "CONSTRAINT"): 2594 constraint = self._parse_id_var() 2595 elif self._match(TokenType.L_PAREN): 2596 conflict_keys = self._parse_csv(self._parse_id_var) 2597 self._match_r_paren() 2598 2599 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2600 if self._prev.token_type == TokenType.UPDATE: 2601 self._match(TokenType.SET) 2602 expressions = self._parse_csv(self._parse_equality) 2603 else: 2604 expressions = None 2605 2606 return self.expression( 2607 exp.OnConflict, 2608 duplicate=duplicate, 2609 expressions=expressions, 2610 action=action, 2611 conflict_keys=conflict_keys, 2612 constraint=constraint, 2613 ) 2614 2615 def _parse_returning(self) -> t.Optional[exp.Returning]: 2616 if not self._match(TokenType.RETURNING): 2617 return None 2618 return self.expression( 2619 exp.Returning, 2620 expressions=self._parse_csv(self._parse_expression), 2621 into=self._match(TokenType.INTO) and self._parse_table_part(), 2622 ) 2623 2624 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2625 if not self._match(TokenType.FORMAT): 2626 return None 2627 return self._parse_row_format() 2628 2629 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2630 index = self._index 2631 with_ = with_ or self._match_text_seq("WITH") 2632 2633 if not self._match(TokenType.SERDE_PROPERTIES): 2634 self._retreat(index) 2635 return None 2636 return self.expression( 2637 exp.SerdeProperties, 2638 **{ # type: ignore 2639 "expressions": self._parse_wrapped_properties(), 2640 "with": with_, 2641 }, 2642 ) 2643 2644 def _parse_row_format( 2645 self, match_row: bool = False 2646 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2647 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2648 return None 2649 2650 if self._match_text_seq("SERDE"): 2651 this = self._parse_string() 2652 2653 serde_properties = self._parse_serde_properties() 2654 2655 return self.expression( 2656 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2657 ) 2658 2659 self._match_text_seq("DELIMITED") 2660 2661 kwargs = {} 2662 2663 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2664 kwargs["fields"] = self._parse_string() 2665 if self._match_text_seq("ESCAPED", "BY"): 2666 kwargs["escaped"] = self._parse_string() 2667 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2668 kwargs["collection_items"] = self._parse_string() 2669 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2670 kwargs["map_keys"] = self._parse_string() 2671 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2672 kwargs["lines"] = self._parse_string() 2673 if self._match_text_seq("NULL", "DEFINED", "AS"): 2674 kwargs["null"] = self._parse_string() 2675 2676 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2677 2678 def _parse_load(self) -> exp.LoadData | exp.Command: 2679 if self._match_text_seq("DATA"): 2680 local = self._match_text_seq("LOCAL") 2681 self._match_text_seq("INPATH") 2682 inpath = self._parse_string() 2683 overwrite = self._match(TokenType.OVERWRITE) 2684 self._match_pair(TokenType.INTO, TokenType.TABLE) 2685 2686 return self.expression( 2687 exp.LoadData, 2688 this=self._parse_table(schema=True), 2689 local=local, 2690 overwrite=overwrite, 2691 inpath=inpath, 2692 partition=self._parse_partition(), 2693 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2694 serde=self._match_text_seq("SERDE") and self._parse_string(), 2695 ) 2696 return self._parse_as_command(self._prev) 2697 2698 def _parse_delete(self) -> exp.Delete: 2699 # This handles MySQL's "Multiple-Table Syntax" 2700 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2701 tables = None 2702 comments = self._prev_comments 2703 if not self._match(TokenType.FROM, advance=False): 2704 tables = self._parse_csv(self._parse_table) or None 2705 2706 returning = self._parse_returning() 2707 2708 return self.expression( 2709 exp.Delete, 2710 comments=comments, 2711 tables=tables, 2712 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2713 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2714 where=self._parse_where(), 2715 returning=returning or self._parse_returning(), 2716 limit=self._parse_limit(), 2717 ) 2718 2719 def _parse_update(self) -> exp.Update: 2720 comments = self._prev_comments 2721 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2722 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2723 returning = self._parse_returning() 2724 return self.expression( 2725 exp.Update, 2726 comments=comments, 2727 **{ # type: ignore 2728 "this": this, 2729 "expressions": expressions, 2730 "from": self._parse_from(joins=True), 2731 "where": self._parse_where(), 2732 "returning": returning or self._parse_returning(), 2733 "order": self._parse_order(), 2734 "limit": self._parse_limit(), 2735 }, 2736 ) 2737 2738 def _parse_uncache(self) -> exp.Uncache: 2739 if not self._match(TokenType.TABLE): 2740 self.raise_error("Expecting TABLE after UNCACHE") 2741 2742 return self.expression( 2743 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2744 ) 2745 2746 def _parse_cache(self) -> exp.Cache: 2747 lazy = self._match_text_seq("LAZY") 2748 self._match(TokenType.TABLE) 2749 table = self._parse_table(schema=True) 2750 2751 options = [] 2752 if self._match_text_seq("OPTIONS"): 2753 self._match_l_paren() 2754 k = self._parse_string() 2755 self._match(TokenType.EQ) 2756 v = self._parse_string() 2757 options = [k, v] 2758 self._match_r_paren() 2759 2760 self._match(TokenType.ALIAS) 2761 return self.expression( 2762 exp.Cache, 2763 this=table, 2764 lazy=lazy, 2765 options=options, 2766 expression=self._parse_select(nested=True), 2767 ) 2768 2769 def _parse_partition(self) -> t.Optional[exp.Partition]: 2770 if not self._match(TokenType.PARTITION): 2771 return None 2772 2773 return self.expression( 2774 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2775 ) 2776 2777 def _parse_value(self) -> t.Optional[exp.Tuple]: 2778 if self._match(TokenType.L_PAREN): 2779 expressions = self._parse_csv(self._parse_expression) 2780 self._match_r_paren() 2781 return self.expression(exp.Tuple, expressions=expressions) 2782 2783 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2784 expression = self._parse_expression() 2785 if expression: 2786 return self.expression(exp.Tuple, expressions=[expression]) 2787 return None 2788 2789 def _parse_projections(self) -> t.List[exp.Expression]: 2790 return self._parse_expressions() 2791 2792 def _parse_select( 2793 self, 2794 nested: bool = False, 2795 table: bool = False, 2796 parse_subquery_alias: bool = True, 2797 parse_set_operation: bool = True, 2798 ) -> t.Optional[exp.Expression]: 2799 cte = self._parse_with() 2800 2801 if cte: 2802 this = self._parse_statement() 2803 2804 if not this: 2805 self.raise_error("Failed to parse any statement following CTE") 2806 return cte 2807 2808 if "with" in this.arg_types: 2809 this.set("with", cte) 2810 else: 2811 self.raise_error(f"{this.key} does not support CTE") 2812 this = cte 2813 2814 return this 2815 2816 # duckdb supports leading with FROM x 2817 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2818 2819 if self._match(TokenType.SELECT): 2820 comments = self._prev_comments 2821 2822 hint = self._parse_hint() 2823 2824 if self._next and not self._next.token_type == TokenType.DOT: 2825 all_ = self._match(TokenType.ALL) 2826 distinct = self._match_set(self.DISTINCT_TOKENS) 2827 else: 2828 all_, distinct = None, None 2829 2830 kind = ( 2831 self._match(TokenType.ALIAS) 2832 and self._match_texts(("STRUCT", "VALUE")) 2833 and self._prev.text.upper() 2834 ) 2835 2836 if distinct: 2837 distinct = self.expression( 2838 exp.Distinct, 2839 on=self._parse_value() if self._match(TokenType.ON) else None, 2840 ) 2841 2842 if all_ and distinct: 2843 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2844 2845 limit = self._parse_limit(top=True) 2846 projections = self._parse_projections() 2847 2848 this = self.expression( 2849 exp.Select, 2850 kind=kind, 2851 hint=hint, 2852 distinct=distinct, 2853 expressions=projections, 2854 limit=limit, 2855 ) 2856 this.comments = comments 2857 2858 into = self._parse_into() 2859 if into: 2860 this.set("into", into) 2861 2862 if not from_: 2863 from_ = self._parse_from() 2864 2865 if from_: 2866 this.set("from", from_) 2867 2868 this = self._parse_query_modifiers(this) 2869 elif (table or nested) and self._match(TokenType.L_PAREN): 2870 if self._match(TokenType.PIVOT): 2871 this = self._parse_simplified_pivot() 2872 elif self._match(TokenType.FROM): 2873 this = exp.select("*").from_( 2874 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2875 ) 2876 else: 2877 this = ( 2878 self._parse_table() 2879 if table 2880 else self._parse_select(nested=True, parse_set_operation=False) 2881 ) 2882 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2883 2884 self._match_r_paren() 2885 2886 # We return early here so that the UNION isn't attached to the subquery by the 2887 # following call to _parse_set_operations, but instead becomes the parent node 2888 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2889 elif self._match(TokenType.VALUES, advance=False): 2890 this = self._parse_derived_table_values() 2891 elif from_: 2892 this = exp.select("*").from_(from_.this, copy=False) 2893 elif self._match(TokenType.SUMMARIZE): 2894 table = self._match(TokenType.TABLE) 2895 this = self._parse_select() or self._parse_string() or self._parse_table() 2896 return self.expression(exp.Summarize, this=this, table=table) 2897 elif self._match(TokenType.DESCRIBE): 2898 this = self._parse_describe() 2899 elif self._match_text_seq("STREAM"): 2900 this = self.expression(exp.Stream, this=self._parse_function()) 2901 else: 2902 this = None 2903 2904 return self._parse_set_operations(this) if parse_set_operation else this 2905 2906 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2907 if not skip_with_token and not self._match(TokenType.WITH): 2908 return None 2909 2910 comments = self._prev_comments 2911 recursive = self._match(TokenType.RECURSIVE) 2912 2913 expressions = [] 2914 while True: 2915 expressions.append(self._parse_cte()) 2916 2917 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2918 break 2919 else: 2920 self._match(TokenType.WITH) 2921 2922 return self.expression( 2923 exp.With, comments=comments, expressions=expressions, recursive=recursive 2924 ) 2925 2926 def _parse_cte(self) -> exp.CTE: 2927 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2928 if not alias or not alias.this: 2929 self.raise_error("Expected CTE to have alias") 2930 2931 self._match(TokenType.ALIAS) 2932 comments = self._prev_comments 2933 2934 if self._match_text_seq("NOT", "MATERIALIZED"): 2935 materialized = False 2936 elif self._match_text_seq("MATERIALIZED"): 2937 materialized = True 2938 else: 2939 materialized = None 2940 2941 return self.expression( 2942 exp.CTE, 2943 this=self._parse_wrapped(self._parse_statement), 2944 alias=alias, 2945 materialized=materialized, 2946 comments=comments, 2947 ) 2948 2949 def _parse_table_alias( 2950 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2951 ) -> t.Optional[exp.TableAlias]: 2952 any_token = self._match(TokenType.ALIAS) 2953 alias = ( 2954 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2955 or self._parse_string_as_identifier() 2956 ) 2957 2958 index = self._index 2959 if self._match(TokenType.L_PAREN): 2960 columns = self._parse_csv(self._parse_function_parameter) 2961 self._match_r_paren() if columns else self._retreat(index) 2962 else: 2963 columns = None 2964 2965 if not alias and not columns: 2966 return None 2967 2968 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 2969 2970 # We bubble up comments from the Identifier to the TableAlias 2971 if isinstance(alias, exp.Identifier): 2972 table_alias.add_comments(alias.pop_comments()) 2973 2974 return table_alias 2975 2976 def _parse_subquery( 2977 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2978 ) -> t.Optional[exp.Subquery]: 2979 if not this: 2980 return None 2981 2982 return self.expression( 2983 exp.Subquery, 2984 this=this, 2985 pivots=self._parse_pivots(), 2986 alias=self._parse_table_alias() if parse_alias else None, 2987 ) 2988 2989 def _implicit_unnests_to_explicit(self, this: E) -> E: 2990 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 2991 2992 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2993 for i, join in enumerate(this.args.get("joins") or []): 2994 table = join.this 2995 normalized_table = table.copy() 2996 normalized_table.meta["maybe_column"] = True 2997 normalized_table = _norm(normalized_table, dialect=self.dialect) 2998 2999 if isinstance(table, exp.Table) and not join.args.get("on"): 3000 if normalized_table.parts[0].name in refs: 3001 table_as_column = table.to_column() 3002 unnest = exp.Unnest(expressions=[table_as_column]) 3003 3004 # Table.to_column creates a parent Alias node that we want to convert to 3005 # a TableAlias and attach to the Unnest, so it matches the parser's output 3006 if isinstance(table.args.get("alias"), exp.TableAlias): 3007 table_as_column.replace(table_as_column.this) 3008 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3009 3010 table.replace(unnest) 3011 3012 refs.add(normalized_table.alias_or_name) 3013 3014 return this 3015 3016 def _parse_query_modifiers( 3017 self, this: t.Optional[exp.Expression] 3018 ) -> t.Optional[exp.Expression]: 3019 if isinstance(this, (exp.Query, exp.Table)): 3020 for join in self._parse_joins(): 3021 this.append("joins", join) 3022 for lateral in iter(self._parse_lateral, None): 3023 this.append("laterals", lateral) 3024 3025 while True: 3026 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3027 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3028 key, expression = parser(self) 3029 3030 if expression: 3031 this.set(key, expression) 3032 if key == "limit": 3033 offset = expression.args.pop("offset", None) 3034 3035 if offset: 3036 offset = exp.Offset(expression=offset) 3037 this.set("offset", offset) 3038 3039 limit_by_expressions = expression.expressions 3040 expression.set("expressions", None) 3041 offset.set("expressions", limit_by_expressions) 3042 continue 3043 break 3044 3045 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3046 this = self._implicit_unnests_to_explicit(this) 3047 3048 return this 3049 3050 def _parse_hint(self) -> t.Optional[exp.Hint]: 3051 if self._match(TokenType.HINT): 3052 hints = [] 3053 for hint in iter( 3054 lambda: self._parse_csv( 3055 lambda: self._parse_function() or self._parse_var(upper=True) 3056 ), 3057 [], 3058 ): 3059 hints.extend(hint) 3060 3061 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 3062 self.raise_error("Expected */ after HINT") 3063 3064 return self.expression(exp.Hint, expressions=hints) 3065 3066 return None 3067 3068 def _parse_into(self) -> t.Optional[exp.Into]: 3069 if not self._match(TokenType.INTO): 3070 return None 3071 3072 temp = self._match(TokenType.TEMPORARY) 3073 unlogged = self._match_text_seq("UNLOGGED") 3074 self._match(TokenType.TABLE) 3075 3076 return self.expression( 3077 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3078 ) 3079 3080 def _parse_from( 3081 self, joins: bool = False, skip_from_token: bool = False 3082 ) -> t.Optional[exp.From]: 3083 if not skip_from_token and not self._match(TokenType.FROM): 3084 return None 3085 3086 return self.expression( 3087 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3088 ) 3089 3090 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3091 return self.expression( 3092 exp.MatchRecognizeMeasure, 3093 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3094 this=self._parse_expression(), 3095 ) 3096 3097 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3098 if not self._match(TokenType.MATCH_RECOGNIZE): 3099 return None 3100 3101 self._match_l_paren() 3102 3103 partition = self._parse_partition_by() 3104 order = self._parse_order() 3105 3106 measures = ( 3107 self._parse_csv(self._parse_match_recognize_measure) 3108 if self._match_text_seq("MEASURES") 3109 else None 3110 ) 3111 3112 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3113 rows = exp.var("ONE ROW PER MATCH") 3114 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3115 text = "ALL ROWS PER MATCH" 3116 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3117 text += " SHOW EMPTY MATCHES" 3118 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3119 text += " OMIT EMPTY MATCHES" 3120 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3121 text += " WITH UNMATCHED ROWS" 3122 rows = exp.var(text) 3123 else: 3124 rows = None 3125 3126 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3127 text = "AFTER MATCH SKIP" 3128 if self._match_text_seq("PAST", "LAST", "ROW"): 3129 text += " PAST LAST ROW" 3130 elif self._match_text_seq("TO", "NEXT", "ROW"): 3131 text += " TO NEXT ROW" 3132 elif self._match_text_seq("TO", "FIRST"): 3133 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3134 elif self._match_text_seq("TO", "LAST"): 3135 text += f" TO LAST {self._advance_any().text}" # type: ignore 3136 after = exp.var(text) 3137 else: 3138 after = None 3139 3140 if self._match_text_seq("PATTERN"): 3141 self._match_l_paren() 3142 3143 if not self._curr: 3144 self.raise_error("Expecting )", self._curr) 3145 3146 paren = 1 3147 start = self._curr 3148 3149 while self._curr and paren > 0: 3150 if self._curr.token_type == TokenType.L_PAREN: 3151 paren += 1 3152 if self._curr.token_type == TokenType.R_PAREN: 3153 paren -= 1 3154 3155 end = self._prev 3156 self._advance() 3157 3158 if paren > 0: 3159 self.raise_error("Expecting )", self._curr) 3160 3161 pattern = exp.var(self._find_sql(start, end)) 3162 else: 3163 pattern = None 3164 3165 define = ( 3166 self._parse_csv(self._parse_name_as_expression) 3167 if self._match_text_seq("DEFINE") 3168 else None 3169 ) 3170 3171 self._match_r_paren() 3172 3173 return self.expression( 3174 exp.MatchRecognize, 3175 partition_by=partition, 3176 order=order, 3177 measures=measures, 3178 rows=rows, 3179 after=after, 3180 pattern=pattern, 3181 define=define, 3182 alias=self._parse_table_alias(), 3183 ) 3184 3185 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3186 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3187 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3188 cross_apply = False 3189 3190 if cross_apply is not None: 3191 this = self._parse_select(table=True) 3192 view = None 3193 outer = None 3194 elif self._match(TokenType.LATERAL): 3195 this = self._parse_select(table=True) 3196 view = self._match(TokenType.VIEW) 3197 outer = self._match(TokenType.OUTER) 3198 else: 3199 return None 3200 3201 if not this: 3202 this = ( 3203 self._parse_unnest() 3204 or self._parse_function() 3205 or self._parse_id_var(any_token=False) 3206 ) 3207 3208 while self._match(TokenType.DOT): 3209 this = exp.Dot( 3210 this=this, 3211 expression=self._parse_function() or self._parse_id_var(any_token=False), 3212 ) 3213 3214 if view: 3215 table = self._parse_id_var(any_token=False) 3216 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3217 table_alias: t.Optional[exp.TableAlias] = self.expression( 3218 exp.TableAlias, this=table, columns=columns 3219 ) 3220 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3221 # We move the alias from the lateral's child node to the lateral itself 3222 table_alias = this.args["alias"].pop() 3223 else: 3224 table_alias = self._parse_table_alias() 3225 3226 return self.expression( 3227 exp.Lateral, 3228 this=this, 3229 view=view, 3230 outer=outer, 3231 alias=table_alias, 3232 cross_apply=cross_apply, 3233 ) 3234 3235 def _parse_join_parts( 3236 self, 3237 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3238 return ( 3239 self._match_set(self.JOIN_METHODS) and self._prev, 3240 self._match_set(self.JOIN_SIDES) and self._prev, 3241 self._match_set(self.JOIN_KINDS) and self._prev, 3242 ) 3243 3244 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3245 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3246 this = self._parse_column() 3247 if isinstance(this, exp.Column): 3248 return this.this 3249 return this 3250 3251 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3252 3253 def _parse_join( 3254 self, skip_join_token: bool = False, parse_bracket: bool = False 3255 ) -> t.Optional[exp.Join]: 3256 if self._match(TokenType.COMMA): 3257 return self.expression(exp.Join, this=self._parse_table()) 3258 3259 index = self._index 3260 method, side, kind = self._parse_join_parts() 3261 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3262 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3263 3264 if not skip_join_token and not join: 3265 self._retreat(index) 3266 kind = None 3267 method = None 3268 side = None 3269 3270 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3271 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3272 3273 if not skip_join_token and not join and not outer_apply and not cross_apply: 3274 return None 3275 3276 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3277 3278 if method: 3279 kwargs["method"] = method.text 3280 if side: 3281 kwargs["side"] = side.text 3282 if kind: 3283 kwargs["kind"] = kind.text 3284 if hint: 3285 kwargs["hint"] = hint 3286 3287 if self._match(TokenType.MATCH_CONDITION): 3288 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3289 3290 if self._match(TokenType.ON): 3291 kwargs["on"] = self._parse_assignment() 3292 elif self._match(TokenType.USING): 3293 kwargs["using"] = self._parse_using_identifiers() 3294 elif ( 3295 not (outer_apply or cross_apply) 3296 and not isinstance(kwargs["this"], exp.Unnest) 3297 and not (kind and kind.token_type == TokenType.CROSS) 3298 ): 3299 index = self._index 3300 joins: t.Optional[list] = list(self._parse_joins()) 3301 3302 if joins and self._match(TokenType.ON): 3303 kwargs["on"] = self._parse_assignment() 3304 elif joins and self._match(TokenType.USING): 3305 kwargs["using"] = self._parse_using_identifiers() 3306 else: 3307 joins = None 3308 self._retreat(index) 3309 3310 kwargs["this"].set("joins", joins if joins else None) 3311 3312 comments = [c for token in (method, side, kind) if token for c in token.comments] 3313 return self.expression(exp.Join, comments=comments, **kwargs) 3314 3315 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3316 this = self._parse_assignment() 3317 3318 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3319 return this 3320 3321 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3322 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3323 3324 return this 3325 3326 def _parse_index_params(self) -> exp.IndexParameters: 3327 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3328 3329 if self._match(TokenType.L_PAREN, advance=False): 3330 columns = self._parse_wrapped_csv(self._parse_with_operator) 3331 else: 3332 columns = None 3333 3334 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3335 partition_by = self._parse_partition_by() 3336 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3337 tablespace = ( 3338 self._parse_var(any_token=True) 3339 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3340 else None 3341 ) 3342 where = self._parse_where() 3343 3344 on = self._parse_field() if self._match(TokenType.ON) else None 3345 3346 return self.expression( 3347 exp.IndexParameters, 3348 using=using, 3349 columns=columns, 3350 include=include, 3351 partition_by=partition_by, 3352 where=where, 3353 with_storage=with_storage, 3354 tablespace=tablespace, 3355 on=on, 3356 ) 3357 3358 def _parse_index( 3359 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3360 ) -> t.Optional[exp.Index]: 3361 if index or anonymous: 3362 unique = None 3363 primary = None 3364 amp = None 3365 3366 self._match(TokenType.ON) 3367 self._match(TokenType.TABLE) # hive 3368 table = self._parse_table_parts(schema=True) 3369 else: 3370 unique = self._match(TokenType.UNIQUE) 3371 primary = self._match_text_seq("PRIMARY") 3372 amp = self._match_text_seq("AMP") 3373 3374 if not self._match(TokenType.INDEX): 3375 return None 3376 3377 index = self._parse_id_var() 3378 table = None 3379 3380 params = self._parse_index_params() 3381 3382 return self.expression( 3383 exp.Index, 3384 this=index, 3385 table=table, 3386 unique=unique, 3387 primary=primary, 3388 amp=amp, 3389 params=params, 3390 ) 3391 3392 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3393 hints: t.List[exp.Expression] = [] 3394 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3395 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3396 hints.append( 3397 self.expression( 3398 exp.WithTableHint, 3399 expressions=self._parse_csv( 3400 lambda: self._parse_function() or self._parse_var(any_token=True) 3401 ), 3402 ) 3403 ) 3404 self._match_r_paren() 3405 else: 3406 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3407 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3408 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3409 3410 self._match_set((TokenType.INDEX, TokenType.KEY)) 3411 if self._match(TokenType.FOR): 3412 hint.set("target", self._advance_any() and self._prev.text.upper()) 3413 3414 hint.set("expressions", self._parse_wrapped_id_vars()) 3415 hints.append(hint) 3416 3417 return hints or None 3418 3419 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3420 return ( 3421 (not schema and self._parse_function(optional_parens=False)) 3422 or self._parse_id_var(any_token=False) 3423 or self._parse_string_as_identifier() 3424 or self._parse_placeholder() 3425 ) 3426 3427 def _parse_table_parts( 3428 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3429 ) -> exp.Table: 3430 catalog = None 3431 db = None 3432 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3433 3434 while self._match(TokenType.DOT): 3435 if catalog: 3436 # This allows nesting the table in arbitrarily many dot expressions if needed 3437 table = self.expression( 3438 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3439 ) 3440 else: 3441 catalog = db 3442 db = table 3443 # "" used for tsql FROM a..b case 3444 table = self._parse_table_part(schema=schema) or "" 3445 3446 if ( 3447 wildcard 3448 and self._is_connected() 3449 and (isinstance(table, exp.Identifier) or not table) 3450 and self._match(TokenType.STAR) 3451 ): 3452 if isinstance(table, exp.Identifier): 3453 table.args["this"] += "*" 3454 else: 3455 table = exp.Identifier(this="*") 3456 3457 # We bubble up comments from the Identifier to the Table 3458 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3459 3460 if is_db_reference: 3461 catalog = db 3462 db = table 3463 table = None 3464 3465 if not table and not is_db_reference: 3466 self.raise_error(f"Expected table name but got {self._curr}") 3467 if not db and is_db_reference: 3468 self.raise_error(f"Expected database name but got {self._curr}") 3469 3470 table = self.expression( 3471 exp.Table, 3472 comments=comments, 3473 this=table, 3474 db=db, 3475 catalog=catalog, 3476 ) 3477 3478 changes = self._parse_changes() 3479 if changes: 3480 table.set("changes", changes) 3481 3482 at_before = self._parse_historical_data() 3483 if at_before: 3484 table.set("when", at_before) 3485 3486 pivots = self._parse_pivots() 3487 if pivots: 3488 table.set("pivots", pivots) 3489 3490 return table 3491 3492 def _parse_table( 3493 self, 3494 schema: bool = False, 3495 joins: bool = False, 3496 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3497 parse_bracket: bool = False, 3498 is_db_reference: bool = False, 3499 parse_partition: bool = False, 3500 ) -> t.Optional[exp.Expression]: 3501 lateral = self._parse_lateral() 3502 if lateral: 3503 return lateral 3504 3505 unnest = self._parse_unnest() 3506 if unnest: 3507 return unnest 3508 3509 values = self._parse_derived_table_values() 3510 if values: 3511 return values 3512 3513 subquery = self._parse_select(table=True) 3514 if subquery: 3515 if not subquery.args.get("pivots"): 3516 subquery.set("pivots", self._parse_pivots()) 3517 return subquery 3518 3519 bracket = parse_bracket and self._parse_bracket(None) 3520 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3521 3522 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3523 self._parse_table 3524 ) 3525 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3526 3527 only = self._match(TokenType.ONLY) 3528 3529 this = t.cast( 3530 exp.Expression, 3531 bracket 3532 or rows_from 3533 or self._parse_bracket( 3534 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3535 ), 3536 ) 3537 3538 if only: 3539 this.set("only", only) 3540 3541 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3542 self._match_text_seq("*") 3543 3544 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3545 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3546 this.set("partition", self._parse_partition()) 3547 3548 if schema: 3549 return self._parse_schema(this=this) 3550 3551 version = self._parse_version() 3552 3553 if version: 3554 this.set("version", version) 3555 3556 if self.dialect.ALIAS_POST_TABLESAMPLE: 3557 this.set("sample", self._parse_table_sample()) 3558 3559 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3560 if alias: 3561 this.set("alias", alias) 3562 3563 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3564 return self.expression( 3565 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3566 ) 3567 3568 this.set("hints", self._parse_table_hints()) 3569 3570 if not this.args.get("pivots"): 3571 this.set("pivots", self._parse_pivots()) 3572 3573 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3574 this.set("sample", self._parse_table_sample()) 3575 3576 if joins: 3577 for join in self._parse_joins(): 3578 this.append("joins", join) 3579 3580 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3581 this.set("ordinality", True) 3582 this.set("alias", self._parse_table_alias()) 3583 3584 return this 3585 3586 def _parse_version(self) -> t.Optional[exp.Version]: 3587 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3588 this = "TIMESTAMP" 3589 elif self._match(TokenType.VERSION_SNAPSHOT): 3590 this = "VERSION" 3591 else: 3592 return None 3593 3594 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3595 kind = self._prev.text.upper() 3596 start = self._parse_bitwise() 3597 self._match_texts(("TO", "AND")) 3598 end = self._parse_bitwise() 3599 expression: t.Optional[exp.Expression] = self.expression( 3600 exp.Tuple, expressions=[start, end] 3601 ) 3602 elif self._match_text_seq("CONTAINED", "IN"): 3603 kind = "CONTAINED IN" 3604 expression = self.expression( 3605 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3606 ) 3607 elif self._match(TokenType.ALL): 3608 kind = "ALL" 3609 expression = None 3610 else: 3611 self._match_text_seq("AS", "OF") 3612 kind = "AS OF" 3613 expression = self._parse_type() 3614 3615 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3616 3617 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 3618 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 3619 index = self._index 3620 historical_data = None 3621 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 3622 this = self._prev.text.upper() 3623 kind = ( 3624 self._match(TokenType.L_PAREN) 3625 and self._match_texts(self.HISTORICAL_DATA_KIND) 3626 and self._prev.text.upper() 3627 ) 3628 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 3629 3630 if expression: 3631 self._match_r_paren() 3632 historical_data = self.expression( 3633 exp.HistoricalData, this=this, kind=kind, expression=expression 3634 ) 3635 else: 3636 self._retreat(index) 3637 3638 return historical_data 3639 3640 def _parse_changes(self) -> t.Optional[exp.Changes]: 3641 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 3642 return None 3643 3644 information = self._parse_var(any_token=True) 3645 self._match_r_paren() 3646 3647 return self.expression( 3648 exp.Changes, 3649 information=information, 3650 at_before=self._parse_historical_data(), 3651 end=self._parse_historical_data(), 3652 ) 3653 3654 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3655 if not self._match(TokenType.UNNEST): 3656 return None 3657 3658 expressions = self._parse_wrapped_csv(self._parse_equality) 3659 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3660 3661 alias = self._parse_table_alias() if with_alias else None 3662 3663 if alias: 3664 if self.dialect.UNNEST_COLUMN_ONLY: 3665 if alias.args.get("columns"): 3666 self.raise_error("Unexpected extra column alias in unnest.") 3667 3668 alias.set("columns", [alias.this]) 3669 alias.set("this", None) 3670 3671 columns = alias.args.get("columns") or [] 3672 if offset and len(expressions) < len(columns): 3673 offset = columns.pop() 3674 3675 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3676 self._match(TokenType.ALIAS) 3677 offset = self._parse_id_var( 3678 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3679 ) or exp.to_identifier("offset") 3680 3681 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3682 3683 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3684 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3685 if not is_derived and not ( 3686 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 3687 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 3688 ): 3689 return None 3690 3691 expressions = self._parse_csv(self._parse_value) 3692 alias = self._parse_table_alias() 3693 3694 if is_derived: 3695 self._match_r_paren() 3696 3697 return self.expression( 3698 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3699 ) 3700 3701 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3702 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3703 as_modifier and self._match_text_seq("USING", "SAMPLE") 3704 ): 3705 return None 3706 3707 bucket_numerator = None 3708 bucket_denominator = None 3709 bucket_field = None 3710 percent = None 3711 size = None 3712 seed = None 3713 3714 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3715 matched_l_paren = self._match(TokenType.L_PAREN) 3716 3717 if self.TABLESAMPLE_CSV: 3718 num = None 3719 expressions = self._parse_csv(self._parse_primary) 3720 else: 3721 expressions = None 3722 num = ( 3723 self._parse_factor() 3724 if self._match(TokenType.NUMBER, advance=False) 3725 else self._parse_primary() or self._parse_placeholder() 3726 ) 3727 3728 if self._match_text_seq("BUCKET"): 3729 bucket_numerator = self._parse_number() 3730 self._match_text_seq("OUT", "OF") 3731 bucket_denominator = bucket_denominator = self._parse_number() 3732 self._match(TokenType.ON) 3733 bucket_field = self._parse_field() 3734 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3735 percent = num 3736 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3737 size = num 3738 else: 3739 percent = num 3740 3741 if matched_l_paren: 3742 self._match_r_paren() 3743 3744 if self._match(TokenType.L_PAREN): 3745 method = self._parse_var(upper=True) 3746 seed = self._match(TokenType.COMMA) and self._parse_number() 3747 self._match_r_paren() 3748 elif self._match_texts(("SEED", "REPEATABLE")): 3749 seed = self._parse_wrapped(self._parse_number) 3750 3751 if not method and self.DEFAULT_SAMPLING_METHOD: 3752 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3753 3754 return self.expression( 3755 exp.TableSample, 3756 expressions=expressions, 3757 method=method, 3758 bucket_numerator=bucket_numerator, 3759 bucket_denominator=bucket_denominator, 3760 bucket_field=bucket_field, 3761 percent=percent, 3762 size=size, 3763 seed=seed, 3764 ) 3765 3766 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3767 return list(iter(self._parse_pivot, None)) or None 3768 3769 def _parse_joins(self) -> t.Iterator[exp.Join]: 3770 return iter(self._parse_join, None) 3771 3772 # https://duckdb.org/docs/sql/statements/pivot 3773 def _parse_simplified_pivot(self) -> exp.Pivot: 3774 def _parse_on() -> t.Optional[exp.Expression]: 3775 this = self._parse_bitwise() 3776 return self._parse_in(this) if self._match(TokenType.IN) else this 3777 3778 this = self._parse_table() 3779 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3780 using = self._match(TokenType.USING) and self._parse_csv( 3781 lambda: self._parse_alias(self._parse_function()) 3782 ) 3783 group = self._parse_group() 3784 return self.expression( 3785 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3786 ) 3787 3788 def _parse_pivot_in(self) -> exp.In | exp.PivotAny: 3789 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3790 this = self._parse_select_or_expression() 3791 3792 self._match(TokenType.ALIAS) 3793 alias = self._parse_bitwise() 3794 if alias: 3795 if isinstance(alias, exp.Column) and not alias.db: 3796 alias = alias.this 3797 return self.expression(exp.PivotAlias, this=this, alias=alias) 3798 3799 return this 3800 3801 value = self._parse_column() 3802 3803 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3804 self.raise_error("Expecting IN (") 3805 3806 if self._match(TokenType.ANY): 3807 expr: exp.PivotAny | exp.In = self.expression(exp.PivotAny, this=self._parse_order()) 3808 else: 3809 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3810 expr = self.expression(exp.In, this=value, expressions=aliased_expressions) 3811 3812 self._match_r_paren() 3813 return expr 3814 3815 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3816 index = self._index 3817 include_nulls = None 3818 3819 if self._match(TokenType.PIVOT): 3820 unpivot = False 3821 elif self._match(TokenType.UNPIVOT): 3822 unpivot = True 3823 3824 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3825 if self._match_text_seq("INCLUDE", "NULLS"): 3826 include_nulls = True 3827 elif self._match_text_seq("EXCLUDE", "NULLS"): 3828 include_nulls = False 3829 else: 3830 return None 3831 3832 expressions = [] 3833 3834 if not self._match(TokenType.L_PAREN): 3835 self._retreat(index) 3836 return None 3837 3838 if unpivot: 3839 expressions = self._parse_csv(self._parse_column) 3840 else: 3841 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3842 3843 if not expressions: 3844 self.raise_error("Failed to parse PIVOT's aggregation list") 3845 3846 if not self._match(TokenType.FOR): 3847 self.raise_error("Expecting FOR") 3848 3849 field = self._parse_pivot_in() 3850 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 3851 self._parse_bitwise 3852 ) 3853 3854 self._match_r_paren() 3855 3856 pivot = self.expression( 3857 exp.Pivot, 3858 expressions=expressions, 3859 field=field, 3860 unpivot=unpivot, 3861 include_nulls=include_nulls, 3862 default_on_null=default_on_null, 3863 ) 3864 3865 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3866 pivot.set("alias", self._parse_table_alias()) 3867 3868 if not unpivot: 3869 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3870 3871 columns: t.List[exp.Expression] = [] 3872 for fld in pivot.args["field"].expressions: 3873 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3874 for name in names: 3875 if self.PREFIXED_PIVOT_COLUMNS: 3876 name = f"{name}_{field_name}" if name else field_name 3877 else: 3878 name = f"{field_name}_{name}" if name else field_name 3879 3880 columns.append(exp.to_identifier(name)) 3881 3882 pivot.set("columns", columns) 3883 3884 return pivot 3885 3886 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3887 return [agg.alias for agg in aggregations] 3888 3889 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3890 if not skip_where_token and not self._match(TokenType.PREWHERE): 3891 return None 3892 3893 return self.expression( 3894 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 3895 ) 3896 3897 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3898 if not skip_where_token and not self._match(TokenType.WHERE): 3899 return None 3900 3901 return self.expression( 3902 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 3903 ) 3904 3905 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3906 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3907 return None 3908 3909 elements: t.Dict[str, t.Any] = defaultdict(list) 3910 3911 if self._match(TokenType.ALL): 3912 elements["all"] = True 3913 elif self._match(TokenType.DISTINCT): 3914 elements["all"] = False 3915 3916 while True: 3917 expressions = self._parse_csv( 3918 lambda: None 3919 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 3920 else self._parse_assignment() 3921 ) 3922 if expressions: 3923 elements["expressions"].extend(expressions) 3924 3925 grouping_sets = self._parse_grouping_sets() 3926 if grouping_sets: 3927 elements["grouping_sets"].extend(grouping_sets) 3928 3929 rollup = None 3930 cube = None 3931 totals = None 3932 3933 index = self._index 3934 with_ = self._match(TokenType.WITH) 3935 if self._match(TokenType.ROLLUP): 3936 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3937 elements["rollup"].extend(ensure_list(rollup)) 3938 3939 if self._match(TokenType.CUBE): 3940 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3941 elements["cube"].extend(ensure_list(cube)) 3942 3943 if self._match_text_seq("TOTALS"): 3944 totals = True 3945 elements["totals"] = True # type: ignore 3946 3947 if not (grouping_sets or rollup or cube or totals): 3948 if with_: 3949 self._retreat(index) 3950 break 3951 3952 return self.expression(exp.Group, **elements) # type: ignore 3953 3954 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3955 if not self._match(TokenType.GROUPING_SETS): 3956 return None 3957 3958 return self._parse_wrapped_csv(self._parse_grouping_set) 3959 3960 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3961 if self._match(TokenType.L_PAREN): 3962 grouping_set = self._parse_csv(self._parse_column) 3963 self._match_r_paren() 3964 return self.expression(exp.Tuple, expressions=grouping_set) 3965 3966 return self._parse_column() 3967 3968 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3969 if not skip_having_token and not self._match(TokenType.HAVING): 3970 return None 3971 return self.expression(exp.Having, this=self._parse_assignment()) 3972 3973 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3974 if not self._match(TokenType.QUALIFY): 3975 return None 3976 return self.expression(exp.Qualify, this=self._parse_assignment()) 3977 3978 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3979 if skip_start_token: 3980 start = None 3981 elif self._match(TokenType.START_WITH): 3982 start = self._parse_assignment() 3983 else: 3984 return None 3985 3986 self._match(TokenType.CONNECT_BY) 3987 nocycle = self._match_text_seq("NOCYCLE") 3988 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3989 exp.Prior, this=self._parse_bitwise() 3990 ) 3991 connect = self._parse_assignment() 3992 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3993 3994 if not start and self._match(TokenType.START_WITH): 3995 start = self._parse_assignment() 3996 3997 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3998 3999 def _parse_name_as_expression(self) -> exp.Alias: 4000 return self.expression( 4001 exp.Alias, 4002 alias=self._parse_id_var(any_token=True), 4003 this=self._match(TokenType.ALIAS) and self._parse_assignment(), 4004 ) 4005 4006 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4007 if self._match_text_seq("INTERPOLATE"): 4008 return self._parse_wrapped_csv(self._parse_name_as_expression) 4009 return None 4010 4011 def _parse_order( 4012 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4013 ) -> t.Optional[exp.Expression]: 4014 siblings = None 4015 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4016 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4017 return this 4018 4019 siblings = True 4020 4021 return self.expression( 4022 exp.Order, 4023 this=this, 4024 expressions=self._parse_csv(self._parse_ordered), 4025 siblings=siblings, 4026 ) 4027 4028 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4029 if not self._match(token): 4030 return None 4031 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4032 4033 def _parse_ordered( 4034 self, parse_method: t.Optional[t.Callable] = None 4035 ) -> t.Optional[exp.Ordered]: 4036 this = parse_method() if parse_method else self._parse_assignment() 4037 if not this: 4038 return None 4039 4040 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4041 this = exp.var("ALL") 4042 4043 asc = self._match(TokenType.ASC) 4044 desc = self._match(TokenType.DESC) or (asc and False) 4045 4046 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4047 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4048 4049 nulls_first = is_nulls_first or False 4050 explicitly_null_ordered = is_nulls_first or is_nulls_last 4051 4052 if ( 4053 not explicitly_null_ordered 4054 and ( 4055 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4056 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4057 ) 4058 and self.dialect.NULL_ORDERING != "nulls_are_last" 4059 ): 4060 nulls_first = True 4061 4062 if self._match_text_seq("WITH", "FILL"): 4063 with_fill = self.expression( 4064 exp.WithFill, 4065 **{ # type: ignore 4066 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4067 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4068 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4069 "interpolate": self._parse_interpolate(), 4070 }, 4071 ) 4072 else: 4073 with_fill = None 4074 4075 return self.expression( 4076 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4077 ) 4078 4079 def _parse_limit( 4080 self, 4081 this: t.Optional[exp.Expression] = None, 4082 top: bool = False, 4083 skip_limit_token: bool = False, 4084 ) -> t.Optional[exp.Expression]: 4085 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4086 comments = self._prev_comments 4087 if top: 4088 limit_paren = self._match(TokenType.L_PAREN) 4089 expression = self._parse_term() if limit_paren else self._parse_number() 4090 4091 if limit_paren: 4092 self._match_r_paren() 4093 else: 4094 expression = self._parse_term() 4095 4096 if self._match(TokenType.COMMA): 4097 offset = expression 4098 expression = self._parse_term() 4099 else: 4100 offset = None 4101 4102 limit_exp = self.expression( 4103 exp.Limit, 4104 this=this, 4105 expression=expression, 4106 offset=offset, 4107 comments=comments, 4108 expressions=self._parse_limit_by(), 4109 ) 4110 4111 return limit_exp 4112 4113 if self._match(TokenType.FETCH): 4114 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4115 direction = self._prev.text.upper() if direction else "FIRST" 4116 4117 count = self._parse_field(tokens=self.FETCH_TOKENS) 4118 percent = self._match(TokenType.PERCENT) 4119 4120 self._match_set((TokenType.ROW, TokenType.ROWS)) 4121 4122 only = self._match_text_seq("ONLY") 4123 with_ties = self._match_text_seq("WITH", "TIES") 4124 4125 if only and with_ties: 4126 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 4127 4128 return self.expression( 4129 exp.Fetch, 4130 direction=direction, 4131 count=count, 4132 percent=percent, 4133 with_ties=with_ties, 4134 ) 4135 4136 return this 4137 4138 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4139 if not self._match(TokenType.OFFSET): 4140 return this 4141 4142 count = self._parse_term() 4143 self._match_set((TokenType.ROW, TokenType.ROWS)) 4144 4145 return self.expression( 4146 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4147 ) 4148 4149 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4150 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4151 4152 def _parse_locks(self) -> t.List[exp.Lock]: 4153 locks = [] 4154 while True: 4155 if self._match_text_seq("FOR", "UPDATE"): 4156 update = True 4157 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4158 "LOCK", "IN", "SHARE", "MODE" 4159 ): 4160 update = False 4161 else: 4162 break 4163 4164 expressions = None 4165 if self._match_text_seq("OF"): 4166 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4167 4168 wait: t.Optional[bool | exp.Expression] = None 4169 if self._match_text_seq("NOWAIT"): 4170 wait = True 4171 elif self._match_text_seq("WAIT"): 4172 wait = self._parse_primary() 4173 elif self._match_text_seq("SKIP", "LOCKED"): 4174 wait = False 4175 4176 locks.append( 4177 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4178 ) 4179 4180 return locks 4181 4182 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4183 while this and self._match_set(self.SET_OPERATIONS): 4184 token_type = self._prev.token_type 4185 4186 if token_type == TokenType.UNION: 4187 operation: t.Type[exp.SetOperation] = exp.Union 4188 elif token_type == TokenType.EXCEPT: 4189 operation = exp.Except 4190 else: 4191 operation = exp.Intersect 4192 4193 comments = self._prev.comments 4194 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 4195 by_name = self._match_text_seq("BY", "NAME") 4196 expression = self._parse_select(nested=True, parse_set_operation=False) 4197 4198 this = self.expression( 4199 operation, 4200 comments=comments, 4201 this=this, 4202 distinct=distinct, 4203 by_name=by_name, 4204 expression=expression, 4205 ) 4206 4207 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4208 expression = this.expression 4209 4210 if expression: 4211 for arg in self.SET_OP_MODIFIERS: 4212 expr = expression.args.get(arg) 4213 if expr: 4214 this.set(arg, expr.pop()) 4215 4216 return this 4217 4218 def _parse_expression(self) -> t.Optional[exp.Expression]: 4219 return self._parse_alias(self._parse_assignment()) 4220 4221 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4222 this = self._parse_disjunction() 4223 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4224 # This allows us to parse <non-identifier token> := <expr> 4225 this = exp.column( 4226 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4227 ) 4228 4229 while self._match_set(self.ASSIGNMENT): 4230 this = self.expression( 4231 self.ASSIGNMENT[self._prev.token_type], 4232 this=this, 4233 comments=self._prev_comments, 4234 expression=self._parse_assignment(), 4235 ) 4236 4237 return this 4238 4239 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4240 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4241 4242 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4243 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4244 4245 def _parse_equality(self) -> t.Optional[exp.Expression]: 4246 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4247 4248 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4249 return self._parse_tokens(self._parse_range, self.COMPARISON) 4250 4251 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4252 this = this or self._parse_bitwise() 4253 negate = self._match(TokenType.NOT) 4254 4255 if self._match_set(self.RANGE_PARSERS): 4256 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4257 if not expression: 4258 return this 4259 4260 this = expression 4261 elif self._match(TokenType.ISNULL): 4262 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4263 4264 # Postgres supports ISNULL and NOTNULL for conditions. 4265 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4266 if self._match(TokenType.NOTNULL): 4267 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4268 this = self.expression(exp.Not, this=this) 4269 4270 if negate: 4271 this = self._negate_range(this) 4272 4273 if self._match(TokenType.IS): 4274 this = self._parse_is(this) 4275 4276 return this 4277 4278 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4279 if not this: 4280 return this 4281 4282 return self.expression(exp.Not, this=this) 4283 4284 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4285 index = self._index - 1 4286 negate = self._match(TokenType.NOT) 4287 4288 if self._match_text_seq("DISTINCT", "FROM"): 4289 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4290 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4291 4292 expression = self._parse_null() or self._parse_boolean() 4293 if not expression: 4294 self._retreat(index) 4295 return None 4296 4297 this = self.expression(exp.Is, this=this, expression=expression) 4298 return self.expression(exp.Not, this=this) if negate else this 4299 4300 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4301 unnest = self._parse_unnest(with_alias=False) 4302 if unnest: 4303 this = self.expression(exp.In, this=this, unnest=unnest) 4304 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4305 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4306 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4307 4308 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4309 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4310 else: 4311 this = self.expression(exp.In, this=this, expressions=expressions) 4312 4313 if matched_l_paren: 4314 self._match_r_paren(this) 4315 elif not self._match(TokenType.R_BRACKET, expression=this): 4316 self.raise_error("Expecting ]") 4317 else: 4318 this = self.expression(exp.In, this=this, field=self._parse_field()) 4319 4320 return this 4321 4322 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4323 low = self._parse_bitwise() 4324 self._match(TokenType.AND) 4325 high = self._parse_bitwise() 4326 return self.expression(exp.Between, this=this, low=low, high=high) 4327 4328 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4329 if not self._match(TokenType.ESCAPE): 4330 return this 4331 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4332 4333 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4334 index = self._index 4335 4336 if not self._match(TokenType.INTERVAL) and match_interval: 4337 return None 4338 4339 if self._match(TokenType.STRING, advance=False): 4340 this = self._parse_primary() 4341 else: 4342 this = self._parse_term() 4343 4344 if not this or ( 4345 isinstance(this, exp.Column) 4346 and not this.table 4347 and not this.this.quoted 4348 and this.name.upper() == "IS" 4349 ): 4350 self._retreat(index) 4351 return None 4352 4353 unit = self._parse_function() or ( 4354 not self._match(TokenType.ALIAS, advance=False) 4355 and self._parse_var(any_token=True, upper=True) 4356 ) 4357 4358 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4359 # each INTERVAL expression into this canonical form so it's easy to transpile 4360 if this and this.is_number: 4361 this = exp.Literal.string(this.to_py()) 4362 elif this and this.is_string: 4363 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4364 if len(parts) == 1: 4365 if unit: 4366 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4367 self._retreat(self._index - 1) 4368 4369 this = exp.Literal.string(parts[0][0]) 4370 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4371 4372 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4373 unit = self.expression( 4374 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4375 ) 4376 4377 interval = self.expression(exp.Interval, this=this, unit=unit) 4378 4379 index = self._index 4380 self._match(TokenType.PLUS) 4381 4382 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4383 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4384 return self.expression( 4385 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4386 ) 4387 4388 self._retreat(index) 4389 return interval 4390 4391 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4392 this = self._parse_term() 4393 4394 while True: 4395 if self._match_set(self.BITWISE): 4396 this = self.expression( 4397 self.BITWISE[self._prev.token_type], 4398 this=this, 4399 expression=self._parse_term(), 4400 ) 4401 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4402 this = self.expression( 4403 exp.DPipe, 4404 this=this, 4405 expression=self._parse_term(), 4406 safe=not self.dialect.STRICT_STRING_CONCAT, 4407 ) 4408 elif self._match(TokenType.DQMARK): 4409 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 4410 elif self._match_pair(TokenType.LT, TokenType.LT): 4411 this = self.expression( 4412 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4413 ) 4414 elif self._match_pair(TokenType.GT, TokenType.GT): 4415 this = self.expression( 4416 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4417 ) 4418 else: 4419 break 4420 4421 return this 4422 4423 def _parse_term(self) -> t.Optional[exp.Expression]: 4424 this = self._parse_factor() 4425 4426 while self._match_set(self.TERM): 4427 klass = self.TERM[self._prev.token_type] 4428 comments = self._prev_comments 4429 expression = self._parse_factor() 4430 4431 this = self.expression(klass, this=this, comments=comments, expression=expression) 4432 4433 if isinstance(this, exp.Collate): 4434 expr = this.expression 4435 4436 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4437 # fallback to Identifier / Var 4438 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4439 ident = expr.this 4440 if isinstance(ident, exp.Identifier): 4441 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4442 4443 return this 4444 4445 def _parse_factor(self) -> t.Optional[exp.Expression]: 4446 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4447 this = parse_method() 4448 4449 while self._match_set(self.FACTOR): 4450 klass = self.FACTOR[self._prev.token_type] 4451 comments = self._prev_comments 4452 expression = parse_method() 4453 4454 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4455 self._retreat(self._index - 1) 4456 return this 4457 4458 this = self.expression(klass, this=this, comments=comments, expression=expression) 4459 4460 if isinstance(this, exp.Div): 4461 this.args["typed"] = self.dialect.TYPED_DIVISION 4462 this.args["safe"] = self.dialect.SAFE_DIVISION 4463 4464 return this 4465 4466 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4467 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4468 4469 def _parse_unary(self) -> t.Optional[exp.Expression]: 4470 if self._match_set(self.UNARY_PARSERS): 4471 return self.UNARY_PARSERS[self._prev.token_type](self) 4472 return self._parse_at_time_zone(self._parse_type()) 4473 4474 def _parse_type( 4475 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4476 ) -> t.Optional[exp.Expression]: 4477 interval = parse_interval and self._parse_interval() 4478 if interval: 4479 return interval 4480 4481 index = self._index 4482 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4483 4484 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4485 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4486 if isinstance(data_type, exp.Cast): 4487 # This constructor can contain ops directly after it, for instance struct unnesting: 4488 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4489 return self._parse_column_ops(data_type) 4490 4491 if data_type: 4492 index2 = self._index 4493 this = self._parse_primary() 4494 4495 if isinstance(this, exp.Literal): 4496 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4497 if parser: 4498 return parser(self, this, data_type) 4499 4500 return self.expression(exp.Cast, this=this, to=data_type) 4501 4502 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4503 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4504 # 4505 # If the index difference here is greater than 1, that means the parser itself must have 4506 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4507 # 4508 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4509 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4510 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4511 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4512 # 4513 # In these cases, we don't really want to return the converted type, but instead retreat 4514 # and try to parse a Column or Identifier in the section below. 4515 if data_type.expressions and index2 - index > 1: 4516 self._retreat(index2) 4517 return self._parse_column_ops(data_type) 4518 4519 self._retreat(index) 4520 4521 if fallback_to_identifier: 4522 return self._parse_id_var() 4523 4524 this = self._parse_column() 4525 return this and self._parse_column_ops(this) 4526 4527 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4528 this = self._parse_type() 4529 if not this: 4530 return None 4531 4532 if isinstance(this, exp.Column) and not this.table: 4533 this = exp.var(this.name.upper()) 4534 4535 return self.expression( 4536 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4537 ) 4538 4539 def _parse_types( 4540 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4541 ) -> t.Optional[exp.Expression]: 4542 index = self._index 4543 4544 this: t.Optional[exp.Expression] = None 4545 prefix = self._match_text_seq("SYSUDTLIB", ".") 4546 4547 if not self._match_set(self.TYPE_TOKENS): 4548 identifier = allow_identifiers and self._parse_id_var( 4549 any_token=False, tokens=(TokenType.VAR,) 4550 ) 4551 if isinstance(identifier, exp.Identifier): 4552 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4553 4554 if len(tokens) != 1: 4555 self.raise_error("Unexpected identifier", self._prev) 4556 4557 if tokens[0].token_type in self.TYPE_TOKENS: 4558 self._prev = tokens[0] 4559 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4560 type_name = identifier.name 4561 4562 while self._match(TokenType.DOT): 4563 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4564 4565 this = exp.DataType.build(type_name, udt=True) 4566 else: 4567 self._retreat(self._index - 1) 4568 return None 4569 else: 4570 return None 4571 4572 type_token = self._prev.token_type 4573 4574 if type_token == TokenType.PSEUDO_TYPE: 4575 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4576 4577 if type_token == TokenType.OBJECT_IDENTIFIER: 4578 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4579 4580 # https://materialize.com/docs/sql/types/map/ 4581 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4582 key_type = self._parse_types( 4583 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4584 ) 4585 if not self._match(TokenType.FARROW): 4586 self._retreat(index) 4587 return None 4588 4589 value_type = self._parse_types( 4590 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4591 ) 4592 if not self._match(TokenType.R_BRACKET): 4593 self._retreat(index) 4594 return None 4595 4596 return exp.DataType( 4597 this=exp.DataType.Type.MAP, 4598 expressions=[key_type, value_type], 4599 nested=True, 4600 prefix=prefix, 4601 ) 4602 4603 nested = type_token in self.NESTED_TYPE_TOKENS 4604 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4605 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4606 expressions = None 4607 maybe_func = False 4608 4609 if self._match(TokenType.L_PAREN): 4610 if is_struct: 4611 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4612 elif nested: 4613 expressions = self._parse_csv( 4614 lambda: self._parse_types( 4615 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4616 ) 4617 ) 4618 elif type_token in self.ENUM_TYPE_TOKENS: 4619 expressions = self._parse_csv(self._parse_equality) 4620 elif is_aggregate: 4621 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4622 any_token=False, tokens=(TokenType.VAR,) 4623 ) 4624 if not func_or_ident or not self._match(TokenType.COMMA): 4625 return None 4626 expressions = self._parse_csv( 4627 lambda: self._parse_types( 4628 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4629 ) 4630 ) 4631 expressions.insert(0, func_or_ident) 4632 else: 4633 expressions = self._parse_csv(self._parse_type_size) 4634 4635 # https://docs.snowflake.com/en/sql-reference/data-types-vector 4636 if type_token == TokenType.VECTOR and len(expressions) == 2: 4637 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 4638 4639 if not expressions or not self._match(TokenType.R_PAREN): 4640 self._retreat(index) 4641 return None 4642 4643 maybe_func = True 4644 4645 values: t.Optional[t.List[exp.Expression]] = None 4646 4647 if nested and self._match(TokenType.LT): 4648 if is_struct: 4649 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4650 else: 4651 expressions = self._parse_csv( 4652 lambda: self._parse_types( 4653 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4654 ) 4655 ) 4656 4657 if not self._match(TokenType.GT): 4658 self.raise_error("Expecting >") 4659 4660 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4661 values = self._parse_csv(self._parse_assignment) 4662 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4663 4664 if type_token in self.TIMESTAMPS: 4665 if self._match_text_seq("WITH", "TIME", "ZONE"): 4666 maybe_func = False 4667 tz_type = ( 4668 exp.DataType.Type.TIMETZ 4669 if type_token in self.TIMES 4670 else exp.DataType.Type.TIMESTAMPTZ 4671 ) 4672 this = exp.DataType(this=tz_type, expressions=expressions) 4673 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4674 maybe_func = False 4675 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4676 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4677 maybe_func = False 4678 elif type_token == TokenType.INTERVAL: 4679 unit = self._parse_var(upper=True) 4680 if unit: 4681 if self._match_text_seq("TO"): 4682 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4683 4684 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4685 else: 4686 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4687 4688 if maybe_func and check_func: 4689 index2 = self._index 4690 peek = self._parse_string() 4691 4692 if not peek: 4693 self._retreat(index) 4694 return None 4695 4696 self._retreat(index2) 4697 4698 if not this: 4699 if self._match_text_seq("UNSIGNED"): 4700 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4701 if not unsigned_type_token: 4702 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4703 4704 type_token = unsigned_type_token or type_token 4705 4706 this = exp.DataType( 4707 this=exp.DataType.Type[type_token.value], 4708 expressions=expressions, 4709 nested=nested, 4710 prefix=prefix, 4711 ) 4712 4713 # Empty arrays/structs are allowed 4714 if values is not None: 4715 cls = exp.Struct if is_struct else exp.Array 4716 this = exp.cast(cls(expressions=values), this, copy=False) 4717 4718 elif expressions: 4719 this.set("expressions", expressions) 4720 4721 # https://materialize.com/docs/sql/types/list/#type-name 4722 while self._match(TokenType.LIST): 4723 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 4724 4725 index = self._index 4726 4727 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4728 matched_array = self._match(TokenType.ARRAY) 4729 4730 while self._curr: 4731 datatype_token = self._prev.token_type 4732 matched_l_bracket = self._match(TokenType.L_BRACKET) 4733 if not matched_l_bracket and not matched_array: 4734 break 4735 4736 matched_array = False 4737 values = self._parse_csv(self._parse_assignment) or None 4738 if ( 4739 values 4740 and not schema 4741 and ( 4742 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 4743 ) 4744 ): 4745 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 4746 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 4747 self._retreat(index) 4748 break 4749 4750 this = exp.DataType( 4751 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 4752 ) 4753 self._match(TokenType.R_BRACKET) 4754 4755 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 4756 converter = self.TYPE_CONVERTERS.get(this.this) 4757 if converter: 4758 this = converter(t.cast(exp.DataType, this)) 4759 4760 return this 4761 4762 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4763 index = self._index 4764 4765 if ( 4766 self._curr 4767 and self._next 4768 and self._curr.token_type in self.TYPE_TOKENS 4769 and self._next.token_type in self.TYPE_TOKENS 4770 ): 4771 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 4772 # type token. Without this, the list will be parsed as a type and we'll eventually crash 4773 this = self._parse_id_var() 4774 else: 4775 this = ( 4776 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4777 or self._parse_id_var() 4778 ) 4779 4780 self._match(TokenType.COLON) 4781 4782 if ( 4783 type_required 4784 and not isinstance(this, exp.DataType) 4785 and not self._match_set(self.TYPE_TOKENS, advance=False) 4786 ): 4787 self._retreat(index) 4788 return self._parse_types() 4789 4790 return self._parse_column_def(this) 4791 4792 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4793 if not self._match_text_seq("AT", "TIME", "ZONE"): 4794 return this 4795 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4796 4797 def _parse_column(self) -> t.Optional[exp.Expression]: 4798 this = self._parse_column_reference() 4799 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 4800 4801 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 4802 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 4803 4804 return column 4805 4806 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4807 this = self._parse_field() 4808 if ( 4809 not this 4810 and self._match(TokenType.VALUES, advance=False) 4811 and self.VALUES_FOLLOWED_BY_PAREN 4812 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4813 ): 4814 this = self._parse_id_var() 4815 4816 if isinstance(this, exp.Identifier): 4817 # We bubble up comments from the Identifier to the Column 4818 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4819 4820 return this 4821 4822 def _parse_colon_as_variant_extract( 4823 self, this: t.Optional[exp.Expression] 4824 ) -> t.Optional[exp.Expression]: 4825 casts = [] 4826 json_path = [] 4827 4828 while self._match(TokenType.COLON): 4829 start_index = self._index 4830 4831 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 4832 path = self._parse_column_ops( 4833 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 4834 ) 4835 4836 # The cast :: operator has a lower precedence than the extraction operator :, so 4837 # we rearrange the AST appropriately to avoid casting the JSON path 4838 while isinstance(path, exp.Cast): 4839 casts.append(path.to) 4840 path = path.this 4841 4842 if casts: 4843 dcolon_offset = next( 4844 i 4845 for i, t in enumerate(self._tokens[start_index:]) 4846 if t.token_type == TokenType.DCOLON 4847 ) 4848 end_token = self._tokens[start_index + dcolon_offset - 1] 4849 else: 4850 end_token = self._prev 4851 4852 if path: 4853 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 4854 4855 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 4856 # Databricks transforms it back to the colon/dot notation 4857 if json_path: 4858 this = self.expression( 4859 exp.JSONExtract, 4860 this=this, 4861 expression=self.dialect.to_json_path(exp.Literal.string(".".join(json_path))), 4862 variant_extract=True, 4863 ) 4864 4865 while casts: 4866 this = self.expression(exp.Cast, this=this, to=casts.pop()) 4867 4868 return this 4869 4870 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 4871 return self._parse_types() 4872 4873 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4874 this = self._parse_bracket(this) 4875 4876 while self._match_set(self.COLUMN_OPERATORS): 4877 op_token = self._prev.token_type 4878 op = self.COLUMN_OPERATORS.get(op_token) 4879 4880 if op_token == TokenType.DCOLON: 4881 field = self._parse_dcolon() 4882 if not field: 4883 self.raise_error("Expected type") 4884 elif op and self._curr: 4885 field = self._parse_column_reference() 4886 else: 4887 field = self._parse_field(any_token=True, anonymous_func=True) 4888 4889 if isinstance(field, exp.Func) and this: 4890 # bigquery allows function calls like x.y.count(...) 4891 # SAFE.SUBSTR(...) 4892 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4893 this = exp.replace_tree( 4894 this, 4895 lambda n: ( 4896 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4897 if n.table 4898 else n.this 4899 ) 4900 if isinstance(n, exp.Column) 4901 else n, 4902 ) 4903 4904 if op: 4905 this = op(self, this, field) 4906 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4907 this = self.expression( 4908 exp.Column, 4909 this=field, 4910 table=this.this, 4911 db=this.args.get("table"), 4912 catalog=this.args.get("db"), 4913 ) 4914 else: 4915 this = self.expression(exp.Dot, this=this, expression=field) 4916 4917 this = self._parse_bracket(this) 4918 4919 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 4920 4921 def _parse_primary(self) -> t.Optional[exp.Expression]: 4922 if self._match_set(self.PRIMARY_PARSERS): 4923 token_type = self._prev.token_type 4924 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4925 4926 if token_type == TokenType.STRING: 4927 expressions = [primary] 4928 while self._match(TokenType.STRING): 4929 expressions.append(exp.Literal.string(self._prev.text)) 4930 4931 if len(expressions) > 1: 4932 return self.expression(exp.Concat, expressions=expressions) 4933 4934 return primary 4935 4936 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4937 return exp.Literal.number(f"0.{self._prev.text}") 4938 4939 if self._match(TokenType.L_PAREN): 4940 comments = self._prev_comments 4941 query = self._parse_select() 4942 4943 if query: 4944 expressions = [query] 4945 else: 4946 expressions = self._parse_expressions() 4947 4948 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4949 4950 if not this and self._match(TokenType.R_PAREN, advance=False): 4951 this = self.expression(exp.Tuple) 4952 elif isinstance(this, exp.UNWRAPPED_QUERIES): 4953 this = self._parse_subquery(this=this, parse_alias=False) 4954 elif isinstance(this, exp.Subquery): 4955 this = self._parse_subquery( 4956 this=self._parse_set_operations(this), parse_alias=False 4957 ) 4958 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 4959 this = self.expression(exp.Tuple, expressions=expressions) 4960 else: 4961 this = self.expression(exp.Paren, this=this) 4962 4963 if this: 4964 this.add_comments(comments) 4965 4966 self._match_r_paren(expression=this) 4967 return this 4968 4969 return None 4970 4971 def _parse_field( 4972 self, 4973 any_token: bool = False, 4974 tokens: t.Optional[t.Collection[TokenType]] = None, 4975 anonymous_func: bool = False, 4976 ) -> t.Optional[exp.Expression]: 4977 if anonymous_func: 4978 field = ( 4979 self._parse_function(anonymous=anonymous_func, any_token=any_token) 4980 or self._parse_primary() 4981 ) 4982 else: 4983 field = self._parse_primary() or self._parse_function( 4984 anonymous=anonymous_func, any_token=any_token 4985 ) 4986 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 4987 4988 def _parse_function( 4989 self, 4990 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4991 anonymous: bool = False, 4992 optional_parens: bool = True, 4993 any_token: bool = False, 4994 ) -> t.Optional[exp.Expression]: 4995 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4996 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4997 fn_syntax = False 4998 if ( 4999 self._match(TokenType.L_BRACE, advance=False) 5000 and self._next 5001 and self._next.text.upper() == "FN" 5002 ): 5003 self._advance(2) 5004 fn_syntax = True 5005 5006 func = self._parse_function_call( 5007 functions=functions, 5008 anonymous=anonymous, 5009 optional_parens=optional_parens, 5010 any_token=any_token, 5011 ) 5012 5013 if fn_syntax: 5014 self._match(TokenType.R_BRACE) 5015 5016 return func 5017 5018 def _parse_function_call( 5019 self, 5020 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5021 anonymous: bool = False, 5022 optional_parens: bool = True, 5023 any_token: bool = False, 5024 ) -> t.Optional[exp.Expression]: 5025 if not self._curr: 5026 return None 5027 5028 comments = self._curr.comments 5029 token_type = self._curr.token_type 5030 this = self._curr.text 5031 upper = this.upper() 5032 5033 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5034 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5035 self._advance() 5036 return self._parse_window(parser(self)) 5037 5038 if not self._next or self._next.token_type != TokenType.L_PAREN: 5039 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5040 self._advance() 5041 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5042 5043 return None 5044 5045 if any_token: 5046 if token_type in self.RESERVED_TOKENS: 5047 return None 5048 elif token_type not in self.FUNC_TOKENS: 5049 return None 5050 5051 self._advance(2) 5052 5053 parser = self.FUNCTION_PARSERS.get(upper) 5054 if parser and not anonymous: 5055 this = parser(self) 5056 else: 5057 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5058 5059 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5060 this = self.expression(subquery_predicate, this=self._parse_select()) 5061 self._match_r_paren() 5062 return this 5063 5064 if functions is None: 5065 functions = self.FUNCTIONS 5066 5067 function = functions.get(upper) 5068 5069 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5070 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5071 5072 if alias: 5073 args = self._kv_to_prop_eq(args) 5074 5075 if function and not anonymous: 5076 if "dialect" in function.__code__.co_varnames: 5077 func = function(args, dialect=self.dialect) 5078 else: 5079 func = function(args) 5080 5081 func = self.validate_expression(func, args) 5082 if not self.dialect.NORMALIZE_FUNCTIONS: 5083 func.meta["name"] = this 5084 5085 this = func 5086 else: 5087 if token_type == TokenType.IDENTIFIER: 5088 this = exp.Identifier(this=this, quoted=True) 5089 this = self.expression(exp.Anonymous, this=this, expressions=args) 5090 5091 if isinstance(this, exp.Expression): 5092 this.add_comments(comments) 5093 5094 self._match_r_paren(this) 5095 return self._parse_window(this) 5096 5097 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5098 transformed = [] 5099 5100 for e in expressions: 5101 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5102 if isinstance(e, exp.Alias): 5103 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5104 5105 if not isinstance(e, exp.PropertyEQ): 5106 e = self.expression( 5107 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5108 ) 5109 5110 if isinstance(e.this, exp.Column): 5111 e.this.replace(e.this.this) 5112 5113 transformed.append(e) 5114 5115 return transformed 5116 5117 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5118 return self._parse_column_def(self._parse_id_var()) 5119 5120 def _parse_user_defined_function( 5121 self, kind: t.Optional[TokenType] = None 5122 ) -> t.Optional[exp.Expression]: 5123 this = self._parse_id_var() 5124 5125 while self._match(TokenType.DOT): 5126 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 5127 5128 if not self._match(TokenType.L_PAREN): 5129 return this 5130 5131 expressions = self._parse_csv(self._parse_function_parameter) 5132 self._match_r_paren() 5133 return self.expression( 5134 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5135 ) 5136 5137 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5138 literal = self._parse_primary() 5139 if literal: 5140 return self.expression(exp.Introducer, this=token.text, expression=literal) 5141 5142 return self.expression(exp.Identifier, this=token.text) 5143 5144 def _parse_session_parameter(self) -> exp.SessionParameter: 5145 kind = None 5146 this = self._parse_id_var() or self._parse_primary() 5147 5148 if this and self._match(TokenType.DOT): 5149 kind = this.name 5150 this = self._parse_var() or self._parse_primary() 5151 5152 return self.expression(exp.SessionParameter, this=this, kind=kind) 5153 5154 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5155 return self._parse_id_var() 5156 5157 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5158 index = self._index 5159 5160 if self._match(TokenType.L_PAREN): 5161 expressions = t.cast( 5162 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5163 ) 5164 5165 if not self._match(TokenType.R_PAREN): 5166 self._retreat(index) 5167 else: 5168 expressions = [self._parse_lambda_arg()] 5169 5170 if self._match_set(self.LAMBDAS): 5171 return self.LAMBDAS[self._prev.token_type](self, expressions) 5172 5173 self._retreat(index) 5174 5175 this: t.Optional[exp.Expression] 5176 5177 if self._match(TokenType.DISTINCT): 5178 this = self.expression( 5179 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5180 ) 5181 else: 5182 this = self._parse_select_or_expression(alias=alias) 5183 5184 return self._parse_limit( 5185 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5186 ) 5187 5188 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5189 index = self._index 5190 if not self._match(TokenType.L_PAREN): 5191 return this 5192 5193 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5194 # expr can be of both types 5195 if self._match_set(self.SELECT_START_TOKENS): 5196 self._retreat(index) 5197 return this 5198 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5199 self._match_r_paren() 5200 return self.expression(exp.Schema, this=this, expressions=args) 5201 5202 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5203 return self._parse_column_def(self._parse_field(any_token=True)) 5204 5205 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5206 # column defs are not really columns, they're identifiers 5207 if isinstance(this, exp.Column): 5208 this = this.this 5209 5210 kind = self._parse_types(schema=True) 5211 5212 if self._match_text_seq("FOR", "ORDINALITY"): 5213 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5214 5215 constraints: t.List[exp.Expression] = [] 5216 5217 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5218 ("ALIAS", "MATERIALIZED") 5219 ): 5220 persisted = self._prev.text.upper() == "MATERIALIZED" 5221 constraints.append( 5222 self.expression( 5223 exp.ComputedColumnConstraint, 5224 this=self._parse_assignment(), 5225 persisted=persisted or self._match_text_seq("PERSISTED"), 5226 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5227 ) 5228 ) 5229 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 5230 self._match(TokenType.ALIAS) 5231 constraints.append( 5232 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 5233 ) 5234 5235 while True: 5236 constraint = self._parse_column_constraint() 5237 if not constraint: 5238 break 5239 constraints.append(constraint) 5240 5241 if not kind and not constraints: 5242 return this 5243 5244 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5245 5246 def _parse_auto_increment( 5247 self, 5248 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5249 start = None 5250 increment = None 5251 5252 if self._match(TokenType.L_PAREN, advance=False): 5253 args = self._parse_wrapped_csv(self._parse_bitwise) 5254 start = seq_get(args, 0) 5255 increment = seq_get(args, 1) 5256 elif self._match_text_seq("START"): 5257 start = self._parse_bitwise() 5258 self._match_text_seq("INCREMENT") 5259 increment = self._parse_bitwise() 5260 5261 if start and increment: 5262 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 5263 5264 return exp.AutoIncrementColumnConstraint() 5265 5266 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5267 if not self._match_text_seq("REFRESH"): 5268 self._retreat(self._index - 1) 5269 return None 5270 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5271 5272 def _parse_compress(self) -> exp.CompressColumnConstraint: 5273 if self._match(TokenType.L_PAREN, advance=False): 5274 return self.expression( 5275 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5276 ) 5277 5278 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5279 5280 def _parse_generated_as_identity( 5281 self, 5282 ) -> ( 5283 exp.GeneratedAsIdentityColumnConstraint 5284 | exp.ComputedColumnConstraint 5285 | exp.GeneratedAsRowColumnConstraint 5286 ): 5287 if self._match_text_seq("BY", "DEFAULT"): 5288 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5289 this = self.expression( 5290 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5291 ) 5292 else: 5293 self._match_text_seq("ALWAYS") 5294 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5295 5296 self._match(TokenType.ALIAS) 5297 5298 if self._match_text_seq("ROW"): 5299 start = self._match_text_seq("START") 5300 if not start: 5301 self._match(TokenType.END) 5302 hidden = self._match_text_seq("HIDDEN") 5303 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5304 5305 identity = self._match_text_seq("IDENTITY") 5306 5307 if self._match(TokenType.L_PAREN): 5308 if self._match(TokenType.START_WITH): 5309 this.set("start", self._parse_bitwise()) 5310 if self._match_text_seq("INCREMENT", "BY"): 5311 this.set("increment", self._parse_bitwise()) 5312 if self._match_text_seq("MINVALUE"): 5313 this.set("minvalue", self._parse_bitwise()) 5314 if self._match_text_seq("MAXVALUE"): 5315 this.set("maxvalue", self._parse_bitwise()) 5316 5317 if self._match_text_seq("CYCLE"): 5318 this.set("cycle", True) 5319 elif self._match_text_seq("NO", "CYCLE"): 5320 this.set("cycle", False) 5321 5322 if not identity: 5323 this.set("expression", self._parse_range()) 5324 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5325 args = self._parse_csv(self._parse_bitwise) 5326 this.set("start", seq_get(args, 0)) 5327 this.set("increment", seq_get(args, 1)) 5328 5329 self._match_r_paren() 5330 5331 return this 5332 5333 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5334 self._match_text_seq("LENGTH") 5335 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5336 5337 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5338 if self._match_text_seq("NULL"): 5339 return self.expression(exp.NotNullColumnConstraint) 5340 if self._match_text_seq("CASESPECIFIC"): 5341 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5342 if self._match_text_seq("FOR", "REPLICATION"): 5343 return self.expression(exp.NotForReplicationColumnConstraint) 5344 return None 5345 5346 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5347 if self._match(TokenType.CONSTRAINT): 5348 this = self._parse_id_var() 5349 else: 5350 this = None 5351 5352 if self._match_texts(self.CONSTRAINT_PARSERS): 5353 return self.expression( 5354 exp.ColumnConstraint, 5355 this=this, 5356 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5357 ) 5358 5359 return this 5360 5361 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5362 if not self._match(TokenType.CONSTRAINT): 5363 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5364 5365 return self.expression( 5366 exp.Constraint, 5367 this=self._parse_id_var(), 5368 expressions=self._parse_unnamed_constraints(), 5369 ) 5370 5371 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5372 constraints = [] 5373 while True: 5374 constraint = self._parse_unnamed_constraint() or self._parse_function() 5375 if not constraint: 5376 break 5377 constraints.append(constraint) 5378 5379 return constraints 5380 5381 def _parse_unnamed_constraint( 5382 self, constraints: t.Optional[t.Collection[str]] = None 5383 ) -> t.Optional[exp.Expression]: 5384 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5385 constraints or self.CONSTRAINT_PARSERS 5386 ): 5387 return None 5388 5389 constraint = self._prev.text.upper() 5390 if constraint not in self.CONSTRAINT_PARSERS: 5391 self.raise_error(f"No parser found for schema constraint {constraint}.") 5392 5393 return self.CONSTRAINT_PARSERS[constraint](self) 5394 5395 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5396 return self._parse_id_var(any_token=False) 5397 5398 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5399 self._match_text_seq("KEY") 5400 return self.expression( 5401 exp.UniqueColumnConstraint, 5402 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5403 this=self._parse_schema(self._parse_unique_key()), 5404 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5405 on_conflict=self._parse_on_conflict(), 5406 ) 5407 5408 def _parse_key_constraint_options(self) -> t.List[str]: 5409 options = [] 5410 while True: 5411 if not self._curr: 5412 break 5413 5414 if self._match(TokenType.ON): 5415 action = None 5416 on = self._advance_any() and self._prev.text 5417 5418 if self._match_text_seq("NO", "ACTION"): 5419 action = "NO ACTION" 5420 elif self._match_text_seq("CASCADE"): 5421 action = "CASCADE" 5422 elif self._match_text_seq("RESTRICT"): 5423 action = "RESTRICT" 5424 elif self._match_pair(TokenType.SET, TokenType.NULL): 5425 action = "SET NULL" 5426 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5427 action = "SET DEFAULT" 5428 else: 5429 self.raise_error("Invalid key constraint") 5430 5431 options.append(f"ON {on} {action}") 5432 else: 5433 var = self._parse_var_from_options( 5434 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 5435 ) 5436 if not var: 5437 break 5438 options.append(var.name) 5439 5440 return options 5441 5442 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5443 if match and not self._match(TokenType.REFERENCES): 5444 return None 5445 5446 expressions = None 5447 this = self._parse_table(schema=True) 5448 options = self._parse_key_constraint_options() 5449 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5450 5451 def _parse_foreign_key(self) -> exp.ForeignKey: 5452 expressions = self._parse_wrapped_id_vars() 5453 reference = self._parse_references() 5454 options = {} 5455 5456 while self._match(TokenType.ON): 5457 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5458 self.raise_error("Expected DELETE or UPDATE") 5459 5460 kind = self._prev.text.lower() 5461 5462 if self._match_text_seq("NO", "ACTION"): 5463 action = "NO ACTION" 5464 elif self._match(TokenType.SET): 5465 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5466 action = "SET " + self._prev.text.upper() 5467 else: 5468 self._advance() 5469 action = self._prev.text.upper() 5470 5471 options[kind] = action 5472 5473 return self.expression( 5474 exp.ForeignKey, 5475 expressions=expressions, 5476 reference=reference, 5477 **options, # type: ignore 5478 ) 5479 5480 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5481 return self._parse_field() 5482 5483 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5484 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5485 self._retreat(self._index - 1) 5486 return None 5487 5488 id_vars = self._parse_wrapped_id_vars() 5489 return self.expression( 5490 exp.PeriodForSystemTimeConstraint, 5491 this=seq_get(id_vars, 0), 5492 expression=seq_get(id_vars, 1), 5493 ) 5494 5495 def _parse_primary_key( 5496 self, wrapped_optional: bool = False, in_props: bool = False 5497 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5498 desc = ( 5499 self._match_set((TokenType.ASC, TokenType.DESC)) 5500 and self._prev.token_type == TokenType.DESC 5501 ) 5502 5503 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5504 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5505 5506 expressions = self._parse_wrapped_csv( 5507 self._parse_primary_key_part, optional=wrapped_optional 5508 ) 5509 options = self._parse_key_constraint_options() 5510 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5511 5512 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5513 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5514 5515 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5516 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5517 return this 5518 5519 bracket_kind = self._prev.token_type 5520 expressions = self._parse_csv( 5521 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5522 ) 5523 5524 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5525 self.raise_error("Expected ]") 5526 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5527 self.raise_error("Expected }") 5528 5529 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5530 if bracket_kind == TokenType.L_BRACE: 5531 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5532 elif not this: 5533 this = build_array_constructor( 5534 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 5535 ) 5536 else: 5537 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5538 if constructor_type: 5539 return build_array_constructor( 5540 constructor_type, 5541 args=expressions, 5542 bracket_kind=bracket_kind, 5543 dialect=self.dialect, 5544 ) 5545 5546 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5547 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5548 5549 self._add_comments(this) 5550 return self._parse_bracket(this) 5551 5552 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5553 if self._match(TokenType.COLON): 5554 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5555 return this 5556 5557 def _parse_case(self) -> t.Optional[exp.Expression]: 5558 ifs = [] 5559 default = None 5560 5561 comments = self._prev_comments 5562 expression = self._parse_assignment() 5563 5564 while self._match(TokenType.WHEN): 5565 this = self._parse_assignment() 5566 self._match(TokenType.THEN) 5567 then = self._parse_assignment() 5568 ifs.append(self.expression(exp.If, this=this, true=then)) 5569 5570 if self._match(TokenType.ELSE): 5571 default = self._parse_assignment() 5572 5573 if not self._match(TokenType.END): 5574 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5575 default = exp.column("interval") 5576 else: 5577 self.raise_error("Expected END after CASE", self._prev) 5578 5579 return self.expression( 5580 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5581 ) 5582 5583 def _parse_if(self) -> t.Optional[exp.Expression]: 5584 if self._match(TokenType.L_PAREN): 5585 args = self._parse_csv(self._parse_assignment) 5586 this = self.validate_expression(exp.If.from_arg_list(args), args) 5587 self._match_r_paren() 5588 else: 5589 index = self._index - 1 5590 5591 if self.NO_PAREN_IF_COMMANDS and index == 0: 5592 return self._parse_as_command(self._prev) 5593 5594 condition = self._parse_assignment() 5595 5596 if not condition: 5597 self._retreat(index) 5598 return None 5599 5600 self._match(TokenType.THEN) 5601 true = self._parse_assignment() 5602 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5603 self._match(TokenType.END) 5604 this = self.expression(exp.If, this=condition, true=true, false=false) 5605 5606 return this 5607 5608 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5609 if not self._match_text_seq("VALUE", "FOR"): 5610 self._retreat(self._index - 1) 5611 return None 5612 5613 return self.expression( 5614 exp.NextValueFor, 5615 this=self._parse_column(), 5616 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5617 ) 5618 5619 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 5620 this = self._parse_function() or self._parse_var_or_string(upper=True) 5621 5622 if self._match(TokenType.FROM): 5623 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5624 5625 if not self._match(TokenType.COMMA): 5626 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5627 5628 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5629 5630 def _parse_gap_fill(self) -> exp.GapFill: 5631 self._match(TokenType.TABLE) 5632 this = self._parse_table() 5633 5634 self._match(TokenType.COMMA) 5635 args = [this, *self._parse_csv(self._parse_lambda)] 5636 5637 gap_fill = exp.GapFill.from_arg_list(args) 5638 return self.validate_expression(gap_fill, args) 5639 5640 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5641 this = self._parse_assignment() 5642 5643 if not self._match(TokenType.ALIAS): 5644 if self._match(TokenType.COMMA): 5645 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5646 5647 self.raise_error("Expected AS after CAST") 5648 5649 fmt = None 5650 to = self._parse_types() 5651 5652 if self._match(TokenType.FORMAT): 5653 fmt_string = self._parse_string() 5654 fmt = self._parse_at_time_zone(fmt_string) 5655 5656 if not to: 5657 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5658 if to.this in exp.DataType.TEMPORAL_TYPES: 5659 this = self.expression( 5660 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5661 this=this, 5662 format=exp.Literal.string( 5663 format_time( 5664 fmt_string.this if fmt_string else "", 5665 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5666 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5667 ) 5668 ), 5669 safe=safe, 5670 ) 5671 5672 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5673 this.set("zone", fmt.args["zone"]) 5674 return this 5675 elif not to: 5676 self.raise_error("Expected TYPE after CAST") 5677 elif isinstance(to, exp.Identifier): 5678 to = exp.DataType.build(to.name, udt=True) 5679 elif to.this == exp.DataType.Type.CHAR: 5680 if self._match(TokenType.CHARACTER_SET): 5681 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5682 5683 return self.expression( 5684 exp.Cast if strict else exp.TryCast, 5685 this=this, 5686 to=to, 5687 format=fmt, 5688 safe=safe, 5689 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5690 ) 5691 5692 def _parse_string_agg(self) -> exp.Expression: 5693 if self._match(TokenType.DISTINCT): 5694 args: t.List[t.Optional[exp.Expression]] = [ 5695 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 5696 ] 5697 if self._match(TokenType.COMMA): 5698 args.extend(self._parse_csv(self._parse_assignment)) 5699 else: 5700 args = self._parse_csv(self._parse_assignment) # type: ignore 5701 5702 index = self._index 5703 if not self._match(TokenType.R_PAREN) and args: 5704 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5705 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5706 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5707 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5708 5709 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5710 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5711 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5712 if not self._match_text_seq("WITHIN", "GROUP"): 5713 self._retreat(index) 5714 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5715 5716 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5717 order = self._parse_order(this=seq_get(args, 0)) 5718 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5719 5720 def _parse_convert( 5721 self, strict: bool, safe: t.Optional[bool] = None 5722 ) -> t.Optional[exp.Expression]: 5723 this = self._parse_bitwise() 5724 5725 if self._match(TokenType.USING): 5726 to: t.Optional[exp.Expression] = self.expression( 5727 exp.CharacterSet, this=self._parse_var() 5728 ) 5729 elif self._match(TokenType.COMMA): 5730 to = self._parse_types() 5731 else: 5732 to = None 5733 5734 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5735 5736 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5737 """ 5738 There are generally two variants of the DECODE function: 5739 5740 - DECODE(bin, charset) 5741 - DECODE(expression, search, result [, search, result] ... [, default]) 5742 5743 The second variant will always be parsed into a CASE expression. Note that NULL 5744 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5745 instead of relying on pattern matching. 5746 """ 5747 args = self._parse_csv(self._parse_assignment) 5748 5749 if len(args) < 3: 5750 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5751 5752 expression, *expressions = args 5753 if not expression: 5754 return None 5755 5756 ifs = [] 5757 for search, result in zip(expressions[::2], expressions[1::2]): 5758 if not search or not result: 5759 return None 5760 5761 if isinstance(search, exp.Literal): 5762 ifs.append( 5763 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5764 ) 5765 elif isinstance(search, exp.Null): 5766 ifs.append( 5767 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5768 ) 5769 else: 5770 cond = exp.or_( 5771 exp.EQ(this=expression.copy(), expression=search), 5772 exp.and_( 5773 exp.Is(this=expression.copy(), expression=exp.Null()), 5774 exp.Is(this=search.copy(), expression=exp.Null()), 5775 copy=False, 5776 ), 5777 copy=False, 5778 ) 5779 ifs.append(exp.If(this=cond, true=result)) 5780 5781 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5782 5783 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5784 self._match_text_seq("KEY") 5785 key = self._parse_column() 5786 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5787 self._match_text_seq("VALUE") 5788 value = self._parse_bitwise() 5789 5790 if not key and not value: 5791 return None 5792 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5793 5794 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5795 if not this or not self._match_text_seq("FORMAT", "JSON"): 5796 return this 5797 5798 return self.expression(exp.FormatJson, this=this) 5799 5800 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5801 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5802 for value in values: 5803 if self._match_text_seq(value, "ON", on): 5804 return f"{value} ON {on}" 5805 5806 return None 5807 5808 @t.overload 5809 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5810 5811 @t.overload 5812 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5813 5814 def _parse_json_object(self, agg=False): 5815 star = self._parse_star() 5816 expressions = ( 5817 [star] 5818 if star 5819 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5820 ) 5821 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5822 5823 unique_keys = None 5824 if self._match_text_seq("WITH", "UNIQUE"): 5825 unique_keys = True 5826 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5827 unique_keys = False 5828 5829 self._match_text_seq("KEYS") 5830 5831 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5832 self._parse_type() 5833 ) 5834 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5835 5836 return self.expression( 5837 exp.JSONObjectAgg if agg else exp.JSONObject, 5838 expressions=expressions, 5839 null_handling=null_handling, 5840 unique_keys=unique_keys, 5841 return_type=return_type, 5842 encoding=encoding, 5843 ) 5844 5845 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5846 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5847 if not self._match_text_seq("NESTED"): 5848 this = self._parse_id_var() 5849 kind = self._parse_types(allow_identifiers=False) 5850 nested = None 5851 else: 5852 this = None 5853 kind = None 5854 nested = True 5855 5856 path = self._match_text_seq("PATH") and self._parse_string() 5857 nested_schema = nested and self._parse_json_schema() 5858 5859 return self.expression( 5860 exp.JSONColumnDef, 5861 this=this, 5862 kind=kind, 5863 path=path, 5864 nested_schema=nested_schema, 5865 ) 5866 5867 def _parse_json_schema(self) -> exp.JSONSchema: 5868 self._match_text_seq("COLUMNS") 5869 return self.expression( 5870 exp.JSONSchema, 5871 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5872 ) 5873 5874 def _parse_json_table(self) -> exp.JSONTable: 5875 this = self._parse_format_json(self._parse_bitwise()) 5876 path = self._match(TokenType.COMMA) and self._parse_string() 5877 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5878 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5879 schema = self._parse_json_schema() 5880 5881 return exp.JSONTable( 5882 this=this, 5883 schema=schema, 5884 path=path, 5885 error_handling=error_handling, 5886 empty_handling=empty_handling, 5887 ) 5888 5889 def _parse_match_against(self) -> exp.MatchAgainst: 5890 expressions = self._parse_csv(self._parse_column) 5891 5892 self._match_text_seq(")", "AGAINST", "(") 5893 5894 this = self._parse_string() 5895 5896 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5897 modifier = "IN NATURAL LANGUAGE MODE" 5898 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5899 modifier = f"{modifier} WITH QUERY EXPANSION" 5900 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5901 modifier = "IN BOOLEAN MODE" 5902 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5903 modifier = "WITH QUERY EXPANSION" 5904 else: 5905 modifier = None 5906 5907 return self.expression( 5908 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5909 ) 5910 5911 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5912 def _parse_open_json(self) -> exp.OpenJSON: 5913 this = self._parse_bitwise() 5914 path = self._match(TokenType.COMMA) and self._parse_string() 5915 5916 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5917 this = self._parse_field(any_token=True) 5918 kind = self._parse_types() 5919 path = self._parse_string() 5920 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5921 5922 return self.expression( 5923 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5924 ) 5925 5926 expressions = None 5927 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5928 self._match_l_paren() 5929 expressions = self._parse_csv(_parse_open_json_column_def) 5930 5931 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5932 5933 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5934 args = self._parse_csv(self._parse_bitwise) 5935 5936 if self._match(TokenType.IN): 5937 return self.expression( 5938 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5939 ) 5940 5941 if haystack_first: 5942 haystack = seq_get(args, 0) 5943 needle = seq_get(args, 1) 5944 else: 5945 needle = seq_get(args, 0) 5946 haystack = seq_get(args, 1) 5947 5948 return self.expression( 5949 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5950 ) 5951 5952 def _parse_predict(self) -> exp.Predict: 5953 self._match_text_seq("MODEL") 5954 this = self._parse_table() 5955 5956 self._match(TokenType.COMMA) 5957 self._match_text_seq("TABLE") 5958 5959 return self.expression( 5960 exp.Predict, 5961 this=this, 5962 expression=self._parse_table(), 5963 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5964 ) 5965 5966 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5967 args = self._parse_csv(self._parse_table) 5968 return exp.JoinHint(this=func_name.upper(), expressions=args) 5969 5970 def _parse_substring(self) -> exp.Substring: 5971 # Postgres supports the form: substring(string [from int] [for int]) 5972 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5973 5974 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5975 5976 if self._match(TokenType.FROM): 5977 args.append(self._parse_bitwise()) 5978 if self._match(TokenType.FOR): 5979 if len(args) == 1: 5980 args.append(exp.Literal.number(1)) 5981 args.append(self._parse_bitwise()) 5982 5983 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5984 5985 def _parse_trim(self) -> exp.Trim: 5986 # https://www.w3resource.com/sql/character-functions/trim.php 5987 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5988 5989 position = None 5990 collation = None 5991 expression = None 5992 5993 if self._match_texts(self.TRIM_TYPES): 5994 position = self._prev.text.upper() 5995 5996 this = self._parse_bitwise() 5997 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5998 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5999 expression = self._parse_bitwise() 6000 6001 if invert_order: 6002 this, expression = expression, this 6003 6004 if self._match(TokenType.COLLATE): 6005 collation = self._parse_bitwise() 6006 6007 return self.expression( 6008 exp.Trim, this=this, position=position, expression=expression, collation=collation 6009 ) 6010 6011 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6012 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6013 6014 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6015 return self._parse_window(self._parse_id_var(), alias=True) 6016 6017 def _parse_respect_or_ignore_nulls( 6018 self, this: t.Optional[exp.Expression] 6019 ) -> t.Optional[exp.Expression]: 6020 if self._match_text_seq("IGNORE", "NULLS"): 6021 return self.expression(exp.IgnoreNulls, this=this) 6022 if self._match_text_seq("RESPECT", "NULLS"): 6023 return self.expression(exp.RespectNulls, this=this) 6024 return this 6025 6026 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6027 if self._match(TokenType.HAVING): 6028 self._match_texts(("MAX", "MIN")) 6029 max = self._prev.text.upper() != "MIN" 6030 return self.expression( 6031 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6032 ) 6033 6034 return this 6035 6036 def _parse_window( 6037 self, this: t.Optional[exp.Expression], alias: bool = False 6038 ) -> t.Optional[exp.Expression]: 6039 func = this 6040 comments = func.comments if isinstance(func, exp.Expression) else None 6041 6042 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6043 self._match(TokenType.WHERE) 6044 this = self.expression( 6045 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6046 ) 6047 self._match_r_paren() 6048 6049 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6050 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6051 if self._match_text_seq("WITHIN", "GROUP"): 6052 order = self._parse_wrapped(self._parse_order) 6053 this = self.expression(exp.WithinGroup, this=this, expression=order) 6054 6055 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6056 # Some dialects choose to implement and some do not. 6057 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6058 6059 # There is some code above in _parse_lambda that handles 6060 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6061 6062 # The below changes handle 6063 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6064 6065 # Oracle allows both formats 6066 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6067 # and Snowflake chose to do the same for familiarity 6068 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6069 if isinstance(this, exp.AggFunc): 6070 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6071 6072 if ignore_respect and ignore_respect is not this: 6073 ignore_respect.replace(ignore_respect.this) 6074 this = self.expression(ignore_respect.__class__, this=this) 6075 6076 this = self._parse_respect_or_ignore_nulls(this) 6077 6078 # bigquery select from window x AS (partition by ...) 6079 if alias: 6080 over = None 6081 self._match(TokenType.ALIAS) 6082 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6083 return this 6084 else: 6085 over = self._prev.text.upper() 6086 6087 if comments and isinstance(func, exp.Expression): 6088 func.pop_comments() 6089 6090 if not self._match(TokenType.L_PAREN): 6091 return self.expression( 6092 exp.Window, 6093 comments=comments, 6094 this=this, 6095 alias=self._parse_id_var(False), 6096 over=over, 6097 ) 6098 6099 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6100 6101 first = self._match(TokenType.FIRST) 6102 if self._match_text_seq("LAST"): 6103 first = False 6104 6105 partition, order = self._parse_partition_and_order() 6106 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6107 6108 if kind: 6109 self._match(TokenType.BETWEEN) 6110 start = self._parse_window_spec() 6111 self._match(TokenType.AND) 6112 end = self._parse_window_spec() 6113 6114 spec = self.expression( 6115 exp.WindowSpec, 6116 kind=kind, 6117 start=start["value"], 6118 start_side=start["side"], 6119 end=end["value"], 6120 end_side=end["side"], 6121 ) 6122 else: 6123 spec = None 6124 6125 self._match_r_paren() 6126 6127 window = self.expression( 6128 exp.Window, 6129 comments=comments, 6130 this=this, 6131 partition_by=partition, 6132 order=order, 6133 spec=spec, 6134 alias=window_alias, 6135 over=over, 6136 first=first, 6137 ) 6138 6139 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6140 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6141 return self._parse_window(window, alias=alias) 6142 6143 return window 6144 6145 def _parse_partition_and_order( 6146 self, 6147 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6148 return self._parse_partition_by(), self._parse_order() 6149 6150 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6151 self._match(TokenType.BETWEEN) 6152 6153 return { 6154 "value": ( 6155 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6156 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6157 or self._parse_bitwise() 6158 ), 6159 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6160 } 6161 6162 def _parse_alias( 6163 self, this: t.Optional[exp.Expression], explicit: bool = False 6164 ) -> t.Optional[exp.Expression]: 6165 any_token = self._match(TokenType.ALIAS) 6166 comments = self._prev_comments or [] 6167 6168 if explicit and not any_token: 6169 return this 6170 6171 if self._match(TokenType.L_PAREN): 6172 aliases = self.expression( 6173 exp.Aliases, 6174 comments=comments, 6175 this=this, 6176 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6177 ) 6178 self._match_r_paren(aliases) 6179 return aliases 6180 6181 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6182 self.STRING_ALIASES and self._parse_string_as_identifier() 6183 ) 6184 6185 if alias: 6186 comments.extend(alias.pop_comments()) 6187 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6188 column = this.this 6189 6190 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6191 if not this.comments and column and column.comments: 6192 this.comments = column.pop_comments() 6193 6194 return this 6195 6196 def _parse_id_var( 6197 self, 6198 any_token: bool = True, 6199 tokens: t.Optional[t.Collection[TokenType]] = None, 6200 ) -> t.Optional[exp.Expression]: 6201 expression = self._parse_identifier() 6202 if not expression and ( 6203 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6204 ): 6205 quoted = self._prev.token_type == TokenType.STRING 6206 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6207 6208 return expression 6209 6210 def _parse_string(self) -> t.Optional[exp.Expression]: 6211 if self._match_set(self.STRING_PARSERS): 6212 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6213 return self._parse_placeholder() 6214 6215 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6216 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6217 6218 def _parse_number(self) -> t.Optional[exp.Expression]: 6219 if self._match_set(self.NUMERIC_PARSERS): 6220 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6221 return self._parse_placeholder() 6222 6223 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6224 if self._match(TokenType.IDENTIFIER): 6225 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6226 return self._parse_placeholder() 6227 6228 def _parse_var( 6229 self, 6230 any_token: bool = False, 6231 tokens: t.Optional[t.Collection[TokenType]] = None, 6232 upper: bool = False, 6233 ) -> t.Optional[exp.Expression]: 6234 if ( 6235 (any_token and self._advance_any()) 6236 or self._match(TokenType.VAR) 6237 or (self._match_set(tokens) if tokens else False) 6238 ): 6239 return self.expression( 6240 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6241 ) 6242 return self._parse_placeholder() 6243 6244 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6245 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6246 self._advance() 6247 return self._prev 6248 return None 6249 6250 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6251 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6252 6253 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6254 return self._parse_primary() or self._parse_var(any_token=True) 6255 6256 def _parse_null(self) -> t.Optional[exp.Expression]: 6257 if self._match_set(self.NULL_TOKENS): 6258 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6259 return self._parse_placeholder() 6260 6261 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6262 if self._match(TokenType.TRUE): 6263 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6264 if self._match(TokenType.FALSE): 6265 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6266 return self._parse_placeholder() 6267 6268 def _parse_star(self) -> t.Optional[exp.Expression]: 6269 if self._match(TokenType.STAR): 6270 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6271 return self._parse_placeholder() 6272 6273 def _parse_parameter(self) -> exp.Parameter: 6274 this = self._parse_identifier() or self._parse_primary_or_var() 6275 return self.expression(exp.Parameter, this=this) 6276 6277 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6278 if self._match_set(self.PLACEHOLDER_PARSERS): 6279 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6280 if placeholder: 6281 return placeholder 6282 self._advance(-1) 6283 return None 6284 6285 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6286 if not self._match_texts(keywords): 6287 return None 6288 if self._match(TokenType.L_PAREN, advance=False): 6289 return self._parse_wrapped_csv(self._parse_expression) 6290 6291 expression = self._parse_expression() 6292 return [expression] if expression else None 6293 6294 def _parse_csv( 6295 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6296 ) -> t.List[exp.Expression]: 6297 parse_result = parse_method() 6298 items = [parse_result] if parse_result is not None else [] 6299 6300 while self._match(sep): 6301 self._add_comments(parse_result) 6302 parse_result = parse_method() 6303 if parse_result is not None: 6304 items.append(parse_result) 6305 6306 return items 6307 6308 def _parse_tokens( 6309 self, parse_method: t.Callable, expressions: t.Dict 6310 ) -> t.Optional[exp.Expression]: 6311 this = parse_method() 6312 6313 while self._match_set(expressions): 6314 this = self.expression( 6315 expressions[self._prev.token_type], 6316 this=this, 6317 comments=self._prev_comments, 6318 expression=parse_method(), 6319 ) 6320 6321 return this 6322 6323 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6324 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6325 6326 def _parse_wrapped_csv( 6327 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6328 ) -> t.List[exp.Expression]: 6329 return self._parse_wrapped( 6330 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6331 ) 6332 6333 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6334 wrapped = self._match(TokenType.L_PAREN) 6335 if not wrapped and not optional: 6336 self.raise_error("Expecting (") 6337 parse_result = parse_method() 6338 if wrapped: 6339 self._match_r_paren() 6340 return parse_result 6341 6342 def _parse_expressions(self) -> t.List[exp.Expression]: 6343 return self._parse_csv(self._parse_expression) 6344 6345 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6346 return self._parse_select() or self._parse_set_operations( 6347 self._parse_expression() if alias else self._parse_assignment() 6348 ) 6349 6350 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6351 return self._parse_query_modifiers( 6352 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6353 ) 6354 6355 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6356 this = None 6357 if self._match_texts(self.TRANSACTION_KIND): 6358 this = self._prev.text 6359 6360 self._match_texts(("TRANSACTION", "WORK")) 6361 6362 modes = [] 6363 while True: 6364 mode = [] 6365 while self._match(TokenType.VAR): 6366 mode.append(self._prev.text) 6367 6368 if mode: 6369 modes.append(" ".join(mode)) 6370 if not self._match(TokenType.COMMA): 6371 break 6372 6373 return self.expression(exp.Transaction, this=this, modes=modes) 6374 6375 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6376 chain = None 6377 savepoint = None 6378 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6379 6380 self._match_texts(("TRANSACTION", "WORK")) 6381 6382 if self._match_text_seq("TO"): 6383 self._match_text_seq("SAVEPOINT") 6384 savepoint = self._parse_id_var() 6385 6386 if self._match(TokenType.AND): 6387 chain = not self._match_text_seq("NO") 6388 self._match_text_seq("CHAIN") 6389 6390 if is_rollback: 6391 return self.expression(exp.Rollback, savepoint=savepoint) 6392 6393 return self.expression(exp.Commit, chain=chain) 6394 6395 def _parse_refresh(self) -> exp.Refresh: 6396 self._match(TokenType.TABLE) 6397 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6398 6399 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6400 if not self._match_text_seq("ADD"): 6401 return None 6402 6403 self._match(TokenType.COLUMN) 6404 exists_column = self._parse_exists(not_=True) 6405 expression = self._parse_field_def() 6406 6407 if expression: 6408 expression.set("exists", exists_column) 6409 6410 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6411 if self._match_texts(("FIRST", "AFTER")): 6412 position = self._prev.text 6413 column_position = self.expression( 6414 exp.ColumnPosition, this=self._parse_column(), position=position 6415 ) 6416 expression.set("position", column_position) 6417 6418 return expression 6419 6420 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6421 drop = self._match(TokenType.DROP) and self._parse_drop() 6422 if drop and not isinstance(drop, exp.Command): 6423 drop.set("kind", drop.args.get("kind", "COLUMN")) 6424 return drop 6425 6426 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6427 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6428 return self.expression( 6429 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6430 ) 6431 6432 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6433 index = self._index - 1 6434 6435 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6436 return self._parse_csv( 6437 lambda: self.expression( 6438 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6439 ) 6440 ) 6441 6442 self._retreat(index) 6443 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6444 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6445 6446 if self._match_text_seq("ADD", "COLUMNS"): 6447 schema = self._parse_schema() 6448 if schema: 6449 return [schema] 6450 return [] 6451 6452 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6453 6454 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6455 if self._match_texts(self.ALTER_ALTER_PARSERS): 6456 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6457 6458 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6459 # keyword after ALTER we default to parsing this statement 6460 self._match(TokenType.COLUMN) 6461 column = self._parse_field(any_token=True) 6462 6463 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6464 return self.expression(exp.AlterColumn, this=column, drop=True) 6465 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6466 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6467 if self._match(TokenType.COMMENT): 6468 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6469 if self._match_text_seq("DROP", "NOT", "NULL"): 6470 return self.expression( 6471 exp.AlterColumn, 6472 this=column, 6473 drop=True, 6474 allow_null=True, 6475 ) 6476 if self._match_text_seq("SET", "NOT", "NULL"): 6477 return self.expression( 6478 exp.AlterColumn, 6479 this=column, 6480 allow_null=False, 6481 ) 6482 self._match_text_seq("SET", "DATA") 6483 self._match_text_seq("TYPE") 6484 return self.expression( 6485 exp.AlterColumn, 6486 this=column, 6487 dtype=self._parse_types(), 6488 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6489 using=self._match(TokenType.USING) and self._parse_assignment(), 6490 ) 6491 6492 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6493 if self._match_texts(("ALL", "EVEN", "AUTO")): 6494 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6495 6496 self._match_text_seq("KEY", "DISTKEY") 6497 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6498 6499 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6500 if compound: 6501 self._match_text_seq("SORTKEY") 6502 6503 if self._match(TokenType.L_PAREN, advance=False): 6504 return self.expression( 6505 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6506 ) 6507 6508 self._match_texts(("AUTO", "NONE")) 6509 return self.expression( 6510 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6511 ) 6512 6513 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6514 index = self._index - 1 6515 6516 partition_exists = self._parse_exists() 6517 if self._match(TokenType.PARTITION, advance=False): 6518 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6519 6520 self._retreat(index) 6521 return self._parse_csv(self._parse_drop_column) 6522 6523 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 6524 if self._match(TokenType.COLUMN): 6525 exists = self._parse_exists() 6526 old_column = self._parse_column() 6527 to = self._match_text_seq("TO") 6528 new_column = self._parse_column() 6529 6530 if old_column is None or to is None or new_column is None: 6531 return None 6532 6533 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6534 6535 self._match_text_seq("TO") 6536 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 6537 6538 def _parse_alter_table_set(self) -> exp.AlterSet: 6539 alter_set = self.expression(exp.AlterSet) 6540 6541 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6542 "TABLE", "PROPERTIES" 6543 ): 6544 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6545 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6546 alter_set.set("expressions", [self._parse_assignment()]) 6547 elif self._match_texts(("LOGGED", "UNLOGGED")): 6548 alter_set.set("option", exp.var(self._prev.text.upper())) 6549 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6550 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6551 elif self._match_text_seq("LOCATION"): 6552 alter_set.set("location", self._parse_field()) 6553 elif self._match_text_seq("ACCESS", "METHOD"): 6554 alter_set.set("access_method", self._parse_field()) 6555 elif self._match_text_seq("TABLESPACE"): 6556 alter_set.set("tablespace", self._parse_field()) 6557 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6558 alter_set.set("file_format", [self._parse_field()]) 6559 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6560 alter_set.set("file_format", self._parse_wrapped_options()) 6561 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6562 alter_set.set("copy_options", self._parse_wrapped_options()) 6563 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6564 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6565 else: 6566 if self._match_text_seq("SERDE"): 6567 alter_set.set("serde", self._parse_field()) 6568 6569 alter_set.set("expressions", [self._parse_properties()]) 6570 6571 return alter_set 6572 6573 def _parse_alter(self) -> exp.Alter | exp.Command: 6574 start = self._prev 6575 6576 alter_token = self._match_set(self.ALTERABLES) and self._prev 6577 if not alter_token: 6578 return self._parse_as_command(start) 6579 6580 exists = self._parse_exists() 6581 only = self._match_text_seq("ONLY") 6582 this = self._parse_table(schema=True) 6583 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6584 6585 if self._next: 6586 self._advance() 6587 6588 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6589 if parser: 6590 actions = ensure_list(parser(self)) 6591 options = self._parse_csv(self._parse_property) 6592 6593 if not self._curr and actions: 6594 return self.expression( 6595 exp.Alter, 6596 this=this, 6597 kind=alter_token.text.upper(), 6598 exists=exists, 6599 actions=actions, 6600 only=only, 6601 options=options, 6602 cluster=cluster, 6603 ) 6604 6605 return self._parse_as_command(start) 6606 6607 def _parse_merge(self) -> exp.Merge: 6608 self._match(TokenType.INTO) 6609 target = self._parse_table() 6610 6611 if target and self._match(TokenType.ALIAS, advance=False): 6612 target.set("alias", self._parse_table_alias()) 6613 6614 self._match(TokenType.USING) 6615 using = self._parse_table() 6616 6617 self._match(TokenType.ON) 6618 on = self._parse_assignment() 6619 6620 return self.expression( 6621 exp.Merge, 6622 this=target, 6623 using=using, 6624 on=on, 6625 expressions=self._parse_when_matched(), 6626 ) 6627 6628 def _parse_when_matched(self) -> t.List[exp.When]: 6629 whens = [] 6630 6631 while self._match(TokenType.WHEN): 6632 matched = not self._match(TokenType.NOT) 6633 self._match_text_seq("MATCHED") 6634 source = ( 6635 False 6636 if self._match_text_seq("BY", "TARGET") 6637 else self._match_text_seq("BY", "SOURCE") 6638 ) 6639 condition = self._parse_assignment() if self._match(TokenType.AND) else None 6640 6641 self._match(TokenType.THEN) 6642 6643 if self._match(TokenType.INSERT): 6644 _this = self._parse_star() 6645 if _this: 6646 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 6647 else: 6648 then = self.expression( 6649 exp.Insert, 6650 this=self._parse_value(), 6651 expression=self._match_text_seq("VALUES") and self._parse_value(), 6652 ) 6653 elif self._match(TokenType.UPDATE): 6654 expressions = self._parse_star() 6655 if expressions: 6656 then = self.expression(exp.Update, expressions=expressions) 6657 else: 6658 then = self.expression( 6659 exp.Update, 6660 expressions=self._match(TokenType.SET) 6661 and self._parse_csv(self._parse_equality), 6662 ) 6663 elif self._match(TokenType.DELETE): 6664 then = self.expression(exp.Var, this=self._prev.text) 6665 else: 6666 then = None 6667 6668 whens.append( 6669 self.expression( 6670 exp.When, 6671 matched=matched, 6672 source=source, 6673 condition=condition, 6674 then=then, 6675 ) 6676 ) 6677 return whens 6678 6679 def _parse_show(self) -> t.Optional[exp.Expression]: 6680 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6681 if parser: 6682 return parser(self) 6683 return self._parse_as_command(self._prev) 6684 6685 def _parse_set_item_assignment( 6686 self, kind: t.Optional[str] = None 6687 ) -> t.Optional[exp.Expression]: 6688 index = self._index 6689 6690 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6691 return self._parse_set_transaction(global_=kind == "GLOBAL") 6692 6693 left = self._parse_primary() or self._parse_column() 6694 assignment_delimiter = self._match_texts(("=", "TO")) 6695 6696 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6697 self._retreat(index) 6698 return None 6699 6700 right = self._parse_statement() or self._parse_id_var() 6701 if isinstance(right, (exp.Column, exp.Identifier)): 6702 right = exp.var(right.name) 6703 6704 this = self.expression(exp.EQ, this=left, expression=right) 6705 return self.expression(exp.SetItem, this=this, kind=kind) 6706 6707 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6708 self._match_text_seq("TRANSACTION") 6709 characteristics = self._parse_csv( 6710 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6711 ) 6712 return self.expression( 6713 exp.SetItem, 6714 expressions=characteristics, 6715 kind="TRANSACTION", 6716 **{"global": global_}, # type: ignore 6717 ) 6718 6719 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6720 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6721 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6722 6723 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6724 index = self._index 6725 set_ = self.expression( 6726 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6727 ) 6728 6729 if self._curr: 6730 self._retreat(index) 6731 return self._parse_as_command(self._prev) 6732 6733 return set_ 6734 6735 def _parse_var_from_options( 6736 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6737 ) -> t.Optional[exp.Var]: 6738 start = self._curr 6739 if not start: 6740 return None 6741 6742 option = start.text.upper() 6743 continuations = options.get(option) 6744 6745 index = self._index 6746 self._advance() 6747 for keywords in continuations or []: 6748 if isinstance(keywords, str): 6749 keywords = (keywords,) 6750 6751 if self._match_text_seq(*keywords): 6752 option = f"{option} {' '.join(keywords)}" 6753 break 6754 else: 6755 if continuations or continuations is None: 6756 if raise_unmatched: 6757 self.raise_error(f"Unknown option {option}") 6758 6759 self._retreat(index) 6760 return None 6761 6762 return exp.var(option) 6763 6764 def _parse_as_command(self, start: Token) -> exp.Command: 6765 while self._curr: 6766 self._advance() 6767 text = self._find_sql(start, self._prev) 6768 size = len(start.text) 6769 self._warn_unsupported() 6770 return exp.Command(this=text[:size], expression=text[size:]) 6771 6772 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6773 settings = [] 6774 6775 self._match_l_paren() 6776 kind = self._parse_id_var() 6777 6778 if self._match(TokenType.L_PAREN): 6779 while True: 6780 key = self._parse_id_var() 6781 value = self._parse_primary() 6782 6783 if not key and value is None: 6784 break 6785 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6786 self._match(TokenType.R_PAREN) 6787 6788 self._match_r_paren() 6789 6790 return self.expression( 6791 exp.DictProperty, 6792 this=this, 6793 kind=kind.this if kind else None, 6794 settings=settings, 6795 ) 6796 6797 def _parse_dict_range(self, this: str) -> exp.DictRange: 6798 self._match_l_paren() 6799 has_min = self._match_text_seq("MIN") 6800 if has_min: 6801 min = self._parse_var() or self._parse_primary() 6802 self._match_text_seq("MAX") 6803 max = self._parse_var() or self._parse_primary() 6804 else: 6805 max = self._parse_var() or self._parse_primary() 6806 min = exp.Literal.number(0) 6807 self._match_r_paren() 6808 return self.expression(exp.DictRange, this=this, min=min, max=max) 6809 6810 def _parse_comprehension( 6811 self, this: t.Optional[exp.Expression] 6812 ) -> t.Optional[exp.Comprehension]: 6813 index = self._index 6814 expression = self._parse_column() 6815 if not self._match(TokenType.IN): 6816 self._retreat(index - 1) 6817 return None 6818 iterator = self._parse_column() 6819 condition = self._parse_assignment() if self._match_text_seq("IF") else None 6820 return self.expression( 6821 exp.Comprehension, 6822 this=this, 6823 expression=expression, 6824 iterator=iterator, 6825 condition=condition, 6826 ) 6827 6828 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6829 if self._match(TokenType.HEREDOC_STRING): 6830 return self.expression(exp.Heredoc, this=self._prev.text) 6831 6832 if not self._match_text_seq("$"): 6833 return None 6834 6835 tags = ["$"] 6836 tag_text = None 6837 6838 if self._is_connected(): 6839 self._advance() 6840 tags.append(self._prev.text.upper()) 6841 else: 6842 self.raise_error("No closing $ found") 6843 6844 if tags[-1] != "$": 6845 if self._is_connected() and self._match_text_seq("$"): 6846 tag_text = tags[-1] 6847 tags.append("$") 6848 else: 6849 self.raise_error("No closing $ found") 6850 6851 heredoc_start = self._curr 6852 6853 while self._curr: 6854 if self._match_text_seq(*tags, advance=False): 6855 this = self._find_sql(heredoc_start, self._prev) 6856 self._advance(len(tags)) 6857 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6858 6859 self._advance() 6860 6861 self.raise_error(f"No closing {''.join(tags)} found") 6862 return None 6863 6864 def _find_parser( 6865 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6866 ) -> t.Optional[t.Callable]: 6867 if not self._curr: 6868 return None 6869 6870 index = self._index 6871 this = [] 6872 while True: 6873 # The current token might be multiple words 6874 curr = self._curr.text.upper() 6875 key = curr.split(" ") 6876 this.append(curr) 6877 6878 self._advance() 6879 result, trie = in_trie(trie, key) 6880 if result == TrieResult.FAILED: 6881 break 6882 6883 if result == TrieResult.EXISTS: 6884 subparser = parsers[" ".join(this)] 6885 return subparser 6886 6887 self._retreat(index) 6888 return None 6889 6890 def _match(self, token_type, advance=True, expression=None): 6891 if not self._curr: 6892 return None 6893 6894 if self._curr.token_type == token_type: 6895 if advance: 6896 self._advance() 6897 self._add_comments(expression) 6898 return True 6899 6900 return None 6901 6902 def _match_set(self, types, advance=True): 6903 if not self._curr: 6904 return None 6905 6906 if self._curr.token_type in types: 6907 if advance: 6908 self._advance() 6909 return True 6910 6911 return None 6912 6913 def _match_pair(self, token_type_a, token_type_b, advance=True): 6914 if not self._curr or not self._next: 6915 return None 6916 6917 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6918 if advance: 6919 self._advance(2) 6920 return True 6921 6922 return None 6923 6924 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6925 if not self._match(TokenType.L_PAREN, expression=expression): 6926 self.raise_error("Expecting (") 6927 6928 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6929 if not self._match(TokenType.R_PAREN, expression=expression): 6930 self.raise_error("Expecting )") 6931 6932 def _match_texts(self, texts, advance=True): 6933 if self._curr and self._curr.text.upper() in texts: 6934 if advance: 6935 self._advance() 6936 return True 6937 return None 6938 6939 def _match_text_seq(self, *texts, advance=True): 6940 index = self._index 6941 for text in texts: 6942 if self._curr and self._curr.text.upper() == text: 6943 self._advance() 6944 else: 6945 self._retreat(index) 6946 return None 6947 6948 if not advance: 6949 self._retreat(index) 6950 6951 return True 6952 6953 def _replace_lambda( 6954 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 6955 ) -> t.Optional[exp.Expression]: 6956 if not node: 6957 return node 6958 6959 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 6960 6961 for column in node.find_all(exp.Column): 6962 typ = lambda_types.get(column.parts[0].name) 6963 if typ is not None: 6964 dot_or_id = column.to_dot() if column.table else column.this 6965 6966 if typ: 6967 dot_or_id = self.expression( 6968 exp.Cast, 6969 this=dot_or_id, 6970 to=typ, 6971 ) 6972 6973 parent = column.parent 6974 6975 while isinstance(parent, exp.Dot): 6976 if not isinstance(parent.parent, exp.Dot): 6977 parent.replace(dot_or_id) 6978 break 6979 parent = parent.parent 6980 else: 6981 if column is node: 6982 node = dot_or_id 6983 else: 6984 column.replace(dot_or_id) 6985 return node 6986 6987 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6988 start = self._prev 6989 6990 # Not to be confused with TRUNCATE(number, decimals) function call 6991 if self._match(TokenType.L_PAREN): 6992 self._retreat(self._index - 2) 6993 return self._parse_function() 6994 6995 # Clickhouse supports TRUNCATE DATABASE as well 6996 is_database = self._match(TokenType.DATABASE) 6997 6998 self._match(TokenType.TABLE) 6999 7000 exists = self._parse_exists(not_=False) 7001 7002 expressions = self._parse_csv( 7003 lambda: self._parse_table(schema=True, is_db_reference=is_database) 7004 ) 7005 7006 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7007 7008 if self._match_text_seq("RESTART", "IDENTITY"): 7009 identity = "RESTART" 7010 elif self._match_text_seq("CONTINUE", "IDENTITY"): 7011 identity = "CONTINUE" 7012 else: 7013 identity = None 7014 7015 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 7016 option = self._prev.text 7017 else: 7018 option = None 7019 7020 partition = self._parse_partition() 7021 7022 # Fallback case 7023 if self._curr: 7024 return self._parse_as_command(start) 7025 7026 return self.expression( 7027 exp.TruncateTable, 7028 expressions=expressions, 7029 is_database=is_database, 7030 exists=exists, 7031 cluster=cluster, 7032 identity=identity, 7033 option=option, 7034 partition=partition, 7035 ) 7036 7037 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 7038 this = self._parse_ordered(self._parse_opclass) 7039 7040 if not self._match(TokenType.WITH): 7041 return this 7042 7043 op = self._parse_var(any_token=True) 7044 7045 return self.expression(exp.WithOperator, this=this, op=op) 7046 7047 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 7048 self._match(TokenType.EQ) 7049 self._match(TokenType.L_PAREN) 7050 7051 opts: t.List[t.Optional[exp.Expression]] = [] 7052 while self._curr and not self._match(TokenType.R_PAREN): 7053 if self._match_text_seq("FORMAT_NAME", "="): 7054 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 7055 # so we parse it separately to use _parse_field() 7056 prop = self.expression( 7057 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 7058 ) 7059 opts.append(prop) 7060 else: 7061 opts.append(self._parse_property()) 7062 7063 self._match(TokenType.COMMA) 7064 7065 return opts 7066 7067 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 7068 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 7069 7070 options = [] 7071 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 7072 option = self._parse_var(any_token=True) 7073 prev = self._prev.text.upper() 7074 7075 # Different dialects might separate options and values by white space, "=" and "AS" 7076 self._match(TokenType.EQ) 7077 self._match(TokenType.ALIAS) 7078 7079 param = self.expression(exp.CopyParameter, this=option) 7080 7081 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 7082 TokenType.L_PAREN, advance=False 7083 ): 7084 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 7085 param.set("expressions", self._parse_wrapped_options()) 7086 elif prev == "FILE_FORMAT": 7087 # T-SQL's external file format case 7088 param.set("expression", self._parse_field()) 7089 else: 7090 param.set("expression", self._parse_unquoted_field()) 7091 7092 options.append(param) 7093 self._match(sep) 7094 7095 return options 7096 7097 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 7098 expr = self.expression(exp.Credentials) 7099 7100 if self._match_text_seq("STORAGE_INTEGRATION", "="): 7101 expr.set("storage", self._parse_field()) 7102 if self._match_text_seq("CREDENTIALS"): 7103 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 7104 creds = ( 7105 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 7106 ) 7107 expr.set("credentials", creds) 7108 if self._match_text_seq("ENCRYPTION"): 7109 expr.set("encryption", self._parse_wrapped_options()) 7110 if self._match_text_seq("IAM_ROLE"): 7111 expr.set("iam_role", self._parse_field()) 7112 if self._match_text_seq("REGION"): 7113 expr.set("region", self._parse_field()) 7114 7115 return expr 7116 7117 def _parse_file_location(self) -> t.Optional[exp.Expression]: 7118 return self._parse_field() 7119 7120 def _parse_copy(self) -> exp.Copy | exp.Command: 7121 start = self._prev 7122 7123 self._match(TokenType.INTO) 7124 7125 this = ( 7126 self._parse_select(nested=True, parse_subquery_alias=False) 7127 if self._match(TokenType.L_PAREN, advance=False) 7128 else self._parse_table(schema=True) 7129 ) 7130 7131 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 7132 7133 files = self._parse_csv(self._parse_file_location) 7134 credentials = self._parse_credentials() 7135 7136 self._match_text_seq("WITH") 7137 7138 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 7139 7140 # Fallback case 7141 if self._curr: 7142 return self._parse_as_command(start) 7143 7144 return self.expression( 7145 exp.Copy, 7146 this=this, 7147 kind=kind, 7148 credentials=credentials, 7149 files=files, 7150 params=params, 7151 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1326 def __init__( 1327 self, 1328 error_level: t.Optional[ErrorLevel] = None, 1329 error_message_context: int = 100, 1330 max_errors: int = 3, 1331 dialect: DialectType = None, 1332 ): 1333 from sqlglot.dialects import Dialect 1334 1335 self.error_level = error_level or ErrorLevel.IMMEDIATE 1336 self.error_message_context = error_message_context 1337 self.max_errors = max_errors 1338 self.dialect = Dialect.get_or_raise(dialect) 1339 self.reset()
1351 def parse( 1352 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1353 ) -> t.List[t.Optional[exp.Expression]]: 1354 """ 1355 Parses a list of tokens and returns a list of syntax trees, one tree 1356 per parsed SQL statement. 1357 1358 Args: 1359 raw_tokens: The list of tokens. 1360 sql: The original SQL string, used to produce helpful debug messages. 1361 1362 Returns: 1363 The list of the produced syntax trees. 1364 """ 1365 return self._parse( 1366 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1367 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1369 def parse_into( 1370 self, 1371 expression_types: exp.IntoType, 1372 raw_tokens: t.List[Token], 1373 sql: t.Optional[str] = None, 1374 ) -> t.List[t.Optional[exp.Expression]]: 1375 """ 1376 Parses a list of tokens into a given Expression type. If a collection of Expression 1377 types is given instead, this method will try to parse the token list into each one 1378 of them, stopping at the first for which the parsing succeeds. 1379 1380 Args: 1381 expression_types: The expression type(s) to try and parse the token list into. 1382 raw_tokens: The list of tokens. 1383 sql: The original SQL string, used to produce helpful debug messages. 1384 1385 Returns: 1386 The target Expression. 1387 """ 1388 errors = [] 1389 for expression_type in ensure_list(expression_types): 1390 parser = self.EXPRESSION_PARSERS.get(expression_type) 1391 if not parser: 1392 raise TypeError(f"No parser registered for {expression_type}") 1393 1394 try: 1395 return self._parse(parser, raw_tokens, sql) 1396 except ParseError as e: 1397 e.errors[0]["into_expression"] = expression_type 1398 errors.append(e) 1399 1400 raise ParseError( 1401 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1402 errors=merge_errors(errors), 1403 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1443 def check_errors(self) -> None: 1444 """Logs or raises any found errors, depending on the chosen error level setting.""" 1445 if self.error_level == ErrorLevel.WARN: 1446 for error in self.errors: 1447 logger.error(str(error)) 1448 elif self.error_level == ErrorLevel.RAISE and self.errors: 1449 raise ParseError( 1450 concat_messages(self.errors, self.max_errors), 1451 errors=merge_errors(self.errors), 1452 )
Logs or raises any found errors, depending on the chosen error level setting.
1454 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1455 """ 1456 Appends an error in the list of recorded errors or raises it, depending on the chosen 1457 error level setting. 1458 """ 1459 token = token or self._curr or self._prev or Token.string("") 1460 start = token.start 1461 end = token.end + 1 1462 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1463 highlight = self.sql[start:end] 1464 end_context = self.sql[end : end + self.error_message_context] 1465 1466 error = ParseError.new( 1467 f"{message}. Line {token.line}, Col: {token.col}.\n" 1468 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1469 description=message, 1470 line=token.line, 1471 col=token.col, 1472 start_context=start_context, 1473 highlight=highlight, 1474 end_context=end_context, 1475 ) 1476 1477 if self.error_level == ErrorLevel.IMMEDIATE: 1478 raise error 1479 1480 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1482 def expression( 1483 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1484 ) -> E: 1485 """ 1486 Creates a new, validated Expression. 1487 1488 Args: 1489 exp_class: The expression class to instantiate. 1490 comments: An optional list of comments to attach to the expression. 1491 kwargs: The arguments to set for the expression along with their respective values. 1492 1493 Returns: 1494 The target expression. 1495 """ 1496 instance = exp_class(**kwargs) 1497 instance.add_comments(comments) if comments else self._add_comments(instance) 1498 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1505 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1506 """ 1507 Validates an Expression, making sure that all its mandatory arguments are set. 1508 1509 Args: 1510 expression: The expression to validate. 1511 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1512 1513 Returns: 1514 The validated expression. 1515 """ 1516 if self.error_level != ErrorLevel.IGNORE: 1517 for error_message in expression.error_messages(args): 1518 self.raise_error(error_message) 1519 1520 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.