sqlglot.dialects.clickhouse
1from __future__ import annotations 2 3import typing as t 4import datetime 5 6from sqlglot import exp, generator, parser, tokens 7from sqlglot.dialects.dialect import ( 8 Dialect, 9 NormalizationStrategy, 10 arg_max_or_min_no_count, 11 build_date_delta, 12 build_formatted_time, 13 inline_array_sql, 14 json_extract_segments, 15 json_path_key_only_name, 16 no_pivot_sql, 17 build_json_extract_path, 18 rename_func, 19 sha256_sql, 20 var_map_sql, 21 timestamptrunc_sql, 22 unit_to_var, 23 trim_sql, 24) 25from sqlglot.generator import Generator 26from sqlglot.helper import is_int, seq_get 27from sqlglot.tokens import Token, TokenType 28 29DATEΤΙΜΕ_DELTA = t.Union[exp.DateAdd, exp.DateDiff, exp.DateSub, exp.TimestampSub, exp.TimestampAdd] 30 31 32def _build_date_format(args: t.List) -> exp.TimeToStr: 33 expr = build_formatted_time(exp.TimeToStr, "clickhouse")(args) 34 35 timezone = seq_get(args, 2) 36 if timezone: 37 expr.set("zone", timezone) 38 39 return expr 40 41 42def _unix_to_time_sql(self: ClickHouse.Generator, expression: exp.UnixToTime) -> str: 43 scale = expression.args.get("scale") 44 timestamp = expression.this 45 46 if scale in (None, exp.UnixToTime.SECONDS): 47 return self.func("fromUnixTimestamp", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 48 if scale == exp.UnixToTime.MILLIS: 49 return self.func("fromUnixTimestamp64Milli", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 50 if scale == exp.UnixToTime.MICROS: 51 return self.func("fromUnixTimestamp64Micro", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 52 if scale == exp.UnixToTime.NANOS: 53 return self.func("fromUnixTimestamp64Nano", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 54 55 return self.func( 56 "fromUnixTimestamp", 57 exp.cast( 58 exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), exp.DataType.Type.BIGINT 59 ), 60 ) 61 62 63def _lower_func(sql: str) -> str: 64 index = sql.index("(") 65 return sql[:index].lower() + sql[index:] 66 67 68def _quantile_sql(self: ClickHouse.Generator, expression: exp.Quantile) -> str: 69 quantile = expression.args["quantile"] 70 args = f"({self.sql(expression, 'this')})" 71 72 if isinstance(quantile, exp.Array): 73 func = self.func("quantiles", *quantile) 74 else: 75 func = self.func("quantile", quantile) 76 77 return func + args 78 79 80def _build_count_if(args: t.List) -> exp.CountIf | exp.CombinedAggFunc: 81 if len(args) == 1: 82 return exp.CountIf(this=seq_get(args, 0)) 83 84 return exp.CombinedAggFunc(this="countIf", expressions=args, parts=("count", "If")) 85 86 87def _build_str_to_date(args: t.List) -> exp.Cast | exp.Anonymous: 88 if len(args) == 3: 89 return exp.Anonymous(this="STR_TO_DATE", expressions=args) 90 91 strtodate = exp.StrToDate.from_arg_list(args) 92 return exp.cast(strtodate, exp.DataType.build(exp.DataType.Type.DATETIME)) 93 94 95def _datetime_delta_sql(name: str) -> t.Callable[[Generator, DATEΤΙΜΕ_DELTA], str]: 96 def _delta_sql(self: Generator, expression: DATEΤΙΜΕ_DELTA) -> str: 97 if not expression.unit: 98 return rename_func(name)(self, expression) 99 100 return self.func( 101 name, 102 unit_to_var(expression), 103 expression.expression, 104 expression.this, 105 ) 106 107 return _delta_sql 108 109 110def _timestrtotime_sql(self: ClickHouse.Generator, expression: exp.TimeStrToTime): 111 tz = expression.args.get("zone") 112 datatype = exp.DataType.build(exp.DataType.Type.TIMESTAMP) 113 ts = expression.this 114 if tz: 115 # build a datatype that encodes the timezone as a type parameter, eg DateTime('America/Los_Angeles') 116 datatype = exp.DataType.build( 117 exp.DataType.Type.TIMESTAMPTZ, # Type.TIMESTAMPTZ maps to DateTime 118 expressions=[exp.DataTypeParam(this=tz)], 119 ) 120 121 if isinstance(ts, exp.Literal): 122 # strip the timezone out of the literal, eg turn '2020-01-01 12:13:14-08:00' into '2020-01-01 12:13:14' 123 # this is because Clickhouse encodes the timezone as a data type parameter and throws an error if it's part of the timestamp string 124 ts_without_tz = ( 125 datetime.datetime.fromisoformat(ts.name).replace(tzinfo=None).isoformat(sep=" ") 126 ) 127 ts = exp.Literal.string(ts_without_tz) 128 129 return self.sql(exp.cast(ts, datatype, dialect=self.dialect)) 130 131 132class ClickHouse(Dialect): 133 NORMALIZE_FUNCTIONS: bool | str = False 134 NULL_ORDERING = "nulls_are_last" 135 SUPPORTS_USER_DEFINED_TYPES = False 136 SAFE_DIVISION = True 137 LOG_BASE_FIRST: t.Optional[bool] = None 138 FORCE_EARLY_ALIAS_REF_EXPANSION = True 139 140 # https://github.com/ClickHouse/ClickHouse/issues/33935#issue-1112165779 141 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_SENSITIVE 142 143 UNESCAPED_SEQUENCES = { 144 "\\0": "\0", 145 } 146 147 CREATABLE_KIND_MAPPING = {"DATABASE": "SCHEMA"} 148 149 class Tokenizer(tokens.Tokenizer): 150 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 151 IDENTIFIERS = ['"', "`"] 152 STRING_ESCAPES = ["'", "\\"] 153 BIT_STRINGS = [("0b", "")] 154 HEX_STRINGS = [("0x", ""), ("0X", "")] 155 HEREDOC_STRINGS = ["$"] 156 157 KEYWORDS = { 158 **tokens.Tokenizer.KEYWORDS, 159 "ATTACH": TokenType.COMMAND, 160 "DATE32": TokenType.DATE32, 161 "DATETIME64": TokenType.DATETIME64, 162 "DICTIONARY": TokenType.DICTIONARY, 163 "ENUM8": TokenType.ENUM8, 164 "ENUM16": TokenType.ENUM16, 165 "FINAL": TokenType.FINAL, 166 "FIXEDSTRING": TokenType.FIXEDSTRING, 167 "FLOAT32": TokenType.FLOAT, 168 "FLOAT64": TokenType.DOUBLE, 169 "GLOBAL": TokenType.GLOBAL, 170 "INT256": TokenType.INT256, 171 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 172 "MAP": TokenType.MAP, 173 "NESTED": TokenType.NESTED, 174 "SAMPLE": TokenType.TABLE_SAMPLE, 175 "TUPLE": TokenType.STRUCT, 176 "UINT128": TokenType.UINT128, 177 "UINT16": TokenType.USMALLINT, 178 "UINT256": TokenType.UINT256, 179 "UINT32": TokenType.UINT, 180 "UINT64": TokenType.UBIGINT, 181 "UINT8": TokenType.UTINYINT, 182 "IPV4": TokenType.IPV4, 183 "IPV6": TokenType.IPV6, 184 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 185 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 186 "SYSTEM": TokenType.COMMAND, 187 "PREWHERE": TokenType.PREWHERE, 188 } 189 KEYWORDS.pop("/*+") 190 191 SINGLE_TOKENS = { 192 **tokens.Tokenizer.SINGLE_TOKENS, 193 "$": TokenType.HEREDOC_STRING, 194 } 195 196 class Parser(parser.Parser): 197 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 198 # * select x from t1 union all select x from t2 limit 1; 199 # * select x from t1 union all (select x from t2 limit 1); 200 MODIFIERS_ATTACHED_TO_SET_OP = False 201 INTERVAL_SPANS = False 202 203 FUNCTIONS = { 204 **parser.Parser.FUNCTIONS, 205 "ANY": exp.AnyValue.from_arg_list, 206 "ARRAYSUM": exp.ArraySum.from_arg_list, 207 "COUNTIF": _build_count_if, 208 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 209 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 210 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 211 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 212 "DATE_FORMAT": _build_date_format, 213 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 214 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 215 "FORMATDATETIME": _build_date_format, 216 "JSONEXTRACTSTRING": build_json_extract_path( 217 exp.JSONExtractScalar, zero_based_indexing=False 218 ), 219 "MAP": parser.build_var_map, 220 "MATCH": exp.RegexpLike.from_arg_list, 221 "RANDCANONICAL": exp.Rand.from_arg_list, 222 "STR_TO_DATE": _build_str_to_date, 223 "TUPLE": exp.Struct.from_arg_list, 224 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 225 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 226 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 227 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 228 "UNIQ": exp.ApproxDistinct.from_arg_list, 229 "XOR": lambda args: exp.Xor(expressions=args), 230 "MD5": exp.MD5Digest.from_arg_list, 231 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 232 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 233 } 234 235 AGG_FUNCTIONS = { 236 "count", 237 "min", 238 "max", 239 "sum", 240 "avg", 241 "any", 242 "stddevPop", 243 "stddevSamp", 244 "varPop", 245 "varSamp", 246 "corr", 247 "covarPop", 248 "covarSamp", 249 "entropy", 250 "exponentialMovingAverage", 251 "intervalLengthSum", 252 "kolmogorovSmirnovTest", 253 "mannWhitneyUTest", 254 "median", 255 "rankCorr", 256 "sumKahan", 257 "studentTTest", 258 "welchTTest", 259 "anyHeavy", 260 "anyLast", 261 "boundingRatio", 262 "first_value", 263 "last_value", 264 "argMin", 265 "argMax", 266 "avgWeighted", 267 "topK", 268 "topKWeighted", 269 "deltaSum", 270 "deltaSumTimestamp", 271 "groupArray", 272 "groupArrayLast", 273 "groupUniqArray", 274 "groupArrayInsertAt", 275 "groupArrayMovingAvg", 276 "groupArrayMovingSum", 277 "groupArraySample", 278 "groupBitAnd", 279 "groupBitOr", 280 "groupBitXor", 281 "groupBitmap", 282 "groupBitmapAnd", 283 "groupBitmapOr", 284 "groupBitmapXor", 285 "sumWithOverflow", 286 "sumMap", 287 "minMap", 288 "maxMap", 289 "skewSamp", 290 "skewPop", 291 "kurtSamp", 292 "kurtPop", 293 "uniq", 294 "uniqExact", 295 "uniqCombined", 296 "uniqCombined64", 297 "uniqHLL12", 298 "uniqTheta", 299 "quantile", 300 "quantiles", 301 "quantileExact", 302 "quantilesExact", 303 "quantileExactLow", 304 "quantilesExactLow", 305 "quantileExactHigh", 306 "quantilesExactHigh", 307 "quantileExactWeighted", 308 "quantilesExactWeighted", 309 "quantileTiming", 310 "quantilesTiming", 311 "quantileTimingWeighted", 312 "quantilesTimingWeighted", 313 "quantileDeterministic", 314 "quantilesDeterministic", 315 "quantileTDigest", 316 "quantilesTDigest", 317 "quantileTDigestWeighted", 318 "quantilesTDigestWeighted", 319 "quantileBFloat16", 320 "quantilesBFloat16", 321 "quantileBFloat16Weighted", 322 "quantilesBFloat16Weighted", 323 "simpleLinearRegression", 324 "stochasticLinearRegression", 325 "stochasticLogisticRegression", 326 "categoricalInformationValue", 327 "contingency", 328 "cramersV", 329 "cramersVBiasCorrected", 330 "theilsU", 331 "maxIntersections", 332 "maxIntersectionsPosition", 333 "meanZTest", 334 "quantileInterpolatedWeighted", 335 "quantilesInterpolatedWeighted", 336 "quantileGK", 337 "quantilesGK", 338 "sparkBar", 339 "sumCount", 340 "largestTriangleThreeBuckets", 341 "histogram", 342 "sequenceMatch", 343 "sequenceCount", 344 "windowFunnel", 345 "retention", 346 "uniqUpTo", 347 "sequenceNextNode", 348 "exponentialTimeDecayedAvg", 349 } 350 351 AGG_FUNCTIONS_SUFFIXES = [ 352 "If", 353 "Array", 354 "ArrayIf", 355 "Map", 356 "SimpleState", 357 "State", 358 "Merge", 359 "MergeState", 360 "ForEach", 361 "Distinct", 362 "OrDefault", 363 "OrNull", 364 "Resample", 365 "ArgMin", 366 "ArgMax", 367 ] 368 369 FUNC_TOKENS = { 370 *parser.Parser.FUNC_TOKENS, 371 TokenType.SET, 372 } 373 374 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 375 376 ID_VAR_TOKENS = { 377 *parser.Parser.ID_VAR_TOKENS, 378 TokenType.LIKE, 379 } 380 381 AGG_FUNC_MAPPING = ( 382 lambda functions, suffixes: { 383 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 384 } 385 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 386 387 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 388 389 FUNCTION_PARSERS = { 390 **parser.Parser.FUNCTION_PARSERS, 391 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 392 "QUANTILE": lambda self: self._parse_quantile(), 393 } 394 395 FUNCTION_PARSERS.pop("MATCH") 396 397 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 398 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 399 400 RANGE_PARSERS = { 401 **parser.Parser.RANGE_PARSERS, 402 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 403 and self._parse_in(this, is_global=True), 404 } 405 406 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 407 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 408 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 409 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 410 411 JOIN_KINDS = { 412 *parser.Parser.JOIN_KINDS, 413 TokenType.ANY, 414 TokenType.ASOF, 415 TokenType.ARRAY, 416 } 417 418 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 419 TokenType.ANY, 420 TokenType.ARRAY, 421 TokenType.FINAL, 422 TokenType.FORMAT, 423 TokenType.SETTINGS, 424 } 425 426 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 427 TokenType.FORMAT, 428 } 429 430 LOG_DEFAULTS_TO_LN = True 431 432 QUERY_MODIFIER_PARSERS = { 433 **parser.Parser.QUERY_MODIFIER_PARSERS, 434 TokenType.SETTINGS: lambda self: ( 435 "settings", 436 self._advance() or self._parse_csv(self._parse_assignment), 437 ), 438 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 439 } 440 441 CONSTRAINT_PARSERS = { 442 **parser.Parser.CONSTRAINT_PARSERS, 443 "INDEX": lambda self: self._parse_index_constraint(), 444 "CODEC": lambda self: self._parse_compress(), 445 } 446 447 ALTER_PARSERS = { 448 **parser.Parser.ALTER_PARSERS, 449 "REPLACE": lambda self: self._parse_alter_table_replace(), 450 } 451 452 SCHEMA_UNNAMED_CONSTRAINTS = { 453 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 454 "INDEX", 455 } 456 457 PLACEHOLDER_PARSERS = { 458 **parser.Parser.PLACEHOLDER_PARSERS, 459 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 460 } 461 462 def _parse_types( 463 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 464 ) -> t.Optional[exp.Expression]: 465 dtype = super()._parse_types( 466 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 467 ) 468 if isinstance(dtype, exp.DataType): 469 # Mark every type as non-nullable which is ClickHouse's default. This marker 470 # helps us transpile types from other dialects to ClickHouse, so that we can 471 # e.g. produce `CAST(x AS Nullable(String))` from `CAST(x AS TEXT)`. If there 472 # is a `NULL` value in `x`, the former would fail in ClickHouse without the 473 # `Nullable` type constructor 474 dtype.set("nullable", False) 475 476 return dtype 477 478 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 479 index = self._index 480 this = self._parse_bitwise() 481 if self._match(TokenType.FROM): 482 self._retreat(index) 483 return super()._parse_extract() 484 485 # We return Anonymous here because extract and regexpExtract have different semantics, 486 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 487 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 488 # 489 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 490 self._match(TokenType.COMMA) 491 return self.expression( 492 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 493 ) 494 495 def _parse_assignment(self) -> t.Optional[exp.Expression]: 496 this = super()._parse_assignment() 497 498 if self._match(TokenType.PLACEHOLDER): 499 return self.expression( 500 exp.If, 501 this=this, 502 true=self._parse_assignment(), 503 false=self._match(TokenType.COLON) and self._parse_assignment(), 504 ) 505 506 return this 507 508 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 509 """ 510 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 511 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 512 """ 513 this = self._parse_id_var() 514 self._match(TokenType.COLON) 515 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 516 self._match_text_seq("IDENTIFIER") and "Identifier" 517 ) 518 519 if not kind: 520 self.raise_error("Expecting a placeholder type or 'Identifier' for tables") 521 elif not self._match(TokenType.R_BRACE): 522 self.raise_error("Expecting }") 523 524 return self.expression(exp.Placeholder, this=this, kind=kind) 525 526 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 527 this = super()._parse_in(this) 528 this.set("is_global", is_global) 529 return this 530 531 def _parse_table( 532 self, 533 schema: bool = False, 534 joins: bool = False, 535 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 536 parse_bracket: bool = False, 537 is_db_reference: bool = False, 538 parse_partition: bool = False, 539 ) -> t.Optional[exp.Expression]: 540 this = super()._parse_table( 541 schema=schema, 542 joins=joins, 543 alias_tokens=alias_tokens, 544 parse_bracket=parse_bracket, 545 is_db_reference=is_db_reference, 546 ) 547 548 if self._match(TokenType.FINAL): 549 this = self.expression(exp.Final, this=this) 550 551 return this 552 553 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 554 return super()._parse_position(haystack_first=True) 555 556 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 557 def _parse_cte(self) -> exp.CTE: 558 # WITH <identifier> AS <subquery expression> 559 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 560 561 if not cte: 562 # WITH <expression> AS <identifier> 563 cte = self.expression( 564 exp.CTE, 565 this=self._parse_assignment(), 566 alias=self._parse_table_alias(), 567 scalar=True, 568 ) 569 570 return cte 571 572 def _parse_join_parts( 573 self, 574 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 575 is_global = self._match(TokenType.GLOBAL) and self._prev 576 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 577 578 if kind_pre: 579 kind = self._match_set(self.JOIN_KINDS) and self._prev 580 side = self._match_set(self.JOIN_SIDES) and self._prev 581 return is_global, side, kind 582 583 return ( 584 is_global, 585 self._match_set(self.JOIN_SIDES) and self._prev, 586 self._match_set(self.JOIN_KINDS) and self._prev, 587 ) 588 589 def _parse_join( 590 self, skip_join_token: bool = False, parse_bracket: bool = False 591 ) -> t.Optional[exp.Join]: 592 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 593 if join: 594 join.set("global", join.args.pop("method", None)) 595 596 return join 597 598 def _parse_function( 599 self, 600 functions: t.Optional[t.Dict[str, t.Callable]] = None, 601 anonymous: bool = False, 602 optional_parens: bool = True, 603 any_token: bool = False, 604 ) -> t.Optional[exp.Expression]: 605 expr = super()._parse_function( 606 functions=functions, 607 anonymous=anonymous, 608 optional_parens=optional_parens, 609 any_token=any_token, 610 ) 611 612 func = expr.this if isinstance(expr, exp.Window) else expr 613 614 # Aggregate functions can be split in 2 parts: <func_name><suffix> 615 parts = ( 616 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 617 ) 618 619 if parts: 620 params = self._parse_func_params(func) 621 622 kwargs = { 623 "this": func.this, 624 "expressions": func.expressions, 625 } 626 if parts[1]: 627 kwargs["parts"] = parts 628 exp_class = exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 629 else: 630 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 631 632 kwargs["exp_class"] = exp_class 633 if params: 634 kwargs["params"] = params 635 636 func = self.expression(**kwargs) 637 638 if isinstance(expr, exp.Window): 639 # The window's func was parsed as Anonymous in base parser, fix its 640 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 641 expr.set("this", func) 642 elif params: 643 # Params have blocked super()._parse_function() from parsing the following window 644 # (if that exists) as they're standing between the function call and the window spec 645 expr = self._parse_window(func) 646 else: 647 expr = func 648 649 return expr 650 651 def _parse_func_params( 652 self, this: t.Optional[exp.Func] = None 653 ) -> t.Optional[t.List[exp.Expression]]: 654 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 655 return self._parse_csv(self._parse_lambda) 656 657 if self._match(TokenType.L_PAREN): 658 params = self._parse_csv(self._parse_lambda) 659 self._match_r_paren(this) 660 return params 661 662 return None 663 664 def _parse_quantile(self) -> exp.Quantile: 665 this = self._parse_lambda() 666 params = self._parse_func_params() 667 if params: 668 return self.expression(exp.Quantile, this=params[0], quantile=this) 669 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 670 671 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 672 return super()._parse_wrapped_id_vars(optional=True) 673 674 def _parse_primary_key( 675 self, wrapped_optional: bool = False, in_props: bool = False 676 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 677 return super()._parse_primary_key( 678 wrapped_optional=wrapped_optional or in_props, in_props=in_props 679 ) 680 681 def _parse_on_property(self) -> t.Optional[exp.Expression]: 682 index = self._index 683 if self._match_text_seq("CLUSTER"): 684 this = self._parse_id_var() 685 if this: 686 return self.expression(exp.OnCluster, this=this) 687 else: 688 self._retreat(index) 689 return None 690 691 def _parse_index_constraint( 692 self, kind: t.Optional[str] = None 693 ) -> exp.IndexColumnConstraint: 694 # INDEX name1 expr TYPE type1(args) GRANULARITY value 695 this = self._parse_id_var() 696 expression = self._parse_assignment() 697 698 index_type = self._match_text_seq("TYPE") and ( 699 self._parse_function() or self._parse_var() 700 ) 701 702 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 703 704 return self.expression( 705 exp.IndexColumnConstraint, 706 this=this, 707 expression=expression, 708 index_type=index_type, 709 granularity=granularity, 710 ) 711 712 def _parse_partition(self) -> t.Optional[exp.Partition]: 713 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 714 if not self._match(TokenType.PARTITION): 715 return None 716 717 if self._match_text_seq("ID"): 718 # Corresponds to the PARTITION ID <string_value> syntax 719 expressions: t.List[exp.Expression] = [ 720 self.expression(exp.PartitionId, this=self._parse_string()) 721 ] 722 else: 723 expressions = self._parse_expressions() 724 725 return self.expression(exp.Partition, expressions=expressions) 726 727 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 728 partition = self._parse_partition() 729 730 if not partition or not self._match(TokenType.FROM): 731 return None 732 733 return self.expression( 734 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 735 ) 736 737 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 738 if not self._match_text_seq("PROJECTION"): 739 return None 740 741 return self.expression( 742 exp.ProjectionDef, 743 this=self._parse_id_var(), 744 expression=self._parse_wrapped(self._parse_statement), 745 ) 746 747 def _parse_constraint(self) -> t.Optional[exp.Expression]: 748 return super()._parse_constraint() or self._parse_projection_def() 749 750 class Generator(generator.Generator): 751 QUERY_HINTS = False 752 STRUCT_DELIMITER = ("(", ")") 753 NVL2_SUPPORTED = False 754 TABLESAMPLE_REQUIRES_PARENS = False 755 TABLESAMPLE_SIZE_IS_ROWS = False 756 TABLESAMPLE_KEYWORDS = "SAMPLE" 757 LAST_DAY_SUPPORTS_DATE_PART = False 758 CAN_IMPLEMENT_ARRAY_ANY = True 759 SUPPORTS_TO_NUMBER = False 760 JOIN_HINTS = False 761 TABLE_HINTS = False 762 EXPLICIT_SET_OP = True 763 GROUPINGS_SEP = "" 764 SET_OP_MODIFIERS = False 765 SUPPORTS_TABLE_ALIAS_COLUMNS = False 766 VALUES_AS_TABLE = False 767 768 STRING_TYPE_MAPPING = { 769 exp.DataType.Type.CHAR: "String", 770 exp.DataType.Type.LONGBLOB: "String", 771 exp.DataType.Type.LONGTEXT: "String", 772 exp.DataType.Type.MEDIUMBLOB: "String", 773 exp.DataType.Type.MEDIUMTEXT: "String", 774 exp.DataType.Type.TINYBLOB: "String", 775 exp.DataType.Type.TINYTEXT: "String", 776 exp.DataType.Type.TEXT: "String", 777 exp.DataType.Type.VARBINARY: "String", 778 exp.DataType.Type.VARCHAR: "String", 779 } 780 781 SUPPORTED_JSON_PATH_PARTS = { 782 exp.JSONPathKey, 783 exp.JSONPathRoot, 784 exp.JSONPathSubscript, 785 } 786 787 TYPE_MAPPING = { 788 **generator.Generator.TYPE_MAPPING, 789 **STRING_TYPE_MAPPING, 790 exp.DataType.Type.ARRAY: "Array", 791 exp.DataType.Type.BIGINT: "Int64", 792 exp.DataType.Type.DATE32: "Date32", 793 exp.DataType.Type.DATETIME: "DateTime", 794 exp.DataType.Type.DATETIME64: "DateTime64", 795 exp.DataType.Type.TIMESTAMP: "DateTime", 796 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 797 exp.DataType.Type.DOUBLE: "Float64", 798 exp.DataType.Type.ENUM: "Enum", 799 exp.DataType.Type.ENUM8: "Enum8", 800 exp.DataType.Type.ENUM16: "Enum16", 801 exp.DataType.Type.FIXEDSTRING: "FixedString", 802 exp.DataType.Type.FLOAT: "Float32", 803 exp.DataType.Type.INT: "Int32", 804 exp.DataType.Type.MEDIUMINT: "Int32", 805 exp.DataType.Type.INT128: "Int128", 806 exp.DataType.Type.INT256: "Int256", 807 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 808 exp.DataType.Type.MAP: "Map", 809 exp.DataType.Type.NESTED: "Nested", 810 exp.DataType.Type.NULLABLE: "Nullable", 811 exp.DataType.Type.SMALLINT: "Int16", 812 exp.DataType.Type.STRUCT: "Tuple", 813 exp.DataType.Type.TINYINT: "Int8", 814 exp.DataType.Type.UBIGINT: "UInt64", 815 exp.DataType.Type.UINT: "UInt32", 816 exp.DataType.Type.UINT128: "UInt128", 817 exp.DataType.Type.UINT256: "UInt256", 818 exp.DataType.Type.USMALLINT: "UInt16", 819 exp.DataType.Type.UTINYINT: "UInt8", 820 exp.DataType.Type.IPV4: "IPv4", 821 exp.DataType.Type.IPV6: "IPv6", 822 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 823 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 824 } 825 826 TRANSFORMS = { 827 **generator.Generator.TRANSFORMS, 828 exp.AnyValue: rename_func("any"), 829 exp.ApproxDistinct: rename_func("uniq"), 830 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 831 exp.ArraySize: rename_func("LENGTH"), 832 exp.ArraySum: rename_func("arraySum"), 833 exp.ArgMax: arg_max_or_min_no_count("argMax"), 834 exp.ArgMin: arg_max_or_min_no_count("argMin"), 835 exp.Array: inline_array_sql, 836 exp.CastToStrType: rename_func("CAST"), 837 exp.CountIf: rename_func("countIf"), 838 exp.CompressColumnConstraint: lambda self, 839 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 840 exp.ComputedColumnConstraint: lambda self, 841 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 842 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 843 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 844 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 845 exp.DateStrToDate: rename_func("toDate"), 846 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 847 exp.Explode: rename_func("arrayJoin"), 848 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 849 exp.IsNan: rename_func("isNaN"), 850 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 851 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 852 exp.JSONPathKey: json_path_key_only_name, 853 exp.JSONPathRoot: lambda *_: "", 854 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 855 exp.Nullif: rename_func("nullIf"), 856 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 857 exp.Pivot: no_pivot_sql, 858 exp.Quantile: _quantile_sql, 859 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 860 exp.Rand: rename_func("randCanonical"), 861 exp.StartsWith: rename_func("startsWith"), 862 exp.StrPosition: lambda self, e: self.func( 863 "position", e.this, e.args.get("substr"), e.args.get("position") 864 ), 865 exp.TimeToStr: lambda self, e: self.func( 866 "DATE_FORMAT", e.this, self.format_time(e), e.args.get("zone") 867 ), 868 exp.TimeStrToTime: _timestrtotime_sql, 869 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 870 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 871 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 872 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 873 exp.MD5Digest: rename_func("MD5"), 874 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 875 exp.SHA: rename_func("SHA1"), 876 exp.SHA2: sha256_sql, 877 exp.UnixToTime: _unix_to_time_sql, 878 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 879 exp.Trim: trim_sql, 880 exp.Variance: rename_func("varSamp"), 881 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 882 exp.Stddev: rename_func("stddevSamp"), 883 } 884 885 PROPERTIES_LOCATION = { 886 **generator.Generator.PROPERTIES_LOCATION, 887 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 888 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 889 exp.OnCluster: exp.Properties.Location.POST_NAME, 890 } 891 892 # There's no list in docs, but it can be found in Clickhouse code 893 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 894 ON_CLUSTER_TARGETS = { 895 "DATABASE", 896 "TABLE", 897 "VIEW", 898 "DICTIONARY", 899 "INDEX", 900 "FUNCTION", 901 "NAMED COLLECTION", 902 } 903 904 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 905 NON_NULLABLE_TYPES = { 906 exp.DataType.Type.ARRAY, 907 exp.DataType.Type.MAP, 908 exp.DataType.Type.NULLABLE, 909 exp.DataType.Type.STRUCT, 910 } 911 912 def strtodate_sql(self, expression: exp.StrToDate) -> str: 913 strtodate_sql = self.function_fallback_sql(expression) 914 915 if not isinstance(expression.parent, exp.Cast): 916 # StrToDate returns DATEs in other dialects (eg. postgres), so 917 # this branch aims to improve the transpilation to clickhouse 918 return f"CAST({strtodate_sql} AS DATE)" 919 920 return strtodate_sql 921 922 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 923 this = expression.this 924 925 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 926 return self.sql(this) 927 928 return super().cast_sql(expression, safe_prefix=safe_prefix) 929 930 def trycast_sql(self, expression: exp.TryCast) -> str: 931 dtype = expression.to 932 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 933 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 934 dtype.set("nullable", True) 935 936 return super().cast_sql(expression) 937 938 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 939 this = self.json_path_part(expression.this) 940 return str(int(this) + 1) if is_int(this) else this 941 942 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 943 return f"AS {self.sql(expression, 'this')}" 944 945 def _any_to_has( 946 self, 947 expression: exp.EQ | exp.NEQ, 948 default: t.Callable[[t.Any], str], 949 prefix: str = "", 950 ) -> str: 951 if isinstance(expression.left, exp.Any): 952 arr = expression.left 953 this = expression.right 954 elif isinstance(expression.right, exp.Any): 955 arr = expression.right 956 this = expression.left 957 else: 958 return default(expression) 959 960 return prefix + self.func("has", arr.this.unnest(), this) 961 962 def eq_sql(self, expression: exp.EQ) -> str: 963 return self._any_to_has(expression, super().eq_sql) 964 965 def neq_sql(self, expression: exp.NEQ) -> str: 966 return self._any_to_has(expression, super().neq_sql, "NOT ") 967 968 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 969 # Manually add a flag to make the search case-insensitive 970 regex = self.func("CONCAT", "'(?i)'", expression.expression) 971 return self.func("match", expression.this, regex) 972 973 def datatype_sql(self, expression: exp.DataType) -> str: 974 # String is the standard ClickHouse type, every other variant is just an alias. 975 # Additionally, any supplied length parameter will be ignored. 976 # 977 # https://clickhouse.com/docs/en/sql-reference/data-types/string 978 if expression.this in self.STRING_TYPE_MAPPING: 979 dtype = "String" 980 else: 981 dtype = super().datatype_sql(expression) 982 983 # This section changes the type to `Nullable(...)` if the following conditions hold: 984 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 985 # and change their semantics 986 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 987 # constraint: "Type of Map key must be a type, that can be represented by integer or 988 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 989 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 990 parent = expression.parent 991 if ( 992 expression.args.get("nullable") is not False 993 and not ( 994 isinstance(parent, exp.DataType) 995 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 996 and expression.index in (None, 0) 997 ) 998 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 999 ): 1000 dtype = f"Nullable({dtype})" 1001 1002 return dtype 1003 1004 def cte_sql(self, expression: exp.CTE) -> str: 1005 if expression.args.get("scalar"): 1006 this = self.sql(expression, "this") 1007 alias = self.sql(expression, "alias") 1008 return f"{this} AS {alias}" 1009 1010 return super().cte_sql(expression) 1011 1012 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1013 return super().after_limit_modifiers(expression) + [ 1014 ( 1015 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1016 if expression.args.get("settings") 1017 else "" 1018 ), 1019 ( 1020 self.seg("FORMAT ") + self.sql(expression, "format") 1021 if expression.args.get("format") 1022 else "" 1023 ), 1024 ] 1025 1026 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 1027 params = self.expressions(expression, key="params", flat=True) 1028 return self.func(expression.name, *expression.expressions) + f"({params})" 1029 1030 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 1031 return self.func(expression.name, *expression.expressions) 1032 1033 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 1034 return self.anonymousaggfunc_sql(expression) 1035 1036 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 1037 return self.parameterizedagg_sql(expression) 1038 1039 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1040 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1041 1042 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1043 return f"ON CLUSTER {self.sql(expression, 'this')}" 1044 1045 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1046 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1047 exp.Properties.Location.POST_NAME 1048 ): 1049 this_name = self.sql( 1050 expression.this if isinstance(expression.this, exp.Schema) else expression, 1051 "this", 1052 ) 1053 this_properties = " ".join( 1054 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1055 ) 1056 this_schema = self.schema_columns_sql(expression.this) 1057 return f"{this_name}{self.sep()}{this_properties}{self.sep()}{this_schema}" 1058 1059 return super().createable_sql(expression, locations) 1060 1061 def create_sql(self, expression: exp.Create) -> str: 1062 # The comment property comes last in CTAS statements, i.e. after the query 1063 query = expression.expression 1064 if isinstance(query, exp.Query): 1065 comment_prop = expression.find(exp.SchemaCommentProperty) 1066 if comment_prop: 1067 comment_prop.pop() 1068 query.replace(exp.paren(query)) 1069 else: 1070 comment_prop = None 1071 1072 create_sql = super().create_sql(expression) 1073 1074 comment_sql = self.sql(comment_prop) 1075 comment_sql = f" {comment_sql}" if comment_sql else "" 1076 1077 return f"{create_sql}{comment_sql}" 1078 1079 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1080 this = self.indent(self.sql(expression, "this")) 1081 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1082 1083 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1084 this = self.sql(expression, "this") 1085 this = f" {this}" if this else "" 1086 expr = self.sql(expression, "expression") 1087 expr = f" {expr}" if expr else "" 1088 index_type = self.sql(expression, "index_type") 1089 index_type = f" TYPE {index_type}" if index_type else "" 1090 granularity = self.sql(expression, "granularity") 1091 granularity = f" GRANULARITY {granularity}" if granularity else "" 1092 1093 return f"INDEX{this}{expr}{index_type}{granularity}" 1094 1095 def partition_sql(self, expression: exp.Partition) -> str: 1096 return f"PARTITION {self.expressions(expression, flat=True)}" 1097 1098 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1099 return f"ID {self.sql(expression.this)}" 1100 1101 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1102 return ( 1103 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1104 ) 1105 1106 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1107 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}"
133class ClickHouse(Dialect): 134 NORMALIZE_FUNCTIONS: bool | str = False 135 NULL_ORDERING = "nulls_are_last" 136 SUPPORTS_USER_DEFINED_TYPES = False 137 SAFE_DIVISION = True 138 LOG_BASE_FIRST: t.Optional[bool] = None 139 FORCE_EARLY_ALIAS_REF_EXPANSION = True 140 141 # https://github.com/ClickHouse/ClickHouse/issues/33935#issue-1112165779 142 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_SENSITIVE 143 144 UNESCAPED_SEQUENCES = { 145 "\\0": "\0", 146 } 147 148 CREATABLE_KIND_MAPPING = {"DATABASE": "SCHEMA"} 149 150 class Tokenizer(tokens.Tokenizer): 151 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 152 IDENTIFIERS = ['"', "`"] 153 STRING_ESCAPES = ["'", "\\"] 154 BIT_STRINGS = [("0b", "")] 155 HEX_STRINGS = [("0x", ""), ("0X", "")] 156 HEREDOC_STRINGS = ["$"] 157 158 KEYWORDS = { 159 **tokens.Tokenizer.KEYWORDS, 160 "ATTACH": TokenType.COMMAND, 161 "DATE32": TokenType.DATE32, 162 "DATETIME64": TokenType.DATETIME64, 163 "DICTIONARY": TokenType.DICTIONARY, 164 "ENUM8": TokenType.ENUM8, 165 "ENUM16": TokenType.ENUM16, 166 "FINAL": TokenType.FINAL, 167 "FIXEDSTRING": TokenType.FIXEDSTRING, 168 "FLOAT32": TokenType.FLOAT, 169 "FLOAT64": TokenType.DOUBLE, 170 "GLOBAL": TokenType.GLOBAL, 171 "INT256": TokenType.INT256, 172 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 173 "MAP": TokenType.MAP, 174 "NESTED": TokenType.NESTED, 175 "SAMPLE": TokenType.TABLE_SAMPLE, 176 "TUPLE": TokenType.STRUCT, 177 "UINT128": TokenType.UINT128, 178 "UINT16": TokenType.USMALLINT, 179 "UINT256": TokenType.UINT256, 180 "UINT32": TokenType.UINT, 181 "UINT64": TokenType.UBIGINT, 182 "UINT8": TokenType.UTINYINT, 183 "IPV4": TokenType.IPV4, 184 "IPV6": TokenType.IPV6, 185 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 186 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 187 "SYSTEM": TokenType.COMMAND, 188 "PREWHERE": TokenType.PREWHERE, 189 } 190 KEYWORDS.pop("/*+") 191 192 SINGLE_TOKENS = { 193 **tokens.Tokenizer.SINGLE_TOKENS, 194 "$": TokenType.HEREDOC_STRING, 195 } 196 197 class Parser(parser.Parser): 198 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 199 # * select x from t1 union all select x from t2 limit 1; 200 # * select x from t1 union all (select x from t2 limit 1); 201 MODIFIERS_ATTACHED_TO_SET_OP = False 202 INTERVAL_SPANS = False 203 204 FUNCTIONS = { 205 **parser.Parser.FUNCTIONS, 206 "ANY": exp.AnyValue.from_arg_list, 207 "ARRAYSUM": exp.ArraySum.from_arg_list, 208 "COUNTIF": _build_count_if, 209 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 210 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 211 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 212 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 213 "DATE_FORMAT": _build_date_format, 214 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 215 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 216 "FORMATDATETIME": _build_date_format, 217 "JSONEXTRACTSTRING": build_json_extract_path( 218 exp.JSONExtractScalar, zero_based_indexing=False 219 ), 220 "MAP": parser.build_var_map, 221 "MATCH": exp.RegexpLike.from_arg_list, 222 "RANDCANONICAL": exp.Rand.from_arg_list, 223 "STR_TO_DATE": _build_str_to_date, 224 "TUPLE": exp.Struct.from_arg_list, 225 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 226 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 227 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 228 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 229 "UNIQ": exp.ApproxDistinct.from_arg_list, 230 "XOR": lambda args: exp.Xor(expressions=args), 231 "MD5": exp.MD5Digest.from_arg_list, 232 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 233 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 234 } 235 236 AGG_FUNCTIONS = { 237 "count", 238 "min", 239 "max", 240 "sum", 241 "avg", 242 "any", 243 "stddevPop", 244 "stddevSamp", 245 "varPop", 246 "varSamp", 247 "corr", 248 "covarPop", 249 "covarSamp", 250 "entropy", 251 "exponentialMovingAverage", 252 "intervalLengthSum", 253 "kolmogorovSmirnovTest", 254 "mannWhitneyUTest", 255 "median", 256 "rankCorr", 257 "sumKahan", 258 "studentTTest", 259 "welchTTest", 260 "anyHeavy", 261 "anyLast", 262 "boundingRatio", 263 "first_value", 264 "last_value", 265 "argMin", 266 "argMax", 267 "avgWeighted", 268 "topK", 269 "topKWeighted", 270 "deltaSum", 271 "deltaSumTimestamp", 272 "groupArray", 273 "groupArrayLast", 274 "groupUniqArray", 275 "groupArrayInsertAt", 276 "groupArrayMovingAvg", 277 "groupArrayMovingSum", 278 "groupArraySample", 279 "groupBitAnd", 280 "groupBitOr", 281 "groupBitXor", 282 "groupBitmap", 283 "groupBitmapAnd", 284 "groupBitmapOr", 285 "groupBitmapXor", 286 "sumWithOverflow", 287 "sumMap", 288 "minMap", 289 "maxMap", 290 "skewSamp", 291 "skewPop", 292 "kurtSamp", 293 "kurtPop", 294 "uniq", 295 "uniqExact", 296 "uniqCombined", 297 "uniqCombined64", 298 "uniqHLL12", 299 "uniqTheta", 300 "quantile", 301 "quantiles", 302 "quantileExact", 303 "quantilesExact", 304 "quantileExactLow", 305 "quantilesExactLow", 306 "quantileExactHigh", 307 "quantilesExactHigh", 308 "quantileExactWeighted", 309 "quantilesExactWeighted", 310 "quantileTiming", 311 "quantilesTiming", 312 "quantileTimingWeighted", 313 "quantilesTimingWeighted", 314 "quantileDeterministic", 315 "quantilesDeterministic", 316 "quantileTDigest", 317 "quantilesTDigest", 318 "quantileTDigestWeighted", 319 "quantilesTDigestWeighted", 320 "quantileBFloat16", 321 "quantilesBFloat16", 322 "quantileBFloat16Weighted", 323 "quantilesBFloat16Weighted", 324 "simpleLinearRegression", 325 "stochasticLinearRegression", 326 "stochasticLogisticRegression", 327 "categoricalInformationValue", 328 "contingency", 329 "cramersV", 330 "cramersVBiasCorrected", 331 "theilsU", 332 "maxIntersections", 333 "maxIntersectionsPosition", 334 "meanZTest", 335 "quantileInterpolatedWeighted", 336 "quantilesInterpolatedWeighted", 337 "quantileGK", 338 "quantilesGK", 339 "sparkBar", 340 "sumCount", 341 "largestTriangleThreeBuckets", 342 "histogram", 343 "sequenceMatch", 344 "sequenceCount", 345 "windowFunnel", 346 "retention", 347 "uniqUpTo", 348 "sequenceNextNode", 349 "exponentialTimeDecayedAvg", 350 } 351 352 AGG_FUNCTIONS_SUFFIXES = [ 353 "If", 354 "Array", 355 "ArrayIf", 356 "Map", 357 "SimpleState", 358 "State", 359 "Merge", 360 "MergeState", 361 "ForEach", 362 "Distinct", 363 "OrDefault", 364 "OrNull", 365 "Resample", 366 "ArgMin", 367 "ArgMax", 368 ] 369 370 FUNC_TOKENS = { 371 *parser.Parser.FUNC_TOKENS, 372 TokenType.SET, 373 } 374 375 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 376 377 ID_VAR_TOKENS = { 378 *parser.Parser.ID_VAR_TOKENS, 379 TokenType.LIKE, 380 } 381 382 AGG_FUNC_MAPPING = ( 383 lambda functions, suffixes: { 384 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 385 } 386 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 387 388 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 389 390 FUNCTION_PARSERS = { 391 **parser.Parser.FUNCTION_PARSERS, 392 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 393 "QUANTILE": lambda self: self._parse_quantile(), 394 } 395 396 FUNCTION_PARSERS.pop("MATCH") 397 398 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 399 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 400 401 RANGE_PARSERS = { 402 **parser.Parser.RANGE_PARSERS, 403 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 404 and self._parse_in(this, is_global=True), 405 } 406 407 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 408 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 409 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 410 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 411 412 JOIN_KINDS = { 413 *parser.Parser.JOIN_KINDS, 414 TokenType.ANY, 415 TokenType.ASOF, 416 TokenType.ARRAY, 417 } 418 419 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 420 TokenType.ANY, 421 TokenType.ARRAY, 422 TokenType.FINAL, 423 TokenType.FORMAT, 424 TokenType.SETTINGS, 425 } 426 427 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 428 TokenType.FORMAT, 429 } 430 431 LOG_DEFAULTS_TO_LN = True 432 433 QUERY_MODIFIER_PARSERS = { 434 **parser.Parser.QUERY_MODIFIER_PARSERS, 435 TokenType.SETTINGS: lambda self: ( 436 "settings", 437 self._advance() or self._parse_csv(self._parse_assignment), 438 ), 439 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 440 } 441 442 CONSTRAINT_PARSERS = { 443 **parser.Parser.CONSTRAINT_PARSERS, 444 "INDEX": lambda self: self._parse_index_constraint(), 445 "CODEC": lambda self: self._parse_compress(), 446 } 447 448 ALTER_PARSERS = { 449 **parser.Parser.ALTER_PARSERS, 450 "REPLACE": lambda self: self._parse_alter_table_replace(), 451 } 452 453 SCHEMA_UNNAMED_CONSTRAINTS = { 454 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 455 "INDEX", 456 } 457 458 PLACEHOLDER_PARSERS = { 459 **parser.Parser.PLACEHOLDER_PARSERS, 460 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 461 } 462 463 def _parse_types( 464 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 465 ) -> t.Optional[exp.Expression]: 466 dtype = super()._parse_types( 467 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 468 ) 469 if isinstance(dtype, exp.DataType): 470 # Mark every type as non-nullable which is ClickHouse's default. This marker 471 # helps us transpile types from other dialects to ClickHouse, so that we can 472 # e.g. produce `CAST(x AS Nullable(String))` from `CAST(x AS TEXT)`. If there 473 # is a `NULL` value in `x`, the former would fail in ClickHouse without the 474 # `Nullable` type constructor 475 dtype.set("nullable", False) 476 477 return dtype 478 479 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 480 index = self._index 481 this = self._parse_bitwise() 482 if self._match(TokenType.FROM): 483 self._retreat(index) 484 return super()._parse_extract() 485 486 # We return Anonymous here because extract and regexpExtract have different semantics, 487 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 488 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 489 # 490 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 491 self._match(TokenType.COMMA) 492 return self.expression( 493 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 494 ) 495 496 def _parse_assignment(self) -> t.Optional[exp.Expression]: 497 this = super()._parse_assignment() 498 499 if self._match(TokenType.PLACEHOLDER): 500 return self.expression( 501 exp.If, 502 this=this, 503 true=self._parse_assignment(), 504 false=self._match(TokenType.COLON) and self._parse_assignment(), 505 ) 506 507 return this 508 509 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 510 """ 511 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 512 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 513 """ 514 this = self._parse_id_var() 515 self._match(TokenType.COLON) 516 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 517 self._match_text_seq("IDENTIFIER") and "Identifier" 518 ) 519 520 if not kind: 521 self.raise_error("Expecting a placeholder type or 'Identifier' for tables") 522 elif not self._match(TokenType.R_BRACE): 523 self.raise_error("Expecting }") 524 525 return self.expression(exp.Placeholder, this=this, kind=kind) 526 527 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 528 this = super()._parse_in(this) 529 this.set("is_global", is_global) 530 return this 531 532 def _parse_table( 533 self, 534 schema: bool = False, 535 joins: bool = False, 536 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 537 parse_bracket: bool = False, 538 is_db_reference: bool = False, 539 parse_partition: bool = False, 540 ) -> t.Optional[exp.Expression]: 541 this = super()._parse_table( 542 schema=schema, 543 joins=joins, 544 alias_tokens=alias_tokens, 545 parse_bracket=parse_bracket, 546 is_db_reference=is_db_reference, 547 ) 548 549 if self._match(TokenType.FINAL): 550 this = self.expression(exp.Final, this=this) 551 552 return this 553 554 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 555 return super()._parse_position(haystack_first=True) 556 557 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 558 def _parse_cte(self) -> exp.CTE: 559 # WITH <identifier> AS <subquery expression> 560 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 561 562 if not cte: 563 # WITH <expression> AS <identifier> 564 cte = self.expression( 565 exp.CTE, 566 this=self._parse_assignment(), 567 alias=self._parse_table_alias(), 568 scalar=True, 569 ) 570 571 return cte 572 573 def _parse_join_parts( 574 self, 575 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 576 is_global = self._match(TokenType.GLOBAL) and self._prev 577 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 578 579 if kind_pre: 580 kind = self._match_set(self.JOIN_KINDS) and self._prev 581 side = self._match_set(self.JOIN_SIDES) and self._prev 582 return is_global, side, kind 583 584 return ( 585 is_global, 586 self._match_set(self.JOIN_SIDES) and self._prev, 587 self._match_set(self.JOIN_KINDS) and self._prev, 588 ) 589 590 def _parse_join( 591 self, skip_join_token: bool = False, parse_bracket: bool = False 592 ) -> t.Optional[exp.Join]: 593 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 594 if join: 595 join.set("global", join.args.pop("method", None)) 596 597 return join 598 599 def _parse_function( 600 self, 601 functions: t.Optional[t.Dict[str, t.Callable]] = None, 602 anonymous: bool = False, 603 optional_parens: bool = True, 604 any_token: bool = False, 605 ) -> t.Optional[exp.Expression]: 606 expr = super()._parse_function( 607 functions=functions, 608 anonymous=anonymous, 609 optional_parens=optional_parens, 610 any_token=any_token, 611 ) 612 613 func = expr.this if isinstance(expr, exp.Window) else expr 614 615 # Aggregate functions can be split in 2 parts: <func_name><suffix> 616 parts = ( 617 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 618 ) 619 620 if parts: 621 params = self._parse_func_params(func) 622 623 kwargs = { 624 "this": func.this, 625 "expressions": func.expressions, 626 } 627 if parts[1]: 628 kwargs["parts"] = parts 629 exp_class = exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 630 else: 631 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 632 633 kwargs["exp_class"] = exp_class 634 if params: 635 kwargs["params"] = params 636 637 func = self.expression(**kwargs) 638 639 if isinstance(expr, exp.Window): 640 # The window's func was parsed as Anonymous in base parser, fix its 641 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 642 expr.set("this", func) 643 elif params: 644 # Params have blocked super()._parse_function() from parsing the following window 645 # (if that exists) as they're standing between the function call and the window spec 646 expr = self._parse_window(func) 647 else: 648 expr = func 649 650 return expr 651 652 def _parse_func_params( 653 self, this: t.Optional[exp.Func] = None 654 ) -> t.Optional[t.List[exp.Expression]]: 655 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 656 return self._parse_csv(self._parse_lambda) 657 658 if self._match(TokenType.L_PAREN): 659 params = self._parse_csv(self._parse_lambda) 660 self._match_r_paren(this) 661 return params 662 663 return None 664 665 def _parse_quantile(self) -> exp.Quantile: 666 this = self._parse_lambda() 667 params = self._parse_func_params() 668 if params: 669 return self.expression(exp.Quantile, this=params[0], quantile=this) 670 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 671 672 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 673 return super()._parse_wrapped_id_vars(optional=True) 674 675 def _parse_primary_key( 676 self, wrapped_optional: bool = False, in_props: bool = False 677 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 678 return super()._parse_primary_key( 679 wrapped_optional=wrapped_optional or in_props, in_props=in_props 680 ) 681 682 def _parse_on_property(self) -> t.Optional[exp.Expression]: 683 index = self._index 684 if self._match_text_seq("CLUSTER"): 685 this = self._parse_id_var() 686 if this: 687 return self.expression(exp.OnCluster, this=this) 688 else: 689 self._retreat(index) 690 return None 691 692 def _parse_index_constraint( 693 self, kind: t.Optional[str] = None 694 ) -> exp.IndexColumnConstraint: 695 # INDEX name1 expr TYPE type1(args) GRANULARITY value 696 this = self._parse_id_var() 697 expression = self._parse_assignment() 698 699 index_type = self._match_text_seq("TYPE") and ( 700 self._parse_function() or self._parse_var() 701 ) 702 703 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 704 705 return self.expression( 706 exp.IndexColumnConstraint, 707 this=this, 708 expression=expression, 709 index_type=index_type, 710 granularity=granularity, 711 ) 712 713 def _parse_partition(self) -> t.Optional[exp.Partition]: 714 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 715 if not self._match(TokenType.PARTITION): 716 return None 717 718 if self._match_text_seq("ID"): 719 # Corresponds to the PARTITION ID <string_value> syntax 720 expressions: t.List[exp.Expression] = [ 721 self.expression(exp.PartitionId, this=self._parse_string()) 722 ] 723 else: 724 expressions = self._parse_expressions() 725 726 return self.expression(exp.Partition, expressions=expressions) 727 728 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 729 partition = self._parse_partition() 730 731 if not partition or not self._match(TokenType.FROM): 732 return None 733 734 return self.expression( 735 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 736 ) 737 738 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 739 if not self._match_text_seq("PROJECTION"): 740 return None 741 742 return self.expression( 743 exp.ProjectionDef, 744 this=self._parse_id_var(), 745 expression=self._parse_wrapped(self._parse_statement), 746 ) 747 748 def _parse_constraint(self) -> t.Optional[exp.Expression]: 749 return super()._parse_constraint() or self._parse_projection_def() 750 751 class Generator(generator.Generator): 752 QUERY_HINTS = False 753 STRUCT_DELIMITER = ("(", ")") 754 NVL2_SUPPORTED = False 755 TABLESAMPLE_REQUIRES_PARENS = False 756 TABLESAMPLE_SIZE_IS_ROWS = False 757 TABLESAMPLE_KEYWORDS = "SAMPLE" 758 LAST_DAY_SUPPORTS_DATE_PART = False 759 CAN_IMPLEMENT_ARRAY_ANY = True 760 SUPPORTS_TO_NUMBER = False 761 JOIN_HINTS = False 762 TABLE_HINTS = False 763 EXPLICIT_SET_OP = True 764 GROUPINGS_SEP = "" 765 SET_OP_MODIFIERS = False 766 SUPPORTS_TABLE_ALIAS_COLUMNS = False 767 VALUES_AS_TABLE = False 768 769 STRING_TYPE_MAPPING = { 770 exp.DataType.Type.CHAR: "String", 771 exp.DataType.Type.LONGBLOB: "String", 772 exp.DataType.Type.LONGTEXT: "String", 773 exp.DataType.Type.MEDIUMBLOB: "String", 774 exp.DataType.Type.MEDIUMTEXT: "String", 775 exp.DataType.Type.TINYBLOB: "String", 776 exp.DataType.Type.TINYTEXT: "String", 777 exp.DataType.Type.TEXT: "String", 778 exp.DataType.Type.VARBINARY: "String", 779 exp.DataType.Type.VARCHAR: "String", 780 } 781 782 SUPPORTED_JSON_PATH_PARTS = { 783 exp.JSONPathKey, 784 exp.JSONPathRoot, 785 exp.JSONPathSubscript, 786 } 787 788 TYPE_MAPPING = { 789 **generator.Generator.TYPE_MAPPING, 790 **STRING_TYPE_MAPPING, 791 exp.DataType.Type.ARRAY: "Array", 792 exp.DataType.Type.BIGINT: "Int64", 793 exp.DataType.Type.DATE32: "Date32", 794 exp.DataType.Type.DATETIME: "DateTime", 795 exp.DataType.Type.DATETIME64: "DateTime64", 796 exp.DataType.Type.TIMESTAMP: "DateTime", 797 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 798 exp.DataType.Type.DOUBLE: "Float64", 799 exp.DataType.Type.ENUM: "Enum", 800 exp.DataType.Type.ENUM8: "Enum8", 801 exp.DataType.Type.ENUM16: "Enum16", 802 exp.DataType.Type.FIXEDSTRING: "FixedString", 803 exp.DataType.Type.FLOAT: "Float32", 804 exp.DataType.Type.INT: "Int32", 805 exp.DataType.Type.MEDIUMINT: "Int32", 806 exp.DataType.Type.INT128: "Int128", 807 exp.DataType.Type.INT256: "Int256", 808 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 809 exp.DataType.Type.MAP: "Map", 810 exp.DataType.Type.NESTED: "Nested", 811 exp.DataType.Type.NULLABLE: "Nullable", 812 exp.DataType.Type.SMALLINT: "Int16", 813 exp.DataType.Type.STRUCT: "Tuple", 814 exp.DataType.Type.TINYINT: "Int8", 815 exp.DataType.Type.UBIGINT: "UInt64", 816 exp.DataType.Type.UINT: "UInt32", 817 exp.DataType.Type.UINT128: "UInt128", 818 exp.DataType.Type.UINT256: "UInt256", 819 exp.DataType.Type.USMALLINT: "UInt16", 820 exp.DataType.Type.UTINYINT: "UInt8", 821 exp.DataType.Type.IPV4: "IPv4", 822 exp.DataType.Type.IPV6: "IPv6", 823 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 824 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 825 } 826 827 TRANSFORMS = { 828 **generator.Generator.TRANSFORMS, 829 exp.AnyValue: rename_func("any"), 830 exp.ApproxDistinct: rename_func("uniq"), 831 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 832 exp.ArraySize: rename_func("LENGTH"), 833 exp.ArraySum: rename_func("arraySum"), 834 exp.ArgMax: arg_max_or_min_no_count("argMax"), 835 exp.ArgMin: arg_max_or_min_no_count("argMin"), 836 exp.Array: inline_array_sql, 837 exp.CastToStrType: rename_func("CAST"), 838 exp.CountIf: rename_func("countIf"), 839 exp.CompressColumnConstraint: lambda self, 840 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 841 exp.ComputedColumnConstraint: lambda self, 842 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 843 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 844 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 845 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 846 exp.DateStrToDate: rename_func("toDate"), 847 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 848 exp.Explode: rename_func("arrayJoin"), 849 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 850 exp.IsNan: rename_func("isNaN"), 851 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 852 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 853 exp.JSONPathKey: json_path_key_only_name, 854 exp.JSONPathRoot: lambda *_: "", 855 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 856 exp.Nullif: rename_func("nullIf"), 857 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 858 exp.Pivot: no_pivot_sql, 859 exp.Quantile: _quantile_sql, 860 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 861 exp.Rand: rename_func("randCanonical"), 862 exp.StartsWith: rename_func("startsWith"), 863 exp.StrPosition: lambda self, e: self.func( 864 "position", e.this, e.args.get("substr"), e.args.get("position") 865 ), 866 exp.TimeToStr: lambda self, e: self.func( 867 "DATE_FORMAT", e.this, self.format_time(e), e.args.get("zone") 868 ), 869 exp.TimeStrToTime: _timestrtotime_sql, 870 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 871 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 872 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 873 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 874 exp.MD5Digest: rename_func("MD5"), 875 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 876 exp.SHA: rename_func("SHA1"), 877 exp.SHA2: sha256_sql, 878 exp.UnixToTime: _unix_to_time_sql, 879 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 880 exp.Trim: trim_sql, 881 exp.Variance: rename_func("varSamp"), 882 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 883 exp.Stddev: rename_func("stddevSamp"), 884 } 885 886 PROPERTIES_LOCATION = { 887 **generator.Generator.PROPERTIES_LOCATION, 888 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 889 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 890 exp.OnCluster: exp.Properties.Location.POST_NAME, 891 } 892 893 # There's no list in docs, but it can be found in Clickhouse code 894 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 895 ON_CLUSTER_TARGETS = { 896 "DATABASE", 897 "TABLE", 898 "VIEW", 899 "DICTIONARY", 900 "INDEX", 901 "FUNCTION", 902 "NAMED COLLECTION", 903 } 904 905 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 906 NON_NULLABLE_TYPES = { 907 exp.DataType.Type.ARRAY, 908 exp.DataType.Type.MAP, 909 exp.DataType.Type.NULLABLE, 910 exp.DataType.Type.STRUCT, 911 } 912 913 def strtodate_sql(self, expression: exp.StrToDate) -> str: 914 strtodate_sql = self.function_fallback_sql(expression) 915 916 if not isinstance(expression.parent, exp.Cast): 917 # StrToDate returns DATEs in other dialects (eg. postgres), so 918 # this branch aims to improve the transpilation to clickhouse 919 return f"CAST({strtodate_sql} AS DATE)" 920 921 return strtodate_sql 922 923 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 924 this = expression.this 925 926 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 927 return self.sql(this) 928 929 return super().cast_sql(expression, safe_prefix=safe_prefix) 930 931 def trycast_sql(self, expression: exp.TryCast) -> str: 932 dtype = expression.to 933 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 934 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 935 dtype.set("nullable", True) 936 937 return super().cast_sql(expression) 938 939 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 940 this = self.json_path_part(expression.this) 941 return str(int(this) + 1) if is_int(this) else this 942 943 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 944 return f"AS {self.sql(expression, 'this')}" 945 946 def _any_to_has( 947 self, 948 expression: exp.EQ | exp.NEQ, 949 default: t.Callable[[t.Any], str], 950 prefix: str = "", 951 ) -> str: 952 if isinstance(expression.left, exp.Any): 953 arr = expression.left 954 this = expression.right 955 elif isinstance(expression.right, exp.Any): 956 arr = expression.right 957 this = expression.left 958 else: 959 return default(expression) 960 961 return prefix + self.func("has", arr.this.unnest(), this) 962 963 def eq_sql(self, expression: exp.EQ) -> str: 964 return self._any_to_has(expression, super().eq_sql) 965 966 def neq_sql(self, expression: exp.NEQ) -> str: 967 return self._any_to_has(expression, super().neq_sql, "NOT ") 968 969 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 970 # Manually add a flag to make the search case-insensitive 971 regex = self.func("CONCAT", "'(?i)'", expression.expression) 972 return self.func("match", expression.this, regex) 973 974 def datatype_sql(self, expression: exp.DataType) -> str: 975 # String is the standard ClickHouse type, every other variant is just an alias. 976 # Additionally, any supplied length parameter will be ignored. 977 # 978 # https://clickhouse.com/docs/en/sql-reference/data-types/string 979 if expression.this in self.STRING_TYPE_MAPPING: 980 dtype = "String" 981 else: 982 dtype = super().datatype_sql(expression) 983 984 # This section changes the type to `Nullable(...)` if the following conditions hold: 985 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 986 # and change their semantics 987 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 988 # constraint: "Type of Map key must be a type, that can be represented by integer or 989 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 990 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 991 parent = expression.parent 992 if ( 993 expression.args.get("nullable") is not False 994 and not ( 995 isinstance(parent, exp.DataType) 996 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 997 and expression.index in (None, 0) 998 ) 999 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1000 ): 1001 dtype = f"Nullable({dtype})" 1002 1003 return dtype 1004 1005 def cte_sql(self, expression: exp.CTE) -> str: 1006 if expression.args.get("scalar"): 1007 this = self.sql(expression, "this") 1008 alias = self.sql(expression, "alias") 1009 return f"{this} AS {alias}" 1010 1011 return super().cte_sql(expression) 1012 1013 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1014 return super().after_limit_modifiers(expression) + [ 1015 ( 1016 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1017 if expression.args.get("settings") 1018 else "" 1019 ), 1020 ( 1021 self.seg("FORMAT ") + self.sql(expression, "format") 1022 if expression.args.get("format") 1023 else "" 1024 ), 1025 ] 1026 1027 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 1028 params = self.expressions(expression, key="params", flat=True) 1029 return self.func(expression.name, *expression.expressions) + f"({params})" 1030 1031 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 1032 return self.func(expression.name, *expression.expressions) 1033 1034 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 1035 return self.anonymousaggfunc_sql(expression) 1036 1037 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 1038 return self.parameterizedagg_sql(expression) 1039 1040 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1041 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1042 1043 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1044 return f"ON CLUSTER {self.sql(expression, 'this')}" 1045 1046 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1047 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1048 exp.Properties.Location.POST_NAME 1049 ): 1050 this_name = self.sql( 1051 expression.this if isinstance(expression.this, exp.Schema) else expression, 1052 "this", 1053 ) 1054 this_properties = " ".join( 1055 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1056 ) 1057 this_schema = self.schema_columns_sql(expression.this) 1058 return f"{this_name}{self.sep()}{this_properties}{self.sep()}{this_schema}" 1059 1060 return super().createable_sql(expression, locations) 1061 1062 def create_sql(self, expression: exp.Create) -> str: 1063 # The comment property comes last in CTAS statements, i.e. after the query 1064 query = expression.expression 1065 if isinstance(query, exp.Query): 1066 comment_prop = expression.find(exp.SchemaCommentProperty) 1067 if comment_prop: 1068 comment_prop.pop() 1069 query.replace(exp.paren(query)) 1070 else: 1071 comment_prop = None 1072 1073 create_sql = super().create_sql(expression) 1074 1075 comment_sql = self.sql(comment_prop) 1076 comment_sql = f" {comment_sql}" if comment_sql else "" 1077 1078 return f"{create_sql}{comment_sql}" 1079 1080 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1081 this = self.indent(self.sql(expression, "this")) 1082 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1083 1084 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1085 this = self.sql(expression, "this") 1086 this = f" {this}" if this else "" 1087 expr = self.sql(expression, "expression") 1088 expr = f" {expr}" if expr else "" 1089 index_type = self.sql(expression, "index_type") 1090 index_type = f" TYPE {index_type}" if index_type else "" 1091 granularity = self.sql(expression, "granularity") 1092 granularity = f" GRANULARITY {granularity}" if granularity else "" 1093 1094 return f"INDEX{this}{expr}{index_type}{granularity}" 1095 1096 def partition_sql(self, expression: exp.Partition) -> str: 1097 return f"PARTITION {self.expressions(expression, flat=True)}" 1098 1099 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1100 return f"ID {self.sql(expression.this)}" 1101 1102 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1103 return ( 1104 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1105 ) 1106 1107 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1108 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}"
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Whether alias reference expansion (_expand_alias_refs()) should run before column qualification (_qualify_columns()).
For example:
WITH data AS ( SELECT 1 AS id, 2 AS my_id ) SELECT id AS my_id FROM data WHERE my_id = 1 GROUP BY my_id, HAVING my_id = 1
In most dialects "my_id" would refer to "data.my_id" (which is done in _qualify_columns()) across the query, except: - BigQuery, which will forward the alias to GROUP BY + HAVING clauses i.e it resolves to "WHERE my_id = 1 GROUP BY id HAVING id = 1" - Clickhouse, which will forward the alias across the query i.e it resolves to "WHERE id = 1 GROUP BY id HAVING id = 1"
Specifies the strategy according to which identifiers should be normalized.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Helper for dialects that use a different name for the same creatable kind. For example, the Clickhouse equivalent of CREATE SCHEMA is CREATE DATABASE.
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- SUPPORTS_SEMI_ANTI_JOIN
- COPY_PARAMS_ARE_CSV
- TYPED_DIVISION
- CONCAT_COALESCE
- HEX_LOWERCASE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- TIME_MAPPING
- FORMAT_MAPPING
- PSEUDOCOLUMNS
- PREFER_CTE_ALIAS_COLUMN
- EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY
- SUPPORTS_ORDER_BY_ALL
- HAS_DISTINCT_ARRAY_CONSTRUCTORS
- SUPPORTS_FIXED_SIZE_ARRAYS
- DATE_PART_MAPPING
- TYPE_TO_EXPRESSIONS
- ANNOTATORS
- get_or_raise
- format_time
- settings
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
150 class Tokenizer(tokens.Tokenizer): 151 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 152 IDENTIFIERS = ['"', "`"] 153 STRING_ESCAPES = ["'", "\\"] 154 BIT_STRINGS = [("0b", "")] 155 HEX_STRINGS = [("0x", ""), ("0X", "")] 156 HEREDOC_STRINGS = ["$"] 157 158 KEYWORDS = { 159 **tokens.Tokenizer.KEYWORDS, 160 "ATTACH": TokenType.COMMAND, 161 "DATE32": TokenType.DATE32, 162 "DATETIME64": TokenType.DATETIME64, 163 "DICTIONARY": TokenType.DICTIONARY, 164 "ENUM8": TokenType.ENUM8, 165 "ENUM16": TokenType.ENUM16, 166 "FINAL": TokenType.FINAL, 167 "FIXEDSTRING": TokenType.FIXEDSTRING, 168 "FLOAT32": TokenType.FLOAT, 169 "FLOAT64": TokenType.DOUBLE, 170 "GLOBAL": TokenType.GLOBAL, 171 "INT256": TokenType.INT256, 172 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 173 "MAP": TokenType.MAP, 174 "NESTED": TokenType.NESTED, 175 "SAMPLE": TokenType.TABLE_SAMPLE, 176 "TUPLE": TokenType.STRUCT, 177 "UINT128": TokenType.UINT128, 178 "UINT16": TokenType.USMALLINT, 179 "UINT256": TokenType.UINT256, 180 "UINT32": TokenType.UINT, 181 "UINT64": TokenType.UBIGINT, 182 "UINT8": TokenType.UTINYINT, 183 "IPV4": TokenType.IPV4, 184 "IPV6": TokenType.IPV6, 185 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 186 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 187 "SYSTEM": TokenType.COMMAND, 188 "PREWHERE": TokenType.PREWHERE, 189 } 190 KEYWORDS.pop("/*+") 191 192 SINGLE_TOKENS = { 193 **tokens.Tokenizer.SINGLE_TOKENS, 194 "$": TokenType.HEREDOC_STRING, 195 }
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BYTE_STRINGS
- RAW_STRINGS
- UNICODE_STRINGS
- IDENTIFIER_ESCAPES
- QUOTES
- VAR_SINGLE_TOKENS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
197 class Parser(parser.Parser): 198 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 199 # * select x from t1 union all select x from t2 limit 1; 200 # * select x from t1 union all (select x from t2 limit 1); 201 MODIFIERS_ATTACHED_TO_SET_OP = False 202 INTERVAL_SPANS = False 203 204 FUNCTIONS = { 205 **parser.Parser.FUNCTIONS, 206 "ANY": exp.AnyValue.from_arg_list, 207 "ARRAYSUM": exp.ArraySum.from_arg_list, 208 "COUNTIF": _build_count_if, 209 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 210 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 211 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 212 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 213 "DATE_FORMAT": _build_date_format, 214 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 215 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 216 "FORMATDATETIME": _build_date_format, 217 "JSONEXTRACTSTRING": build_json_extract_path( 218 exp.JSONExtractScalar, zero_based_indexing=False 219 ), 220 "MAP": parser.build_var_map, 221 "MATCH": exp.RegexpLike.from_arg_list, 222 "RANDCANONICAL": exp.Rand.from_arg_list, 223 "STR_TO_DATE": _build_str_to_date, 224 "TUPLE": exp.Struct.from_arg_list, 225 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 226 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 227 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 228 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 229 "UNIQ": exp.ApproxDistinct.from_arg_list, 230 "XOR": lambda args: exp.Xor(expressions=args), 231 "MD5": exp.MD5Digest.from_arg_list, 232 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 233 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 234 } 235 236 AGG_FUNCTIONS = { 237 "count", 238 "min", 239 "max", 240 "sum", 241 "avg", 242 "any", 243 "stddevPop", 244 "stddevSamp", 245 "varPop", 246 "varSamp", 247 "corr", 248 "covarPop", 249 "covarSamp", 250 "entropy", 251 "exponentialMovingAverage", 252 "intervalLengthSum", 253 "kolmogorovSmirnovTest", 254 "mannWhitneyUTest", 255 "median", 256 "rankCorr", 257 "sumKahan", 258 "studentTTest", 259 "welchTTest", 260 "anyHeavy", 261 "anyLast", 262 "boundingRatio", 263 "first_value", 264 "last_value", 265 "argMin", 266 "argMax", 267 "avgWeighted", 268 "topK", 269 "topKWeighted", 270 "deltaSum", 271 "deltaSumTimestamp", 272 "groupArray", 273 "groupArrayLast", 274 "groupUniqArray", 275 "groupArrayInsertAt", 276 "groupArrayMovingAvg", 277 "groupArrayMovingSum", 278 "groupArraySample", 279 "groupBitAnd", 280 "groupBitOr", 281 "groupBitXor", 282 "groupBitmap", 283 "groupBitmapAnd", 284 "groupBitmapOr", 285 "groupBitmapXor", 286 "sumWithOverflow", 287 "sumMap", 288 "minMap", 289 "maxMap", 290 "skewSamp", 291 "skewPop", 292 "kurtSamp", 293 "kurtPop", 294 "uniq", 295 "uniqExact", 296 "uniqCombined", 297 "uniqCombined64", 298 "uniqHLL12", 299 "uniqTheta", 300 "quantile", 301 "quantiles", 302 "quantileExact", 303 "quantilesExact", 304 "quantileExactLow", 305 "quantilesExactLow", 306 "quantileExactHigh", 307 "quantilesExactHigh", 308 "quantileExactWeighted", 309 "quantilesExactWeighted", 310 "quantileTiming", 311 "quantilesTiming", 312 "quantileTimingWeighted", 313 "quantilesTimingWeighted", 314 "quantileDeterministic", 315 "quantilesDeterministic", 316 "quantileTDigest", 317 "quantilesTDigest", 318 "quantileTDigestWeighted", 319 "quantilesTDigestWeighted", 320 "quantileBFloat16", 321 "quantilesBFloat16", 322 "quantileBFloat16Weighted", 323 "quantilesBFloat16Weighted", 324 "simpleLinearRegression", 325 "stochasticLinearRegression", 326 "stochasticLogisticRegression", 327 "categoricalInformationValue", 328 "contingency", 329 "cramersV", 330 "cramersVBiasCorrected", 331 "theilsU", 332 "maxIntersections", 333 "maxIntersectionsPosition", 334 "meanZTest", 335 "quantileInterpolatedWeighted", 336 "quantilesInterpolatedWeighted", 337 "quantileGK", 338 "quantilesGK", 339 "sparkBar", 340 "sumCount", 341 "largestTriangleThreeBuckets", 342 "histogram", 343 "sequenceMatch", 344 "sequenceCount", 345 "windowFunnel", 346 "retention", 347 "uniqUpTo", 348 "sequenceNextNode", 349 "exponentialTimeDecayedAvg", 350 } 351 352 AGG_FUNCTIONS_SUFFIXES = [ 353 "If", 354 "Array", 355 "ArrayIf", 356 "Map", 357 "SimpleState", 358 "State", 359 "Merge", 360 "MergeState", 361 "ForEach", 362 "Distinct", 363 "OrDefault", 364 "OrNull", 365 "Resample", 366 "ArgMin", 367 "ArgMax", 368 ] 369 370 FUNC_TOKENS = { 371 *parser.Parser.FUNC_TOKENS, 372 TokenType.SET, 373 } 374 375 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 376 377 ID_VAR_TOKENS = { 378 *parser.Parser.ID_VAR_TOKENS, 379 TokenType.LIKE, 380 } 381 382 AGG_FUNC_MAPPING = ( 383 lambda functions, suffixes: { 384 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 385 } 386 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 387 388 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 389 390 FUNCTION_PARSERS = { 391 **parser.Parser.FUNCTION_PARSERS, 392 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 393 "QUANTILE": lambda self: self._parse_quantile(), 394 } 395 396 FUNCTION_PARSERS.pop("MATCH") 397 398 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 399 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 400 401 RANGE_PARSERS = { 402 **parser.Parser.RANGE_PARSERS, 403 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 404 and self._parse_in(this, is_global=True), 405 } 406 407 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 408 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 409 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 410 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 411 412 JOIN_KINDS = { 413 *parser.Parser.JOIN_KINDS, 414 TokenType.ANY, 415 TokenType.ASOF, 416 TokenType.ARRAY, 417 } 418 419 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 420 TokenType.ANY, 421 TokenType.ARRAY, 422 TokenType.FINAL, 423 TokenType.FORMAT, 424 TokenType.SETTINGS, 425 } 426 427 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 428 TokenType.FORMAT, 429 } 430 431 LOG_DEFAULTS_TO_LN = True 432 433 QUERY_MODIFIER_PARSERS = { 434 **parser.Parser.QUERY_MODIFIER_PARSERS, 435 TokenType.SETTINGS: lambda self: ( 436 "settings", 437 self._advance() or self._parse_csv(self._parse_assignment), 438 ), 439 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 440 } 441 442 CONSTRAINT_PARSERS = { 443 **parser.Parser.CONSTRAINT_PARSERS, 444 "INDEX": lambda self: self._parse_index_constraint(), 445 "CODEC": lambda self: self._parse_compress(), 446 } 447 448 ALTER_PARSERS = { 449 **parser.Parser.ALTER_PARSERS, 450 "REPLACE": lambda self: self._parse_alter_table_replace(), 451 } 452 453 SCHEMA_UNNAMED_CONSTRAINTS = { 454 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 455 "INDEX", 456 } 457 458 PLACEHOLDER_PARSERS = { 459 **parser.Parser.PLACEHOLDER_PARSERS, 460 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 461 } 462 463 def _parse_types( 464 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 465 ) -> t.Optional[exp.Expression]: 466 dtype = super()._parse_types( 467 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 468 ) 469 if isinstance(dtype, exp.DataType): 470 # Mark every type as non-nullable which is ClickHouse's default. This marker 471 # helps us transpile types from other dialects to ClickHouse, so that we can 472 # e.g. produce `CAST(x AS Nullable(String))` from `CAST(x AS TEXT)`. If there 473 # is a `NULL` value in `x`, the former would fail in ClickHouse without the 474 # `Nullable` type constructor 475 dtype.set("nullable", False) 476 477 return dtype 478 479 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 480 index = self._index 481 this = self._parse_bitwise() 482 if self._match(TokenType.FROM): 483 self._retreat(index) 484 return super()._parse_extract() 485 486 # We return Anonymous here because extract and regexpExtract have different semantics, 487 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 488 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 489 # 490 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 491 self._match(TokenType.COMMA) 492 return self.expression( 493 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 494 ) 495 496 def _parse_assignment(self) -> t.Optional[exp.Expression]: 497 this = super()._parse_assignment() 498 499 if self._match(TokenType.PLACEHOLDER): 500 return self.expression( 501 exp.If, 502 this=this, 503 true=self._parse_assignment(), 504 false=self._match(TokenType.COLON) and self._parse_assignment(), 505 ) 506 507 return this 508 509 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 510 """ 511 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 512 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 513 """ 514 this = self._parse_id_var() 515 self._match(TokenType.COLON) 516 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 517 self._match_text_seq("IDENTIFIER") and "Identifier" 518 ) 519 520 if not kind: 521 self.raise_error("Expecting a placeholder type or 'Identifier' for tables") 522 elif not self._match(TokenType.R_BRACE): 523 self.raise_error("Expecting }") 524 525 return self.expression(exp.Placeholder, this=this, kind=kind) 526 527 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 528 this = super()._parse_in(this) 529 this.set("is_global", is_global) 530 return this 531 532 def _parse_table( 533 self, 534 schema: bool = False, 535 joins: bool = False, 536 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 537 parse_bracket: bool = False, 538 is_db_reference: bool = False, 539 parse_partition: bool = False, 540 ) -> t.Optional[exp.Expression]: 541 this = super()._parse_table( 542 schema=schema, 543 joins=joins, 544 alias_tokens=alias_tokens, 545 parse_bracket=parse_bracket, 546 is_db_reference=is_db_reference, 547 ) 548 549 if self._match(TokenType.FINAL): 550 this = self.expression(exp.Final, this=this) 551 552 return this 553 554 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 555 return super()._parse_position(haystack_first=True) 556 557 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 558 def _parse_cte(self) -> exp.CTE: 559 # WITH <identifier> AS <subquery expression> 560 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 561 562 if not cte: 563 # WITH <expression> AS <identifier> 564 cte = self.expression( 565 exp.CTE, 566 this=self._parse_assignment(), 567 alias=self._parse_table_alias(), 568 scalar=True, 569 ) 570 571 return cte 572 573 def _parse_join_parts( 574 self, 575 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 576 is_global = self._match(TokenType.GLOBAL) and self._prev 577 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 578 579 if kind_pre: 580 kind = self._match_set(self.JOIN_KINDS) and self._prev 581 side = self._match_set(self.JOIN_SIDES) and self._prev 582 return is_global, side, kind 583 584 return ( 585 is_global, 586 self._match_set(self.JOIN_SIDES) and self._prev, 587 self._match_set(self.JOIN_KINDS) and self._prev, 588 ) 589 590 def _parse_join( 591 self, skip_join_token: bool = False, parse_bracket: bool = False 592 ) -> t.Optional[exp.Join]: 593 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 594 if join: 595 join.set("global", join.args.pop("method", None)) 596 597 return join 598 599 def _parse_function( 600 self, 601 functions: t.Optional[t.Dict[str, t.Callable]] = None, 602 anonymous: bool = False, 603 optional_parens: bool = True, 604 any_token: bool = False, 605 ) -> t.Optional[exp.Expression]: 606 expr = super()._parse_function( 607 functions=functions, 608 anonymous=anonymous, 609 optional_parens=optional_parens, 610 any_token=any_token, 611 ) 612 613 func = expr.this if isinstance(expr, exp.Window) else expr 614 615 # Aggregate functions can be split in 2 parts: <func_name><suffix> 616 parts = ( 617 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 618 ) 619 620 if parts: 621 params = self._parse_func_params(func) 622 623 kwargs = { 624 "this": func.this, 625 "expressions": func.expressions, 626 } 627 if parts[1]: 628 kwargs["parts"] = parts 629 exp_class = exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 630 else: 631 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 632 633 kwargs["exp_class"] = exp_class 634 if params: 635 kwargs["params"] = params 636 637 func = self.expression(**kwargs) 638 639 if isinstance(expr, exp.Window): 640 # The window's func was parsed as Anonymous in base parser, fix its 641 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 642 expr.set("this", func) 643 elif params: 644 # Params have blocked super()._parse_function() from parsing the following window 645 # (if that exists) as they're standing between the function call and the window spec 646 expr = self._parse_window(func) 647 else: 648 expr = func 649 650 return expr 651 652 def _parse_func_params( 653 self, this: t.Optional[exp.Func] = None 654 ) -> t.Optional[t.List[exp.Expression]]: 655 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 656 return self._parse_csv(self._parse_lambda) 657 658 if self._match(TokenType.L_PAREN): 659 params = self._parse_csv(self._parse_lambda) 660 self._match_r_paren(this) 661 return params 662 663 return None 664 665 def _parse_quantile(self) -> exp.Quantile: 666 this = self._parse_lambda() 667 params = self._parse_func_params() 668 if params: 669 return self.expression(exp.Quantile, this=params[0], quantile=this) 670 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 671 672 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 673 return super()._parse_wrapped_id_vars(optional=True) 674 675 def _parse_primary_key( 676 self, wrapped_optional: bool = False, in_props: bool = False 677 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 678 return super()._parse_primary_key( 679 wrapped_optional=wrapped_optional or in_props, in_props=in_props 680 ) 681 682 def _parse_on_property(self) -> t.Optional[exp.Expression]: 683 index = self._index 684 if self._match_text_seq("CLUSTER"): 685 this = self._parse_id_var() 686 if this: 687 return self.expression(exp.OnCluster, this=this) 688 else: 689 self._retreat(index) 690 return None 691 692 def _parse_index_constraint( 693 self, kind: t.Optional[str] = None 694 ) -> exp.IndexColumnConstraint: 695 # INDEX name1 expr TYPE type1(args) GRANULARITY value 696 this = self._parse_id_var() 697 expression = self._parse_assignment() 698 699 index_type = self._match_text_seq("TYPE") and ( 700 self._parse_function() or self._parse_var() 701 ) 702 703 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 704 705 return self.expression( 706 exp.IndexColumnConstraint, 707 this=this, 708 expression=expression, 709 index_type=index_type, 710 granularity=granularity, 711 ) 712 713 def _parse_partition(self) -> t.Optional[exp.Partition]: 714 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 715 if not self._match(TokenType.PARTITION): 716 return None 717 718 if self._match_text_seq("ID"): 719 # Corresponds to the PARTITION ID <string_value> syntax 720 expressions: t.List[exp.Expression] = [ 721 self.expression(exp.PartitionId, this=self._parse_string()) 722 ] 723 else: 724 expressions = self._parse_expressions() 725 726 return self.expression(exp.Partition, expressions=expressions) 727 728 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 729 partition = self._parse_partition() 730 731 if not partition or not self._match(TokenType.FROM): 732 return None 733 734 return self.expression( 735 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 736 ) 737 738 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 739 if not self._match_text_seq("PROJECTION"): 740 return None 741 742 return self.expression( 743 exp.ProjectionDef, 744 this=self._parse_id_var(), 745 expression=self._parse_wrapped(self._parse_statement), 746 ) 747 748 def _parse_constraint(self) -> t.Optional[exp.Expression]: 749 return super()._parse_constraint() or self._parse_projection_def()
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- INTERVAL_VARS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_HINTS
- LAMBDAS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PROPERTY_PARSERS
- ALTER_ALTER_PARSERS
- INVALID_FUNC_NAME_TOKENS
- KEY_VALUE_DEFINITIONS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- SUPPORTS_PARTITION_SELECTION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
751 class Generator(generator.Generator): 752 QUERY_HINTS = False 753 STRUCT_DELIMITER = ("(", ")") 754 NVL2_SUPPORTED = False 755 TABLESAMPLE_REQUIRES_PARENS = False 756 TABLESAMPLE_SIZE_IS_ROWS = False 757 TABLESAMPLE_KEYWORDS = "SAMPLE" 758 LAST_DAY_SUPPORTS_DATE_PART = False 759 CAN_IMPLEMENT_ARRAY_ANY = True 760 SUPPORTS_TO_NUMBER = False 761 JOIN_HINTS = False 762 TABLE_HINTS = False 763 EXPLICIT_SET_OP = True 764 GROUPINGS_SEP = "" 765 SET_OP_MODIFIERS = False 766 SUPPORTS_TABLE_ALIAS_COLUMNS = False 767 VALUES_AS_TABLE = False 768 769 STRING_TYPE_MAPPING = { 770 exp.DataType.Type.CHAR: "String", 771 exp.DataType.Type.LONGBLOB: "String", 772 exp.DataType.Type.LONGTEXT: "String", 773 exp.DataType.Type.MEDIUMBLOB: "String", 774 exp.DataType.Type.MEDIUMTEXT: "String", 775 exp.DataType.Type.TINYBLOB: "String", 776 exp.DataType.Type.TINYTEXT: "String", 777 exp.DataType.Type.TEXT: "String", 778 exp.DataType.Type.VARBINARY: "String", 779 exp.DataType.Type.VARCHAR: "String", 780 } 781 782 SUPPORTED_JSON_PATH_PARTS = { 783 exp.JSONPathKey, 784 exp.JSONPathRoot, 785 exp.JSONPathSubscript, 786 } 787 788 TYPE_MAPPING = { 789 **generator.Generator.TYPE_MAPPING, 790 **STRING_TYPE_MAPPING, 791 exp.DataType.Type.ARRAY: "Array", 792 exp.DataType.Type.BIGINT: "Int64", 793 exp.DataType.Type.DATE32: "Date32", 794 exp.DataType.Type.DATETIME: "DateTime", 795 exp.DataType.Type.DATETIME64: "DateTime64", 796 exp.DataType.Type.TIMESTAMP: "DateTime", 797 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 798 exp.DataType.Type.DOUBLE: "Float64", 799 exp.DataType.Type.ENUM: "Enum", 800 exp.DataType.Type.ENUM8: "Enum8", 801 exp.DataType.Type.ENUM16: "Enum16", 802 exp.DataType.Type.FIXEDSTRING: "FixedString", 803 exp.DataType.Type.FLOAT: "Float32", 804 exp.DataType.Type.INT: "Int32", 805 exp.DataType.Type.MEDIUMINT: "Int32", 806 exp.DataType.Type.INT128: "Int128", 807 exp.DataType.Type.INT256: "Int256", 808 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 809 exp.DataType.Type.MAP: "Map", 810 exp.DataType.Type.NESTED: "Nested", 811 exp.DataType.Type.NULLABLE: "Nullable", 812 exp.DataType.Type.SMALLINT: "Int16", 813 exp.DataType.Type.STRUCT: "Tuple", 814 exp.DataType.Type.TINYINT: "Int8", 815 exp.DataType.Type.UBIGINT: "UInt64", 816 exp.DataType.Type.UINT: "UInt32", 817 exp.DataType.Type.UINT128: "UInt128", 818 exp.DataType.Type.UINT256: "UInt256", 819 exp.DataType.Type.USMALLINT: "UInt16", 820 exp.DataType.Type.UTINYINT: "UInt8", 821 exp.DataType.Type.IPV4: "IPv4", 822 exp.DataType.Type.IPV6: "IPv6", 823 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 824 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 825 } 826 827 TRANSFORMS = { 828 **generator.Generator.TRANSFORMS, 829 exp.AnyValue: rename_func("any"), 830 exp.ApproxDistinct: rename_func("uniq"), 831 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 832 exp.ArraySize: rename_func("LENGTH"), 833 exp.ArraySum: rename_func("arraySum"), 834 exp.ArgMax: arg_max_or_min_no_count("argMax"), 835 exp.ArgMin: arg_max_or_min_no_count("argMin"), 836 exp.Array: inline_array_sql, 837 exp.CastToStrType: rename_func("CAST"), 838 exp.CountIf: rename_func("countIf"), 839 exp.CompressColumnConstraint: lambda self, 840 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 841 exp.ComputedColumnConstraint: lambda self, 842 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 843 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 844 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 845 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 846 exp.DateStrToDate: rename_func("toDate"), 847 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 848 exp.Explode: rename_func("arrayJoin"), 849 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 850 exp.IsNan: rename_func("isNaN"), 851 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 852 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 853 exp.JSONPathKey: json_path_key_only_name, 854 exp.JSONPathRoot: lambda *_: "", 855 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 856 exp.Nullif: rename_func("nullIf"), 857 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 858 exp.Pivot: no_pivot_sql, 859 exp.Quantile: _quantile_sql, 860 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 861 exp.Rand: rename_func("randCanonical"), 862 exp.StartsWith: rename_func("startsWith"), 863 exp.StrPosition: lambda self, e: self.func( 864 "position", e.this, e.args.get("substr"), e.args.get("position") 865 ), 866 exp.TimeToStr: lambda self, e: self.func( 867 "DATE_FORMAT", e.this, self.format_time(e), e.args.get("zone") 868 ), 869 exp.TimeStrToTime: _timestrtotime_sql, 870 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 871 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 872 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 873 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 874 exp.MD5Digest: rename_func("MD5"), 875 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 876 exp.SHA: rename_func("SHA1"), 877 exp.SHA2: sha256_sql, 878 exp.UnixToTime: _unix_to_time_sql, 879 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 880 exp.Trim: trim_sql, 881 exp.Variance: rename_func("varSamp"), 882 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 883 exp.Stddev: rename_func("stddevSamp"), 884 } 885 886 PROPERTIES_LOCATION = { 887 **generator.Generator.PROPERTIES_LOCATION, 888 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 889 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 890 exp.OnCluster: exp.Properties.Location.POST_NAME, 891 } 892 893 # There's no list in docs, but it can be found in Clickhouse code 894 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 895 ON_CLUSTER_TARGETS = { 896 "DATABASE", 897 "TABLE", 898 "VIEW", 899 "DICTIONARY", 900 "INDEX", 901 "FUNCTION", 902 "NAMED COLLECTION", 903 } 904 905 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 906 NON_NULLABLE_TYPES = { 907 exp.DataType.Type.ARRAY, 908 exp.DataType.Type.MAP, 909 exp.DataType.Type.NULLABLE, 910 exp.DataType.Type.STRUCT, 911 } 912 913 def strtodate_sql(self, expression: exp.StrToDate) -> str: 914 strtodate_sql = self.function_fallback_sql(expression) 915 916 if not isinstance(expression.parent, exp.Cast): 917 # StrToDate returns DATEs in other dialects (eg. postgres), so 918 # this branch aims to improve the transpilation to clickhouse 919 return f"CAST({strtodate_sql} AS DATE)" 920 921 return strtodate_sql 922 923 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 924 this = expression.this 925 926 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 927 return self.sql(this) 928 929 return super().cast_sql(expression, safe_prefix=safe_prefix) 930 931 def trycast_sql(self, expression: exp.TryCast) -> str: 932 dtype = expression.to 933 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 934 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 935 dtype.set("nullable", True) 936 937 return super().cast_sql(expression) 938 939 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 940 this = self.json_path_part(expression.this) 941 return str(int(this) + 1) if is_int(this) else this 942 943 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 944 return f"AS {self.sql(expression, 'this')}" 945 946 def _any_to_has( 947 self, 948 expression: exp.EQ | exp.NEQ, 949 default: t.Callable[[t.Any], str], 950 prefix: str = "", 951 ) -> str: 952 if isinstance(expression.left, exp.Any): 953 arr = expression.left 954 this = expression.right 955 elif isinstance(expression.right, exp.Any): 956 arr = expression.right 957 this = expression.left 958 else: 959 return default(expression) 960 961 return prefix + self.func("has", arr.this.unnest(), this) 962 963 def eq_sql(self, expression: exp.EQ) -> str: 964 return self._any_to_has(expression, super().eq_sql) 965 966 def neq_sql(self, expression: exp.NEQ) -> str: 967 return self._any_to_has(expression, super().neq_sql, "NOT ") 968 969 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 970 # Manually add a flag to make the search case-insensitive 971 regex = self.func("CONCAT", "'(?i)'", expression.expression) 972 return self.func("match", expression.this, regex) 973 974 def datatype_sql(self, expression: exp.DataType) -> str: 975 # String is the standard ClickHouse type, every other variant is just an alias. 976 # Additionally, any supplied length parameter will be ignored. 977 # 978 # https://clickhouse.com/docs/en/sql-reference/data-types/string 979 if expression.this in self.STRING_TYPE_MAPPING: 980 dtype = "String" 981 else: 982 dtype = super().datatype_sql(expression) 983 984 # This section changes the type to `Nullable(...)` if the following conditions hold: 985 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 986 # and change their semantics 987 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 988 # constraint: "Type of Map key must be a type, that can be represented by integer or 989 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 990 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 991 parent = expression.parent 992 if ( 993 expression.args.get("nullable") is not False 994 and not ( 995 isinstance(parent, exp.DataType) 996 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 997 and expression.index in (None, 0) 998 ) 999 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1000 ): 1001 dtype = f"Nullable({dtype})" 1002 1003 return dtype 1004 1005 def cte_sql(self, expression: exp.CTE) -> str: 1006 if expression.args.get("scalar"): 1007 this = self.sql(expression, "this") 1008 alias = self.sql(expression, "alias") 1009 return f"{this} AS {alias}" 1010 1011 return super().cte_sql(expression) 1012 1013 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1014 return super().after_limit_modifiers(expression) + [ 1015 ( 1016 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1017 if expression.args.get("settings") 1018 else "" 1019 ), 1020 ( 1021 self.seg("FORMAT ") + self.sql(expression, "format") 1022 if expression.args.get("format") 1023 else "" 1024 ), 1025 ] 1026 1027 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 1028 params = self.expressions(expression, key="params", flat=True) 1029 return self.func(expression.name, *expression.expressions) + f"({params})" 1030 1031 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 1032 return self.func(expression.name, *expression.expressions) 1033 1034 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 1035 return self.anonymousaggfunc_sql(expression) 1036 1037 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 1038 return self.parameterizedagg_sql(expression) 1039 1040 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1041 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1042 1043 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1044 return f"ON CLUSTER {self.sql(expression, 'this')}" 1045 1046 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1047 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1048 exp.Properties.Location.POST_NAME 1049 ): 1050 this_name = self.sql( 1051 expression.this if isinstance(expression.this, exp.Schema) else expression, 1052 "this", 1053 ) 1054 this_properties = " ".join( 1055 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1056 ) 1057 this_schema = self.schema_columns_sql(expression.this) 1058 return f"{this_name}{self.sep()}{this_properties}{self.sep()}{this_schema}" 1059 1060 return super().createable_sql(expression, locations) 1061 1062 def create_sql(self, expression: exp.Create) -> str: 1063 # The comment property comes last in CTAS statements, i.e. after the query 1064 query = expression.expression 1065 if isinstance(query, exp.Query): 1066 comment_prop = expression.find(exp.SchemaCommentProperty) 1067 if comment_prop: 1068 comment_prop.pop() 1069 query.replace(exp.paren(query)) 1070 else: 1071 comment_prop = None 1072 1073 create_sql = super().create_sql(expression) 1074 1075 comment_sql = self.sql(comment_prop) 1076 comment_sql = f" {comment_sql}" if comment_sql else "" 1077 1078 return f"{create_sql}{comment_sql}" 1079 1080 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1081 this = self.indent(self.sql(expression, "this")) 1082 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1083 1084 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1085 this = self.sql(expression, "this") 1086 this = f" {this}" if this else "" 1087 expr = self.sql(expression, "expression") 1088 expr = f" {expr}" if expr else "" 1089 index_type = self.sql(expression, "index_type") 1090 index_type = f" TYPE {index_type}" if index_type else "" 1091 granularity = self.sql(expression, "granularity") 1092 granularity = f" GRANULARITY {granularity}" if granularity else "" 1093 1094 return f"INDEX{this}{expr}{index_type}{granularity}" 1095 1096 def partition_sql(self, expression: exp.Partition) -> str: 1097 return f"PARTITION {self.expressions(expression, flat=True)}" 1098 1099 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1100 return f"ID {self.sql(expression.this)}" 1101 1102 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1103 return ( 1104 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1105 ) 1106 1107 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1108 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}"
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
913 def strtodate_sql(self, expression: exp.StrToDate) -> str: 914 strtodate_sql = self.function_fallback_sql(expression) 915 916 if not isinstance(expression.parent, exp.Cast): 917 # StrToDate returns DATEs in other dialects (eg. postgres), so 918 # this branch aims to improve the transpilation to clickhouse 919 return f"CAST({strtodate_sql} AS DATE)" 920 921 return strtodate_sql
923 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 924 this = expression.this 925 926 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 927 return self.sql(this) 928 929 return super().cast_sql(expression, safe_prefix=safe_prefix)
931 def trycast_sql(self, expression: exp.TryCast) -> str: 932 dtype = expression.to 933 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 934 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 935 dtype.set("nullable", True) 936 937 return super().cast_sql(expression)
974 def datatype_sql(self, expression: exp.DataType) -> str: 975 # String is the standard ClickHouse type, every other variant is just an alias. 976 # Additionally, any supplied length parameter will be ignored. 977 # 978 # https://clickhouse.com/docs/en/sql-reference/data-types/string 979 if expression.this in self.STRING_TYPE_MAPPING: 980 dtype = "String" 981 else: 982 dtype = super().datatype_sql(expression) 983 984 # This section changes the type to `Nullable(...)` if the following conditions hold: 985 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 986 # and change their semantics 987 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 988 # constraint: "Type of Map key must be a type, that can be represented by integer or 989 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 990 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 991 parent = expression.parent 992 if ( 993 expression.args.get("nullable") is not False 994 and not ( 995 isinstance(parent, exp.DataType) 996 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 997 and expression.index in (None, 0) 998 ) 999 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1000 ): 1001 dtype = f"Nullable({dtype})" 1002 1003 return dtype
1013 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1014 return super().after_limit_modifiers(expression) + [ 1015 ( 1016 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1017 if expression.args.get("settings") 1018 else "" 1019 ), 1020 ( 1021 self.seg("FORMAT ") + self.sql(expression, "format") 1022 if expression.args.get("format") 1023 else "" 1024 ), 1025 ]
1046 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1047 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1048 exp.Properties.Location.POST_NAME 1049 ): 1050 this_name = self.sql( 1051 expression.this if isinstance(expression.this, exp.Schema) else expression, 1052 "this", 1053 ) 1054 this_properties = " ".join( 1055 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1056 ) 1057 this_schema = self.schema_columns_sql(expression.this) 1058 return f"{this_name}{self.sep()}{this_properties}{self.sep()}{this_schema}" 1059 1060 return super().createable_sql(expression, locations)
1062 def create_sql(self, expression: exp.Create) -> str: 1063 # The comment property comes last in CTAS statements, i.e. after the query 1064 query = expression.expression 1065 if isinstance(query, exp.Query): 1066 comment_prop = expression.find(exp.SchemaCommentProperty) 1067 if comment_prop: 1068 comment_prop.pop() 1069 query.replace(exp.paren(query)) 1070 else: 1071 comment_prop = None 1072 1073 create_sql = super().create_sql(expression) 1074 1075 comment_sql = self.sql(comment_prop) 1076 comment_sql = f" {comment_sql}" if comment_sql else "" 1077 1078 return f"{create_sql}{comment_sql}"
1084 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1085 this = self.sql(expression, "this") 1086 this = f" {this}" if this else "" 1087 expr = self.sql(expression, "expression") 1088 expr = f" {expr}" if expr else "" 1089 index_type = self.sql(expression, "index_type") 1090 index_type = f" TYPE {index_type}" if index_type else "" 1091 granularity = self.sql(expression, "granularity") 1092 granularity = f" GRANULARITY {granularity}" if granularity else "" 1093 1094 return f"INDEX{this}{expr}{index_type}{granularity}"
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- LIMIT_ONLY_LITERALS
- RENAME_TABLE_WITH_DB
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- JSON_KEY_VALUE_PAIR_SEP
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- STAR_EXCEPT
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- SUPPORTS_EXPLODING_PROJECTIONS
- ARRAY_CONCAT_IS_VAR_LEN
- SUPPORTS_CONVERT_TIMEZONE
- SUPPORTS_NULLABLE_TYPES
- PARSE_JSON_NAME
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- PARAMETER_TOKEN
- NAMED_PLACEHOLDER_TOKEN
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- except_op
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- intersect_op
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- subquery_sql
- qualify_sql
- set_operations
- union_sql
- union_op
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- renametable_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- try_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql