diff --git a/pyproject.toml b/pyproject.toml index 715e0f3db..e643d54b5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -93,9 +93,7 @@ dependencies = [ "slack_sdk>=3.19.0, <4", "sqlalchemy>=1.4, <2", "sqlalchemy-utils>=0.38.3, <0.39", - # known breaking changes in sqlglot 25.25.0 - #https://github.com/tobymao/sqlglot/blob/main/CHANGELOG.md#v25250---2024-10-14 - "sqlglot>=25.24.0,<25.25.0", + "sqlglot>=26.1.3, <27", "sqlparse>=0.5.0", "tabulate>=0.8.9, <0.9", "typing-extensions>=4, <5", diff --git a/requirements/base.txt b/requirements/base.txt index 554094259..7a30a505c 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -9,9 +9,7 @@ apispec==6.3.0 apsw==3.46.0.0 # via shillelagh async-timeout==4.0.3 - # via - # -r requirements/base.in - # redis + # via -r requirements/base.in attrs==24.2.0 # via # cattrs @@ -370,7 +368,7 @@ sqlalchemy-utils==0.38.3 # via # apache-superset (pyproject.toml) # flask-appbuilder -sqlglot==25.24.5 +sqlglot==26.1.3 # via apache-superset (pyproject.toml) sqlparse==0.5.2 # via apache-superset (pyproject.toml) @@ -388,7 +386,6 @@ typing-extensions==4.12.2 # via # apache-superset (pyproject.toml) # alembic - # cattrs # flask-limiter # kombu # limits diff --git a/requirements/development.txt b/requirements/development.txt index 234342873..a8082d907 100644 --- a/requirements/development.txt +++ b/requirements/development.txt @@ -800,7 +800,7 @@ sqlalchemy-utils==0.38.3 # -c requirements/base.txt # apache-superset # flask-appbuilder -sqlglot==25.24.5 +sqlglot==26.1.3 # via # -c requirements/base.txt # apache-superset diff --git a/superset/sql/dialects/__init__.py b/superset/sql/dialects/__init__.py new file mode 100644 index 000000000..13a83393a --- /dev/null +++ b/superset/sql/dialects/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/superset/sql/dialects/firebolt.py b/superset/sql/dialects/firebolt.py new file mode 100644 index 000000000..119ee3ba1 --- /dev/null +++ b/superset/sql/dialects/firebolt.py @@ -0,0 +1,75 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import annotations + +from sqlglot import exp, generator, parser +from sqlglot.dialects.dialect import Dialect +from sqlglot.tokens import TokenType + + +class Firebolt(Dialect): + """ + A sqlglot dialect for Firebolt. + """ + + class Parser(parser.Parser): + """ + Custom parser for Firebolt. + + In Firebolt `NOT` has higher precedence than `IN`, so we need to wrap the + expression in parentheses when we find a negated range. + """ + + UNARY_PARSERS = { + **parser.Parser.UNARY_PARSERS, + TokenType.NOT: lambda self: self.expression( + exp.Not, + this=self._parse_unary(), + ), + } + + def _negate_range( + self, + this: exp.Expression | None = None, + ) -> exp.Expression | None: + if not this: + return this + + return self.expression(exp.Not, this=self.expression(exp.Paren, this=this)) + + class Generator(generator.Generator): + """ + Custom generator for Firebolt. + """ + + TYPE_MAPPING = { + **generator.Generator.TYPE_MAPPING, + exp.DataType.Type.VARBINARY: "BYTEA", + } + + def not_sql(self, expression: exp.Not) -> str: + """ + Parenthesize negated expressions. + + Firebolt requires negated to be wrapped in parentheses, since NOT has higher + precedence than IN. + """ + if isinstance(expression.this, exp.In): + return f"NOT ({self.sql(expression, 'this')})" + + return super().not_sql(expression) diff --git a/superset/sql/parse.py b/superset/sql/parse.py index 34ec9299d..3fd13a800 100644 --- a/superset/sql/parse.py +++ b/superset/sql/parse.py @@ -36,9 +36,12 @@ from sqlglot.optimizer.pushdown_predicates import pushdown_predicates from sqlglot.optimizer.scope import Scope, ScopeType, traverse_scope from superset.exceptions import SupersetParseError +from superset.sql.dialects.firebolt import Firebolt logger = logging.getLogger(__name__) +# register 3rd party dialects +Dialect.classes["firebolt"] = Firebolt # mapping between DB engine specs and sqlglot dialects SQLGLOT_DIALECTS = { @@ -62,7 +65,7 @@ SQLGLOT_DIALECTS = { # "elasticsearch": ??? # "exa": ??? # "firebird": ??? - # "firebolt": ??? + "firebolt": "firebolt", "gsheets": Dialects.SQLITE, "hana": Dialects.POSTGRES, "hive": Dialects.HIVE, @@ -81,7 +84,7 @@ SQLGLOT_DIALECTS = { "presto": Dialects.PRESTO, "pydoris": Dialects.DORIS, "redshift": Dialects.REDSHIFT, - # "risingwave": ??? + "risingwave": Dialects.RISINGWAVE, # "rockset": ??? "shillelagh": Dialects.SQLITE, "snowflake": Dialects.SNOWFLAKE, diff --git a/tests/unit_tests/sql/parse_tests.py b/tests/unit_tests/sql/parse_tests.py index 1eabb78e0..a2aff686a 100644 --- a/tests/unit_tests/sql/parse_tests.py +++ b/tests/unit_tests/sql/parse_tests.py @@ -301,7 +301,7 @@ def test_format_no_dialect() -> None: Test format with an engine that has no corresponding dialect. """ assert ( - SQLScript("SELECT col FROM t WHERE col NOT IN (1, 2)", "firebolt").format() + SQLScript("SELECT col FROM t WHERE col NOT IN (1, 2)", "dremio").format() == "SELECT col\nFROM t\nWHERE col NOT IN (1,\n 2)" ) @@ -311,7 +311,7 @@ def test_split_no_dialect() -> None: Test the statement split when the engine has no corresponding dialect. """ sql = "SELECT col FROM t WHERE col NOT IN (1, 2); SELECT * FROM t; SELECT foo" - statements = SQLScript(sql, "firebolt").statements + statements = SQLScript(sql, "dremio").statements assert len(statements) == 3 assert statements[0]._sql == "SELECT col FROM t WHERE col NOT IN (1, 2)" assert statements[1]._sql == "SELECT * FROM t" @@ -1112,4 +1112,37 @@ WHERE anon_1.a > 1 AND anon_1.b = 2""" assert SQLStatement(sql, "sqlite").optimize().format() == optimized - assert SQLStatement(sql, "firebolt").optimize().format() == not_optimized + assert SQLStatement(sql, "dremio").optimize().format() == not_optimized + + +def test_firebolt() -> None: + """ + Test that Firebolt 3rd party dialect is registered correctly. + + We need a custom dialect for Firebolt because it parses `NOT col IN (1, 2)` as + `(NOT col) IN (1, 2)` instead of `NOT (col IN (1, 2))`, which will fail when `col` + is not a boolean. + + Note that `NOT col = 1` works as expected in Firebolt, parsing as `NOT (col = 1)`. + """ + sql = "SELECT col NOT IN (1, 2) FROM tbl" + assert ( + SQLStatement(sql, "firebolt").format() + == """ +SELECT + NOT ( + col IN (1, 2) + ) +FROM tbl + """.strip() + ) + + sql = "SELECT NOT col = 1 FROM tbl" + assert ( + SQLStatement(sql, "firebolt").format() + == """ +SELECT + NOT col = 1 +FROM tbl + """.strip() + )