feat: Firebolt sqlglot dialect (#31825)

This commit is contained in:
Beto Dealmeida 2025-01-14 09:36:25 -05:00 committed by GitHub
parent 274aa143d3
commit c2d7cf388d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 136 additions and 14 deletions

View File

@ -93,9 +93,7 @@ dependencies = [
"slack_sdk>=3.19.0, <4", "slack_sdk>=3.19.0, <4",
"sqlalchemy>=1.4, <2", "sqlalchemy>=1.4, <2",
"sqlalchemy-utils>=0.38.3, <0.39", "sqlalchemy-utils>=0.38.3, <0.39",
# known breaking changes in sqlglot 25.25.0 "sqlglot>=26.1.3, <27",
#https://github.com/tobymao/sqlglot/blob/main/CHANGELOG.md#v25250---2024-10-14
"sqlglot>=25.24.0,<25.25.0",
"sqlparse>=0.5.0", "sqlparse>=0.5.0",
"tabulate>=0.8.9, <0.9", "tabulate>=0.8.9, <0.9",
"typing-extensions>=4, <5", "typing-extensions>=4, <5",

View File

@ -9,9 +9,7 @@ apispec==6.3.0
apsw==3.46.0.0 apsw==3.46.0.0
# via shillelagh # via shillelagh
async-timeout==4.0.3 async-timeout==4.0.3
# via # via -r requirements/base.in
# -r requirements/base.in
# redis
attrs==24.2.0 attrs==24.2.0
# via # via
# cattrs # cattrs
@ -370,7 +368,7 @@ sqlalchemy-utils==0.38.3
# via # via
# apache-superset (pyproject.toml) # apache-superset (pyproject.toml)
# flask-appbuilder # flask-appbuilder
sqlglot==25.24.5 sqlglot==26.1.3
# via apache-superset (pyproject.toml) # via apache-superset (pyproject.toml)
sqlparse==0.5.2 sqlparse==0.5.2
# via apache-superset (pyproject.toml) # via apache-superset (pyproject.toml)
@ -388,7 +386,6 @@ typing-extensions==4.12.2
# via # via
# apache-superset (pyproject.toml) # apache-superset (pyproject.toml)
# alembic # alembic
# cattrs
# flask-limiter # flask-limiter
# kombu # kombu
# limits # limits

View File

@ -800,7 +800,7 @@ sqlalchemy-utils==0.38.3
# -c requirements/base.txt # -c requirements/base.txt
# apache-superset # apache-superset
# flask-appbuilder # flask-appbuilder
sqlglot==25.24.5 sqlglot==26.1.3
# via # via
# -c requirements/base.txt # -c requirements/base.txt
# apache-superset # apache-superset

View File

@ -0,0 +1,16 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

View File

@ -0,0 +1,75 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from __future__ import annotations
from sqlglot import exp, generator, parser
from sqlglot.dialects.dialect import Dialect
from sqlglot.tokens import TokenType
class Firebolt(Dialect):
"""
A sqlglot dialect for Firebolt.
"""
class Parser(parser.Parser):
"""
Custom parser for Firebolt.
In Firebolt `NOT` has higher precedence than `IN`, so we need to wrap the
expression in parentheses when we find a negated range.
"""
UNARY_PARSERS = {
**parser.Parser.UNARY_PARSERS,
TokenType.NOT: lambda self: self.expression(
exp.Not,
this=self._parse_unary(),
),
}
def _negate_range(
self,
this: exp.Expression | None = None,
) -> exp.Expression | None:
if not this:
return this
return self.expression(exp.Not, this=self.expression(exp.Paren, this=this))
class Generator(generator.Generator):
"""
Custom generator for Firebolt.
"""
TYPE_MAPPING = {
**generator.Generator.TYPE_MAPPING,
exp.DataType.Type.VARBINARY: "BYTEA",
}
def not_sql(self, expression: exp.Not) -> str:
"""
Parenthesize negated expressions.
Firebolt requires negated to be wrapped in parentheses, since NOT has higher
precedence than IN.
"""
if isinstance(expression.this, exp.In):
return f"NOT ({self.sql(expression, 'this')})"
return super().not_sql(expression)

View File

@ -36,9 +36,12 @@ from sqlglot.optimizer.pushdown_predicates import pushdown_predicates
from sqlglot.optimizer.scope import Scope, ScopeType, traverse_scope from sqlglot.optimizer.scope import Scope, ScopeType, traverse_scope
from superset.exceptions import SupersetParseError from superset.exceptions import SupersetParseError
from superset.sql.dialects.firebolt import Firebolt
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
# register 3rd party dialects
Dialect.classes["firebolt"] = Firebolt
# mapping between DB engine specs and sqlglot dialects # mapping between DB engine specs and sqlglot dialects
SQLGLOT_DIALECTS = { SQLGLOT_DIALECTS = {
@ -62,7 +65,7 @@ SQLGLOT_DIALECTS = {
# "elasticsearch": ??? # "elasticsearch": ???
# "exa": ??? # "exa": ???
# "firebird": ??? # "firebird": ???
# "firebolt": ??? "firebolt": "firebolt",
"gsheets": Dialects.SQLITE, "gsheets": Dialects.SQLITE,
"hana": Dialects.POSTGRES, "hana": Dialects.POSTGRES,
"hive": Dialects.HIVE, "hive": Dialects.HIVE,
@ -81,7 +84,7 @@ SQLGLOT_DIALECTS = {
"presto": Dialects.PRESTO, "presto": Dialects.PRESTO,
"pydoris": Dialects.DORIS, "pydoris": Dialects.DORIS,
"redshift": Dialects.REDSHIFT, "redshift": Dialects.REDSHIFT,
# "risingwave": ??? "risingwave": Dialects.RISINGWAVE,
# "rockset": ??? # "rockset": ???
"shillelagh": Dialects.SQLITE, "shillelagh": Dialects.SQLITE,
"snowflake": Dialects.SNOWFLAKE, "snowflake": Dialects.SNOWFLAKE,

View File

@ -301,7 +301,7 @@ def test_format_no_dialect() -> None:
Test format with an engine that has no corresponding dialect. Test format with an engine that has no corresponding dialect.
""" """
assert ( assert (
SQLScript("SELECT col FROM t WHERE col NOT IN (1, 2)", "firebolt").format() SQLScript("SELECT col FROM t WHERE col NOT IN (1, 2)", "dremio").format()
== "SELECT col\nFROM t\nWHERE col NOT IN (1,\n 2)" == "SELECT col\nFROM t\nWHERE col NOT IN (1,\n 2)"
) )
@ -311,7 +311,7 @@ def test_split_no_dialect() -> None:
Test the statement split when the engine has no corresponding dialect. Test the statement split when the engine has no corresponding dialect.
""" """
sql = "SELECT col FROM t WHERE col NOT IN (1, 2); SELECT * FROM t; SELECT foo" sql = "SELECT col FROM t WHERE col NOT IN (1, 2); SELECT * FROM t; SELECT foo"
statements = SQLScript(sql, "firebolt").statements statements = SQLScript(sql, "dremio").statements
assert len(statements) == 3 assert len(statements) == 3
assert statements[0]._sql == "SELECT col FROM t WHERE col NOT IN (1, 2)" assert statements[0]._sql == "SELECT col FROM t WHERE col NOT IN (1, 2)"
assert statements[1]._sql == "SELECT * FROM t" assert statements[1]._sql == "SELECT * FROM t"
@ -1112,4 +1112,37 @@ WHERE anon_1.a > 1
AND anon_1.b = 2""" AND anon_1.b = 2"""
assert SQLStatement(sql, "sqlite").optimize().format() == optimized assert SQLStatement(sql, "sqlite").optimize().format() == optimized
assert SQLStatement(sql, "firebolt").optimize().format() == not_optimized assert SQLStatement(sql, "dremio").optimize().format() == not_optimized
def test_firebolt() -> None:
"""
Test that Firebolt 3rd party dialect is registered correctly.
We need a custom dialect for Firebolt because it parses `NOT col IN (1, 2)` as
`(NOT col) IN (1, 2)` instead of `NOT (col IN (1, 2))`, which will fail when `col`
is not a boolean.
Note that `NOT col = 1` works as expected in Firebolt, parsing as `NOT (col = 1)`.
"""
sql = "SELECT col NOT IN (1, 2) FROM tbl"
assert (
SQLStatement(sql, "firebolt").format()
== """
SELECT
NOT (
col IN (1, 2)
)
FROM tbl
""".strip()
)
sql = "SELECT NOT col = 1 FROM tbl"
assert (
SQLStatement(sql, "firebolt").format()
== """
SELECT
NOT col = 1
FROM tbl
""".strip()
)