feat: add YDB as a new database engine (#31141)

This commit is contained in:
Oleg Ovcharuk 2024-12-05 17:14:19 +03:00 committed by GitHub
parent 638f82b46d
commit cf5c770adc
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 287 additions and 0 deletions

View File

@ -70,6 +70,7 @@ google-sheets.svg
ibm-db2.svg ibm-db2.svg
postgresql.svg postgresql.svg
snowflake.svg snowflake.svg
ydb.svg
# docs-related # docs-related
erd.puml erd.puml

View File

@ -136,6 +136,7 @@ Here are some of the major database solutions that are supported:
<img src="https://superset.apache.org/img/databases/oceanbase.svg" alt="oceanbase" border="0" width="220" /> <img src="https://superset.apache.org/img/databases/oceanbase.svg" alt="oceanbase" border="0" width="220" />
<img src="https://superset.apache.org/img/databases/sap-hana.png" alt="oceanbase" border="0" width="220" /> <img src="https://superset.apache.org/img/databases/sap-hana.png" alt="oceanbase" border="0" width="220" />
<img src="https://superset.apache.org/img/databases/denodo.png" alt="denodo" border="0" width="200" /> <img src="https://superset.apache.org/img/databases/denodo.png" alt="denodo" border="0" width="200" />
<img src="https://superset.apache.org/img/databases/ydb.svg" alt="ydb" border="0" width="200" />
</p> </p>
**A more comprehensive list of supported databases** along with the configuration instructions can be found [here](https://superset.apache.org/docs/configuration/databases). **A more comprehensive list of supported databases** along with the configuration instructions can be found [here](https://superset.apache.org/docs/configuration/databases).

View File

@ -81,6 +81,7 @@ are compatible with Superset.
| [TimescaleDB](/docs/configuration/databases#timescaledb) | `pip install psycopg2` | `postgresql://<UserName>:<DBPassword>@<Database Host>:<Port>/<Database Name>` | | [TimescaleDB](/docs/configuration/databases#timescaledb) | `pip install psycopg2` | `postgresql://<UserName>:<DBPassword>@<Database Host>:<Port>/<Database Name>` |
| [Trino](/docs/configuration/databases#trino) | `pip install trino` | `trino://{username}:{password}@{hostname}:{port}/{catalog}` | | [Trino](/docs/configuration/databases#trino) | `pip install trino` | `trino://{username}:{password}@{hostname}:{port}/{catalog}` |
| [Vertica](/docs/configuration/databases#vertica) | `pip install sqlalchemy-vertica-python` | `vertica+vertica_python://<UserName>:<DBPassword>@<Database Host>/<Database Name>` | | [Vertica](/docs/configuration/databases#vertica) | `pip install sqlalchemy-vertica-python` | `vertica+vertica_python://<UserName>:<DBPassword>@<Database Host>/<Database Name>` |
| [YDB](/docs/configuration/databases#ydb) | `pip install ydb-sqlalchemy` | `ydb://{host}:{port}/{database_name}` |
| [YugabyteDB](/docs/configuration/databases#yugabytedb) | `pip install psycopg2` | `postgresql://<UserName>:<DBPassword>@<Database Host>/<Database Name>` | | [YugabyteDB](/docs/configuration/databases#yugabytedb) | `pip install psycopg2` | `postgresql://<UserName>:<DBPassword>@<Database Host>/<Database Name>` |
--- ---
@ -1537,6 +1538,78 @@ Other parameters:
- Load Balancer - Backup Host - Load Balancer - Backup Host
#### YDB
The recommended connector library for [YDB](https://ydb.tech/) is
[ydb-sqlalchemy](https://pypi.org/project/ydb-sqlalchemy/).
##### Connection String
The connection string for YDB looks like this:
```
ydb://{host}:{port}/{database_name}
```
##### Protocol
You can specify `protocol` in the `Secure Extra` field at `Advanced / Security`:
```
{
"protocol": "grpcs"
}
```
Default is `grpc`.
##### Authentication Methods
###### Static Credentials
To use `Static Credentials` you should provide `username`/`password` in the `Secure Extra` field at `Advanced / Security`:
```
{
"credentials": {
"username": "...",
"password": "..."
}
}
```
###### Access Token Credentials
To use `Access Token Credentials` you should provide `token` in the `Secure Extra` field at `Advanced / Security`:
```
{
"credentials": {
"token": "...",
}
}
```
##### Service Account Credentials
To use Service Account Credentials, you should provide `service_account_json` in the `Secure Extra` field at `Advanced / Security`:
```
{
"credentials": {
"service_account_json": {
"id": "...",
"service_account_id": "...",
"created_at": "...",
"key_algorithm": "...",
"public_key": "...",
"private_key": "..."
}
}
}
```
#### YugabyteDB #### YugabyteDB
[YugabyteDB](https://www.yugabyte.com/) is a distributed SQL database built on top of PostgreSQL. [YugabyteDB](https://www.yugabyte.com/) is a distributed SQL database built on top of PostgreSQL.

20
docs/static/img/databases/ydb.svg vendored Normal file
View File

@ -0,0 +1,20 @@
<svg width="753" height="274" viewBox="0 0 753 274" fill="none" xmlns="http://www.w3.org/2000/svg">
<g clip-path="url(#clip0_28_1297)">
<path fill-rule="evenodd" clip-rule="evenodd" d="M5 53.8669C5 37.6466 29.6243 29 60 29C90.3757 29 115 37.6466 115 53.8669V138.133C115 154.353 90.3757 163 60 163C29.6243 163 5 154.353 5 138.133V53.8669Z" fill="#2399FF"/>
<path fill-rule="evenodd" clip-rule="evenodd" d="M175 53.8669C175 37.6466 199.624 29 230 29C260.376 29 285 37.6466 285 53.8669V138.133C285 154.353 260.376 163 230 163C199.624 163 175 154.353 175 138.133V53.8669Z" fill="#2399FF"/>
<path d="M177 85H113V103H177V85Z" fill="#2399FF"/>
<path d="M173 157H115L81 111H59L105 173H183L229 111H207L173 157Z" fill="white"/>
<path fill-rule="evenodd" clip-rule="evenodd" d="M89 145.867C89 129.647 113.624 121 144 121C174.376 121 199 129.647 199 145.867V230.133C199 246.353 174.376 255 144 255C113.624 255 89 246.353 89 230.133V145.867Z" fill="#2399FF"/>
<path fill-rule="evenodd" clip-rule="evenodd" d="M108.783 136.779C100.111 140.552 99 144.237 99 146C99 147.763 100.111 151.448 108.783 155.221C117.076 158.829 129.435 161 144 161C158.565 161 170.924 158.829 179.217 155.221C187.889 151.448 189 147.763 189 146C189 144.237 187.889 140.552 179.218 136.779C170.924 133.171 158.565 131 144 131C129.435 131 117.076 133.171 108.783 136.779Z" fill="white"/>
<path fill-rule="evenodd" clip-rule="evenodd" d="M24.7825 44.7792C16.1105 48.5515 15 52.2365 15 54C15 55.7635 16.1105 59.4485 24.7825 63.2208C33.0763 66.8287 45.4354 69 60 69C74.5646 69 86.9237 66.8287 95.2175 63.2208C103.889 59.4485 105 55.7635 105 54C105 52.2365 103.889 48.5515 95.2175 44.7792C86.9237 41.1713 74.5646 39 60 39C45.4354 39 33.0763 41.1713 24.7825 44.7792Z" fill="white"/>
<path fill-rule="evenodd" clip-rule="evenodd" d="M194.783 44.7792C186.111 48.5515 185 52.2365 185 54C185 55.7635 186.111 59.4485 194.783 63.2208C203.076 66.8287 215.435 69 230 69C244.565 69 256.924 66.8287 265.217 63.2208C273.889 59.4485 275 55.7635 275 54C275 52.2365 273.889 48.5515 265.218 44.7792C256.924 41.1713 244.565 39 230 39C215.435 39 203.076 41.1713 194.783 44.7792Z" fill="white"/>
<path fill-rule="evenodd" clip-rule="evenodd" d="M694.131 64H634.75V210H705.026C730.974 210 750.243 191.821 750.243 166.963C750.243 150.15 740.93 137.39 726.201 130.891C733.027 124.143 737.168 115.224 737.168 104.858C737.168 81.2033 718.875 64 694.131 64ZM660.899 85.791V123.925H691.951C702.482 123.925 711.019 115.389 711.019 104.858C711.019 94.3277 702.482 85.791 691.951 85.791H660.899ZM660.899 188.209V145.716H702.847C714.581 145.716 724.093 155.229 724.093 166.963C724.093 178.697 714.581 188.209 702.847 188.209H660.899Z" fill="black"/>
<path d="M352.716 64.0039H382.134L419.179 128.287L456.223 64.0039H485.641L432.308 155.472V210.004H406.049V155.472L352.716 64.0039Z" fill="black"/>
<path fill-rule="evenodd" clip-rule="evenodd" d="M496.008 64.0039H546.127C589.713 64.0039 619.127 92.3289 619.127 137.004C619.127 181.679 589.713 210.004 546.127 210.004H496.008V64.0039ZM522.157 188.213V85.7949H543.948C573.32 85.7949 592.978 104.364 592.978 137.004C592.978 169.644 573.32 188.213 543.948 188.213H522.157Z" fill="black"/>
</g>
<defs>
<clipPath id="clip0_28_1297">
<rect width="753" height="274" fill="white"/>
</clipPath>
</defs>
</svg>

After

Width:  |  Height:  |  Size: 3.2 KiB

View File

@ -179,6 +179,7 @@ netezza = ["nzalchemy>=11.0.2"]
starrocks = ["starrocks>=1.0.0"] starrocks = ["starrocks>=1.0.0"]
doris = ["pydoris>=1.0.0, <2.0.0"] doris = ["pydoris>=1.0.0, <2.0.0"]
oceanbase = ["oceanbase_py>=0.0.1"] oceanbase = ["oceanbase_py>=0.0.1"]
ydb = ["ydb-sqlalchemy>=0.1.2"]
development = [ development = [
"docker", "docker",
"flask-testing", "flask-testing",

108
superset/db_engine_specs/ydb.py Executable file
View File

@ -0,0 +1,108 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from __future__ import annotations
import logging
from datetime import datetime
from typing import Any, TYPE_CHECKING
from sqlalchemy import types
from superset.constants import TimeGrain
from superset.db_engine_specs.base import BaseEngineSpec
from superset.utils import json
if TYPE_CHECKING:
from superset.models.core import Database
logger = logging.getLogger(__name__)
class YDBEngineSpec(BaseEngineSpec):
engine = "yql"
engine_aliases = {"ydb", "yql+ydb"}
engine_name = "YDB"
default_driver = "ydb"
sqlalchemy_uri_placeholder = "ydb://{host}:{port}/{database_name}"
# pylint: disable=invalid-name
encrypted_extra_sensitive_fields = {"$.connect_args.credentials", "$.credentials"}
disable_ssh_tunneling = False
supports_file_upload = False
allows_alias_in_orderby = True
_time_grain_expressions = {
None: "{col}",
TimeGrain.SECOND: "DateTime::MakeDatetime(DateTime::StartOf({col}, Interval('PT1S')))",
TimeGrain.THIRTY_SECONDS: "DateTime::MakeDatetime(DateTime::StartOf({col}, Interval('PT30S')))",
TimeGrain.MINUTE: "DateTime::MakeDatetime(DateTime::StartOf({col}, Interval('PT1M')))",
TimeGrain.FIVE_MINUTES: "DateTime::MakeDatetime(DateTime::StartOf({col}, Interval('PT5M')))",
TimeGrain.TEN_MINUTES: "DateTime::MakeDatetime(DateTime::StartOf({col}, Interval('PT10M')))",
TimeGrain.FIFTEEN_MINUTES: "DateTime::MakeDatetime(DateTime::StartOf({col}, Interval('PT15M')))",
TimeGrain.THIRTY_MINUTES: "DateTime::MakeDatetime(DateTime::StartOf({col}, Interval('PT30M')))",
TimeGrain.HOUR: "DateTime::MakeDatetime(DateTime::StartOf({col}, Interval('PT1H')))",
TimeGrain.DAY: "DateTime::MakeDatetime(DateTime::StartOf({col}, Interval('P1D')))",
TimeGrain.WEEK: "DateTime::MakeDatetime(DateTime::StartOfWeek({col}))",
TimeGrain.MONTH: "DateTime::MakeDatetime(DateTime::StartOfMonth({col}))",
TimeGrain.QUARTER: "DateTime::MakeDatetime(DateTime::StartOfQuarter({col}))",
TimeGrain.YEAR: "DateTime::MakeDatetime(DateTime::StartOfYear({col}))",
}
@classmethod
def epoch_to_dttm(cls) -> str:
return "DateTime::MakeDatetime({col})"
@classmethod
def convert_dttm(
cls, target_type: str, dttm: datetime, db_extra: dict[str, Any] | None = None
) -> str | None:
sqla_type = cls.get_sqla_column_type(target_type)
if isinstance(sqla_type, types.Date):
return f"DateTime::MakeDate(DateTime::ParseIso8601('{dttm.date().isoformat()}'))"
if isinstance(sqla_type, types.DateTime):
return f"""DateTime::MakeDatetime(DateTime::ParseIso8601('{dttm.isoformat(sep="T", timespec="seconds")}'))"""
return None
@staticmethod
def update_params_from_encrypted_extra(
database: Database,
params: dict[str, Any],
) -> None:
if not database.encrypted_extra:
return
try:
encrypted_extra = json.loads(database.encrypted_extra)
connect_args = params.setdefault("connect_args", {})
if "protocol" in encrypted_extra:
connect_args["protocol"] = encrypted_extra["protocol"]
if "credentials" in encrypted_extra:
credentials_info = encrypted_extra["credentials"]
connect_args["credentials"] = credentials_info
except json.JSONDecodeError as ex:
logger.error(ex, exc_info=True)
raise

View File

@ -0,0 +1,83 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# pylint: disable=unused-argument, import-outside-toplevel, protected-access
from __future__ import annotations
from datetime import datetime
from typing import Any, Optional
from unittest.mock import Mock
import pytest
from superset.utils import json
from tests.unit_tests.db_engine_specs.utils import assert_convert_dttm
from tests.unit_tests.fixtures.common import dttm # noqa: F401
def test_epoch_to_dttm() -> None:
from superset.db_engine_specs.ydb import YDBEngineSpec
assert YDBEngineSpec.epoch_to_dttm() == "DateTime::MakeDatetime({col})"
@pytest.mark.parametrize(
"target_type,expected_result",
[
("Date", "DateTime::MakeDate(DateTime::ParseIso8601('2019-01-02'))"),
(
"DateTime",
"DateTime::MakeDatetime(DateTime::ParseIso8601('2019-01-02T03:04:05'))",
),
("UnknownType", None),
],
)
def test_convert_dttm(
target_type: str,
expected_result: Optional[str],
dttm: datetime, # noqa: F811
) -> None:
from superset.db_engine_specs.ydb import YDBEngineSpec as spec
assert_convert_dttm(spec, target_type, expected_result, dttm)
def test_specify_protocol() -> None:
from superset.db_engine_specs.ydb import YDBEngineSpec
database = Mock()
extra = {"protocol": "grpcs"}
database.encrypted_extra = json.dumps(extra)
params: dict[str, Any] = {}
YDBEngineSpec.update_params_from_encrypted_extra(database, params)
connect_args = params.setdefault("connect_args", {})
assert connect_args.get("protocol") == "grpcs"
def test_specify_credentials() -> None:
from superset.db_engine_specs.ydb import YDBEngineSpec
database = Mock()
auth_params = {"username": "username", "password": "password"}
database.encrypted_extra = json.dumps({"credentials": auth_params})
params: dict[str, Any] = {}
YDBEngineSpec.update_params_from_encrypted_extra(database, params)
connect_args = params.setdefault("connect_args", {})
assert connect_args.get("credentials") == auth_params