diff --git a/pyproject.toml b/pyproject.toml index d401eee49..1ba425850 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -61,6 +61,7 @@ dependencies = [ "humanize", "importlib_metadata", "isodate", + "jsonpath-ng>=1.6.1, <2", "Mako>=1.2.2", "markdown>=3.0", "msgpack>=1.0.0, <1.1", diff --git a/requirements/base.txt b/requirements/base.txt index 1b19d3a92..31a8c8275 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -144,9 +144,7 @@ geopy==2.4.1 google-auth==2.29.0 # via shillelagh greenlet==3.0.3 - # via - # shillelagh - # sqlalchemy + # via shillelagh gunicorn==22.0.0 # via apache-superset hashids==1.3.1 @@ -173,6 +171,8 @@ jinja2==3.1.4 # via # flask # flask-babel +jsonpath-ng==1.6.1 + # via apache-superset jsonschema==4.17.3 # via flask-appbuilder kombu==5.3.7 @@ -249,6 +249,8 @@ pgsanity==0.2.9 # via apache-superset platformdirs==3.8.1 # via requests-cache +ply==3.11 + # via jsonpath-ng polyline==2.0.2 # via apache-superset prison==0.2.1 diff --git a/requirements/development.txt b/requirements/development.txt index 5b99fd81b..b2ed1b8b3 100644 --- a/requirements/development.txt +++ b/requirements/development.txt @@ -10,12 +10,12 @@ # via # -r requirements/base.in # -r requirements/development.in -appnope==0.1.4 - # via ipython astroid==3.1.0 # via pylint boto3==1.34.112 - # via dataflows-tabulator + # via + # apache-superset + # dataflows-tabulator botocore==1.34.112 # via # boto3 @@ -177,9 +177,7 @@ protobuf==4.23.0 psycopg2-binary==2.9.6 # via apache-superset pure-sasl==0.6.2 - # via - # pyhive - # thrift-sasl + # via thrift-sasl pydata-google-auth==1.7.0 # via pandas-gbq pydruid==0.6.9 @@ -232,18 +230,9 @@ tableschema==1.20.10 thrift==0.16.0 # via # apache-superset - # pyhive # thrift-sasl thrift-sasl==0.4.3 - # via - # build - # coverage - # pip-tools - # pylint - # pyproject-api - # pyproject-hooks - # pytest - # tox + # via apache-superset tomlkit==0.12.5 # via pylint toposort==1.10 @@ -254,9 +243,6 @@ tqdm==4.66.4 # via # cmdstanpy # prophet -traitlets==5.14.3 - # via - # matplotlib-inline trino==0.328.0 # via apache-superset tzlocal==5.2 diff --git a/superset/db_engine_specs/base.py b/superset/db_engine_specs/base.py index 438154f5f..7f4d591a7 100644 --- a/superset/db_engine_specs/base.py +++ b/superset/db_engine_specs/base.py @@ -74,6 +74,7 @@ from superset.superset_typing import ( from superset.utils import core as utils, json from superset.utils.core import ColumnSpec, GenericDataType from superset.utils.hashing import md5_sha_from_str +from superset.utils.json import redact_sensitive, reveal_sensitive from superset.utils.network import is_hostname_valid, is_port_open from superset.utils.oauth2 import encode_oauth2_state @@ -398,6 +399,11 @@ class BaseEngineSpec: # pylint: disable=too-many-public-methods Pattern[str], tuple[str, SupersetErrorType, dict[str, Any]] ] = {} + # List of JSON path to fields in `encrypted_extra` that should be masked when the + # database is edited. By default everything is masked. + # pylint: disable=invalid-name + encrypted_extra_sensitive_fields: set[str] = {"$.*"} + # Whether the engine supports file uploads # if True, database will be listed as option in the upload file form supports_file_upload = True @@ -2163,26 +2169,54 @@ class BaseEngineSpec: # pylint: disable=too-many-public-methods @classmethod def mask_encrypted_extra(cls, encrypted_extra: str | None) -> str | None: """ - Mask ``encrypted_extra``. + Mask `encrypted_extra`. - This is used to remove any sensitive data in ``encrypted_extra`` when presenting - it to the user. For example, a private key might be replaced with a masked value - "XXXXXXXXXX". If the masked value is changed the corresponding entry is updated, - otherwise the old value is used (see ``unmask_encrypted_extra`` below). + This is used to remove any sensitive data in `encrypted_extra` when presenting + it to the user when a database is edited. For example, a private key might be + replaced with a masked value "XXXXXXXXXX". If the masked value is changed the + corresponding entry is updated, otherwise the old value is used (see + `unmask_encrypted_extra` below). """ - return encrypted_extra + if encrypted_extra is None or not cls.encrypted_extra_sensitive_fields: + return encrypted_extra + + try: + config = json.loads(encrypted_extra) + except (TypeError, json.JSONDecodeError): + return encrypted_extra + + masked_encrypted_extra = redact_sensitive( + config, + cls.encrypted_extra_sensitive_fields, + ) + + return json.dumps(masked_encrypted_extra) - # pylint: disable=unused-argument @classmethod def unmask_encrypted_extra(cls, old: str | None, new: str | None) -> str | None: """ - Remove masks from ``encrypted_extra``. + Remove masks from `encrypted_extra`. This method allows reusing existing values from the current encrypted extra on updates. It's useful for reusing masked passwords, allowing keys to be updated without having to provide sensitive data to the client. """ - return new + if old is None or new is None: + return new + + try: + old_config = json.loads(old) + new_config = json.loads(new) + except (TypeError, json.JSONDecodeError): + return new + + new_config = reveal_sensitive( + old_config, + new_config, + cls.encrypted_extra_sensitive_fields, + ) + + return json.dumps(new_config) @classmethod def get_public_information(cls) -> dict[str, Any]: diff --git a/superset/db_engine_specs/bigquery.py b/superset/db_engine_specs/bigquery.py index 7693e48da..380fa297a 100644 --- a/superset/db_engine_specs/bigquery.py +++ b/superset/db_engine_specs/bigquery.py @@ -17,7 +17,6 @@ from __future__ import annotations -import contextlib import re import urllib from datetime import datetime @@ -38,7 +37,7 @@ from sqlalchemy.engine.url import URL from sqlalchemy.sql import sqltypes from superset import sql_parse -from superset.constants import PASSWORD_MASK, TimeGrain +from superset.constants import TimeGrain from superset.databases.schemas import encrypted_field_properties, EncryptedString from superset.databases.utils import make_url_safe from superset.db_engine_specs.base import BaseEngineSpec, BasicPropertiesType @@ -129,6 +128,10 @@ class BigQueryEngineSpec(BaseEngineSpec): # pylint: disable=too-many-public-met supports_catalog = supports_dynamic_catalog = True + # when editing the database, mask this field in `encrypted_extra` + # pylint: disable=invalid-name + encrypted_extra_sensitive_fields = {"$.credentials_info.private_key"} + """ https://www.python.org/dev/peps/pep-0249/#arraysize raw_connections bypass the sqlalchemy-bigquery query execution context and deal with @@ -594,47 +597,6 @@ class BigQueryEngineSpec(BaseEngineSpec): # pylint: disable=too-many-public-met raise ValidationError("Invalid service credentials") - @classmethod - def mask_encrypted_extra(cls, encrypted_extra: str | None) -> str | None: - if encrypted_extra is None: - return encrypted_extra - - try: - config = json.loads(encrypted_extra) - except (json.JSONDecodeError, TypeError): - return encrypted_extra - - with contextlib.suppress(KeyError): - config["credentials_info"]["private_key"] = PASSWORD_MASK - return json.dumps(config) - - @classmethod - def unmask_encrypted_extra(cls, old: str | None, new: str | None) -> str | None: - """ - Reuse ``private_key`` if available and unchanged. - """ - if old is None or new is None: - return new - - try: - old_config = json.loads(old) - new_config = json.loads(new) - except (TypeError, json.JSONDecodeError): - return new - - if "credentials_info" not in new_config: - return new - - if "private_key" not in new_config["credentials_info"]: - return new - - if new_config["credentials_info"]["private_key"] == PASSWORD_MASK: - new_config["credentials_info"]["private_key"] = old_config[ - "credentials_info" - ]["private_key"] - - return json.dumps(new_config) - @classmethod def get_dbapi_exception_mapping(cls) -> dict[type[Exception], type[Exception]]: # pylint: disable=import-outside-toplevel diff --git a/superset/db_engine_specs/gsheets.py b/superset/db_engine_specs/gsheets.py index fd5ec6722..070be5a92 100644 --- a/superset/db_engine_specs/gsheets.py +++ b/superset/db_engine_specs/gsheets.py @@ -17,7 +17,6 @@ from __future__ import annotations -import contextlib import logging import re from re import Pattern @@ -37,7 +36,6 @@ from sqlalchemy.engine import create_engine from sqlalchemy.engine.url import URL from superset import db, security_manager -from superset.constants import PASSWORD_MASK from superset.databases.schemas import encrypted_field_properties, EncryptedString from superset.db_engine_specs.shillelagh import ShillelaghEngineSpec from superset.errors import ErrorLevel, SupersetError, SupersetErrorType @@ -93,6 +91,10 @@ class GSheetsEngineSpec(ShillelaghEngineSpec): default_driver = "apsw" sqlalchemy_uri_placeholder = "gsheets://" + # when editing the database, mask this field in `encrypted_extra` + # pylint: disable=invalid-name + encrypted_extra_sensitive_fields = {"$.service_account_info.private_key"} + custom_errors: dict[Pattern[str], tuple[str, SupersetErrorType, dict[str, Any]]] = { SYNTAX_ERROR_REGEX: ( __( @@ -157,11 +159,11 @@ class GSheetsEngineSpec(ShillelaghEngineSpec): return {"metadata": metadata["extra"]} @classmethod + # pylint: disable=unused-argument def build_sqlalchemy_uri( cls, _: GSheetsParametersType, - encrypted_extra: None # pylint: disable=unused-argument - | (dict[str, Any]) = None, + encrypted_extra: None | (dict[str, Any]) = None, ) -> str: return "gsheets://" @@ -177,47 +179,6 @@ class GSheetsEngineSpec(ShillelaghEngineSpec): raise ValidationError("Invalid service credentials") - @classmethod - def mask_encrypted_extra(cls, encrypted_extra: str | None) -> str | None: - if encrypted_extra is None: - return encrypted_extra - - try: - config = json.loads(encrypted_extra) - except (TypeError, json.JSONDecodeError): - return encrypted_extra - - with contextlib.suppress(KeyError): - config["service_account_info"]["private_key"] = PASSWORD_MASK - return json.dumps(config) - - @classmethod - def unmask_encrypted_extra(cls, old: str | None, new: str | None) -> str | None: - """ - Reuse ``private_key`` if available and unchanged. - """ - if old is None or new is None: - return new - - try: - old_config = json.loads(old) - new_config = json.loads(new) - except (TypeError, json.JSONDecodeError): - return new - - if "service_account_info" not in new_config: - return new - - if "private_key" not in new_config["service_account_info"]: - return new - - if new_config["service_account_info"]["private_key"] == PASSWORD_MASK: - new_config["service_account_info"]["private_key"] = old_config[ - "service_account_info" - ]["private_key"] - - return json.dumps(new_config) - @classmethod def parameters_json_schema(cls) -> Any: """ diff --git a/superset/db_engine_specs/snowflake.py b/superset/db_engine_specs/snowflake.py index 72116cfc3..9680628d0 100644 --- a/superset/db_engine_specs/snowflake.py +++ b/superset/db_engine_specs/snowflake.py @@ -87,6 +87,12 @@ class SnowflakeEngineSpec(PostgresBaseEngineSpec): supports_dynamic_schema = True supports_catalog = supports_dynamic_catalog = True + # pylint: disable=invalid-name + encrypted_extra_sensitive_fields = { + "$.auth_params.privatekey_body", + "$.auth_params.privatekey_pass", + } + _time_grain_expressions = { None: "{col}", TimeGrain.SECOND: "DATE_TRUNC('SECOND', {col})", diff --git a/superset/utils/json.py b/superset/utils/json.py index 1deea099c..cc2ab400c 100644 --- a/superset/utils/json.py +++ b/superset/utils/json.py @@ -14,6 +14,7 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +import copy import decimal import logging import uuid @@ -24,8 +25,10 @@ import numpy as np import pandas as pd import simplejson from flask_babel.speaklater import LazyString +from jsonpath_ng import parse from simplejson import JSONDecodeError +from superset.constants import PASSWORD_MASK from superset.utils.dates import datetime_to_epoch, EPOCH logging.getLogger("MARKDOWN").setLevel(logging.INFO) @@ -243,3 +246,56 @@ def loads( except JSONDecodeError as ex: logger.error("JSON is not valid %s", str(ex), exc_info=True) raise + + +def redact_sensitive( + payload: dict[str, Any], + sensitive_fields: set[str], +) -> dict[str, Any]: + """ + Redacts sensitive fields from a payload. + + :param payload: The payload to redact + :param sensitive_fields: The set of fields to redact, as JSONPath expressions + :returns: The redacted payload + """ + redacted_payload = copy.deepcopy(payload) + + for json_path in sensitive_fields: + jsonpath_expr = parse(json_path) + for match in jsonpath_expr.find(redacted_payload): + match.context.value[match.path.fields[0]] = PASSWORD_MASK + + return redacted_payload + + +def reveal_sensitive( + old_payload: dict[str, Any], + new_payload: dict[str, Any], + sensitive_fields: set[str], +) -> dict[str, Any]: + """ + Reveals sensitive fields from a payload when not modified. + + This allows users to perform deep edits on a payload without having to provide + sensitive information. The old payload is sent to the user with any sensitive fields + masked, and when the user sends back a modified payload, any fields that were masked + are replaced with the original values from the old payload. + + For now this is only used to edit `encrypted_extra` fields in the database. + + :param old_payload: The old payload to reveal + :param new_payload: The new payload to reveal + :param sensitive_fields: The set of fields to reveal, as JSONPath expressions + :returns: The revealed payload + """ + revealed_payload = copy.deepcopy(new_payload) + + for json_path in sensitive_fields: + jsonpath_expr = parse(json_path) + for match in jsonpath_expr.find(revealed_payload): + if match.value == PASSWORD_MASK: + old_value = match.full_path.find(old_payload) + match.context.value[match.path.fields[0]] = old_value[0].value + + return revealed_payload diff --git a/tests/unit_tests/db_engine_specs/test_base.py b/tests/unit_tests/db_engine_specs/test_base.py index 9ec1ebaf0..a3af15581 100644 --- a/tests/unit_tests/db_engine_specs/test_base.py +++ b/tests/unit_tests/db_engine_specs/test_base.py @@ -19,6 +19,7 @@ from __future__ import annotations +import json from textwrap import dedent from typing import Any @@ -334,3 +335,60 @@ def test_quote_table() -> None: BaseEngineSpec.quote_table(Table("ta ble", "sche.ma", 'cata"log'), dialect) == '"cata""log"."sche.ma"."ta ble"' ) + + +def test_mask_encrypted_extra() -> None: + """ + Test that the private key is masked when the database is edited. + """ + from superset.db_engine_specs.base import BaseEngineSpec + + config = json.dumps( + { + "foo": "bar", + "service_account_info": { + "project_id": "black-sanctum-314419", + "private_key": "SECRET", + }, + } + ) + + assert BaseEngineSpec.mask_encrypted_extra(config) == json.dumps( + { + "foo": "XXXXXXXXXX", + "service_account_info": "XXXXXXXXXX", + } + ) + + +def test_unmask_encrypted_extra() -> None: + """ + Test that the private key can be reused from the previous `encrypted_extra`. + """ + from superset.db_engine_specs.base import BaseEngineSpec + + old = json.dumps( + { + "foo": "bar", + "service_account_info": { + "project_id": "black-sanctum-314419", + "private_key": "SECRET", + }, + } + ) + new = json.dumps( + { + "foo": "XXXXXXXXXX", + "service_account_info": "XXXXXXXXXX", + } + ) + + assert BaseEngineSpec.unmask_encrypted_extra(old, new) == json.dumps( + { + "foo": "bar", + "service_account_info": { + "project_id": "black-sanctum-314419", + "private_key": "SECRET", + }, + } + ) diff --git a/tests/unit_tests/db_engine_specs/test_bigquery.py b/tests/unit_tests/db_engine_specs/test_bigquery.py index 1a6173057..9e3d98ff8 100644 --- a/tests/unit_tests/db_engine_specs/test_bigquery.py +++ b/tests/unit_tests/db_engine_specs/test_bigquery.py @@ -191,7 +191,7 @@ def test_get_parameters_from_uri_serializable() -> None: def test_unmask_encrypted_extra() -> None: """ - Test that the private key can be reused from the previous ``encrypted_extra``. + Test that the private key can be reused from the previous `encrypted_extra`. """ from superset.db_engine_specs.bigquery import BigQueryEngineSpec @@ -212,17 +212,52 @@ def test_unmask_encrypted_extra() -> None: } ) - assert json.loads(str(BigQueryEngineSpec.unmask_encrypted_extra(old, new))) == { - "credentials_info": { - "project_id": "yellow-unicorn-314419", - "private_key": "SECRET", - }, - } + assert BigQueryEngineSpec.unmask_encrypted_extra(old, new) == json.dumps( + { + "credentials_info": { + "project_id": "yellow-unicorn-314419", + "private_key": "SECRET", + }, + } + ) -def test_unmask_encrypted_extra_when_empty() -> None: +def test_unmask_encrypted_extra_field_changeed() -> None: """ - Test that a None value works for ``encrypted_extra``. + Test that the private key is not reused when the field has changed. + """ + from superset.db_engine_specs.bigquery import BigQueryEngineSpec + + old = json.dumps( + { + "credentials_info": { + "project_id": "black-sanctum-314419", + "private_key": "SECRET", + }, + } + ) + new = json.dumps( + { + "credentials_info": { + "project_id": "yellow-unicorn-314419", + "private_key": "NEW-SECRET", + }, + } + ) + + assert BigQueryEngineSpec.unmask_encrypted_extra(old, new) == json.dumps( + { + "credentials_info": { + "project_id": "yellow-unicorn-314419", + "private_key": "NEW-SECRET", + }, + } + ) + + +def test_unmask_encrypted_extra_when_old_is_none() -> None: + """ + Test that a `None` value for the old field works for `encrypted_extra`. """ from superset.db_engine_specs.bigquery import BigQueryEngineSpec @@ -236,17 +271,19 @@ def test_unmask_encrypted_extra_when_empty() -> None: } ) - assert json.loads(str(BigQueryEngineSpec.unmask_encrypted_extra(old, new))) == { - "credentials_info": { - "project_id": "yellow-unicorn-314419", - "private_key": "XXXXXXXXXX", - }, - } + assert BigQueryEngineSpec.unmask_encrypted_extra(old, new) == json.dumps( + { + "credentials_info": { + "project_id": "yellow-unicorn-314419", + "private_key": "XXXXXXXXXX", + }, + } + ) -def test_unmask_encrypted_extra_when_new_is_empty() -> None: +def test_unmask_encrypted_extra_when_new_is_none() -> None: """ - Test that a None value works for ``encrypted_extra``. + Test that a `None` value for the new field works for `encrypted_extra`. """ from superset.db_engine_specs.bigquery import BigQueryEngineSpec @@ -263,6 +300,31 @@ def test_unmask_encrypted_extra_when_new_is_empty() -> None: assert BigQueryEngineSpec.unmask_encrypted_extra(old, new) is None +def test_mask_encrypted_extra() -> None: + """ + Test that the private key is masked when the database is edited. + """ + from superset.db_engine_specs.bigquery import BigQueryEngineSpec + + config = json.dumps( + { + "credentials_info": { + "project_id": "black-sanctum-314419", + "private_key": "SECRET", + }, + } + ) + + assert BigQueryEngineSpec.mask_encrypted_extra(config) == json.dumps( + { + "credentials_info": { + "project_id": "black-sanctum-314419", + "private_key": "XXXXXXXXXX", + }, + } + ) + + def test_mask_encrypted_extra_when_empty() -> None: """ Test that the encrypted extra will return a none value if the field is empty. diff --git a/tests/unit_tests/db_engine_specs/test_gsheets.py b/tests/unit_tests/db_engine_specs/test_gsheets.py index fe8230ac9..5d2ddb807 100644 --- a/tests/unit_tests/db_engine_specs/test_gsheets.py +++ b/tests/unit_tests/db_engine_specs/test_gsheets.py @@ -247,9 +247,34 @@ def test_validate_parameters_catalog_and_credentials( ) +def test_mask_encrypted_extra() -> None: + """ + Test that the private key is masked when the database is edited. + """ + from superset.db_engine_specs.gsheets import GSheetsEngineSpec + + config = json.dumps( + { + "service_account_info": { + "project_id": "black-sanctum-314419", + "private_key": "SECRET", + }, + } + ) + + assert GSheetsEngineSpec.mask_encrypted_extra(config) == json.dumps( + { + "service_account_info": { + "project_id": "black-sanctum-314419", + "private_key": "XXXXXXXXXX", + }, + } + ) + + def test_unmask_encrypted_extra() -> None: """ - Test that the private key can be reused from the previous ``encrypted_extra``. + Test that the private key can be reused from the previous `encrypted_extra`. """ from superset.db_engine_specs.gsheets import GSheetsEngineSpec @@ -270,17 +295,52 @@ def test_unmask_encrypted_extra() -> None: } ) - assert json.loads(str(GSheetsEngineSpec.unmask_encrypted_extra(old, new))) == { - "service_account_info": { - "project_id": "yellow-unicorn-314419", - "private_key": "SECRET", - }, - } + assert GSheetsEngineSpec.unmask_encrypted_extra(old, new) == json.dumps( + { + "service_account_info": { + "project_id": "yellow-unicorn-314419", + "private_key": "SECRET", + }, + } + ) + + +def test_unmask_encrypted_extra_field_changeed() -> None: + """ + Test that the private key is not reused when the field has changed. + """ + from superset.db_engine_specs.gsheets import GSheetsEngineSpec + + old = json.dumps( + { + "service_account_info": { + "project_id": "black-sanctum-314419", + "private_key": "SECRET", + }, + } + ) + new = json.dumps( + { + "service_account_info": { + "project_id": "yellow-unicorn-314419", + "private_key": "NEW-SECRET", + }, + } + ) + + assert GSheetsEngineSpec.unmask_encrypted_extra(old, new) == json.dumps( + { + "service_account_info": { + "project_id": "yellow-unicorn-314419", + "private_key": "NEW-SECRET", + }, + } + ) def test_unmask_encrypted_extra_when_old_is_none() -> None: """ - Test that a None value works for ``encrypted_extra``. + Test that a `None` value for the old field works for `encrypted_extra`. """ from superset.db_engine_specs.gsheets import GSheetsEngineSpec @@ -294,17 +354,19 @@ def test_unmask_encrypted_extra_when_old_is_none() -> None: } ) - assert json.loads(str(GSheetsEngineSpec.unmask_encrypted_extra(old, new))) == { - "service_account_info": { - "project_id": "yellow-unicorn-314419", - "private_key": "XXXXXXXXXX", - }, - } + assert GSheetsEngineSpec.unmask_encrypted_extra(old, new) == json.dumps( + { + "service_account_info": { + "project_id": "yellow-unicorn-314419", + "private_key": "XXXXXXXXXX", + }, + } + ) def test_unmask_encrypted_extra_when_new_is_none() -> None: """ - Test that a None value works for ``encrypted_extra``. + Test that a `None` value for the new field works for `encrypted_extra`. """ from superset.db_engine_specs.gsheets import GSheetsEngineSpec diff --git a/tests/unit_tests/db_engine_specs/test_snowflake.py b/tests/unit_tests/db_engine_specs/test_snowflake.py index 67ac88f69..73b93b27e 100644 --- a/tests/unit_tests/db_engine_specs/test_snowflake.py +++ b/tests/unit_tests/db_engine_specs/test_snowflake.py @@ -291,3 +291,106 @@ def test_get_default_catalog() -> None: sqlalchemy_uri="snowflake://user:pass@account/database_name/default", ) assert SnowflakeEngineSpec.get_default_catalog(database) == "database_name" + + +def test_mask_encrypted_extra() -> None: + """ + Test that the private keys are masked when the database is edited. + """ + from superset.db_engine_specs.snowflake import SnowflakeEngineSpec + + config = json.dumps( + { + "auth_method": "keypair", + "auth_params": { + "privatekey_body": ( + "-----BEGIN ENCRYPTED PRIVATE KEY-----" + "..." + "-----END ENCRYPTED PRIVATE KEY-----" + ), + "privatekey_pass": "my_password", + }, + } + ) + + assert SnowflakeEngineSpec.mask_encrypted_extra(config) == json.dumps( + { + "auth_method": "keypair", + "auth_params": { + "privatekey_body": "XXXXXXXXXX", + "privatekey_pass": "XXXXXXXXXX", + }, + } + ) + + +def test_mask_encrypted_extra_no_fields() -> None: + """ + Test that the private key is masked when the database is edited. + """ + from superset.db_engine_specs.snowflake import SnowflakeEngineSpec + + config = json.dumps( + { + # this is a fake example and the fields are made up + "auth_method": "token", + "auth_params": { + "jwt": "SECRET", + }, + } + ) + + assert SnowflakeEngineSpec.mask_encrypted_extra(config) == json.dumps( + { + "auth_method": "token", + "auth_params": { + "jwt": "SECRET", + }, + } + ) + + +def test_unmask_encrypted_extra() -> None: + """ + Test that the private keys can be reused from the previous `encrypted_extra`. + """ + from superset.db_engine_specs.snowflake import SnowflakeEngineSpec + + old = json.dumps( + { + "auth_method": "keypair", + "auth_params": { + "privatekey_body": ( + "-----BEGIN ENCRYPTED PRIVATE KEY-----" + "..." + "-----END ENCRYPTED PRIVATE KEY-----" + ), + "privatekey_pass": "my_password", + }, + } + ) + new = json.dumps( + { + "foo": "bar", + "auth_method": "keypair", + "auth_params": { + "privatekey_body": "XXXXXXXXXX", + "privatekey_pass": "XXXXXXXXXX", + }, + } + ) + + assert SnowflakeEngineSpec.unmask_encrypted_extra(old, new) == json.dumps( + { + "foo": "bar", + "auth_method": "keypair", + "auth_params": { + "privatekey_body": ( + "-----BEGIN ENCRYPTED PRIVATE KEY-----" + "..." + "-----END ENCRYPTED PRIVATE KEY-----" + ), + "privatekey_pass": "my_password", + }, + } + ) diff --git a/tests/unit_tests/utils/json_tests.py b/tests/unit_tests/utils/json_tests.py index 2eb7f7c2a..0a302dfb4 100644 --- a/tests/unit_tests/utils/json_tests.py +++ b/tests/unit_tests/utils/json_tests.py @@ -14,6 +14,7 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +import copy import datetime import math from unittest.mock import MagicMock @@ -146,3 +147,48 @@ def test_validate_json(): str(excinfo.value) == "Unterminated string starting at: line 1 column 28 (char 27)" ) + + +def test_sensitive_fields() -> None: + """ + Test masking/unmasking of sensitive fields. + """ + payload = { + "password": "SECRET", + "credentials": { + "user_id": "alice", + "user_token": "TOKEN", + }, + } + sensitive_fields = {"$.password", "$.credentials.user_token"} + + redacted_payload = json.redact_sensitive(payload, sensitive_fields) + assert redacted_payload == { + "password": "XXXXXXXXXX", + "credentials": { + "user_id": "alice", + "user_token": "XXXXXXXXXX", + }, + } + + new_payload = copy.deepcopy(redacted_payload) + new_payload["credentials"]["user_id"] = "bob" + + assert json.reveal_sensitive(payload, new_payload, sensitive_fields) == { + "password": "SECRET", + "credentials": { + "user_id": "bob", + "user_token": "TOKEN", + }, + } + + new_payload = copy.deepcopy(redacted_payload) + new_payload["credentials"]["user_token"] = "NEW_TOKEN" + + assert json.reveal_sensitive(payload, new_payload, sensitive_fields) == { + "password": "SECRET", + "credentials": { + "user_id": "alice", + "user_token": "NEW_TOKEN", + }, + }