From 3d23adec5eb94deb714fa892cd4b8349f5dae516 Mon Sep 17 00:00:00 2001 From: Beto Dealmeida Date: Thu, 18 Feb 2021 09:48:18 -0800 Subject: [PATCH] chore: use shillelagh instead of gsheetsdb (#13185) * chore: use shillelagh instead of gsheetsdb * Fix tests * Clean up code and remove duplication * Fix test * Tighten dep --- UPDATING.md | 3 ++ setup.py | 2 +- .../src/components/ErrorMessage/types.ts | 1 + .../databases/commands/test_connection.py | 4 +- superset/databases/schemas.py | 21 ++++------- superset/errors.py | 1 + superset/security/analytics_db_safety.py | 37 ++++++++++++++----- superset/views/core.py | 9 ++--- superset/views/database/mixins.py | 3 +- tests/core_tests.py | 2 +- tests/databases/api_tests.py | 4 +- tests/security/analytics_db_safety_tests.py | 26 +++++++++---- 12 files changed, 70 insertions(+), 43 deletions(-) diff --git a/UPDATING.md b/UPDATING.md index 1576ce31b..18f6e8081 100644 --- a/UPDATING.md +++ b/UPDATING.md @@ -26,6 +26,9 @@ assists people when migrating to a new version. ### Breaking Changes ### Potential Downtime ### Deprecations +### Other + +[shillelagh](https://github.com/betodealmeida/shillelagh/) is now the recommended module to connect Superset to Google Spreadsheets, since it's more robust and has extensive test coverage. You should uninstall the `gsheetsdb` module and install the `shillelagh` module in its place. Shillelagh is a drop-in replacement, so no modifications are needed to be done on existing queries, datasets or charts. ## 1.0.0 diff --git a/setup.py b/setup.py index e39284f14..83fa1444d 100644 --- a/setup.py +++ b/setup.py @@ -129,7 +129,7 @@ setup( "elasticsearch": ["elasticsearch-dbapi>=0.2.0, <0.3.0"], "exasol": ["sqlalchemy-exasol>=2.1.0, <2.2"], "excel": ["xlrd>=1.2.0, <1.3"], - "gsheets": ["gsheetsdb>=0.1.9"], + "gsheets": ["shillelagh>=0.2, <0.3"], "hana": ["hdbcli==2.4.162", "sqlalchemy_hana==0.4.0"], "hive": ["pyhive[hive]>=0.6.1", "tableschema", "thrift>=0.11.0, <1.0.0"], "impala": ["impyla>0.16.2, <0.17"], diff --git a/superset-frontend/src/components/ErrorMessage/types.ts b/superset-frontend/src/components/ErrorMessage/types.ts index 7843ff394..b64aaf163 100644 --- a/superset-frontend/src/components/ErrorMessage/types.ts +++ b/superset-frontend/src/components/ErrorMessage/types.ts @@ -38,6 +38,7 @@ export const ErrorTypeEnum = { // Security access errors TABLE_SECURITY_ACCESS_ERROR: 'TABLE_SECURITY_ACCESS_ERROR', DATASOURCE_SECURITY_ACCESS_ERROR: 'DATASOURCE_SECURITY_ACCESS_ERROR', + DATABASE_SECURITY_ACCESS_ERROR: 'DATABASE_SECURITY_ACCESS_ERROR', MISSING_OWNERSHIP_ERROR: 'MISSING_OWNERSHIP_ERROR', // Other errors diff --git a/superset/databases/commands/test_connection.py b/superset/databases/commands/test_connection.py index 3ea28c5f0..d35d07aaa 100644 --- a/superset/databases/commands/test_connection.py +++ b/superset/databases/commands/test_connection.py @@ -31,8 +31,8 @@ from superset.databases.commands.exceptions import ( DatabaseTestConnectionUnexpectedError, ) from superset.databases.dao import DatabaseDAO +from superset.exceptions import SupersetSecurityException from superset.models.core import Database -from superset.security.analytics_db_safety import DBSecurityException logger = logging.getLogger(__name__) @@ -70,7 +70,7 @@ class TestConnectionDatabaseCommand(BaseCommand): ) except DBAPIError: raise DatabaseTestConnectionFailedError() - except DBSecurityException as ex: + except SupersetSecurityException as ex: raise DatabaseSecurityUnsafeError(message=str(ex)) except Exception: raise DatabaseTestConnectionUnexpectedError() diff --git a/superset/databases/schemas.py b/superset/databases/schemas.py index 0fbcf1dcb..6c6acc73f 100644 --- a/superset/databases/schemas.py +++ b/superset/databases/schemas.py @@ -27,8 +27,9 @@ from sqlalchemy import MetaData from sqlalchemy.engine.url import make_url from sqlalchemy.exc import ArgumentError -from superset.exceptions import CertificateException +from superset.exceptions import CertificateException, SupersetSecurityException from superset.models.core import PASSWORD_MASK +from superset.security.analytics_db_safety import check_sqlalchemy_uri from superset.utils.core import markdown, parse_ssl_cert database_schemas_query_schema = { @@ -133,7 +134,7 @@ def sqlalchemy_uri_validator(value: str) -> str: Validate if it's a valid SQLAlchemy URI and refuse SQLLite by default """ try: - make_url(value.strip()) + uri = make_url(value.strip()) except (ArgumentError, AttributeError, ValueError): raise ValidationError( [ @@ -143,17 +144,11 @@ def sqlalchemy_uri_validator(value: str) -> str: ) ] ) - if current_app.config.get("PREVENT_UNSAFE_DB_CONNECTIONS", True) and value: - if value.startswith("sqlite"): - raise ValidationError( - [ - _( - "SQLite database cannot be used as a data source for " - "security reasons." - ) - ] - ) - + if current_app.config.get("PREVENT_UNSAFE_DB_CONNECTIONS", True): + try: + check_sqlalchemy_uri(uri) + except SupersetSecurityException as ex: + raise ValidationError([str(ex)]) return value diff --git a/superset/errors.py b/superset/errors.py index 8b0416d5b..24745e65e 100644 --- a/superset/errors.py +++ b/superset/errors.py @@ -48,6 +48,7 @@ class SupersetErrorType(str, Enum): # Security access errors TABLE_SECURITY_ACCESS_ERROR = "TABLE_SECURITY_ACCESS_ERROR" DATASOURCE_SECURITY_ACCESS_ERROR = "DATASOURCE_SECURITY_ACCESS_ERROR" + DATABASE_SECURITY_ACCESS_ERROR = "DATABASE_SECURITY_ACCESS_ERROR" MISSING_OWNERSHIP_ERROR = "MISSING_OWNERSHIP_ERROR" # Other errors diff --git a/superset/security/analytics_db_safety.py b/superset/security/analytics_db_safety.py index ee3376b06..bfa93613e 100644 --- a/superset/security/analytics_db_safety.py +++ b/superset/security/analytics_db_safety.py @@ -14,20 +14,37 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +from flask_babel import lazy_gettext as _ from sqlalchemy.engine.url import URL +from sqlalchemy.exc import NoSuchModuleError -from superset.exceptions import SupersetException +from superset.errors import ErrorLevel, SupersetError, SupersetErrorType +from superset.exceptions import SupersetSecurityException - -class DBSecurityException(SupersetException): - """ Exception to prevent a security issue with connecting to a DB """ - - status = 400 +# list of unsafe SQLAlchemy dialects +BLOCKLIST = { + # sqlite creates a local DB, which allows mapping server's filesystem + "sqlite", + # shillelagh allows opening local files (eg, 'SELECT * FROM "csv:///etc/passwd"') + "shillelagh", + "shillelagh+apsw", +} def check_sqlalchemy_uri(uri: URL) -> None: - if uri.startswith("sqlite"): - # sqlite creates a local DB, which allows mapping server's filesystem - raise DBSecurityException( - "SQLite database cannot be used as a data source for security reasons." + if uri.drivername in BLOCKLIST: + try: + dialect = uri.get_dialect().__name__ + except NoSuchModuleError: + dialect = uri.drivername + + raise SupersetSecurityException( + SupersetError( + error_type=SupersetErrorType.DATABASE_SECURITY_ACCESS_ERROR, + message=_( + "%(dialect)s cannot be used as a data source for security reasons.", + dialect=dialect, + ), + level=ErrorLevel.ERROR, + ) ) diff --git a/superset/views/core.py b/superset/views/core.py index 8d4ecefbb..58786db86 100755 --- a/superset/views/core.py +++ b/superset/views/core.py @@ -92,10 +92,7 @@ from superset.models.slice import Slice from superset.models.sql_lab import Query, TabState from superset.models.user_attributes import UserAttribute from superset.queries.dao import QueryDAO -from superset.security.analytics_db_safety import ( - check_sqlalchemy_uri, - DBSecurityException, -) +from superset.security.analytics_db_safety import check_sqlalchemy_uri from superset.sql_parse import CtasMethod, ParsedQuery, Table from superset.sql_validators import get_validator_by_name from superset.tasks.async_queries import load_explore_json_into_cache @@ -1234,7 +1231,7 @@ class Superset(BaseSupersetView): # pylint: disable=too-many-public-methods uri = request.json.get("uri") try: if app.config["PREVENT_UNSAFE_DB_CONNECTIONS"]: - check_sqlalchemy_uri(uri) + check_sqlalchemy_uri(make_url(uri)) # if the database already exists in the database, only its safe # (password-masked) URI would be shown in the UI and would be passed in the # form data so if the database already exists and the form was submitted @@ -1294,7 +1291,7 @@ class Superset(BaseSupersetView): # pylint: disable=too-many-public-methods return json_error_response( _("Connection failed, please check your connection settings"), 400 ) - except DBSecurityException as ex: + except SupersetSecurityException as ex: logger.warning("Stopped an unsafe database connection") return json_error_response(_(str(ex)), 400) except Exception as ex: # pylint: disable=broad-except diff --git a/superset/views/database/mixins.py b/superset/views/database/mixins.py index d5d561247..c415d68f3 100644 --- a/superset/views/database/mixins.py +++ b/superset/views/database/mixins.py @@ -19,6 +19,7 @@ import inspect from flask import Markup from flask_babel import lazy_gettext as _ from sqlalchemy import MetaData +from sqlalchemy.engine.url import make_url from superset import app, security_manager from superset.databases.filters import DatabaseFilter @@ -205,7 +206,7 @@ class DatabaseMixin: def _pre_add_update(self, database: Database) -> None: if app.config["PREVENT_UNSAFE_DB_CONNECTIONS"]: - check_sqlalchemy_uri(database.sqlalchemy_uri) + check_sqlalchemy_uri(make_url(database.sqlalchemy_uri)) self.check_extra(database) self.check_encrypted_extra(database) if database.server_cert: diff --git a/tests/core_tests.py b/tests/core_tests.py index 819a9c98f..912b23bde 100644 --- a/tests/core_tests.py +++ b/tests/core_tests.py @@ -545,7 +545,7 @@ class TestCore(SupersetTestCase): self.assertEqual(400, response.status_code) response_body = json.loads(response.data.decode("utf-8")) expected_body = { - "error": "SQLite database cannot be used as a data source for security reasons." + "error": "SQLiteDialect_pysqlite cannot be used as a data source for security reasons." } self.assertEqual(expected_body, response_body) diff --git a/tests/databases/api_tests.py b/tests/databases/api_tests.py index b1af44efa..825ea8543 100644 --- a/tests/databases/api_tests.py +++ b/tests/databases/api_tests.py @@ -387,7 +387,7 @@ class TestDatabaseApi(SupersetTestCase): expected_response = { "message": { "sqlalchemy_uri": [ - "SQLite database cannot be used as a data source " + "SQLiteDialect_pysqlite cannot be used as a data source " "for security reasons." ] } @@ -858,7 +858,7 @@ class TestDatabaseApi(SupersetTestCase): expected_response = { "message": { "sqlalchemy_uri": [ - "SQLite database cannot be used as a data source for security reasons." + "SQLiteDialect_pysqlite cannot be used as a data source for security reasons." ] } } diff --git a/tests/security/analytics_db_safety_tests.py b/tests/security/analytics_db_safety_tests.py index 942320865..ebab520f7 100644 --- a/tests/security/analytics_db_safety_tests.py +++ b/tests/security/analytics_db_safety_tests.py @@ -15,17 +15,29 @@ # specific language governing permissions and limitations # under the License. -from superset.security.analytics_db_safety import ( - check_sqlalchemy_uri, - DBSecurityException, -) +import pytest +from sqlalchemy.engine.url import make_url + +from superset.exceptions import SupersetSecurityException +from superset.security.analytics_db_safety import check_sqlalchemy_uri from tests.base_tests import SupersetTestCase class TestDBConnections(SupersetTestCase): def test_check_sqlalchemy_uri_ok(self): - check_sqlalchemy_uri("postgres://user:password@test.com") + check_sqlalchemy_uri(make_url("postgres://user:password@test.com")) def test_check_sqlalchemy_url_sqlite(self): - with self.assertRaises(DBSecurityException): - check_sqlalchemy_uri("sqlite:///home/superset/bad.db") + with pytest.raises(SupersetSecurityException) as excinfo: + check_sqlalchemy_uri(make_url("sqlite:///home/superset/bad.db")) + assert ( + str(excinfo.value) + == "SQLiteDialect_pysqlite cannot be used as a data source for security reasons." + ) + + with pytest.raises(SupersetSecurityException) as excinfo: + check_sqlalchemy_uri(make_url("shillelagh:///home/superset/bad.db")) + assert ( + str(excinfo.value) + == "shillelagh cannot be used as a data source for security reasons." + )