chore: use shillelagh instead of gsheetsdb (#13185)

* chore: use shillelagh instead of gsheetsdb

* Fix tests

* Clean up code and remove duplication

* Fix test

* Tighten dep
This commit is contained in:
Beto Dealmeida 2021-02-18 09:48:18 -08:00 committed by GitHub
parent 3c58fc5ef5
commit 3d23adec5e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 70 additions and 43 deletions

View File

@ -26,6 +26,9 @@ assists people when migrating to a new version.
### Breaking Changes ### Breaking Changes
### Potential Downtime ### Potential Downtime
### Deprecations ### Deprecations
### Other
[shillelagh](https://github.com/betodealmeida/shillelagh/) is now the recommended module to connect Superset to Google Spreadsheets, since it's more robust and has extensive test coverage. You should uninstall the `gsheetsdb` module and install the `shillelagh` module in its place. Shillelagh is a drop-in replacement, so no modifications are needed to be done on existing queries, datasets or charts.
## 1.0.0 ## 1.0.0

View File

@ -129,7 +129,7 @@ setup(
"elasticsearch": ["elasticsearch-dbapi>=0.2.0, <0.3.0"], "elasticsearch": ["elasticsearch-dbapi>=0.2.0, <0.3.0"],
"exasol": ["sqlalchemy-exasol>=2.1.0, <2.2"], "exasol": ["sqlalchemy-exasol>=2.1.0, <2.2"],
"excel": ["xlrd>=1.2.0, <1.3"], "excel": ["xlrd>=1.2.0, <1.3"],
"gsheets": ["gsheetsdb>=0.1.9"], "gsheets": ["shillelagh>=0.2, <0.3"],
"hana": ["hdbcli==2.4.162", "sqlalchemy_hana==0.4.0"], "hana": ["hdbcli==2.4.162", "sqlalchemy_hana==0.4.0"],
"hive": ["pyhive[hive]>=0.6.1", "tableschema", "thrift>=0.11.0, <1.0.0"], "hive": ["pyhive[hive]>=0.6.1", "tableschema", "thrift>=0.11.0, <1.0.0"],
"impala": ["impyla>0.16.2, <0.17"], "impala": ["impyla>0.16.2, <0.17"],

View File

@ -38,6 +38,7 @@ export const ErrorTypeEnum = {
// Security access errors // Security access errors
TABLE_SECURITY_ACCESS_ERROR: 'TABLE_SECURITY_ACCESS_ERROR', TABLE_SECURITY_ACCESS_ERROR: 'TABLE_SECURITY_ACCESS_ERROR',
DATASOURCE_SECURITY_ACCESS_ERROR: 'DATASOURCE_SECURITY_ACCESS_ERROR', DATASOURCE_SECURITY_ACCESS_ERROR: 'DATASOURCE_SECURITY_ACCESS_ERROR',
DATABASE_SECURITY_ACCESS_ERROR: 'DATABASE_SECURITY_ACCESS_ERROR',
MISSING_OWNERSHIP_ERROR: 'MISSING_OWNERSHIP_ERROR', MISSING_OWNERSHIP_ERROR: 'MISSING_OWNERSHIP_ERROR',
// Other errors // Other errors

View File

@ -31,8 +31,8 @@ from superset.databases.commands.exceptions import (
DatabaseTestConnectionUnexpectedError, DatabaseTestConnectionUnexpectedError,
) )
from superset.databases.dao import DatabaseDAO from superset.databases.dao import DatabaseDAO
from superset.exceptions import SupersetSecurityException
from superset.models.core import Database from superset.models.core import Database
from superset.security.analytics_db_safety import DBSecurityException
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -70,7 +70,7 @@ class TestConnectionDatabaseCommand(BaseCommand):
) )
except DBAPIError: except DBAPIError:
raise DatabaseTestConnectionFailedError() raise DatabaseTestConnectionFailedError()
except DBSecurityException as ex: except SupersetSecurityException as ex:
raise DatabaseSecurityUnsafeError(message=str(ex)) raise DatabaseSecurityUnsafeError(message=str(ex))
except Exception: except Exception:
raise DatabaseTestConnectionUnexpectedError() raise DatabaseTestConnectionUnexpectedError()

View File

@ -27,8 +27,9 @@ from sqlalchemy import MetaData
from sqlalchemy.engine.url import make_url from sqlalchemy.engine.url import make_url
from sqlalchemy.exc import ArgumentError from sqlalchemy.exc import ArgumentError
from superset.exceptions import CertificateException from superset.exceptions import CertificateException, SupersetSecurityException
from superset.models.core import PASSWORD_MASK from superset.models.core import PASSWORD_MASK
from superset.security.analytics_db_safety import check_sqlalchemy_uri
from superset.utils.core import markdown, parse_ssl_cert from superset.utils.core import markdown, parse_ssl_cert
database_schemas_query_schema = { database_schemas_query_schema = {
@ -133,7 +134,7 @@ def sqlalchemy_uri_validator(value: str) -> str:
Validate if it's a valid SQLAlchemy URI and refuse SQLLite by default Validate if it's a valid SQLAlchemy URI and refuse SQLLite by default
""" """
try: try:
make_url(value.strip()) uri = make_url(value.strip())
except (ArgumentError, AttributeError, ValueError): except (ArgumentError, AttributeError, ValueError):
raise ValidationError( raise ValidationError(
[ [
@ -143,17 +144,11 @@ def sqlalchemy_uri_validator(value: str) -> str:
) )
] ]
) )
if current_app.config.get("PREVENT_UNSAFE_DB_CONNECTIONS", True) and value: if current_app.config.get("PREVENT_UNSAFE_DB_CONNECTIONS", True):
if value.startswith("sqlite"): try:
raise ValidationError( check_sqlalchemy_uri(uri)
[ except SupersetSecurityException as ex:
_( raise ValidationError([str(ex)])
"SQLite database cannot be used as a data source for "
"security reasons."
)
]
)
return value return value

View File

@ -48,6 +48,7 @@ class SupersetErrorType(str, Enum):
# Security access errors # Security access errors
TABLE_SECURITY_ACCESS_ERROR = "TABLE_SECURITY_ACCESS_ERROR" TABLE_SECURITY_ACCESS_ERROR = "TABLE_SECURITY_ACCESS_ERROR"
DATASOURCE_SECURITY_ACCESS_ERROR = "DATASOURCE_SECURITY_ACCESS_ERROR" DATASOURCE_SECURITY_ACCESS_ERROR = "DATASOURCE_SECURITY_ACCESS_ERROR"
DATABASE_SECURITY_ACCESS_ERROR = "DATABASE_SECURITY_ACCESS_ERROR"
MISSING_OWNERSHIP_ERROR = "MISSING_OWNERSHIP_ERROR" MISSING_OWNERSHIP_ERROR = "MISSING_OWNERSHIP_ERROR"
# Other errors # Other errors

View File

@ -14,20 +14,37 @@
# KIND, either express or implied. See the License for the # KIND, either express or implied. See the License for the
# specific language governing permissions and limitations # specific language governing permissions and limitations
# under the License. # under the License.
from flask_babel import lazy_gettext as _
from sqlalchemy.engine.url import URL from sqlalchemy.engine.url import URL
from sqlalchemy.exc import NoSuchModuleError
from superset.exceptions import SupersetException from superset.errors import ErrorLevel, SupersetError, SupersetErrorType
from superset.exceptions import SupersetSecurityException
# list of unsafe SQLAlchemy dialects
class DBSecurityException(SupersetException): BLOCKLIST = {
""" Exception to prevent a security issue with connecting to a DB """ # sqlite creates a local DB, which allows mapping server's filesystem
"sqlite",
status = 400 # shillelagh allows opening local files (eg, 'SELECT * FROM "csv:///etc/passwd"')
"shillelagh",
"shillelagh+apsw",
}
def check_sqlalchemy_uri(uri: URL) -> None: def check_sqlalchemy_uri(uri: URL) -> None:
if uri.startswith("sqlite"): if uri.drivername in BLOCKLIST:
# sqlite creates a local DB, which allows mapping server's filesystem try:
raise DBSecurityException( dialect = uri.get_dialect().__name__
"SQLite database cannot be used as a data source for security reasons." except NoSuchModuleError:
dialect = uri.drivername
raise SupersetSecurityException(
SupersetError(
error_type=SupersetErrorType.DATABASE_SECURITY_ACCESS_ERROR,
message=_(
"%(dialect)s cannot be used as a data source for security reasons.",
dialect=dialect,
),
level=ErrorLevel.ERROR,
)
) )

View File

@ -92,10 +92,7 @@ from superset.models.slice import Slice
from superset.models.sql_lab import Query, TabState from superset.models.sql_lab import Query, TabState
from superset.models.user_attributes import UserAttribute from superset.models.user_attributes import UserAttribute
from superset.queries.dao import QueryDAO from superset.queries.dao import QueryDAO
from superset.security.analytics_db_safety import ( from superset.security.analytics_db_safety import check_sqlalchemy_uri
check_sqlalchemy_uri,
DBSecurityException,
)
from superset.sql_parse import CtasMethod, ParsedQuery, Table from superset.sql_parse import CtasMethod, ParsedQuery, Table
from superset.sql_validators import get_validator_by_name from superset.sql_validators import get_validator_by_name
from superset.tasks.async_queries import load_explore_json_into_cache from superset.tasks.async_queries import load_explore_json_into_cache
@ -1234,7 +1231,7 @@ class Superset(BaseSupersetView): # pylint: disable=too-many-public-methods
uri = request.json.get("uri") uri = request.json.get("uri")
try: try:
if app.config["PREVENT_UNSAFE_DB_CONNECTIONS"]: if app.config["PREVENT_UNSAFE_DB_CONNECTIONS"]:
check_sqlalchemy_uri(uri) check_sqlalchemy_uri(make_url(uri))
# if the database already exists in the database, only its safe # if the database already exists in the database, only its safe
# (password-masked) URI would be shown in the UI and would be passed in the # (password-masked) URI would be shown in the UI and would be passed in the
# form data so if the database already exists and the form was submitted # form data so if the database already exists and the form was submitted
@ -1294,7 +1291,7 @@ class Superset(BaseSupersetView): # pylint: disable=too-many-public-methods
return json_error_response( return json_error_response(
_("Connection failed, please check your connection settings"), 400 _("Connection failed, please check your connection settings"), 400
) )
except DBSecurityException as ex: except SupersetSecurityException as ex:
logger.warning("Stopped an unsafe database connection") logger.warning("Stopped an unsafe database connection")
return json_error_response(_(str(ex)), 400) return json_error_response(_(str(ex)), 400)
except Exception as ex: # pylint: disable=broad-except except Exception as ex: # pylint: disable=broad-except

View File

@ -19,6 +19,7 @@ import inspect
from flask import Markup from flask import Markup
from flask_babel import lazy_gettext as _ from flask_babel import lazy_gettext as _
from sqlalchemy import MetaData from sqlalchemy import MetaData
from sqlalchemy.engine.url import make_url
from superset import app, security_manager from superset import app, security_manager
from superset.databases.filters import DatabaseFilter from superset.databases.filters import DatabaseFilter
@ -205,7 +206,7 @@ class DatabaseMixin:
def _pre_add_update(self, database: Database) -> None: def _pre_add_update(self, database: Database) -> None:
if app.config["PREVENT_UNSAFE_DB_CONNECTIONS"]: if app.config["PREVENT_UNSAFE_DB_CONNECTIONS"]:
check_sqlalchemy_uri(database.sqlalchemy_uri) check_sqlalchemy_uri(make_url(database.sqlalchemy_uri))
self.check_extra(database) self.check_extra(database)
self.check_encrypted_extra(database) self.check_encrypted_extra(database)
if database.server_cert: if database.server_cert:

View File

@ -545,7 +545,7 @@ class TestCore(SupersetTestCase):
self.assertEqual(400, response.status_code) self.assertEqual(400, response.status_code)
response_body = json.loads(response.data.decode("utf-8")) response_body = json.loads(response.data.decode("utf-8"))
expected_body = { expected_body = {
"error": "SQLite database cannot be used as a data source for security reasons." "error": "SQLiteDialect_pysqlite cannot be used as a data source for security reasons."
} }
self.assertEqual(expected_body, response_body) self.assertEqual(expected_body, response_body)

View File

@ -387,7 +387,7 @@ class TestDatabaseApi(SupersetTestCase):
expected_response = { expected_response = {
"message": { "message": {
"sqlalchemy_uri": [ "sqlalchemy_uri": [
"SQLite database cannot be used as a data source " "SQLiteDialect_pysqlite cannot be used as a data source "
"for security reasons." "for security reasons."
] ]
} }
@ -858,7 +858,7 @@ class TestDatabaseApi(SupersetTestCase):
expected_response = { expected_response = {
"message": { "message": {
"sqlalchemy_uri": [ "sqlalchemy_uri": [
"SQLite database cannot be used as a data source for security reasons." "SQLiteDialect_pysqlite cannot be used as a data source for security reasons."
] ]
} }
} }

View File

@ -15,17 +15,29 @@
# specific language governing permissions and limitations # specific language governing permissions and limitations
# under the License. # under the License.
from superset.security.analytics_db_safety import ( import pytest
check_sqlalchemy_uri, from sqlalchemy.engine.url import make_url
DBSecurityException,
) from superset.exceptions import SupersetSecurityException
from superset.security.analytics_db_safety import check_sqlalchemy_uri
from tests.base_tests import SupersetTestCase from tests.base_tests import SupersetTestCase
class TestDBConnections(SupersetTestCase): class TestDBConnections(SupersetTestCase):
def test_check_sqlalchemy_uri_ok(self): def test_check_sqlalchemy_uri_ok(self):
check_sqlalchemy_uri("postgres://user:password@test.com") check_sqlalchemy_uri(make_url("postgres://user:password@test.com"))
def test_check_sqlalchemy_url_sqlite(self): def test_check_sqlalchemy_url_sqlite(self):
with self.assertRaises(DBSecurityException): with pytest.raises(SupersetSecurityException) as excinfo:
check_sqlalchemy_uri("sqlite:///home/superset/bad.db") check_sqlalchemy_uri(make_url("sqlite:///home/superset/bad.db"))
assert (
str(excinfo.value)
== "SQLiteDialect_pysqlite cannot be used as a data source for security reasons."
)
with pytest.raises(SupersetSecurityException) as excinfo:
check_sqlalchemy_uri(make_url("shillelagh:///home/superset/bad.db"))
assert (
str(excinfo.value)
== "shillelagh cannot be used as a data source for security reasons."
)