fix: dataset safe URL for explore_url (#24686)

This commit is contained in:
Daniel Vaz Gaspar 2023-08-23 13:31:44 +01:00 committed by GitHub
parent c92a975e4b
commit a9efd4b2e3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 85 additions and 147 deletions

View File

@ -45,6 +45,7 @@ assists people when migrating to a new version.
### Breaking Changes
- [24686]https://github.com/apache/superset/pull/24686): All dataset's custom explore_url are handled as relative URLs on the frontend, behaviour controlled by PREVENT_UNSAFE_DEFAULT_URLS_ON_DATASET.
- [24262](https://github.com/apache/superset/pull/24262): Enabled `TALISMAN_ENABLED` flag by default and provided stricter default Content Security Policy
- [24415](https://github.com/apache/superset/pull/24415): Removed the obsolete Druid NoSQL REGEX operator.
- [24423](https://github.com/apache/superset/pull/24423): Removed deprecated APIs `/superset/slice_json/...`, `/superset/annotation_json/...`

View File

@ -35,6 +35,7 @@ import IndeterminateCheckbox from 'src/components/IndeterminateCheckbox';
import waitForComponentToPaint from 'spec/helpers/waitForComponentToPaint';
import { act } from 'react-dom/test-utils';
import SubMenu from 'src/features/home/SubMenu';
import * as reactRedux from 'react-redux';
// store needed for withToasts(DatasetList)
const mockStore = configureStore([thunk]);
@ -47,13 +48,15 @@ const datasetsDuplicateEndpoint = 'glob:*/api/v1/dataset/duplicate*';
const databaseEndpoint = 'glob:*/api/v1/dataset/related/database*';
const datasetsEndpoint = 'glob:*/api/v1/dataset/?*';
const useSelectorMock = jest.spyOn(reactRedux, 'useSelector');
const mockdatasets = [...new Array(3)].map((_, i) => ({
changed_by_name: 'user',
kind: i === 0 ? 'virtual' : 'physical', // ensure there is 1 virtual
changed_by: 'user',
changed_on: new Date().toISOString(),
database_name: `db ${i}`,
explore_url: `/explore/?datasource_type=table&datasource_id=${i}`,
explore_url: `https://www.google.com?${i}`,
id: i,
schema: `schema ${i}`,
table_name: `coolest table ${i}`,
@ -280,3 +283,58 @@ describe('RTL', () => {
expect(importTooltip).toBeInTheDocument();
});
});
describe('Prevent unsafe URLs', () => {
const mockedProps = {};
let wrapper: any;
it('Check prevent unsafe is on renders relative links', async () => {
const tdColumnsNumber = 9;
useSelectorMock.mockReturnValue(true);
wrapper = await mountAndWait(mockedProps);
const tdElements = wrapper.find(ListView).find('td');
expect(
tdElements
.at(0 * tdColumnsNumber + 1)
.find('a')
.prop('href'),
).toBe('/https://www.google.com?0');
expect(
tdElements
.at(1 * tdColumnsNumber + 1)
.find('a')
.prop('href'),
).toBe('/https://www.google.com?1');
expect(
tdElements
.at(2 * tdColumnsNumber + 1)
.find('a')
.prop('href'),
).toBe('/https://www.google.com?2');
});
it('Check prevent unsafe is off renders absolute links', async () => {
const tdColumnsNumber = 9;
useSelectorMock.mockReturnValue(false);
wrapper = await mountAndWait(mockedProps);
const tdElements = wrapper.find(ListView).find('td');
expect(
tdElements
.at(0 * tdColumnsNumber + 1)
.find('a')
.prop('href'),
).toBe('https://www.google.com?0');
expect(
tdElements
.at(1 * tdColumnsNumber + 1)
.find('a')
.prop('href'),
).toBe('https://www.google.com?1');
expect(
tdElements
.at(2 * tdColumnsNumber + 1)
.find('a')
.prop('href'),
).toBe('https://www.google.com?2');
});
});

View File

@ -30,7 +30,7 @@ import React, {
useMemo,
useCallback,
} from 'react';
import { useHistory } from 'react-router-dom';
import { Link, useHistory } from 'react-router-dom';
import rison from 'rison';
import {
createFetchRelated,
@ -69,6 +69,7 @@ import {
CONFIRM_OVERWRITE_MESSAGE,
} from 'src/features/datasets/constants';
import DuplicateDatasetModal from 'src/features/datasets/DuplicateDatasetModal';
import { useSelector } from 'react-redux';
const extensionsRegistry = getExtensionsRegistry();
const DatasetDeleteRelatedExtension = extensionsRegistry.get(
@ -181,6 +182,11 @@ const DatasetList: FunctionComponent<DatasetListProps> = ({
setSSHTunnelPrivateKeyPasswordFields,
] = useState<string[]>([]);
const PREVENT_UNSAFE_DEFAULT_URLS_ON_DATASET = useSelector<any, boolean>(
state =>
state.common?.conf?.PREVENT_UNSAFE_DEFAULT_URLS_ON_DATASET || false,
);
const openDatasetImportModal = () => {
showImportModal(true);
};
@ -309,11 +315,20 @@ const DatasetList: FunctionComponent<DatasetListProps> = ({
},
},
}: any) => {
const titleLink = (
// exploreUrl can be a link to Explore or an external link
// in the first case use SPA routing, else use HTML anchor
<GenericLink to={exploreURL}>{datasetTitle}</GenericLink>
);
let titleLink: JSX.Element;
if (PREVENT_UNSAFE_DEFAULT_URLS_ON_DATASET) {
titleLink = (
<Link data-test="internal-link" to={exploreURL}>
{datasetTitle}
</Link>
);
} else {
titleLink = (
// exploreUrl can be a link to Explore or an external link
// in the first case use SPA routing, else use HTML anchor
<GenericLink to={exploreURL}>{datasetTitle}</GenericLink>
);
}
try {
const parsedExtra = JSON.parse(extra);
return (

View File

@ -1471,7 +1471,7 @@ STATIC_ASSETS_PREFIX = ""
# Typically these should not be allowed.
PREVENT_UNSAFE_DB_CONNECTIONS = True
# Prevents unsafe default endpoints to be registered on datasets.
# If true all default urls on datasets will be handled as relative URLs by the frontend
PREVENT_UNSAFE_DEFAULT_URLS_ON_DATASET = True
# Define a list of allowed URLs for dataset data imports (v1).

View File

@ -61,23 +61,6 @@ class DatasetExistsValidationError(ValidationError):
)
class DatasetEndpointUnsafeValidationError(ValidationError):
"""
Marshmallow validation error for unsafe dataset default endpoint
"""
def __init__(self) -> None:
super().__init__(
[
_(
"The submitted URL is not considered safe,"
" only use URLs with the same domain as Superset."
)
],
field_name="default_endpoint",
)
class DatasetColumnNotFoundValidationError(ValidationError):
"""
Marshmallow validation error when dataset column for update does not exist

View File

@ -18,7 +18,6 @@ import logging
from collections import Counter
from typing import Any, Optional
from flask import current_app
from flask_appbuilder.models.sqla import Model
from marshmallow import ValidationError
@ -32,7 +31,6 @@ from superset.datasets.commands.exceptions import (
DatasetColumnNotFoundValidationError,
DatasetColumnsDuplicateValidationError,
DatasetColumnsExistsValidationError,
DatasetEndpointUnsafeValidationError,
DatasetExistsValidationError,
DatasetForbiddenError,
DatasetInvalidError,
@ -43,7 +41,6 @@ from superset.datasets.commands.exceptions import (
DatasetUpdateFailedError,
)
from superset.exceptions import SupersetSecurityException
from superset.utils.urls import is_safe_url
logger = logging.getLogger(__name__)
@ -104,15 +101,6 @@ class UpdateDatasetCommand(UpdateMixin, BaseCommand):
self._properties["owners"] = owners
except ValidationError as ex:
exceptions.append(ex)
# Validate default URL safety
default_endpoint = self._properties.get("default_endpoint")
if (
default_endpoint
and not is_safe_url(default_endpoint)
and current_app.config["PREVENT_UNSAFE_DEFAULT_URLS_ON_DATASET"]
):
exceptions.append(DatasetEndpointUnsafeValidationError())
# Validate columns
if columns := self._properties.get("columns"):
self._validate_columns(columns, exceptions)

View File

@ -14,12 +14,10 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import unicodedata
import urllib
from typing import Any
from urllib.parse import urlparse
from flask import current_app, request, url_for
from flask import current_app, url_for
def get_url_host(user_friendly: bool = False) -> str:
@ -52,18 +50,3 @@ def modify_url_query(url: str, **kwargs: Any) -> str:
f"{k}={urllib.parse.quote(str(v[0]))}" for k, v in params.items()
)
return urllib.parse.urlunsplit(parts)
def is_safe_url(url: str) -> bool:
if url.startswith("///"):
return False
try:
ref_url = urlparse(request.host_url)
test_url = urlparse(url)
except ValueError:
return False
if unicodedata.category(url[0])[0] == "C":
return False
if test_url.scheme != ref_url.scheme or ref_url.netloc != test_url.netloc:
return False
return True

View File

@ -121,6 +121,7 @@ FRONTEND_CONF_KEYS = (
"ALERT_REPORTS_DEFAULT_RETENTION",
"ALERT_REPORTS_DEFAULT_WORKING_TIMEOUT",
"NATIVE_FILTER_DEFAULT_ROW_LIMIT",
"PREVENT_UNSAFE_DEFAULT_URLS_ON_DATASET",
)
logger = logging.getLogger(__name__)

View File

@ -18,7 +18,7 @@ import json
from collections import Counter
from typing import Any
from flask import current_app, redirect, request
from flask import redirect, request
from flask_appbuilder import expose, permission_name
from flask_appbuilder.api import rison
from flask_appbuilder.security.decorators import has_access, has_access_api
@ -40,7 +40,6 @@ from superset.exceptions import SupersetException, SupersetSecurityException
from superset.models.core import Database
from superset.superset_typing import FlaskResponse
from superset.utils.core import DatasourceType
from superset.utils.urls import is_safe_url
from superset.views.base import (
api,
BaseSupersetView,
@ -82,20 +81,6 @@ class Datasource(BaseSupersetView):
datasource_id = datasource_dict.get("id")
datasource_type = datasource_dict.get("type")
database_id = datasource_dict["database"].get("id")
default_endpoint = datasource_dict["default_endpoint"]
if (
default_endpoint
and not is_safe_url(default_endpoint)
and current_app.config["PREVENT_UNSAFE_DEFAULT_URLS_ON_DATASET"]
):
return json_error_response(
_(
"The submitted URL is not considered safe,"
" only use URLs with the same domain as Superset."
),
status=400,
)
orm_datasource = DatasourceDAO.get_datasource(
db.session, DatasourceType(datasource_type), datasource_id
)

View File

@ -1449,32 +1449,6 @@ class TestDatasetApi(SupersetTestCase):
db.session.delete(ab_user)
db.session.commit()
def test_update_dataset_unsafe_default_endpoint(self):
"""
Dataset API: Test unsafe default endpoint
"""
if backend() == "sqlite":
return
dataset = self.insert_default_dataset()
self.login(username="admin")
uri = f"api/v1/dataset/{dataset.id}"
table_data = {"default_endpoint": "http://www.google.com"}
rv = self.client.put(uri, json=table_data)
data = json.loads(rv.data.decode("utf-8"))
assert rv.status_code == 422
expected_response = {
"message": {
"default_endpoint": [
"The submitted URL is not considered safe,"
" only use URLs with the same domain as Superset."
]
}
}
assert data == expected_response
db.session.delete(dataset)
db.session.commit()
@patch("superset.daos.dataset.DatasetDAO.update")
def test_update_dataset_sqlalchemy_error(self, mock_dao_update):
"""

View File

@ -302,32 +302,6 @@ class TestDatasource(SupersetTestCase):
print(k)
self.assertEqual(resp[k], datasource_post[k])
def test_save_default_endpoint_validation_fail(self):
self.login(username="admin")
tbl_id = self.get_table(name="birth_names").id
datasource_post = get_datasource_post()
datasource_post["id"] = tbl_id
datasource_post["owners"] = [1]
datasource_post["default_endpoint"] = "http://www.google.com"
data = dict(data=json.dumps(datasource_post))
resp = self.client.post("/datasource/save/", data=data)
assert resp.status_code == 400
def test_save_default_endpoint_validation_unsafe(self):
self.app.config["PREVENT_UNSAFE_DEFAULT_URLS_ON_DATASET"] = False
self.login(username="admin")
tbl_id = self.get_table(name="birth_names").id
datasource_post = get_datasource_post()
datasource_post["id"] = tbl_id
datasource_post["owners"] = [1]
datasource_post["default_endpoint"] = "http://www.google.com"
data = dict(data=json.dumps(datasource_post))
resp = self.client.post("/datasource/save/", data=data)
assert resp.status_code == 200
self.app.config["PREVENT_UNSAFE_DEFAULT_URLS_ON_DATASET"] = True
def test_save_default_endpoint_validation_success(self):
self.login(username="admin")
tbl_id = self.get_table(name="birth_names").id

View File

@ -39,27 +39,3 @@ def test_convert_dashboard_link() -> None:
def test_convert_dashboard_link_with_integer() -> None:
test_url = modify_url_query(EXPLORE_DASHBOARD_LINK, standalone=0)
assert test_url == "http://localhost:9000/superset/dashboard/3/?standalone=0"
@pytest.mark.parametrize(
"url,is_safe",
[
("http://localhost/", True),
("http://localhost/superset/1", True),
("https://localhost/", False),
("https://localhost/superset/1", False),
("localhost/superset/1", False),
("ftp://localhost/superset/1", False),
("http://external.com", False),
("https://external.com", False),
("external.com", False),
("///localhost", False),
("xpto://localhost:[3/1/", False),
],
)
def test_is_safe_url(url: str, is_safe: bool) -> None:
from superset import app
from superset.utils.urls import is_safe_url
with app.test_request_context("/"):
assert is_safe_url(url) == is_safe