diff --git a/.pylintrc b/.pylintrc index 848767fe5..b39335c56 100644 --- a/.pylintrc +++ b/.pylintrc @@ -300,7 +300,7 @@ ignore-mixin-members=yes # (useful for modules/projects where namespaces are manipulated during runtime # and thus existing member attributes cannot be deduced by static analysis. It # supports qualified module names, as well as Unix pattern matching. -ignored-modules=numpy,pandas,alembic.op,sqlalchemy,alembic.context,flask_appbuilder.security.sqla.PermissionView.role,flask_appbuilder.Model.metadata,flask_appbuilder.Base.metadata,distutils +ignored-modules=numpy,pandas,alembic.op,sqlalchemy,alembic.context,flask_appbuilder.security.sqla.PermissionView.role,flask_appbuilder.Model.metadata,flask_appbuilder.Base.metadata # List of class names for which member attributes should not be checked (useful # for classes with dynamically set attributes). This supports the use of diff --git a/requirements/base.txt b/requirements/base.txt index 720a77d57..34478f105 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -193,6 +193,7 @@ ordered-set==4.1.0 # via flask-limiter packaging==23.1 # via + # apache-superset # deprecation # limits pandas==1.5.3 diff --git a/setup.py b/setup.py index bc99c2695..b66b87181 100644 --- a/setup.py +++ b/setup.py @@ -101,6 +101,7 @@ setup( "msgpack>=1.0.0, <1.1", "nh3>=0.2.11, <0.3", "numpy==1.23.5", + "packaging", "pandas>=1.5.3, <1.6", "parsedatetime", "pgsanity", diff --git a/superset/db_engine_specs/elasticsearch.py b/superset/db_engine_specs/elasticsearch.py index c96d0b36a..934aa0bb0 100644 --- a/superset/db_engine_specs/elasticsearch.py +++ b/superset/db_engine_specs/elasticsearch.py @@ -16,9 +16,9 @@ # under the License. import logging from datetime import datetime -from distutils.version import StrictVersion from typing import Any, Dict, Optional, Type +from packaging.version import Version from sqlalchemy import types from superset.db_engine_specs.base import BaseEngineSpec @@ -79,9 +79,7 @@ class ElasticSearchEngineSpec(BaseEngineSpec): # pylint: disable=abstract-metho supports_dttm_parse = False try: if es_version: - supports_dttm_parse = StrictVersion(es_version) >= StrictVersion( - "7.8" - ) + supports_dttm_parse = Version(es_version) >= Version("7.8") except Exception as ex: # pylint: disable=broad-except logger.error("Unexpected error while convert es_version", exc_info=True) logger.exception(ex) diff --git a/superset/db_engine_specs/presto.py b/superset/db_engine_specs/presto.py index 0889dd653..8e2116ccd 100644 --- a/superset/db_engine_specs/presto.py +++ b/superset/db_engine_specs/presto.py @@ -23,7 +23,6 @@ import time from abc import ABCMeta from collections import defaultdict, deque from datetime import datetime -from distutils.version import StrictVersion from textwrap import dedent from typing import ( Any, @@ -43,6 +42,7 @@ import pandas as pd import simplejson as json from flask import current_app from flask_babel import gettext as __, lazy_gettext as _ +from packaging.version import Version from sqlalchemy import Column, literal_column, types from sqlalchemy.engine.base import Engine from sqlalchemy.engine.reflection import Inspector @@ -470,8 +470,7 @@ class PrestoBaseEngineSpec(BaseEngineSpec, metaclass=ABCMeta): # Default to the new syntax if version is unset. partition_select_clause = ( f'SELECT * FROM "{table_name}$partitions"' - if not presto_version - or StrictVersion(presto_version) >= StrictVersion("0.199") + if not presto_version or Version(presto_version) >= Version("0.199") else f"SHOW PARTITIONS FROM {table_name}" ) @@ -705,7 +704,7 @@ class PrestoEngineSpec(PrestoBaseEngineSpec): @classmethod def get_allow_cost_estimate(cls, extra: Dict[str, Any]) -> bool: version = extra.get("version") - return version is not None and StrictVersion(version) >= StrictVersion("0.319") + return version is not None and Version(version) >= Version("0.319") @classmethod def update_impersonation_config( diff --git a/superset/utils/core.py b/superset/utils/core.py index 4569031f3..8451eaaa6 100644 --- a/superset/utils/core.py +++ b/superset/utils/core.py @@ -38,7 +38,6 @@ import zlib from contextlib import contextmanager from dataclasses import dataclass from datetime import date, datetime, time, timedelta -from distutils.util import strtobool from email.mime.application import MIMEApplication from email.mime.image import MIMEImage from email.mime.multipart import MIMEMultipart @@ -1191,6 +1190,7 @@ def merge_extra_filters(form_data: Dict[str, Any]) -> None: "__time_grain": "time_grain_sqla", "__granularity": "granularity", } + # Grab list of existing filters 'keyed' on the column and operator def get_filter_key(f: Dict[str, Any]) -> str: @@ -1788,7 +1788,7 @@ def indexed( def is_test() -> bool: - return strtobool(os.environ.get("SUPERSET_TESTENV", "false")) # type: ignore + return parse_boolean_string(os.environ.get("SUPERSET_TESTENV", "false")) def get_time_filter_status( @@ -1952,10 +1952,7 @@ def parse_boolean_string(bool_str: Optional[str]) -> bool: """ if bool_str is None: return False - try: - return bool(strtobool(bool_str.lower())) - except ValueError: - return False + return bool_str.lower() in ("y", "Y", "yes", "True", "t", "true", "On", "on", "1") def apply_max_row_limit( diff --git a/superset/utils/pandas_postprocessing/flatten.py b/superset/utils/pandas_postprocessing/flatten.py index 1026164e4..da9954ef1 100644 --- a/superset/utils/pandas_postprocessing/flatten.py +++ b/superset/utils/pandas_postprocessing/flatten.py @@ -15,10 +15,10 @@ # specific language governing permissions and limitations # under the License. -from typing import Sequence, Union +from collections.abc import Iterable +from typing import Any, Sequence, Union import pandas as pd -from numpy.distutils.misc_util import is_sequence from superset.utils.pandas_postprocessing.utils import ( _is_multi_index_on_columns, @@ -27,6 +27,13 @@ from superset.utils.pandas_postprocessing.utils import ( ) +def is_sequence(seq: Any) -> bool: + if isinstance(seq, str): + return False + + return isinstance(seq, Iterable) + + def flatten( df: pd.DataFrame, reset_index: bool = True, @@ -85,7 +92,7 @@ def flatten( _columns = [] for series in df.columns.to_flat_index(): _cells = [] - for cell in series if is_sequence(series) else [series]: # type: ignore + for cell in series if is_sequence(series) else [series]: if pd.notnull(cell): # every cell should be converted to string and escape comma _cells.append(escape_separator(str(cell))) diff --git a/tests/unit_tests/utils/test_core.py b/tests/unit_tests/utils/test_core.py index 6845bb2fc..363698315 100644 --- a/tests/unit_tests/utils/test_core.py +++ b/tests/unit_tests/utils/test_core.py @@ -15,11 +15,17 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. -from typing import Any, Dict +import os +from typing import Any, Dict, Optional import pytest -from superset.utils.core import QueryObjectFilterClause, remove_extra_adhoc_filters +from superset.utils.core import ( + is_test, + parse_boolean_string, + QueryObjectFilterClause, + remove_extra_adhoc_filters, +) ADHOC_FILTER: QueryObjectFilterClause = { "col": "foo", @@ -84,3 +90,46 @@ def test_remove_extra_adhoc_filters( ) -> None: remove_extra_adhoc_filters(original) assert expected == original + + +def test_is_test(): + orig_value = os.getenv("SUPERSET_TESTENV") + + os.environ["SUPERSET_TESTENV"] = "true" + assert is_test() + os.environ["SUPERSET_TESTENV"] = "false" + assert not is_test() + os.environ["SUPERSET_TESTENV"] = "" + assert not is_test() + + if orig_value is not None: + os.environ["SUPERSET_TESTENV"] = orig_value + + +@pytest.mark.parametrize( + "test_input,expected", + [ + ("y", True), + ("Y", True), + ("yes", True), + ("True", True), + ("t", True), + ("true", True), + ("On", True), + ("on", True), + ("1", True), + ("n", False), + ("N", False), + ("no", False), + ("False", False), + ("f", False), + ("false", False), + ("Off", False), + ("off", False), + ("0", False), + ("foo", False), + (None, False), + ], +) +def test_parse_boolean_string(test_input: Optional[str], expected: bool): + assert parse_boolean_string(test_input) == expected