diff --git a/.github/actions/setup-backend/action.yml b/.github/actions/setup-backend/action.yml index 2d2f993ff..73345481d 100644 --- a/.github/actions/setup-backend/action.yml +++ b/.github/actions/setup-backend/action.yml @@ -26,11 +26,12 @@ runs: shell: bash run: | if [ "${{ inputs.python-version }}" = "current" ]; then - echo "PYTHON_VERSION=3.10" >> $GITHUB_ENV - elif [ "${{ inputs.python-version }}" = "next" ]; then echo "PYTHON_VERSION=3.11" >> $GITHUB_ENV + elif [ "${{ inputs.python-version }}" = "next" ]; then + # currently disabled in GHA matrixes because of library compatibility issues + echo "PYTHON_VERSION=3.12" >> $GITHUB_ENV elif [ "${{ inputs.python-version }}" = "previous" ]; then - echo "PYTHON_VERSION=3.9" >> $GITHUB_ENV + echo "PYTHON_VERSION=3.10" >> $GITHUB_ENV else echo "PYTHON_VERSION=${{ inputs.python-version }}" >> $GITHUB_ENV fi @@ -43,6 +44,7 @@ runs: run: | if [ "${{ inputs.install-superset }}" = "true" ]; then sudo apt-get update && sudo apt-get -y install libldap2-dev libsasl2-dev + pip install --upgrade pip setuptools wheel uv if [ "${{ inputs.requirements-type }}" = "dev" ]; then diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml index e102e630c..640468d2a 100644 --- a/.github/workflows/pre-commit.yml +++ b/.github/workflows/pre-commit.yml @@ -18,7 +18,7 @@ jobs: runs-on: ubuntu-24.04 strategy: matrix: - python-version: ["current", "next", "previous"] + python-version: ["current", "previous"] steps: - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" uses: actions/checkout@v4 diff --git a/.github/workflows/superset-python-integrationtest.yml b/.github/workflows/superset-python-integrationtest.yml index a511882e6..3a7488966 100644 --- a/.github/workflows/superset-python-integrationtest.yml +++ b/.github/workflows/superset-python-integrationtest.yml @@ -77,7 +77,7 @@ jobs: runs-on: ubuntu-24.04 strategy: matrix: - python-version: ["current", "next", "previous"] + python-version: ["current", "previous"] env: PYTHONPATH: ${{ github.workspace }} SUPERSET_CONFIG: tests.integration_tests.superset_test_config diff --git a/.github/workflows/superset-python-unittest.yml b/.github/workflows/superset-python-unittest.yml index c7bb82a73..c4cef8de2 100644 --- a/.github/workflows/superset-python-unittest.yml +++ b/.github/workflows/superset-python-unittest.yml @@ -19,7 +19,7 @@ jobs: runs-on: ubuntu-24.04 strategy: matrix: - python-version: ["current", "next"] + python-version: ["previous", "current"] env: PYTHONPATH: ${{ github.workspace }} steps: diff --git a/Dockerfile b/Dockerfile index 7297ad139..76983ae77 100644 --- a/Dockerfile +++ b/Dockerfile @@ -18,7 +18,7 @@ ###################################################################### # Node stage to deal with static asset construction ###################################################################### -ARG PY_VER=3.10-slim-bookworm +ARG PY_VER=3.11-slim-bookworm # If BUILDPLATFORM is null, set it to 'amd64' (or leave as is otherwise). ARG BUILDPLATFORM=${BUILDPLATFORM:-amd64} diff --git a/UPDATING.md b/UPDATING.md index b680f2701..ee9ec1fd9 100644 --- a/UPDATING.md +++ b/UPDATING.md @@ -32,6 +32,7 @@ assists people when migrating to a new version. - [31262](https://github.com/apache/superset/pull/31262) NOTE: deprecated `pylint` in favor of `ruff` as our only python linter. Only affect development workflows positively (not the release itself). It should cover most important rules, be much faster, but some things linting rules that were enforced before may not be enforce in the exact same way as before. - [31173](https://github.com/apache/superset/pull/31173) Modified `fetch_csrf_token` to align with HTTP standards, particularly regarding how cookies are handled. If you encounter any issues related to CSRF functionality, please report them as a new issue and reference this PR for context. - [31385](https://github.com/apache/superset/pull/31385) Significant docker refactor, reducing access levels for the `superset` user, streamlining layer building, ... +- [31503](https://github.com/apache/superset/pull/31503) Deprecating python 3.9.x support, 3.11 is now the recommended version and 3.10 is still supported over the Superset 5.0 lifecycle. ### Potential Downtime diff --git a/pyproject.toml b/pyproject.toml index 453134f9a..715e0f3db 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,13 +24,12 @@ name = "apache-superset" description = "A modern, enterprise-ready business intelligence web application" readme = "README.md" dynamic = ["version", "scripts", "entry-points"] -requires-python = ">=3.9" +requires-python = ">=3.10" license = { file="LICENSE.txt" } authors = [ { name = "Apache Software Foundation", email = "dev@superset.apache.org" }, ] classifiers = [ - "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", ] @@ -67,7 +66,7 @@ dependencies = [ "markdown>=3.0", "msgpack>=1.0.0, <1.1", "nh3>=0.2.11, <0.3", - "numpy==1.23.5", + "numpy>1.23.5, <2", "packaging", # -------------------------- # pandas and related (wanting pandas[performance] without numba as it's 100+MB and not needed) @@ -275,8 +274,8 @@ exclude = [ line-length = 88 indent-width = 4 -# Assume Python 3.9 -target-version = "py39" +# Assume Python 3.10 +target-version = "py310" [tool.ruff.lint] # Enable Pyflakes (`F`) and a subset of the pycodestyle (`E`) codes by default. diff --git a/requirements/base.in b/requirements/base.in index 17f5379cc..3cf359215 100644 --- a/requirements/base.in +++ b/requirements/base.in @@ -23,8 +23,3 @@ numexpr>=2.9.0 # 5.0.0 has a sensitive deprecation used in other libs # -> https://github.com/aio-libs/async-timeout/blob/master/CHANGES.rst#500-2024-10-31 async_timeout>=4.0.0,<5.0.0 - -# playwright requires greenlet==3.0.3 -# submitted a PR to relax deps in 11/2024 -# https://github.com/microsoft/playwright-python/pull/2669 -greenlet==3.0.3 diff --git a/requirements/base.txt b/requirements/base.txt index 821655c17..554094259 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -153,7 +153,6 @@ google-auth==2.36.0 # via shillelagh greenlet==3.0.3 # via - # -r requirements/base.in # apache-superset (pyproject.toml) # shillelagh # sqlalchemy @@ -230,7 +229,7 @@ nh3==0.2.19 # via apache-superset (pyproject.toml) numexpr==2.10.2 # via -r requirements/base.in -numpy==1.23.5 +numpy==1.26.4 # via # apache-superset (pyproject.toml) # bottleneck diff --git a/requirements/development.txt b/requirements/development.txt index 3b2203f46..234342873 100644 --- a/requirements/development.txt +++ b/requirements/development.txt @@ -463,7 +463,7 @@ nh3==0.2.19 # apache-superset nodeenv==1.8.0 # via pre-commit -numpy==1.23.5 +numpy==1.26.4 # via # -c requirements/base.txt # apache-superset diff --git a/superset/commands/dashboard/export.py b/superset/commands/dashboard/export.py index 93cc490ad..719aed6be 100644 --- a/superset/commands/dashboard/export.py +++ b/superset/commands/dashboard/export.py @@ -83,7 +83,7 @@ def append_charts(position: dict[str, Any], charts: set[Slice]) -> dict[str, Any "parents": ["ROOT_ID", "GRID_ID"], } - for chart_hash, chart in zip(chart_hashes, charts): + for chart_hash, chart in zip(chart_hashes, charts, strict=False): position[chart_hash] = { "children": [], "id": chart_hash, diff --git a/superset/connectors/sqla/models.py b/superset/connectors/sqla/models.py index bb2767871..2acc7b12b 100644 --- a/superset/connectors/sqla/models.py +++ b/superset/connectors/sqla/models.py @@ -1907,6 +1907,7 @@ class SqlaTable( for method, perms in zip( (SqlaTable.perm, SqlaTable.schema_perm, SqlaTable.catalog_perm), (permissions, schema_perms, catalog_perms), + strict=False, ) if perms ] diff --git a/superset/db_engine_specs/hive.py b/superset/db_engine_specs/hive.py index 9491ff588..55e118740 100644 --- a/superset/db_engine_specs/hive.py +++ b/superset/db_engine_specs/hive.py @@ -440,7 +440,7 @@ class HiveEngineSpec(PrestoEngineSpec): # table is not partitioned return None if values is not None and columns is not None: - for col_name, value in zip(col_names, values): + for col_name, value in zip(col_names, values, strict=False): for clm in columns: if clm.get("name") == col_name: query = query.where(Column(col_name) == value) diff --git a/superset/db_engine_specs/ocient.py b/superset/db_engine_specs/ocient.py index e740ca938..a7b97ed69 100644 --- a/superset/db_engine_specs/ocient.py +++ b/superset/db_engine_specs/ocient.py @@ -348,7 +348,9 @@ class OcientEngineSpec(BaseEngineSpec): rows = [ tuple( sanitize_func(val) - for sanitize_func, val in zip(sanitization_functions, row) + for sanitize_func, val in zip( + sanitization_functions, row, strict=False + ) ) for row in rows ] diff --git a/superset/db_engine_specs/presto.py b/superset/db_engine_specs/presto.py index 3a3dadbbd..6f27503a2 100644 --- a/superset/db_engine_specs/presto.py +++ b/superset/db_engine_specs/presto.py @@ -545,7 +545,7 @@ class PrestoBaseEngineSpec(BaseEngineSpec, metaclass=ABCMeta): column.get("column_name"): column.get("type") for column in columns or [] } - for col_name, value in zip(col_names, values): + for col_name, value in zip(col_names, values, strict=False): col_type = column_type_by_name.get(col_name) if isinstance(col_type, str): @@ -1240,7 +1240,7 @@ class PrestoEngineSpec(PrestoBaseEngineSpec): if isinstance(values, str): values = cast(Optional[list[Any]], destringify(values)) row[name] = values - for value, col in zip(values or [], expanded): + for value, col in zip(values or [], expanded, strict=False): row[col["column_name"]] = value data = [ @@ -1271,7 +1271,7 @@ class PrestoEngineSpec(PrestoBaseEngineSpec): metadata["partitions"] = { "cols": sorted(indexes[0].get("column_names", [])), - "latest": dict(zip(col_names, latest_parts)), + "latest": dict(zip(col_names, latest_parts, strict=False)), "partitionQuery": cls._partition_query( table=table, indexes=indexes, diff --git a/superset/db_engine_specs/redshift.py b/superset/db_engine_specs/redshift.py index 8b5a35759..d1a09cf78 100644 --- a/superset/db_engine_specs/redshift.py +++ b/superset/db_engine_specs/redshift.py @@ -131,7 +131,7 @@ class RedshiftEngineSpec(BasicParametersMixin, PostgresBaseEngineSpec): # uses the max size for redshift nvarchar(65335) # the default object and string types create a varchar(256) col_name: NVARCHAR(length=65535) - for col_name, type in zip(df.columns, df.dtypes) + for col_name, type in zip(df.columns, df.dtypes, strict=False) if isinstance(type, pd.StringDtype) } diff --git a/superset/db_engine_specs/trino.py b/superset/db_engine_specs/trino.py index e4567082e..c7ae2b0f8 100644 --- a/superset/db_engine_specs/trino.py +++ b/superset/db_engine_specs/trino.py @@ -111,7 +111,7 @@ class TrinoEngineSpec(PrestoBaseEngineSpec): } ) ), - "latest": dict(zip(col_names, latest_parts)), + "latest": dict(zip(col_names, latest_parts, strict=False)), "partitionQuery": cls._partition_query( table=table, indexes=indexes, diff --git a/superset/extensions/metadb.py b/superset/extensions/metadb.py index 3a95ab5d7..8409aed24 100644 --- a/superset/extensions/metadb.py +++ b/superset/extensions/metadb.py @@ -412,7 +412,7 @@ class SupersetShillelaghAdapter(Adapter): connection = engine.connect() rows = connection.execute(query) for i, row in enumerate(rows): - data = dict(zip(self.columns, row)) + data = dict(zip(self.columns, row, strict=False)) data["rowid"] = data[self._rowid] if self._rowid else i yield data diff --git a/superset/models/helpers.py b/superset/models/helpers.py index 3d8f109c9..1cad46ca6 100644 --- a/superset/models/helpers.py +++ b/superset/models/helpers.py @@ -1976,7 +1976,7 @@ class ExploreMixin: # pylint: disable=too-many-public-methods self.make_orderby_compatible(select_exprs, orderby_exprs) - for col, (orig_col, ascending) in zip(orderby_exprs, orderby): # noqa: B007 + for col, (orig_col, ascending) in zip(orderby_exprs, orderby, strict=False): # noqa: B007 if not db_engine_spec.allows_alias_in_orderby and isinstance(col, Label): # if engine does not allow using SELECT alias in ORDER BY # revert to the underlying column diff --git a/superset/result_set.py b/superset/result_set.py index eca00de4f..f6daa4b99 100644 --- a/superset/result_set.py +++ b/superset/result_set.py @@ -123,7 +123,9 @@ class SupersetResultSet: # fix cursor descriptor with the deduped names deduped_cursor_desc = [ tuple([column_name, *list(description)[1:]]) # noqa: C409 - for column_name, description in zip(column_names, cursor_description) + for column_name, description in zip( + column_names, cursor_description, strict=False + ) ] # generate numpy structured array dtype diff --git a/superset/utils/excel.py b/superset/utils/excel.py index 602549975..d34446832 100644 --- a/superset/utils/excel.py +++ b/superset/utils/excel.py @@ -56,7 +56,7 @@ def df_to_excel(df: pd.DataFrame, **kwargs: Any) -> Any: def apply_column_types( df: pd.DataFrame, column_types: list[GenericDataType] ) -> pd.DataFrame: - for column, column_type in zip(df.columns, column_types): + for column, column_type in zip(df.columns, column_types, strict=False): if column_type == GenericDataType.NUMERIC: try: df[column] = pd.to_numeric(df[column]) diff --git a/superset/utils/mock_data.py b/superset/utils/mock_data.py index 88c9d5a57..b156273dc 100644 --- a/superset/utils/mock_data.py +++ b/superset/utils/mock_data.py @@ -221,8 +221,11 @@ def get_column_objects(columns: list[ColumnInfo]) -> list[Column]: def generate_data(columns: list[ColumnInfo], num_rows: int) -> list[dict[str, Any]]: keys = [column["name"] for column in columns] return [ - dict(zip(keys, row)) - for row in zip(*[generate_column_data(column, num_rows) for column in columns]) + dict(zip(keys, row, strict=False)) + for row in zip( + *[generate_column_data(column, num_rows) for column in columns], + strict=False, + ) ] diff --git a/superset/utils/pandas_postprocessing/compare.py b/superset/utils/pandas_postprocessing/compare.py index 64442280b..22b345bb3 100644 --- a/superset/utils/pandas_postprocessing/compare.py +++ b/superset/utils/pandas_postprocessing/compare.py @@ -59,7 +59,7 @@ def compare( # pylint: disable=too-many-arguments if len(source_columns) == 0: return df - for s_col, c_col in zip(source_columns, compare_columns): + for s_col, c_col in zip(source_columns, compare_columns, strict=False): s_df = df.loc[:, [s_col]] s_df.rename(columns={s_col: "__intermediate"}, inplace=True) c_df = df.loc[:, [c_col]] diff --git a/superset/utils/pandas_postprocessing/geography.py b/superset/utils/pandas_postprocessing/geography.py index 79046cb71..c5f46cd49 100644 --- a/superset/utils/pandas_postprocessing/geography.py +++ b/superset/utils/pandas_postprocessing/geography.py @@ -40,7 +40,7 @@ def geohash_decode( try: lonlat_df = DataFrame() lonlat_df["latitude"], lonlat_df["longitude"] = zip( - *df[geohash].apply(geohash_lib.decode) + *df[geohash].apply(geohash_lib.decode), strict=False ) return _append_columns( df, lonlat_df, {"latitude": latitude, "longitude": longitude} @@ -109,7 +109,7 @@ def geodetic_parse( geodetic_df["latitude"], geodetic_df["longitude"], geodetic_df["altitude"], - ) = zip(*df[geodetic].apply(_parse_location)) + ) = zip(*df[geodetic].apply(_parse_location), strict=False) columns = {"latitude": latitude, "longitude": longitude} if altitude: columns["altitude"] = altitude diff --git a/superset/utils/pandas_postprocessing/histogram.py b/superset/utils/pandas_postprocessing/histogram.py index 74fc68e22..f55f0d076 100644 --- a/superset/utils/pandas_postprocessing/histogram.py +++ b/superset/utils/pandas_postprocessing/histogram.py @@ -71,7 +71,7 @@ def histogram( if len(groupby) == 0: # without grouping - hist_dict = dict(zip(bin_edges_str, hist_values(df[column]))) + hist_dict = dict(zip(bin_edges_str, hist_values(df[column]), strict=False)) histogram_df = DataFrame(hist_dict, index=[0]) else: # with grouping diff --git a/superset/viz.py b/superset/viz.py index 7e13402f6..a42ce9420 100644 --- a/superset/viz.py +++ b/superset/viz.py @@ -1483,6 +1483,7 @@ class MapboxViz(BaseViz): df[self.form_data.get("all_columns_y")], metric_col, point_radius_col, + strict=False, ) ], } @@ -1606,6 +1607,7 @@ class BaseDeckGLViz(BaseViz): zip( pd.to_numeric(df[spatial.get("lonCol")], errors="coerce"), pd.to_numeric(df[spatial.get("latCol")], errors="coerce"), + strict=False, ) ) elif spatial.get("type") == "delimited": diff --git a/tests/integration_tests/core_tests.py b/tests/integration_tests/core_tests.py index ec2f0d6bd..af59118aa 100644 --- a/tests/integration_tests/core_tests.py +++ b/tests/integration_tests/core_tests.py @@ -679,7 +679,9 @@ class TestCore(SupersetTestCase): count_ds = series["values"] if series["key"] == "COUNT(name)": count_name = series["values"] - for expected, actual_ds, actual_name in zip(resp["data"], count_ds, count_name): + for expected, actual_ds, actual_name in zip( + resp["data"], count_ds, count_name, strict=False + ): assert expected["count_name"] == actual_name["y"] assert expected["count_ds"] == actual_ds["y"] diff --git a/tests/integration_tests/db_engine_specs/presto_tests.py b/tests/integration_tests/db_engine_specs/presto_tests.py index 6ee639552..c57bec880 100644 --- a/tests/integration_tests/db_engine_specs/presto_tests.py +++ b/tests/integration_tests/db_engine_specs/presto_tests.py @@ -87,7 +87,7 @@ class TestPrestoDbEngineSpec(TestDbEngineSpec): inspector.bind.execute.return_value.fetchall = mock.Mock(return_value=[row]) results = PrestoEngineSpec.get_columns(inspector, Table("", "")) assert len(expected_results) == len(results) - for expected_result, result in zip(expected_results, results): + for expected_result, result in zip(expected_results, results, strict=False): assert expected_result[0] == result["column_name"] assert expected_result[1] == str(result["type"]) @@ -191,7 +191,9 @@ class TestPrestoDbEngineSpec(TestDbEngineSpec): "label": 'column."quoted.nested obj"', }, ] - for actual_result, expected_result in zip(actual_results, expected_results): + for actual_result, expected_result in zip( + actual_results, expected_results, strict=False + ): assert actual_result.element.name == expected_result["column_name"] assert actual_result.name == expected_result["label"] diff --git a/tests/integration_tests/dict_import_export_tests.py b/tests/integration_tests/dict_import_export_tests.py index bff144630..c9f1436f0 100644 --- a/tests/integration_tests/dict_import_export_tests.py +++ b/tests/integration_tests/dict_import_export_tests.py @@ -80,7 +80,8 @@ class TestDictImportExport(SupersetTestCase): "id": id, "params": json.dumps(params), "columns": [ - {"column_name": c, "uuid": u} for c, u in zip(cols_names, cols_uuids) + {"column_name": c, "uuid": u} + for c, u in zip(cols_names, cols_uuids, strict=False) ], "metrics": [{"metric_name": c, "expression": ""} for c in metric_names], } @@ -88,7 +89,7 @@ class TestDictImportExport(SupersetTestCase): table = SqlaTable( id=id, schema=schema, table_name=name, params=json.dumps(params) ) - for col_name, uuid in zip(cols_names, cols_uuids): + for col_name, uuid in zip(cols_names, cols_uuids, strict=False): table.columns.append(TableColumn(column_name=col_name, uuid=uuid)) for metric_name in metric_names: table.metrics.append(SqlMetric(metric_name=metric_name, expression="")) diff --git a/tests/integration_tests/import_export_tests.py b/tests/integration_tests/import_export_tests.py index d4acc0103..89b295708 100644 --- a/tests/integration_tests/import_export_tests.py +++ b/tests/integration_tests/import_export_tests.py @@ -153,7 +153,7 @@ class TestImportExport(SupersetTestCase): assert len(expected_dash.slices) == len(actual_dash.slices) expected_slices = sorted(expected_dash.slices, key=lambda s: s.slice_name or "") actual_slices = sorted(actual_dash.slices, key=lambda s: s.slice_name or "") - for e_slc, a_slc in zip(expected_slices, actual_slices): + for e_slc, a_slc in zip(expected_slices, actual_slices, strict=False): self.assert_slice_equals(e_slc, a_slc) if check_position: assert expected_dash.position_json == actual_dash.position_json @@ -212,7 +212,7 @@ class TestImportExport(SupersetTestCase): """ expected_slices = sorted(expected_dash.slices, key=lambda s: s.slice_name or "") actual_slices = sorted(actual_dash.slices, key=lambda s: s.slice_name or "") - for e_slc, a_slc in zip(expected_slices, actual_slices): + for e_slc, a_slc in zip(expected_slices, actual_slices, strict=False): params = a_slc.params_dict assert e_slc.datasource.name == params["datasource_name"] assert e_slc.datasource.schema == params["schema"] diff --git a/tests/unit_tests/sql_parse_tests.py b/tests/unit_tests/sql_parse_tests.py index 3b44c1c2c..9c814a0f4 100644 --- a/tests/unit_tests/sql_parse_tests.py +++ b/tests/unit_tests/sql_parse_tests.py @@ -1507,7 +1507,7 @@ def test_insert_rls_as_subquery( else candidate_table.table ) for left, right in zip( - candidate_table_name.split(".")[::-1], table.split(".")[::-1] + candidate_table_name.split(".")[::-1], table.split(".")[::-1], strict=False ): if left != right: return None @@ -1719,7 +1719,9 @@ def test_insert_rls_in_predicate( Return the RLS ``condition`` if ``candidate`` matches ``table``. """ # compare ignoring schema - for left, right in zip(str(candidate).split(".")[::-1], table.split(".")[::-1]): + for left, right in zip( + str(candidate).split(".")[::-1], table.split(".")[::-1], strict=False + ): if left != right: return None return condition