diff --git a/superset/common/query_context_processor.py b/superset/common/query_context_processor.py index 807886849..4f6e1910f 100644 --- a/superset/common/query_context_processor.py +++ b/superset/common/query_context_processor.py @@ -32,7 +32,7 @@ from superset.charts.dao import ChartDAO from superset.common.chart_data import ChartDataResultFormat from superset.common.db_query_status import QueryStatus from superset.common.query_actions import get_query_results -from superset.common.utils import dataframe_utils as df_utils +from superset.common.utils import dataframe_utils from superset.common.utils.query_cache_manager import QueryCacheManager from superset.connectors.base.models import BaseDatasource from superset.constants import CacheRegion @@ -231,7 +231,7 @@ class QueryContextProcessor: ) if self.enforce_numerical_metrics: - df_utils.df_metrics_to_num(df, query_object) + dataframe_utils.df_metrics_to_num(df, query_object) df.replace([np.inf, -np.inf], np.nan, inplace=True) @@ -322,9 +322,7 @@ class QueryContextProcessor: # multi-dimensional charts granularity = query_object.granularity index = granularity if granularity in df.columns else DTTM_ALIAS - if not pd.api.types.is_datetime64_any_dtype( - offset_metrics_df.get(index) - ): + if not dataframe_utils.is_datetime_series(offset_metrics_df.get(index)): raise QueryObjectValidationError( _( "A time column must be specified " @@ -337,7 +335,7 @@ class QueryContextProcessor: ) # df left join `offset_metrics_df` - offset_df = df_utils.left_join_df( + offset_df = dataframe_utils.left_join_df( left_df=df, right_df=offset_metrics_df, join_keys=join_keys, diff --git a/superset/common/utils/dataframe_utils.py b/superset/common/utils/dataframe_utils.py index a0216ad54..4dd62e3b5 100644 --- a/superset/common/utils/dataframe_utils.py +++ b/superset/common/utils/dataframe_utils.py @@ -16,7 +16,8 @@ # under the License. from __future__ import annotations -from typing import List, TYPE_CHECKING +import datetime +from typing import Any, List, TYPE_CHECKING import numpy as np import pandas as pd @@ -42,3 +43,15 @@ def df_metrics_to_num(df: pd.DataFrame, query_object: QueryObject) -> None: # soft-convert a metric column to numeric # will stay as strings if conversion fails df[col] = df[col].infer_objects() + + +def is_datetime_series(series: Any) -> bool: + if series is None or not isinstance(series, pd.Series): + return False + + if series.isnull().all(): + return False + + return pd.api.types.is_datetime64_any_dtype(series) or ( + series.apply(lambda x: isinstance(x, datetime.date) or x is None).all() + ) diff --git a/tests/unit_tests/common/test_dataframe_utils.py b/tests/unit_tests/common/test_dataframe_utils.py new file mode 100644 index 000000000..01fa4224c --- /dev/null +++ b/tests/unit_tests/common/test_dataframe_utils.py @@ -0,0 +1,50 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +import datetime + +import pandas as pd + +from superset.common.utils import dataframe_utils + + +def test_is_datetime_series(): + assert not dataframe_utils.is_datetime_series(None) + assert not dataframe_utils.is_datetime_series(pd.DataFrame({"foo": [1]})) + assert not dataframe_utils.is_datetime_series(pd.Series([1, 2, 3])) + assert not dataframe_utils.is_datetime_series(pd.Series(["1", "2", "3"])) + assert not dataframe_utils.is_datetime_series(pd.Series()) + assert not dataframe_utils.is_datetime_series(pd.Series([None, None])) + assert dataframe_utils.is_datetime_series( + pd.Series([datetime.date(2018, 1, 1), datetime.date(2018, 1, 2), None]) + ) + assert dataframe_utils.is_datetime_series( + pd.Series([datetime.date(2018, 1, 1), datetime.date(2018, 1, 2)]) + ) + assert dataframe_utils.is_datetime_series( + pd.Series([datetime.datetime(2018, 1, 1), datetime.datetime(2018, 1, 2), None]) + ) + assert dataframe_utils.is_datetime_series( + pd.Series([datetime.datetime(2018, 1, 1), datetime.datetime(2018, 1, 2)]) + ) + assert dataframe_utils.is_datetime_series( + pd.date_range(datetime.date(2018, 1, 1), datetime.date(2018, 2, 1)).to_series() + ) + assert dataframe_utils.is_datetime_series( + pd.date_range( + datetime.datetime(2018, 1, 1), datetime.datetime(2018, 2, 1) + ).to_series() + )