fix(sqllab/charts): casting from timestamp[us] to timestamp[ns] would result in out of bounds timestamp (#18873)

* fix casting from timestamp[us] to timestamp[ns] would result in out of bounds timestamp from sqllab and charts

* Add unittests

* Lint changes and parameter variable rename

* Fix linting
This commit is contained in:
Yeachan Park 2022-05-04 08:47:12 +02:00 committed by GitHub
parent 24e4ab6a1f
commit 8b72354654
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 63 additions and 3 deletions

View File

@ -174,7 +174,10 @@ class SupersetResultSet:
@staticmethod
def convert_table_to_df(table: pa.Table) -> pd.DataFrame:
return table.to_pandas(integer_object_nulls=True)
try:
return table.to_pandas(integer_object_nulls=True)
except pa.lib.ArrowInvalid:
return table.to_pandas(integer_object_nulls=True, timestamp_as_object=True)
@staticmethod
def first_nonempty(items: List[Any]) -> Any:

View File

@ -1746,14 +1746,14 @@ def normalize_dttm_col(
# Column is formatted as a numeric value
unit = timestamp_format.replace("epoch_", "")
df[DTTM_ALIAS] = pd.to_datetime(
dttm_col, utc=False, unit=unit, origin="unix"
dttm_col, utc=False, unit=unit, origin="unix", errors="coerce"
)
else:
# Column has already been formatted as a timestamp.
df[DTTM_ALIAS] = dttm_col.apply(pd.Timestamp)
else:
df[DTTM_ALIAS] = pd.to_datetime(
df[DTTM_ALIAS], utc=False, format=timestamp_format
df[DTTM_ALIAS], utc=False, format=timestamp_format, errors="coerce"
)
if offset:
df[DTTM_ALIAS] += timedelta(hours=offset)

View File

@ -1093,3 +1093,8 @@ class TestUtils(SupersetTestCase):
# test numeric epoch_ms format
df = pd.DataFrame([{"__timestamp": ts.timestamp() * 1000, "a": 1}])
assert normalize_col(df, "epoch_ms", 0, None)[DTTM_ALIAS][0] == ts
# test that out of bounds timestamps are coerced to None instead of
# erroring out
df = pd.DataFrame([{"__timestamp": "1677-09-21 00:00:00", "a": 1}])
assert pd.isnull(normalize_col(df, None, 0, None)[DTTM_ALIAS][0])

View File

@ -15,6 +15,11 @@
# specific language governing permissions and limitations
# under the License.
# pylint: disable=unused-argument, import-outside-toplevel
from datetime import datetime
import pytest
from pandas import Timestamp
from superset.dataframe import df_to_records
from superset.superset_typing import DbapiDescription
@ -53,3 +58,50 @@ def test_js_max_int(app_context: None) -> None:
{"a": 1, "b": "1239162456494753670", "c": "c1"},
{"a": 2, "b": 100, "c": "c2"},
]
@pytest.mark.parametrize(
"input_, expected",
[
pytest.param(
[
(datetime.strptime("1677-09-22 00:12:43", "%Y-%m-%d %H:%M:%S"), 1),
(datetime.strptime("2262-04-11 23:47:17", "%Y-%m-%d %H:%M:%S"), 2),
],
[
{
"a": datetime.strptime("1677-09-22 00:12:43", "%Y-%m-%d %H:%M:%S"),
"b": 1,
},
{
"a": datetime.strptime("2262-04-11 23:47:17", "%Y-%m-%d %H:%M:%S"),
"b": 2,
},
],
id="timestamp conversion fail",
),
pytest.param(
[
(datetime.strptime("1677-09-22 00:12:44", "%Y-%m-%d %H:%M:%S"), 1),
(datetime.strptime("2262-04-11 23:47:16", "%Y-%m-%d %H:%M:%S"), 2),
],
[
{"a": Timestamp("1677-09-22 00:12:44"), "b": 1},
{"a": Timestamp("2262-04-11 23:47:16"), "b": 2},
],
id="timestamp conversion success",
),
],
)
def test_max_pandas_timestamp(input_, expected) -> None:
from superset.db_engine_specs import BaseEngineSpec
from superset.result_set import SupersetResultSet
cursor_descr: DbapiDescription = [
("a", "datetime", None, None, None, None, False),
("b", "int", None, None, None, None, False),
]
results = SupersetResultSet(input_, cursor_descr, BaseEngineSpec)
df = results.to_pandas_df()
assert df_to_records(df) == expected