fix(sqllab/charts): casting from timestamp[us] to timestamp[ns] would result in out of bounds timestamp (#18873)
* fix casting from timestamp[us] to timestamp[ns] would result in out of bounds timestamp from sqllab and charts * Add unittests * Lint changes and parameter variable rename * Fix linting
This commit is contained in:
parent
24e4ab6a1f
commit
8b72354654
|
|
@ -174,7 +174,10 @@ class SupersetResultSet:
|
|||
|
||||
@staticmethod
|
||||
def convert_table_to_df(table: pa.Table) -> pd.DataFrame:
|
||||
return table.to_pandas(integer_object_nulls=True)
|
||||
try:
|
||||
return table.to_pandas(integer_object_nulls=True)
|
||||
except pa.lib.ArrowInvalid:
|
||||
return table.to_pandas(integer_object_nulls=True, timestamp_as_object=True)
|
||||
|
||||
@staticmethod
|
||||
def first_nonempty(items: List[Any]) -> Any:
|
||||
|
|
|
|||
|
|
@ -1746,14 +1746,14 @@ def normalize_dttm_col(
|
|||
# Column is formatted as a numeric value
|
||||
unit = timestamp_format.replace("epoch_", "")
|
||||
df[DTTM_ALIAS] = pd.to_datetime(
|
||||
dttm_col, utc=False, unit=unit, origin="unix"
|
||||
dttm_col, utc=False, unit=unit, origin="unix", errors="coerce"
|
||||
)
|
||||
else:
|
||||
# Column has already been formatted as a timestamp.
|
||||
df[DTTM_ALIAS] = dttm_col.apply(pd.Timestamp)
|
||||
else:
|
||||
df[DTTM_ALIAS] = pd.to_datetime(
|
||||
df[DTTM_ALIAS], utc=False, format=timestamp_format
|
||||
df[DTTM_ALIAS], utc=False, format=timestamp_format, errors="coerce"
|
||||
)
|
||||
if offset:
|
||||
df[DTTM_ALIAS] += timedelta(hours=offset)
|
||||
|
|
|
|||
|
|
@ -1093,3 +1093,8 @@ class TestUtils(SupersetTestCase):
|
|||
# test numeric epoch_ms format
|
||||
df = pd.DataFrame([{"__timestamp": ts.timestamp() * 1000, "a": 1}])
|
||||
assert normalize_col(df, "epoch_ms", 0, None)[DTTM_ALIAS][0] == ts
|
||||
|
||||
# test that out of bounds timestamps are coerced to None instead of
|
||||
# erroring out
|
||||
df = pd.DataFrame([{"__timestamp": "1677-09-21 00:00:00", "a": 1}])
|
||||
assert pd.isnull(normalize_col(df, None, 0, None)[DTTM_ALIAS][0])
|
||||
|
|
|
|||
|
|
@ -15,6 +15,11 @@
|
|||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
# pylint: disable=unused-argument, import-outside-toplevel
|
||||
from datetime import datetime
|
||||
|
||||
import pytest
|
||||
from pandas import Timestamp
|
||||
|
||||
from superset.dataframe import df_to_records
|
||||
from superset.superset_typing import DbapiDescription
|
||||
|
||||
|
|
@ -53,3 +58,50 @@ def test_js_max_int(app_context: None) -> None:
|
|||
{"a": 1, "b": "1239162456494753670", "c": "c1"},
|
||||
{"a": 2, "b": 100, "c": "c2"},
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"input_, expected",
|
||||
[
|
||||
pytest.param(
|
||||
[
|
||||
(datetime.strptime("1677-09-22 00:12:43", "%Y-%m-%d %H:%M:%S"), 1),
|
||||
(datetime.strptime("2262-04-11 23:47:17", "%Y-%m-%d %H:%M:%S"), 2),
|
||||
],
|
||||
[
|
||||
{
|
||||
"a": datetime.strptime("1677-09-22 00:12:43", "%Y-%m-%d %H:%M:%S"),
|
||||
"b": 1,
|
||||
},
|
||||
{
|
||||
"a": datetime.strptime("2262-04-11 23:47:17", "%Y-%m-%d %H:%M:%S"),
|
||||
"b": 2,
|
||||
},
|
||||
],
|
||||
id="timestamp conversion fail",
|
||||
),
|
||||
pytest.param(
|
||||
[
|
||||
(datetime.strptime("1677-09-22 00:12:44", "%Y-%m-%d %H:%M:%S"), 1),
|
||||
(datetime.strptime("2262-04-11 23:47:16", "%Y-%m-%d %H:%M:%S"), 2),
|
||||
],
|
||||
[
|
||||
{"a": Timestamp("1677-09-22 00:12:44"), "b": 1},
|
||||
{"a": Timestamp("2262-04-11 23:47:16"), "b": 2},
|
||||
],
|
||||
id="timestamp conversion success",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_max_pandas_timestamp(input_, expected) -> None:
|
||||
from superset.db_engine_specs import BaseEngineSpec
|
||||
from superset.result_set import SupersetResultSet
|
||||
|
||||
cursor_descr: DbapiDescription = [
|
||||
("a", "datetime", None, None, None, None, False),
|
||||
("b", "int", None, None, None, None, False),
|
||||
]
|
||||
results = SupersetResultSet(input_, cursor_descr, BaseEngineSpec)
|
||||
df = results.to_pandas_df()
|
||||
|
||||
assert df_to_records(df) == expected
|
||||
|
|
|
|||
Loading…
Reference in New Issue