fix: convert <NA> values to None instead of stringifying (#22321)

This commit is contained in:
Elizabeth Thompson 2022-12-02 17:15:20 -08:00 committed by GitHub
parent 6d3591cb9e
commit 1c20206057
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 46 additions and 1 deletions

View File

@ -67,7 +67,11 @@ def stringify_values(array: np.ndarray) -> np.ndarray:
with np.nditer(result, flags=["refs_ok"], op_flags=["readwrite"]) as it:
for obj in it:
obj[...] = stringify(obj)
if pd.isna(obj):
# pandas <NA> type cannot be converted to string
obj[pd.isna(obj)] = None
else:
obj[...] = stringify(obj)
return result

View File

@ -18,6 +18,13 @@
# pylint: disable=import-outside-toplevel, unused-argument
import numpy as np
import pandas as pd
from numpy.core.multiarray import array
from superset.result_set import stringify_values
def test_column_names_as_bytes() -> None:
"""
Test that we can handle column names as bytes.
@ -65,3 +72,37 @@ def test_column_names_as_bytes() -> None:
| 1 | 2016-01-27 | 392.444 | 396.843 | 391.782 | 394.972 | 394.972 | 47424400 |
""".strip()
)
def test_stringify_with_null_integers():
"""
Test that we can safely handle type errors when an integer column has a null value
"""
data = [
("foo", "bar", pd.NA, None),
("foo", "bar", pd.NA, True),
("foo", "bar", pd.NA, None),
]
numpy_dtype = [
("id", "object"),
("value", "object"),
("num", "object"),
("bool", "object"),
]
array2 = np.array(data, dtype=numpy_dtype)
column_names = ["id", "value", "num", "bool"]
result_set = np.array([stringify_values(array2[column]) for column in column_names])
expected = np.array(
[
array(['"foo"', '"foo"', '"foo"'], dtype=object),
array(['"bar"', '"bar"', '"bar"'], dtype=object),
array([None, None, None], dtype=object),
array([None, "true", None], dtype=object),
]
)
assert np.array_equal(result_set, expected)