refactor: speed up conversion from dataframe to list of records (#12806)
This commit is contained in:
parent
8ccf2e8f1e
commit
b56aec763d
|
|
@ -16,6 +16,7 @@
|
|||
# under the License.
|
||||
""" Superset utilities for pandas.DataFrame.
|
||||
"""
|
||||
import warnings
|
||||
from typing import Any, Dict, List
|
||||
|
||||
import pandas as pd
|
||||
|
|
@ -23,13 +24,32 @@ import pandas as pd
|
|||
from superset.utils.core import JS_MAX_INTEGER
|
||||
|
||||
|
||||
def _convert_big_integers(val: Any) -> Any:
|
||||
"""
|
||||
Cast integers larger than ``JS_MAX_INTEGER`` to strings.
|
||||
|
||||
:param val: the value to process
|
||||
:returns: the same value but recast as a string if it was an integer over
|
||||
``JS_MAX_INTEGER``
|
||||
"""
|
||||
return str(val) if isinstance(val, int) and abs(val) > JS_MAX_INTEGER else val
|
||||
|
||||
|
||||
def df_to_records(dframe: pd.DataFrame) -> List[Dict[str, Any]]:
|
||||
data: List[Dict[str, Any]] = dframe.to_dict(orient="records")
|
||||
# TODO: refactor this
|
||||
for row in data:
|
||||
for key, value in list(row.items()):
|
||||
# if an int is too big for JavaScript to handle
|
||||
# convert it to a string
|
||||
if isinstance(value, int) and abs(value) > JS_MAX_INTEGER:
|
||||
row[key] = str(value)
|
||||
return data
|
||||
"""
|
||||
Convert a DataFrame to a set of records.
|
||||
|
||||
:param dframe: the DataFrame to convert
|
||||
:returns: a list of dictionaries reflecting each single row of the DataFrame
|
||||
"""
|
||||
if not dframe.columns.is_unique:
|
||||
warnings.warn(
|
||||
"DataFrame columns are not unique, some columns will be omitted.",
|
||||
UserWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
columns = dframe.columns
|
||||
return list(
|
||||
dict(zip(columns, map(_convert_big_integers, row)))
|
||||
for row in zip(*[dframe[col] for col in columns])
|
||||
)
|
||||
|
|
|
|||
Loading…
Reference in New Issue