refactor: speed up conversion from dataframe to list of records (#12806)

This commit is contained in:
Tom 2021-02-07 20:01:28 +00:00 committed by GitHub
parent 8ccf2e8f1e
commit b56aec763d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 29 additions and 9 deletions

View File

@ -16,6 +16,7 @@
# under the License.
""" Superset utilities for pandas.DataFrame.
"""
import warnings
from typing import Any, Dict, List
import pandas as pd
@ -23,13 +24,32 @@ import pandas as pd
from superset.utils.core import JS_MAX_INTEGER
def _convert_big_integers(val: Any) -> Any:
"""
Cast integers larger than ``JS_MAX_INTEGER`` to strings.
:param val: the value to process
:returns: the same value but recast as a string if it was an integer over
``JS_MAX_INTEGER``
"""
return str(val) if isinstance(val, int) and abs(val) > JS_MAX_INTEGER else val
def df_to_records(dframe: pd.DataFrame) -> List[Dict[str, Any]]:
data: List[Dict[str, Any]] = dframe.to_dict(orient="records")
# TODO: refactor this
for row in data:
for key, value in list(row.items()):
# if an int is too big for JavaScript to handle
# convert it to a string
if isinstance(value, int) and abs(value) > JS_MAX_INTEGER:
row[key] = str(value)
return data
"""
Convert a DataFrame to a set of records.
:param dframe: the DataFrame to convert
:returns: a list of dictionaries reflecting each single row of the DataFrame
"""
if not dframe.columns.is_unique:
warnings.warn(
"DataFrame columns are not unique, some columns will be omitted.",
UserWarning,
stacklevel=2,
)
columns = dframe.columns
return list(
dict(zip(columns, map(_convert_big_integers, row)))
for row in zip(*[dframe[col] for col in columns])
)