fix: Histogram chart not able to use decimal datatype column (#30416)
This commit is contained in:
parent
bdd50c7553
commit
4834390e6a
|
|
@ -25,7 +25,6 @@ export default function buildQuery(formData: HistogramFormData) {
|
|||
return buildQueryContext(formData, baseQueryObject => [
|
||||
{
|
||||
...baseQueryObject,
|
||||
extras: { where: `${column} IS NOT NULL` },
|
||||
columns: [...groupby, column],
|
||||
post_processing: [histogramOperator(formData, baseQueryObject)],
|
||||
metrics: undefined,
|
||||
|
|
|
|||
|
|
@ -17,7 +17,7 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
from pandas import DataFrame, Series
|
||||
from pandas import DataFrame, Series, to_numeric
|
||||
|
||||
|
||||
# pylint: disable=too-many-arguments
|
||||
|
|
@ -48,12 +48,15 @@ def histogram(
|
|||
if groupby is None:
|
||||
groupby = []
|
||||
|
||||
# check if the column is numeric
|
||||
if not np.issubdtype(df[column].dtype, np.number):
|
||||
raise ValueError(f"The column '{column}' must be numeric.")
|
||||
# convert to numeric, coercing errors to NaN
|
||||
df[column] = to_numeric(df[column], errors="coerce")
|
||||
|
||||
# check if the column contains non-numeric values
|
||||
if df[column].isna().any():
|
||||
raise ValueError(f"Column '{column}' contains non-numeric values")
|
||||
|
||||
# calculate the histogram bin edges
|
||||
bin_edges = np.histogram_bin_edges(df[column].dropna(), bins=bins)
|
||||
bin_edges = np.histogram_bin_edges(df[column], bins=bins)
|
||||
|
||||
# convert the bin edges to strings
|
||||
bin_edges_str = [
|
||||
|
|
@ -62,6 +65,7 @@ def histogram(
|
|||
]
|
||||
|
||||
def hist_values(series: Series) -> np.ndarray:
|
||||
# we might have NaN values as the result of grouping so we need to drop them
|
||||
result = np.histogram(series.dropna(), bins=bin_edges)[0]
|
||||
return result if not cumulative else np.cumsum(result)
|
||||
|
||||
|
|
|
|||
|
|
@ -117,28 +117,20 @@ def test_histogram_with_groupby_and_cumulative_and_normalize():
|
|||
|
||||
def test_histogram_with_non_numeric_column():
|
||||
try:
|
||||
histogram(data, "b", ["group"], bins)
|
||||
histogram(data, "group", None, bins)
|
||||
except ValueError as e:
|
||||
assert str(e) == "The column 'b' must be numeric."
|
||||
assert str(e) == "Column 'group' contains non-numeric values"
|
||||
|
||||
|
||||
# test histogram ignore null values
|
||||
def test_histogram_ignore_null_values():
|
||||
data_with_null = DataFrame(
|
||||
def test_histogram_with_some_non_numeric_values():
|
||||
data_with_non_numeric = DataFrame(
|
||||
{
|
||||
"group": ["A", "A", "B", "B", "A", "A", "B", "B", "A", "A"],
|
||||
"a": [1, 2, 3, 4, 5, 6, 7, 8, 9, None],
|
||||
"b": [1, 2, 3, 4, 5, 6, 7, 8, 9, None],
|
||||
"a": [1, 2, 3, 4, 5, 6, 7, 8, 9, "10"],
|
||||
"b": [1, 2, 3, 4, 5, 6, 7, 8, 9, "10"],
|
||||
}
|
||||
)
|
||||
result = histogram(data_with_null, "a", ["group"], bins)
|
||||
assert result.shape == (2, bins + 1)
|
||||
assert result.columns.tolist() == [
|
||||
"group",
|
||||
"1 - 2",
|
||||
"2 - 4",
|
||||
"4 - 5",
|
||||
"5 - 7",
|
||||
"7 - 9",
|
||||
]
|
||||
assert result.values.tolist() == [["A", 2, 0, 1, 1, 1], ["B", 0, 2, 0, 1, 1]]
|
||||
try:
|
||||
histogram(data_with_non_numeric, "a", ["group"], bins)
|
||||
except ValueError as e:
|
||||
assert str(e) == "Column 'group' contains non-numeric values"
|
||||
|
|
|
|||
Loading…
Reference in New Issue