fix: Histogram chart not able to use decimal datatype column (#30416)

This commit is contained in:
Michael S. Molina 2024-09-30 09:04:55 -03:00 committed by GitHub
parent bdd50c7553
commit 4834390e6a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 19 additions and 24 deletions

View File

@ -25,7 +25,6 @@ export default function buildQuery(formData: HistogramFormData) {
return buildQueryContext(formData, baseQueryObject => [
{
...baseQueryObject,
extras: { where: `${column} IS NOT NULL` },
columns: [...groupby, column],
post_processing: [histogramOperator(formData, baseQueryObject)],
metrics: undefined,

View File

@ -17,7 +17,7 @@
from __future__ import annotations
import numpy as np
from pandas import DataFrame, Series
from pandas import DataFrame, Series, to_numeric
# pylint: disable=too-many-arguments
@ -48,12 +48,15 @@ def histogram(
if groupby is None:
groupby = []
# check if the column is numeric
if not np.issubdtype(df[column].dtype, np.number):
raise ValueError(f"The column '{column}' must be numeric.")
# convert to numeric, coercing errors to NaN
df[column] = to_numeric(df[column], errors="coerce")
# check if the column contains non-numeric values
if df[column].isna().any():
raise ValueError(f"Column '{column}' contains non-numeric values")
# calculate the histogram bin edges
bin_edges = np.histogram_bin_edges(df[column].dropna(), bins=bins)
bin_edges = np.histogram_bin_edges(df[column], bins=bins)
# convert the bin edges to strings
bin_edges_str = [
@ -62,6 +65,7 @@ def histogram(
]
def hist_values(series: Series) -> np.ndarray:
# we might have NaN values as the result of grouping so we need to drop them
result = np.histogram(series.dropna(), bins=bin_edges)[0]
return result if not cumulative else np.cumsum(result)

View File

@ -117,28 +117,20 @@ def test_histogram_with_groupby_and_cumulative_and_normalize():
def test_histogram_with_non_numeric_column():
try:
histogram(data, "b", ["group"], bins)
histogram(data, "group", None, bins)
except ValueError as e:
assert str(e) == "The column 'b' must be numeric."
assert str(e) == "Column 'group' contains non-numeric values"
# test histogram ignore null values
def test_histogram_ignore_null_values():
data_with_null = DataFrame(
def test_histogram_with_some_non_numeric_values():
data_with_non_numeric = DataFrame(
{
"group": ["A", "A", "B", "B", "A", "A", "B", "B", "A", "A"],
"a": [1, 2, 3, 4, 5, 6, 7, 8, 9, None],
"b": [1, 2, 3, 4, 5, 6, 7, 8, 9, None],
"a": [1, 2, 3, 4, 5, 6, 7, 8, 9, "10"],
"b": [1, 2, 3, 4, 5, 6, 7, 8, 9, "10"],
}
)
result = histogram(data_with_null, "a", ["group"], bins)
assert result.shape == (2, bins + 1)
assert result.columns.tolist() == [
"group",
"1 - 2",
"2 - 4",
"4 - 5",
"5 - 7",
"7 - 9",
]
assert result.values.tolist() == [["A", 2, 0, 1, 1, 1], ["B", 0, 2, 0, 1, 1]]
try:
histogram(data_with_non_numeric, "a", ["group"], bins)
except ValueError as e:
assert str(e) == "Column 'group' contains non-numeric values"