feat: Adds Histogram chart migration logic (#28780)

This commit is contained in:
Michael S. Molina 2024-06-05 13:33:50 -03:00 committed by GitHub
parent dabb4e064f
commit df0b1cb8ed
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 118 additions and 6 deletions

View File

@ -23,7 +23,7 @@ import { PostProcessingFactory } from './types';
export const histogramOperator: PostProcessingFactory<
PostProcessingHistogram
> = (formData, queryObject) => {
const { bins, column, cumulative, groupby, normalize } = formData;
const { bins, column, cumulative, groupby = [], normalize } = formData;
const parsedBins = Number.isNaN(Number(bins)) ? 5 : Number(bins);
const parsedColumn = getColumnLabel(column);
const parsedGroupBy = groupby!.map(getColumnLabel);

View File

@ -37,6 +37,13 @@ test('matches formData', () => {
});
});
test('sets default groupby', () => {
expect(
histogramOperator({ ...formData, groupby: undefined }, {})?.options
?.groupby,
).toEqual([]);
});
test('defaults to 5 bins', () => {
expect(
histogramOperator(omit(formData, ['bins']) as SqlaFormData, {}),

View File

@ -25,8 +25,10 @@ export default function buildQuery(formData: HistogramFormData) {
return buildQueryContext(formData, baseQueryObject => [
{
...baseQueryObject,
extras: { where: `${column} IS NOT NULL` },
columns: [...groupby, column],
post_processing: [histogramOperator(formData, baseQueryObject)],
metrics: undefined,
},
]);
}

View File

@ -136,9 +136,10 @@ export default function transformProps(
const echartOptions: EChartsOption = {
grid: {
...defaultGrid,
bottom: 30,
left: 30,
right: 30,
left: '5%',
right: '5%',
top: '10%',
bottom: '10%',
},
xAxis: {
data: xAxisData,

View File

@ -124,6 +124,7 @@ const ColumnSelectPopoverTrigger = ({
isTemporal,
onColumnEdit,
popoverLabel,
disabledTabs,
],
);

View File

@ -207,6 +207,7 @@ function DndColumnSelect(props: DndColumnSelectProps) {
closePopover={closePopover}
visible={newColumnPopoverVisible}
isTemporal={isTemporal}
disabledTabs={disabledTabs}
>
<div />
</ColumnSelectPopoverTrigger>

View File

@ -30,6 +30,7 @@ class VizType(str, Enum):
DIST_BAR = "dist_bar"
DUAL_LINE = "dual_line"
HEATMAP = "heatmap"
HISTOGRAM = "histogram"
LINE = "line"
PIVOT_TABLE = "pivot_table"
SUNBURST = "sunburst"
@ -85,6 +86,7 @@ def migrate(viz_type: VizType, is_downgrade: bool = False) -> None:
MigrateDistBarChart,
MigrateDualLine,
MigrateHeatmapChart,
MigrateHistogramChart,
MigrateLineChart,
MigratePivotTable,
MigrateSunburst,
@ -98,6 +100,7 @@ def migrate(viz_type: VizType, is_downgrade: bool = False) -> None:
VizType.DIST_BAR: MigrateDistBarChart,
VizType.DUAL_LINE: MigrateDualLine,
VizType.HEATMAP: MigrateHeatmapChart,
VizType.HISTOGRAM: MigrateHistogramChart,
VizType.LINE: MigrateLineChart,
VizType.PIVOT_TABLE: MigratePivotTable,
VizType.SUNBURST: MigrateSunburst,

View File

@ -280,3 +280,26 @@ class MigrateHeatmapChart(MigrateViz):
def _pre_action(self) -> None:
self.data["legend_type"] = "continuous"
class MigrateHistogramChart(MigrateViz):
source_viz_type = "histogram"
target_viz_type = "histogram_v2"
rename_keys = {
"x_axis_label": "x_axis_title",
"y_axis_label": "y_axis_title",
"normalized": "normalize",
}
remove_keys = {"all_columns_x", "link_length", "queryFields"}
def _pre_action(self) -> None:
all_columns_x = self.data.get("all_columns_x")
if all_columns_x and len(all_columns_x) > 0:
self.data["column"] = all_columns_x[0]
link_length = self.data.get("link_length")
self.data["bins"] = int(link_length) if link_length else 5
groupby = self.data.get("groupby")
if not groupby:
self.data["groupby"] = []

View File

@ -53,7 +53,7 @@ def histogram(
raise ValueError(f"The column '{column}' must be numeric.")
# calculate the histogram bin edges
bin_edges = np.histogram_bin_edges(df[column], bins=bins)
bin_edges = np.histogram_bin_edges(df[column].dropna(), bins=bins)
# convert the bin edges to strings
bin_edges_str = [
@ -62,7 +62,7 @@ def histogram(
]
def hist_values(series: Series) -> np.ndarray:
result = np.histogram(series, bins=bin_edges)[0]
result = np.histogram(series.dropna(), bins=bin_edges)[0]
return result if not cumulative else np.cumsum(result)
if len(groupby) == 0:

View File

@ -0,0 +1,52 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from typing import Any
from superset.migrations.shared.migrate_viz import MigrateHistogramChart
from tests.unit_tests.migrations.viz.utils import migrate_and_assert
SOURCE_FORM_DATA: dict[str, Any] = {
"all_columns_x": ["category"],
"adhoc_filters": [],
"cumulative": True,
"linear_color_scheme": "blue",
"link_length": "5",
"normalized": True,
"row_limit": 100,
"viz_type": "histogram",
"x_axis_label": "X",
"y_axis_label": "Y",
}
TARGET_FORM_DATA: dict[str, Any] = {
"adhoc_filters": [],
"bins": 5,
"column": "category",
"cumulative": True,
"form_data_bak": SOURCE_FORM_DATA,
"groupby": [],
"linear_color_scheme": "blue",
"normalize": True,
"row_limit": 100,
"viz_type": "histogram_v2",
"x_axis_title": "X",
"y_axis_title": "Y",
}
def test_migration() -> None:
migrate_and_assert(MigrateHistogramChart, SOURCE_FORM_DATA, TARGET_FORM_DATA)

View File

@ -120,3 +120,25 @@ def test_histogram_with_non_numeric_column():
histogram(data, "b", ["group"], bins)
except ValueError as e:
assert str(e) == "The column 'b' must be numeric."
# test histogram ignore null values
def test_histogram_ignore_null_values():
data_with_null = DataFrame(
{
"group": ["A", "A", "B", "B", "A", "A", "B", "B", "A", "A"],
"a": [1, 2, 3, 4, 5, 6, 7, 8, 9, None],
"b": [1, 2, 3, 4, 5, 6, 7, 8, 9, None],
}
)
result = histogram(data_with_null, "a", ["group"], bins)
assert result.shape == (2, bins + 1)
assert result.columns.tolist() == [
"group",
"1 - 2",
"2 - 4",
"4 - 5",
"5 - 7",
"7 - 9",
]
assert result.values.tolist() == [["A", 2, 0, 1, 1, 1], ["B", 0, 2, 0, 1, 1]]