From c2be54c335d14521afc1221749e8554ff23c215b Mon Sep 17 00:00:00 2001 From: Elizabeth Thompson Date: Fri, 8 Jul 2022 16:33:53 -0700 Subject: [PATCH] fix: pandas bug when data is blank on post-processing (#20629) * fix pandas bug when data is blank on post-processing * account for multiple queries when data is blank --- superset/charts/post_processing.py | 9 +- .../unit_tests/charts/test_post_processing.py | 601 +++++++++++++++++- 2 files changed, 606 insertions(+), 4 deletions(-) diff --git a/superset/charts/post_processing.py b/superset/charts/post_processing.py index 715f46557..81d2a09b0 100644 --- a/superset/charts/post_processing.py +++ b/superset/charts/post_processing.py @@ -324,12 +324,17 @@ def apply_post_process( post_processor = post_processors[viz_type] for query in result["queries"]: + if query["result_format"] not in (rf.value for rf in ChartDataResultFormat): + raise Exception(f"Result format {query['result_format']} not supported") + + if not query["data"]: + # do not try to process empty data + continue + if query["result_format"] == ChartDataResultFormat.JSON: df = pd.DataFrame.from_dict(query["data"]) elif query["result_format"] == ChartDataResultFormat.CSV: df = pd.read_csv(StringIO(query["data"])) - else: - raise Exception(f"Result format {query['result_format']} not supported") processed_df = post_processor(df, form_data, datasource) diff --git a/tests/unit_tests/charts/test_post_processing.py b/tests/unit_tests/charts/test_post_processing.py index f8586be1f..05d3fe6e0 100644 --- a/tests/unit_tests/charts/test_post_processing.py +++ b/tests/unit_tests/charts/test_post_processing.py @@ -15,9 +15,15 @@ # specific language governing permissions and limitations # under the License. -import pandas as pd +import json -from superset.charts.post_processing import pivot_df, table +import pandas as pd +from numpy import True_ +from pytest import raises + +from superset.charts.post_processing import apply_post_process, pivot_df, table +from superset.common.chart_data import ChartDataResultFormat +from superset.utils.core import GenericDataType def test_pivot_df_no_cols_no_rows_single_metric(): @@ -1363,3 +1369,594 @@ def test_table(): | 0 | 80,679,663 | """.strip() ) + + +def test_apply_post_process_no_form_invalid_viz_type(): + """ + Test with invalid viz type. It should just return the result + """ + result = {"foo": "bar"} + form_data = {"viz_type": "baz"} + assert apply_post_process(result, form_data) == result + + +def test_apply_post_process_without_result_format(): + """ + A query without result_format should raise an exception + """ + result = {"queries": [{"result_format": "foo"}]} + form_data = {"viz_type": "pivot_table"} + + with raises(Exception) as ex: + apply_post_process(result, form_data) + + assert ex.match("Result format foo not supported") == True + + +def test_apply_post_process_json_format(): + """ + It should be able to process json results + """ + + result = { + "queries": [ + { + "result_format": ChartDataResultFormat.JSON, + "data": { + "result": [ + { + "data": [{"COUNT(is_software_dev)": 4725}], + "colnames": ["COUNT(is_software_dev)"], + "coltypes": [0], + } + ] + }, + } + ] + } + form_data = { + "datasource": "19__table", + "viz_type": "pivot_table_v2", + "slice_id": 69, + "url_params": {}, + "granularity_sqla": "time_start", + "time_grain_sqla": "P1D", + "time_range": "No filter", + "groupbyColumns": [], + "groupbyRows": [], + "metrics": [ + { + "aggregate": "COUNT", + "column": { + "column_name": "is_software_dev", + "description": None, + "expression": None, + "filterable": True, + "groupby": True, + "id": 1463, + "is_dttm": False, + "python_date_format": None, + "type": "DOUBLE PRECISION", + "verbose_name": None, + }, + "expressionType": "SIMPLE", + "hasCustomLabel": False, + "isNew": False, + "label": "COUNT(is_software_dev)", + "optionName": "metric_9i1kctig9yr_sizo6ihd2o", + "sqlExpression": None, + } + ], + "metricsLayout": "COLUMNS", + "adhoc_filters": [ + { + "clause": "WHERE", + "comparator": "Currently A Developer", + "expressionType": "SIMPLE", + "filterOptionName": "filter_fvi0jg9aii_2lekqrhy7qk", + "isExtra": False, + "isNew": False, + "operator": "==", + "sqlExpression": None, + "subject": "developer_type", + } + ], + "row_limit": 10000, + "order_desc": True, + "aggregateFunction": "Sum", + "valueFormat": "SMART_NUMBER", + "date_format": "smart_date", + "rowOrder": "key_a_to_z", + "colOrder": "key_a_to_z", + "extra_form_data": {}, + "force": False, + "result_format": "json", + "result_type": "results", + } + + assert apply_post_process(result, form_data) == { + "queries": [ + { + "result_format": ChartDataResultFormat.JSON, + "data": { + "result": { + "Total (Sum)": { + "data": [{"COUNT(is_software_dev)": 4725}], + "colnames": ["COUNT(is_software_dev)"], + "coltypes": [0], + } + } + }, + "colnames": [("result",)], + "indexnames": [("Total (Sum)",)], + "coltypes": [GenericDataType.STRING], + "rowcount": 1, + } + ] + } + + +def test_apply_post_process_csv_format(): + """ + It should be able to process csv results + """ + + result = { + "queries": [ + { + "result_format": ChartDataResultFormat.CSV, + "data": """ +COUNT(is_software_dev) +4725 +""", + } + ] + } + form_data = { + "datasource": "19__table", + "viz_type": "pivot_table_v2", + "slice_id": 69, + "url_params": {}, + "granularity_sqla": "time_start", + "time_grain_sqla": "P1D", + "time_range": "No filter", + "groupbyColumns": [], + "groupbyRows": [], + "metrics": [ + { + "aggregate": "COUNT", + "column": { + "column_name": "is_software_dev", + "description": None, + "expression": None, + "filterable": True, + "groupby": True, + "id": 1463, + "is_dttm": False, + "python_date_format": None, + "type": "DOUBLE PRECISION", + "verbose_name": None, + }, + "expressionType": "SIMPLE", + "hasCustomLabel": False, + "isNew": False, + "label": "COUNT(is_software_dev)", + "optionName": "metric_9i1kctig9yr_sizo6ihd2o", + "sqlExpression": None, + } + ], + "metricsLayout": "COLUMNS", + "adhoc_filters": [ + { + "clause": "WHERE", + "comparator": "Currently A Developer", + "expressionType": "SIMPLE", + "filterOptionName": "filter_fvi0jg9aii_2lekqrhy7qk", + "isExtra": False, + "isNew": False, + "operator": "==", + "sqlExpression": None, + "subject": "developer_type", + } + ], + "row_limit": 10000, + "order_desc": True, + "aggregateFunction": "Sum", + "valueFormat": "SMART_NUMBER", + "date_format": "smart_date", + "rowOrder": "key_a_to_z", + "colOrder": "key_a_to_z", + "extra_form_data": {}, + "force": False, + "result_format": "json", + "result_type": "results", + } + + assert apply_post_process(result, form_data) == { + "queries": [ + { + "result_format": ChartDataResultFormat.CSV, + "data": ",COUNT(is_software_dev)\nTotal (Sum),4725\n", + "colnames": [("COUNT(is_software_dev)",)], + "indexnames": [("Total (Sum)",)], + "coltypes": [GenericDataType.NUMERIC], + "rowcount": 1, + } + ] + } + + +def test_apply_post_process_csv_format_empty_string(): + """ + It should be able to process csv results with no data + """ + + result = {"queries": [{"result_format": ChartDataResultFormat.CSV, "data": ""}]} + form_data = { + "datasource": "19__table", + "viz_type": "pivot_table_v2", + "slice_id": 69, + "url_params": {}, + "granularity_sqla": "time_start", + "time_grain_sqla": "P1D", + "time_range": "No filter", + "groupbyColumns": [], + "groupbyRows": [], + "metrics": [ + { + "aggregate": "COUNT", + "column": { + "column_name": "is_software_dev", + "description": None, + "expression": None, + "filterable": True, + "groupby": True, + "id": 1463, + "is_dttm": False, + "python_date_format": None, + "type": "DOUBLE PRECISION", + "verbose_name": None, + }, + "expressionType": "SIMPLE", + "hasCustomLabel": False, + "isNew": False, + "label": "COUNT(is_software_dev)", + "optionName": "metric_9i1kctig9yr_sizo6ihd2o", + "sqlExpression": None, + } + ], + "metricsLayout": "COLUMNS", + "adhoc_filters": [ + { + "clause": "WHERE", + "comparator": "Currently A Developer", + "expressionType": "SIMPLE", + "filterOptionName": "filter_fvi0jg9aii_2lekqrhy7qk", + "isExtra": False, + "isNew": False, + "operator": "==", + "sqlExpression": None, + "subject": "developer_type", + } + ], + "row_limit": 10000, + "order_desc": True, + "aggregateFunction": "Sum", + "valueFormat": "SMART_NUMBER", + "date_format": "smart_date", + "rowOrder": "key_a_to_z", + "colOrder": "key_a_to_z", + "extra_form_data": {}, + "force": False, + "result_format": "json", + "result_type": "results", + } + + assert apply_post_process(result, form_data) == { + "queries": [{"result_format": ChartDataResultFormat.CSV, "data": ""}] + } + + +def test_apply_post_process_csv_format_no_data(): + """ + It should be able to process csv results with no data + """ + + result = {"queries": [{"result_format": ChartDataResultFormat.CSV, "data": None}]} + form_data = { + "datasource": "19__table", + "viz_type": "pivot_table_v2", + "slice_id": 69, + "url_params": {}, + "granularity_sqla": "time_start", + "time_grain_sqla": "P1D", + "time_range": "No filter", + "groupbyColumns": [], + "groupbyRows": [], + "metrics": [ + { + "aggregate": "COUNT", + "column": { + "column_name": "is_software_dev", + "description": None, + "expression": None, + "filterable": True, + "groupby": True, + "id": 1463, + "is_dttm": False, + "python_date_format": None, + "type": "DOUBLE PRECISION", + "verbose_name": None, + }, + "expressionType": "SIMPLE", + "hasCustomLabel": False, + "isNew": False, + "label": "COUNT(is_software_dev)", + "optionName": "metric_9i1kctig9yr_sizo6ihd2o", + "sqlExpression": None, + } + ], + "metricsLayout": "COLUMNS", + "adhoc_filters": [ + { + "clause": "WHERE", + "comparator": "Currently A Developer", + "expressionType": "SIMPLE", + "filterOptionName": "filter_fvi0jg9aii_2lekqrhy7qk", + "isExtra": False, + "isNew": False, + "operator": "==", + "sqlExpression": None, + "subject": "developer_type", + } + ], + "row_limit": 10000, + "order_desc": True, + "aggregateFunction": "Sum", + "valueFormat": "SMART_NUMBER", + "date_format": "smart_date", + "rowOrder": "key_a_to_z", + "colOrder": "key_a_to_z", + "extra_form_data": {}, + "force": False, + "result_format": "json", + "result_type": "results", + } + + assert apply_post_process(result, form_data) == { + "queries": [{"result_format": ChartDataResultFormat.CSV, "data": None}] + } + + +def test_apply_post_process_csv_format_no_data_multiple_queries(): + """ + It should be able to process csv results multiple queries if one query has no data + """ + + result = { + "queries": [ + {"result_format": ChartDataResultFormat.CSV, "data": ""}, + { + "result_format": ChartDataResultFormat.CSV, + "data": """ +COUNT(is_software_dev) +4725 +""", + }, + ] + } + form_data = { + "datasource": "19__table", + "viz_type": "pivot_table_v2", + "slice_id": 69, + "url_params": {}, + "granularity_sqla": "time_start", + "time_grain_sqla": "P1D", + "time_range": "No filter", + "groupbyColumns": [], + "groupbyRows": [], + "metrics": [ + { + "aggregate": "COUNT", + "column": { + "column_name": "is_software_dev", + "description": None, + "expression": None, + "filterable": True, + "groupby": True, + "id": 1463, + "is_dttm": False, + "python_date_format": None, + "type": "DOUBLE PRECISION", + "verbose_name": None, + }, + "expressionType": "SIMPLE", + "hasCustomLabel": False, + "isNew": False, + "label": "COUNT(is_software_dev)", + "optionName": "metric_9i1kctig9yr_sizo6ihd2o", + "sqlExpression": None, + } + ], + "metricsLayout": "COLUMNS", + "adhoc_filters": [ + { + "clause": "WHERE", + "comparator": "Currently A Developer", + "expressionType": "SIMPLE", + "filterOptionName": "filter_fvi0jg9aii_2lekqrhy7qk", + "isExtra": False, + "isNew": False, + "operator": "==", + "sqlExpression": None, + "subject": "developer_type", + } + ], + "row_limit": 10000, + "order_desc": True, + "aggregateFunction": "Sum", + "valueFormat": "SMART_NUMBER", + "date_format": "smart_date", + "rowOrder": "key_a_to_z", + "colOrder": "key_a_to_z", + "extra_form_data": {}, + "force": False, + "result_format": "json", + "result_type": "results", + } + + assert apply_post_process(result, form_data) == { + "queries": [ + {"result_format": ChartDataResultFormat.CSV, "data": ""}, + { + "result_format": ChartDataResultFormat.CSV, + "data": ",COUNT(is_software_dev)\nTotal (Sum),4725\n", + "colnames": [("COUNT(is_software_dev)",)], + "indexnames": [("Total (Sum)",)], + "coltypes": [GenericDataType.NUMERIC], + "rowcount": 1, + }, + ] + } + + +def test_apply_post_process_json_format_empty_string(): + """ + It should be able to process json results with no data + """ + + result = {"queries": [{"result_format": ChartDataResultFormat.JSON, "data": ""}]} + form_data = { + "datasource": "19__table", + "viz_type": "pivot_table_v2", + "slice_id": 69, + "url_params": {}, + "granularity_sqla": "time_start", + "time_grain_sqla": "P1D", + "time_range": "No filter", + "groupbyColumns": [], + "groupbyRows": [], + "metrics": [ + { + "aggregate": "COUNT", + "column": { + "column_name": "is_software_dev", + "description": None, + "expression": None, + "filterable": True, + "groupby": True, + "id": 1463, + "is_dttm": False, + "python_date_format": None, + "type": "DOUBLE PRECISION", + "verbose_name": None, + }, + "expressionType": "SIMPLE", + "hasCustomLabel": False, + "isNew": False, + "label": "COUNT(is_software_dev)", + "optionName": "metric_9i1kctig9yr_sizo6ihd2o", + "sqlExpression": None, + } + ], + "metricsLayout": "COLUMNS", + "adhoc_filters": [ + { + "clause": "WHERE", + "comparator": "Currently A Developer", + "expressionType": "SIMPLE", + "filterOptionName": "filter_fvi0jg9aii_2lekqrhy7qk", + "isExtra": False, + "isNew": False, + "operator": "==", + "sqlExpression": None, + "subject": "developer_type", + } + ], + "row_limit": 10000, + "order_desc": True, + "aggregateFunction": "Sum", + "valueFormat": "SMART_NUMBER", + "date_format": "smart_date", + "rowOrder": "key_a_to_z", + "colOrder": "key_a_to_z", + "extra_form_data": {}, + "force": False, + "result_format": "json", + "result_type": "results", + } + + assert apply_post_process(result, form_data) == { + "queries": [{"result_format": ChartDataResultFormat.JSON, "data": ""}] + } + + +def test_apply_post_process_json_format_data_is_none(): + """ + It should be able to process json results with no data + """ + + result = {"queries": [{"result_format": ChartDataResultFormat.JSON, "data": None}]} + form_data = { + "datasource": "19__table", + "viz_type": "pivot_table_v2", + "slice_id": 69, + "url_params": {}, + "granularity_sqla": "time_start", + "time_grain_sqla": "P1D", + "time_range": "No filter", + "groupbyColumns": [], + "groupbyRows": [], + "metrics": [ + { + "aggregate": "COUNT", + "column": { + "column_name": "is_software_dev", + "description": None, + "expression": None, + "filterable": True, + "groupby": True, + "id": 1463, + "is_dttm": False, + "python_date_format": None, + "type": "DOUBLE PRECISION", + "verbose_name": None, + }, + "expressionType": "SIMPLE", + "hasCustomLabel": False, + "isNew": False, + "label": "COUNT(is_software_dev)", + "optionName": "metric_9i1kctig9yr_sizo6ihd2o", + "sqlExpression": None, + } + ], + "metricsLayout": "COLUMNS", + "adhoc_filters": [ + { + "clause": "WHERE", + "comparator": "Currently A Developer", + "expressionType": "SIMPLE", + "filterOptionName": "filter_fvi0jg9aii_2lekqrhy7qk", + "isExtra": False, + "isNew": False, + "operator": "==", + "sqlExpression": None, + "subject": "developer_type", + } + ], + "row_limit": 10000, + "order_desc": True, + "aggregateFunction": "Sum", + "valueFormat": "SMART_NUMBER", + "date_format": "smart_date", + "rowOrder": "key_a_to_z", + "colOrder": "key_a_to_z", + "extra_form_data": {}, + "force": False, + "result_format": "json", + "result_type": "results", + } + + assert apply_post_process(result, form_data) == { + "queries": [{"result_format": ChartDataResultFormat.JSON, "data": None}] + }