diff --git a/superset/examples/birth_names.py b/superset/examples/birth_names.py index 70c553afd..7cc9093d5 100644 --- a/superset/examples/birth_names.py +++ b/superset/examples/birth_names.py @@ -52,9 +52,10 @@ def gen_filter( } -def load_data(tbl_name: str, database: Database) -> None: +def load_data(tbl_name: str, database: Database, sample: bool = False) -> None: pdf = pd.read_json(get_example_data("birth_names.json.gz")) pdf.ds = pd.to_datetime(pdf.ds, unit="ms") + pdf = pdf.head(100) if sample else pdf pdf.to_sql( tbl_name, database.get_sqla_engine(), @@ -72,7 +73,9 @@ def load_data(tbl_name: str, database: Database) -> None: print("-" * 80) -def load_birth_names(only_metadata: bool = False, force: bool = False) -> None: +def load_birth_names( + only_metadata: bool = False, force: bool = False, sample: bool = False +) -> None: """Loading birth name dataset from a zip file in the repo""" # pylint: disable=too-many-locals tbl_name = "birth_names" @@ -80,7 +83,7 @@ def load_birth_names(only_metadata: bool = False, force: bool = False) -> None: table_exists = database.has_table_by_name(tbl_name) if not only_metadata and (not table_exists or force): - load_data(tbl_name, database) + load_data(tbl_name, database, sample=sample) obj = db.session.query(TBL).filter_by(table_name=tbl_name).first() if not obj: diff --git a/superset/examples/energy.py b/superset/examples/energy.py index b27c45ebb..977afe2db 100644 --- a/superset/examples/energy.py +++ b/superset/examples/energy.py @@ -29,7 +29,9 @@ from superset.utils import core as utils from .helpers import get_example_data, merge_slice, misc_dash_slices, TBL -def load_energy(only_metadata: bool = False, force: bool = False) -> None: +def load_energy( + only_metadata: bool = False, force: bool = False, sample: bool = False +) -> None: """Loads an energy related dataset to use with sankey and graphs""" tbl_name = "energy_usage" database = utils.get_example_database() @@ -38,6 +40,7 @@ def load_energy(only_metadata: bool = False, force: bool = False) -> None: if not only_metadata and (not table_exists or force): data = get_example_data("energy.json.gz") pdf = pd.read_json(data) + pdf = pdf.head(100) if sample else pdf pdf.to_sql( tbl_name, database.get_sqla_engine(), diff --git a/superset/examples/unicode_test_data.py b/superset/examples/unicode_test_data.py index 70d5e72d3..b8b12feae 100644 --- a/superset/examples/unicode_test_data.py +++ b/superset/examples/unicode_test_data.py @@ -36,7 +36,9 @@ from .helpers import ( ) -def load_unicode_test_data(only_metadata: bool = False, force: bool = False) -> None: +def load_unicode_test_data( + only_metadata: bool = False, force: bool = False, sample: bool = False +) -> None: """Loading unicode test dataset from a csv file in the repo""" tbl_name = "unicode_test" database = utils.get_example_database() @@ -50,6 +52,7 @@ def load_unicode_test_data(only_metadata: bool = False, force: bool = False) -> # generate date/numeric data df["dttm"] = datetime.datetime.now().date() df["value"] = [random.randint(1, 100) for _ in range(len(df))] + df = df.head(100) if sample else df df.to_sql( # pylint: disable=no-member tbl_name, database.get_sqla_engine(), diff --git a/superset/examples/world_bank.py b/superset/examples/world_bank.py index 1764e1122..24f32734c 100644 --- a/superset/examples/world_bank.py +++ b/superset/examples/world_bank.py @@ -41,8 +41,8 @@ from .helpers import ( ) -def load_world_bank_health_n_pop( # pylint: disable=too-many-locals - only_metadata: bool = False, force: bool = False +def load_world_bank_health_n_pop( # pylint: disable=too-many-locals, too-many-statements + only_metadata: bool = False, force: bool = False, sample: bool = False, ) -> None: """Loads the world bank health dataset, slices and a dashboard""" tbl_name = "wb_health_population" @@ -54,6 +54,7 @@ def load_world_bank_health_n_pop( # pylint: disable=too-many-locals pdf = pd.read_json(data) pdf.columns = [col.replace(".", "_") for col in pdf.columns] pdf.year = pd.to_datetime(pdf.year) + pdf = pdf.head(100) if sample else pdf pdf.to_sql( tbl_name, database.get_sqla_engine(), diff --git a/tests/charts/api_tests.py b/tests/charts/api_tests.py index 6db100b0e..8ccbc7f75 100644 --- a/tests/charts/api_tests.py +++ b/tests/charts/api_tests.py @@ -677,7 +677,7 @@ class TestChartApi(SupersetTestCase, ApiOwnersTestCaseMixin): rv = self.post_assert_metric(CHART_DATA_URI, request_payload, "data") self.assertEqual(rv.status_code, 200) data = json.loads(rv.data.decode("utf-8")) - self.assertEqual(data["result"][0]["rowcount"], 100) + self.assertEqual(data["result"][0]["rowcount"], 45) def test_chart_data_limit_offset(self): """ diff --git a/tests/load_examples_test.py b/tests/load_examples_test.py index 7fe94e98d..028d13c8a 100644 --- a/tests/load_examples_test.py +++ b/tests/load_examples_test.py @@ -28,13 +28,13 @@ class TestSupersetDataFrame(SupersetTestCase): self.examples.load_css_templates() def test_load_energy(self): - self.examples.load_energy() + self.examples.load_energy(sample=True) def test_load_world_bank_health_n_pop(self): - self.examples.load_world_bank_health_n_pop() + self.examples.load_world_bank_health_n_pop(sample=True) def test_load_birth_names(self): - self.examples.load_birth_names() + self.examples.load_birth_names(sample=True) def test_load_test_users_run(self): from superset.cli import load_test_users_run @@ -42,4 +42,4 @@ class TestSupersetDataFrame(SupersetTestCase): load_test_users_run() def test_load_unicode_test_data(self): - self.examples.load_unicode_test_data() + self.examples.load_unicode_test_data(sample=True) diff --git a/tests/model_tests.py b/tests/model_tests.py index 2b10b8777..c0f6327e9 100644 --- a/tests/model_tests.py +++ b/tests/model_tests.py @@ -231,7 +231,7 @@ class TestSqlaTableModel(SupersetTestCase): spec.allows_joins = inner_join arbitrary_gby = "state || gender || '_test'" arbitrary_metric = dict( - label="arbitrary", expressionType="SQL", sqlExpression="COUNT(1)" + label="arbitrary", expressionType="SQL", sqlExpression="SUM(sum_boys)" ) query_obj = dict( groupby=[arbitrary_gby, "name"], @@ -264,13 +264,30 @@ class TestSqlaTableModel(SupersetTestCase): return ret df1 = self.query_with_expr_helper(is_timeseries=True, inner_join=True) + name_list1 = cannonicalize_df(df1).name.values.tolist() df2 = self.query_with_expr_helper(is_timeseries=True, inner_join=False) + name_list2 = cannonicalize_df(df1).name.values.tolist() self.assertFalse(df2.empty) - # df1 can be empty if the db does not support join - if not df1.empty: - pandas.testing.assert_frame_equal( - cannonicalize_df(df1), cannonicalize_df(df2) - ) + + expected_namelist = [ + "Anthony", + "Brian", + "Christopher", + "Daniel", + "David", + "Eric", + "James", + "Jeffrey", + "John", + "Joseph", + "Kenneth", + "Kevin", + "Mark", + "Michael", + "Paul", + ] + assert name_list2 == expected_namelist + assert name_list1 == expected_namelist def test_query_with_expr_groupby(self): self.query_with_expr_helper(is_timeseries=False) diff --git a/tests/sqllab_tests.py b/tests/sqllab_tests.py index 071d7b96c..405faf8bc 100644 --- a/tests/sqllab_tests.py +++ b/tests/sqllab_tests.py @@ -96,7 +96,7 @@ class TestSqlLab(SupersetTestCase): f"SELECT * FROM admin_database.{tmp_table_name}" ).fetchall() self.assertEqual( - 75691, len(data) + 100, len(data) ) # SQL_MAX_ROW not applied due to the SQLLAB_CTAS_NO_LIMIT set to True # cleanup