Sample test data (#10487)

Co-authored-by: bogdan kyryliuk <bogdankyryliuk@dropbox.com>
2020-08-03 09:08:49 -07:00 · 2020-08-03 09:08:49 -07:00 · ab404ea2cf
parent 821916a681
commit ab404ea2cf
8 changed files with 46 additions and 19 deletions
--- a/superset/examples/birth_names.py
+++ b/superset/examples/birth_names.py
@ -52,9 +52,10 @@ def gen_filter(
    }


-def load_data(tbl_name: str, database: Database) -> None:
+def load_data(tbl_name: str, database: Database, sample: bool = False) -> None:
    pdf = pd.read_json(get_example_data("birth_names.json.gz"))
    pdf.ds = pd.to_datetime(pdf.ds, unit="ms")
+    pdf = pdf.head(100) if sample else pdf
    pdf.to_sql(
        tbl_name,
        database.get_sqla_engine(),
@ -72,7 +73,9 @@ def load_data(tbl_name: str, database: Database) -> None:
    print("-" * 80)


-def load_birth_names(only_metadata: bool = False, force: bool = False) -> None:
+def load_birth_names(
+    only_metadata: bool = False, force: bool = False, sample: bool = False
+) -> None:
    """Loading birth name dataset from a zip file in the repo"""
    # pylint: disable=too-many-locals
    tbl_name = "birth_names"
@ -80,7 +83,7 @@ def load_birth_names(only_metadata: bool = False, force: bool = False) -> None:
    table_exists = database.has_table_by_name(tbl_name)

    if not only_metadata and (not table_exists or force):
-        load_data(tbl_name, database)
+        load_data(tbl_name, database, sample=sample)

    obj = db.session.query(TBL).filter_by(table_name=tbl_name).first()
    if not obj:
--- a/superset/examples/energy.py
+++ b/superset/examples/energy.py
@ -29,7 +29,9 @@ from superset.utils import core as utils
 from .helpers import get_example_data, merge_slice, misc_dash_slices, TBL


-def load_energy(only_metadata: bool = False, force: bool = False) -> None:
+def load_energy(
+    only_metadata: bool = False, force: bool = False, sample: bool = False
+) -> None:
    """Loads an energy related dataset to use with sankey and graphs"""
    tbl_name = "energy_usage"
    database = utils.get_example_database()
@ -38,6 +40,7 @@ def load_energy(only_metadata: bool = False, force: bool = False) -> None:
    if not only_metadata and (not table_exists or force):
        data = get_example_data("energy.json.gz")
        pdf = pd.read_json(data)
+        pdf = pdf.head(100) if sample else pdf
        pdf.to_sql(
            tbl_name,
            database.get_sqla_engine(),
--- a/superset/examples/unicode_test_data.py
+++ b/superset/examples/unicode_test_data.py
@ -36,7 +36,9 @@ from .helpers import (
 )


-def load_unicode_test_data(only_metadata: bool = False, force: bool = False) -> None:
+def load_unicode_test_data(
+    only_metadata: bool = False, force: bool = False, sample: bool = False
+) -> None:
    """Loading unicode test dataset from a csv file in the repo"""
    tbl_name = "unicode_test"
    database = utils.get_example_database()
@ -50,6 +52,7 @@ def load_unicode_test_data(only_metadata: bool = False, force: bool = False) ->
        # generate date/numeric data
        df["dttm"] = datetime.datetime.now().date()
        df["value"] = [random.randint(1, 100) for _ in range(len(df))]
+        df = df.head(100) if sample else df
        df.to_sql(  # pylint: disable=no-member
            tbl_name,
            database.get_sqla_engine(),
--- a/superset/examples/world_bank.py
+++ b/superset/examples/world_bank.py
@ -41,8 +41,8 @@ from .helpers import (
 )


-def load_world_bank_health_n_pop(  # pylint: disable=too-many-locals
-    only_metadata: bool = False, force: bool = False
+def load_world_bank_health_n_pop(  # pylint: disable=too-many-locals, too-many-statements
+    only_metadata: bool = False, force: bool = False, sample: bool = False,
 ) -> None:
    """Loads the world bank health dataset, slices and a dashboard"""
    tbl_name = "wb_health_population"
@ -54,6 +54,7 @@ def load_world_bank_health_n_pop(  # pylint: disable=too-many-locals
        pdf = pd.read_json(data)
        pdf.columns = [col.replace(".", "_") for col in pdf.columns]
        pdf.year = pd.to_datetime(pdf.year)
+        pdf = pdf.head(100) if sample else pdf
        pdf.to_sql(
            tbl_name,
            database.get_sqla_engine(),
--- a/tests/charts/api_tests.py
+++ b/tests/charts/api_tests.py
@ -677,7 +677,7 @@ class TestChartApi(SupersetTestCase, ApiOwnersTestCaseMixin):
        rv = self.post_assert_metric(CHART_DATA_URI, request_payload, "data")
        self.assertEqual(rv.status_code, 200)
        data = json.loads(rv.data.decode("utf-8"))
-        self.assertEqual(data["result"][0]["rowcount"], 100)
+        self.assertEqual(data["result"][0]["rowcount"], 45)

    def test_chart_data_limit_offset(self):
        """
--- a/tests/load_examples_test.py
+++ b/tests/load_examples_test.py
@ -28,13 +28,13 @@ class TestSupersetDataFrame(SupersetTestCase):
        self.examples.load_css_templates()

    def test_load_energy(self):
-        self.examples.load_energy()
+        self.examples.load_energy(sample=True)

    def test_load_world_bank_health_n_pop(self):
-        self.examples.load_world_bank_health_n_pop()
+        self.examples.load_world_bank_health_n_pop(sample=True)

    def test_load_birth_names(self):
-        self.examples.load_birth_names()
+        self.examples.load_birth_names(sample=True)

    def test_load_test_users_run(self):
        from superset.cli import load_test_users_run
@ -42,4 +42,4 @@ class TestSupersetDataFrame(SupersetTestCase):
        load_test_users_run()

    def test_load_unicode_test_data(self):
-        self.examples.load_unicode_test_data()
+        self.examples.load_unicode_test_data(sample=True)
--- a/tests/model_tests.py
+++ b/tests/model_tests.py
@ -231,7 +231,7 @@ class TestSqlaTableModel(SupersetTestCase):
        spec.allows_joins = inner_join
        arbitrary_gby = "state || gender || '_test'"
        arbitrary_metric = dict(
-            label="arbitrary", expressionType="SQL", sqlExpression="COUNT(1)"
+            label="arbitrary", expressionType="SQL", sqlExpression="SUM(sum_boys)"
        )
        query_obj = dict(
            groupby=[arbitrary_gby, "name"],
@ -264,13 +264,30 @@ class TestSqlaTableModel(SupersetTestCase):
            return ret

        df1 = self.query_with_expr_helper(is_timeseries=True, inner_join=True)
+        name_list1 = cannonicalize_df(df1).name.values.tolist()
        df2 = self.query_with_expr_helper(is_timeseries=True, inner_join=False)
+        name_list2 = cannonicalize_df(df1).name.values.tolist()
        self.assertFalse(df2.empty)
-        # df1 can be empty if the db does not support join
-        if not df1.empty:
-            pandas.testing.assert_frame_equal(
-                cannonicalize_df(df1), cannonicalize_df(df2)
-            )
+
+        expected_namelist = [
+            "Anthony",
+            "Brian",
+            "Christopher",
+            "Daniel",
+            "David",
+            "Eric",
+            "James",
+            "Jeffrey",
+            "John",
+            "Joseph",
+            "Kenneth",
+            "Kevin",
+            "Mark",
+            "Michael",
+            "Paul",
+        ]
+        assert name_list2 == expected_namelist
+        assert name_list1 == expected_namelist

    def test_query_with_expr_groupby(self):
        self.query_with_expr_helper(is_timeseries=False)
--- a/tests/sqllab_tests.py
+++ b/tests/sqllab_tests.py
@ -96,7 +96,7 @@ class TestSqlLab(SupersetTestCase):
                f"SELECT * FROM admin_database.{tmp_table_name}"
            ).fetchall()
            self.assertEqual(
-                75691, len(data)
+                100, len(data)
            )  # SQL_MAX_ROW not applied due to the SQLLAB_CTAS_NO_LIMIT set to True

            # cleanup