Sample test data (#10487)

Co-authored-by: bogdan kyryliuk <bogdankyryliuk@dropbox.com>
This commit is contained in:
Bogdan 2020-08-03 09:08:49 -07:00 committed by GitHub
parent 821916a681
commit ab404ea2cf
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 46 additions and 19 deletions

View File

@ -52,9 +52,10 @@ def gen_filter(
}
def load_data(tbl_name: str, database: Database) -> None:
def load_data(tbl_name: str, database: Database, sample: bool = False) -> None:
pdf = pd.read_json(get_example_data("birth_names.json.gz"))
pdf.ds = pd.to_datetime(pdf.ds, unit="ms")
pdf = pdf.head(100) if sample else pdf
pdf.to_sql(
tbl_name,
database.get_sqla_engine(),
@ -72,7 +73,9 @@ def load_data(tbl_name: str, database: Database) -> None:
print("-" * 80)
def load_birth_names(only_metadata: bool = False, force: bool = False) -> None:
def load_birth_names(
only_metadata: bool = False, force: bool = False, sample: bool = False
) -> None:
"""Loading birth name dataset from a zip file in the repo"""
# pylint: disable=too-many-locals
tbl_name = "birth_names"
@ -80,7 +83,7 @@ def load_birth_names(only_metadata: bool = False, force: bool = False) -> None:
table_exists = database.has_table_by_name(tbl_name)
if not only_metadata and (not table_exists or force):
load_data(tbl_name, database)
load_data(tbl_name, database, sample=sample)
obj = db.session.query(TBL).filter_by(table_name=tbl_name).first()
if not obj:

View File

@ -29,7 +29,9 @@ from superset.utils import core as utils
from .helpers import get_example_data, merge_slice, misc_dash_slices, TBL
def load_energy(only_metadata: bool = False, force: bool = False) -> None:
def load_energy(
only_metadata: bool = False, force: bool = False, sample: bool = False
) -> None:
"""Loads an energy related dataset to use with sankey and graphs"""
tbl_name = "energy_usage"
database = utils.get_example_database()
@ -38,6 +40,7 @@ def load_energy(only_metadata: bool = False, force: bool = False) -> None:
if not only_metadata and (not table_exists or force):
data = get_example_data("energy.json.gz")
pdf = pd.read_json(data)
pdf = pdf.head(100) if sample else pdf
pdf.to_sql(
tbl_name,
database.get_sqla_engine(),

View File

@ -36,7 +36,9 @@ from .helpers import (
)
def load_unicode_test_data(only_metadata: bool = False, force: bool = False) -> None:
def load_unicode_test_data(
only_metadata: bool = False, force: bool = False, sample: bool = False
) -> None:
"""Loading unicode test dataset from a csv file in the repo"""
tbl_name = "unicode_test"
database = utils.get_example_database()
@ -50,6 +52,7 @@ def load_unicode_test_data(only_metadata: bool = False, force: bool = False) ->
# generate date/numeric data
df["dttm"] = datetime.datetime.now().date()
df["value"] = [random.randint(1, 100) for _ in range(len(df))]
df = df.head(100) if sample else df
df.to_sql( # pylint: disable=no-member
tbl_name,
database.get_sqla_engine(),

View File

@ -41,8 +41,8 @@ from .helpers import (
)
def load_world_bank_health_n_pop( # pylint: disable=too-many-locals
only_metadata: bool = False, force: bool = False
def load_world_bank_health_n_pop( # pylint: disable=too-many-locals, too-many-statements
only_metadata: bool = False, force: bool = False, sample: bool = False,
) -> None:
"""Loads the world bank health dataset, slices and a dashboard"""
tbl_name = "wb_health_population"
@ -54,6 +54,7 @@ def load_world_bank_health_n_pop( # pylint: disable=too-many-locals
pdf = pd.read_json(data)
pdf.columns = [col.replace(".", "_") for col in pdf.columns]
pdf.year = pd.to_datetime(pdf.year)
pdf = pdf.head(100) if sample else pdf
pdf.to_sql(
tbl_name,
database.get_sqla_engine(),

View File

@ -677,7 +677,7 @@ class TestChartApi(SupersetTestCase, ApiOwnersTestCaseMixin):
rv = self.post_assert_metric(CHART_DATA_URI, request_payload, "data")
self.assertEqual(rv.status_code, 200)
data = json.loads(rv.data.decode("utf-8"))
self.assertEqual(data["result"][0]["rowcount"], 100)
self.assertEqual(data["result"][0]["rowcount"], 45)
def test_chart_data_limit_offset(self):
"""

View File

@ -28,13 +28,13 @@ class TestSupersetDataFrame(SupersetTestCase):
self.examples.load_css_templates()
def test_load_energy(self):
self.examples.load_energy()
self.examples.load_energy(sample=True)
def test_load_world_bank_health_n_pop(self):
self.examples.load_world_bank_health_n_pop()
self.examples.load_world_bank_health_n_pop(sample=True)
def test_load_birth_names(self):
self.examples.load_birth_names()
self.examples.load_birth_names(sample=True)
def test_load_test_users_run(self):
from superset.cli import load_test_users_run
@ -42,4 +42,4 @@ class TestSupersetDataFrame(SupersetTestCase):
load_test_users_run()
def test_load_unicode_test_data(self):
self.examples.load_unicode_test_data()
self.examples.load_unicode_test_data(sample=True)

View File

@ -231,7 +231,7 @@ class TestSqlaTableModel(SupersetTestCase):
spec.allows_joins = inner_join
arbitrary_gby = "state || gender || '_test'"
arbitrary_metric = dict(
label="arbitrary", expressionType="SQL", sqlExpression="COUNT(1)"
label="arbitrary", expressionType="SQL", sqlExpression="SUM(sum_boys)"
)
query_obj = dict(
groupby=[arbitrary_gby, "name"],
@ -264,13 +264,30 @@ class TestSqlaTableModel(SupersetTestCase):
return ret
df1 = self.query_with_expr_helper(is_timeseries=True, inner_join=True)
name_list1 = cannonicalize_df(df1).name.values.tolist()
df2 = self.query_with_expr_helper(is_timeseries=True, inner_join=False)
name_list2 = cannonicalize_df(df1).name.values.tolist()
self.assertFalse(df2.empty)
# df1 can be empty if the db does not support join
if not df1.empty:
pandas.testing.assert_frame_equal(
cannonicalize_df(df1), cannonicalize_df(df2)
)
expected_namelist = [
"Anthony",
"Brian",
"Christopher",
"Daniel",
"David",
"Eric",
"James",
"Jeffrey",
"John",
"Joseph",
"Kenneth",
"Kevin",
"Mark",
"Michael",
"Paul",
]
assert name_list2 == expected_namelist
assert name_list1 == expected_namelist
def test_query_with_expr_groupby(self):
self.query_with_expr_helper(is_timeseries=False)

View File

@ -96,7 +96,7 @@ class TestSqlLab(SupersetTestCase):
f"SELECT * FROM admin_database.{tmp_table_name}"
).fetchall()
self.assertEqual(
75691, len(data)
100, len(data)
) # SQL_MAX_ROW not applied due to the SQLLAB_CTAS_NO_LIMIT set to True
# cleanup