feat: adding Progress Bar to Benchmark script (#15719)

* rough draft of benchmark script

* revisions

* revisions

* rough draft of benchmark script

* revisions

* Update requirements/development.in

Co-authored-by: Beto Dealmeida <roberto@dealmeida.net>

* Update superset/utils/mock_data.py

Co-authored-by: Beto Dealmeida <roberto@dealmeida.net>

* more revisions

Co-authored-by: Beto Dealmeida <roberto@dealmeida.net>
This commit is contained in:
AAfghahi 2021-07-19 19:34:58 -04:00 committed by GitHub
parent 9a14aed152
commit 80dd525026
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 51 additions and 92 deletions

View File

@ -25,3 +25,4 @@ psycopg2-binary==2.8.5
tableschema
thrift>=0.11.0,<1.0.0
pygithub>=1.54.1,<2.0.0
progress>=1.5,<2

View File

@ -1,4 +1,4 @@
# SHA1:1b4d15a41f3498d2eb930ac3d3d4ce5d1f218a2f
# SHA1:c470411e2e9cb04b412a94f80a6a9d870bece74d
#
# This file is autogenerated by pip-compile-multi
# To update, run:
@ -6,85 +6,39 @@
# pip-compile-multi
#
-r base.txt
-e file:.
# via -r requirements/base.in
boto3==1.16.10
# via tabulator
botocore==1.19.10
# via
# boto3
# s3transfer
cached-property==1.5.2
# via tableschema
certifi==2020.6.20
# via requests
deprecated==1.2.11
# via pygithub
et-xmlfile==1.0.1
# via openpyxl
flask-cors==3.0.9
# via -r requirements/development.in
future==0.18.2
# via pyhive
ijson==3.1.2.post0
# via tabulator
jdcal==1.4.1
# via openpyxl
jmespath==0.10.0
# via
# boto3
# botocore
jsonlines==1.2.0
# via tabulator
linear-tsv==1.1.0
# via tabulator
mysqlclient==1.4.2.post1
# via -r requirements/development.in
openpyxl==3.0.5
# via tabulator
pillow==7.2.0
# via -r requirements/development.in
psycopg2-binary==2.8.5
# via -r requirements/development.in
pydruid==0.6.1
# via -r requirements/development.in
pygithub==1.54.1
# via -r requirements/development.in
pyhive[hive]==0.6.3
# via -r requirements/development.in
requests==2.24.0
# via
# pydruid
# pygithub
# tableschema
# tabulator
rfc3986==1.4.0
# via tableschema
s3transfer==0.3.3
# via boto3
sasl==0.2.1
# via
# pyhive
# thrift-sasl
tableschema==1.20.0
# via -r requirements/development.in
tabulator==1.52.5
# via tableschema
thrift==0.13.0
# via
# -r requirements/development.in
# pyhive
# thrift-sasl
thrift-sasl==0.4.2
# via pyhive
unicodecsv==0.14.1
# via
# tableschema
# tabulator
wrapt==1.12.1
# via deprecated
xlrd==1.2.0
# via tabulator
-e file:. # via -r requirements/base.in
boto3==1.16.10 # via tabulator
botocore==1.19.10 # via boto3, s3transfer
cached-property==1.5.2 # via tableschema
certifi==2020.6.20 # via requests
deprecated==1.2.11 # via pygithub
et-xmlfile==1.0.1 # via openpyxl
flask-cors==3.0.9 # via -r requirements/development.in
future==0.18.2 # via pyhive
ijson==3.1.2.post0 # via tabulator
jdcal==1.4.1 # via openpyxl
jmespath==0.10.0 # via boto3, botocore
jsonlines==1.2.0 # via tabulator
linear-tsv==1.1.0 # via tabulator
mysqlclient==1.4.2.post1 # via -r requirements/development.in
openpyxl==3.0.5 # via tabulator
pillow==7.2.0 # via -r requirements/development.in
progress==1.5 # via -r requirements/development.in
psycopg2-binary==2.8.5 # via -r requirements/development.in
pydruid==0.6.1 # via -r requirements/development.in
pygithub==1.54.1 # via -r requirements/development.in
pyhive[hive]==0.6.3 # via -r requirements/development.in
requests==2.24.0 # via pydruid, pygithub, tableschema, tabulator
rfc3986==1.4.0 # via tableschema
s3transfer==0.3.3 # via boto3
sasl==0.2.1 # via pyhive, thrift-sasl
tableschema==1.20.0 # via -r requirements/development.in
tabulator==1.52.5 # via tableschema
thrift-sasl==0.4.2 # via pyhive
thrift==0.13.0 # via -r requirements/development.in, pyhive, thrift-sasl
unicodecsv==0.14.1 # via tableschema, tabulator
wrapt==1.12.1 # via deprecated
xlrd==1.2.0 # via tabulator
# The following packages are considered to be unsafe in a requirements file:
# setuptools

View File

@ -29,6 +29,7 @@ from flask import current_app
from flask_appbuilder import Model
from flask_migrate import downgrade, upgrade
from graphlib import TopologicalSorter # pylint: disable=wrong-import-order
from progress.bar import ChargingBar
from sqlalchemy import create_engine, inspect, Table
from sqlalchemy.ext.automap import automap_base
@ -177,18 +178,23 @@ def main(
for model in models:
missing = min_entities - model_rows[model]
if missing > 0:
entities: List[Model] = []
print(f"- Adding {missing} entities to the {model.__name__} model")
bar = ChargingBar("Processing", max=missing)
try:
added_models = add_sample_rows(session, model, missing)
for entity in add_sample_rows(session, model, missing):
entities.append(entity)
bar.next()
except Exception:
session.rollback()
raise
bar.finish()
model_rows[model] = min_entities
session.add_all(entities)
session.commit()
if auto_cleanup:
new_models[model].extend(added_models)
new_models[model].extend(entities)
start = time.time()
upgrade(revision=revision)
duration = time.time() - start

View File

@ -30,7 +30,7 @@ combine_as_imports = true
include_trailing_comma = true
line_length = 88
known_first_party = superset
known_third_party =alembic,apispec,backoff,bleach,cachelib,celery,click,colorama,contextlib2,cron_descriptor,croniter,cryptography,dateutil,deprecation,flask,flask_appbuilder,flask_babel,flask_caching,flask_compress,flask_jwt_extended,flask_login,flask_migrate,flask_sqlalchemy,flask_talisman,flask_testing,flask_wtf,freezegun,geohash,geopy,graphlib,holidays,humanize,isodate,jinja2,jwt,markdown,markupsafe,marshmallow,marshmallow_enum,msgpack,numpy,pandas,parameterized,parsedatetime,pathlib2,pgsanity,pkg_resources,polyline,prison,pyarrow,pyhive,pyparsing,pytest,pytz,redis,requests,retry,selenium,setuptools,simplejson,slack,sqlalchemy,sqlalchemy_utils,sqlparse,typing_extensions,werkzeug,wtforms,wtforms_json,yaml
known_third_party =alembic,apispec,backoff,bleach,cachelib,celery,click,colorama,contextlib2,cron_descriptor,croniter,cryptography,dateutil,deprecation,flask,flask_appbuilder,flask_babel,flask_caching,flask_compress,flask_jwt_extended,flask_login,flask_migrate,flask_sqlalchemy,flask_talisman,flask_testing,flask_wtf,freezegun,geohash,geopy,graphlib,holidays,humanize,isodate,jinja2,jwt,markdown,markupsafe,marshmallow,marshmallow_enum,msgpack,numpy,pandas,parameterized,parsedatetime,pathlib2,pgsanity,pkg_resources,polyline,prison,progress,pyarrow,pyhive,pyparsing,pytest,pytz,redis,requests,retry,selenium,setuptools,simplejson,slack,sqlalchemy,sqlalchemy_utils,sqlparse,typing_extensions,werkzeug,wtforms,wtforms_json,yaml
multi_line_output = 3
order_by_type = false

View File

@ -22,7 +22,7 @@ import random
import string
import sys
from datetime import date, datetime, time, timedelta
from typing import Any, Callable, cast, Dict, List, Optional, Type
from typing import Any, Callable, cast, Dict, Iterator, List, Optional, Type
from uuid import uuid4
import sqlalchemy.sql.sqltypes
@ -232,10 +232,11 @@ def generate_column_data(column: ColumnInfo, num_rows: int) -> List[Any]:
return [gen() for _ in range(num_rows)]
def add_sample_rows(session: Session, model: Type[Model], count: int) -> List[Model]:
def add_sample_rows(
session: Session, model: Type[Model], count: int
) -> Iterator[Model]:
"""
Add entities of a given model.
:param Model model: a Superset/FAB model
:param int count: how many entities to generate and insert
"""
@ -245,7 +246,6 @@ def add_sample_rows(session: Session, model: Type[Model], count: int) -> List[Mo
relationships = inspector.relationships.items()
samples = session.query(model).limit(count).all() if relationships else []
entities: List[Model] = []
max_primary_key: Optional[int] = None
for i in range(count):
sample = samples[i % len(samples)] if samples else None
@ -276,10 +276,8 @@ def add_sample_rows(session: Session, model: Type[Model], count: int) -> List[Mo
else:
kwargs[column.name] = generate_value(column)
entities.append(model(**kwargs))
session.add_all(entities)
return entities
entity = model(**kwargs)
yield entity
def get_valid_foreign_key(column: Column) -> Any: