feat: adding Progress Bar to Benchmark script (#15719)
* rough draft of benchmark script * revisions * revisions * rough draft of benchmark script * revisions * Update requirements/development.in Co-authored-by: Beto Dealmeida <roberto@dealmeida.net> * Update superset/utils/mock_data.py Co-authored-by: Beto Dealmeida <roberto@dealmeida.net> * more revisions Co-authored-by: Beto Dealmeida <roberto@dealmeida.net>
This commit is contained in:
parent
9a14aed152
commit
80dd525026
|
|
@ -25,3 +25,4 @@ psycopg2-binary==2.8.5
|
|||
tableschema
|
||||
thrift>=0.11.0,<1.0.0
|
||||
pygithub>=1.54.1,<2.0.0
|
||||
progress>=1.5,<2
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
# SHA1:1b4d15a41f3498d2eb930ac3d3d4ce5d1f218a2f
|
||||
# SHA1:c470411e2e9cb04b412a94f80a6a9d870bece74d
|
||||
#
|
||||
# This file is autogenerated by pip-compile-multi
|
||||
# To update, run:
|
||||
|
|
@ -6,85 +6,39 @@
|
|||
# pip-compile-multi
|
||||
#
|
||||
-r base.txt
|
||||
-e file:.
|
||||
# via -r requirements/base.in
|
||||
boto3==1.16.10
|
||||
# via tabulator
|
||||
botocore==1.19.10
|
||||
# via
|
||||
# boto3
|
||||
# s3transfer
|
||||
cached-property==1.5.2
|
||||
# via tableschema
|
||||
certifi==2020.6.20
|
||||
# via requests
|
||||
deprecated==1.2.11
|
||||
# via pygithub
|
||||
et-xmlfile==1.0.1
|
||||
# via openpyxl
|
||||
flask-cors==3.0.9
|
||||
# via -r requirements/development.in
|
||||
future==0.18.2
|
||||
# via pyhive
|
||||
ijson==3.1.2.post0
|
||||
# via tabulator
|
||||
jdcal==1.4.1
|
||||
# via openpyxl
|
||||
jmespath==0.10.0
|
||||
# via
|
||||
# boto3
|
||||
# botocore
|
||||
jsonlines==1.2.0
|
||||
# via tabulator
|
||||
linear-tsv==1.1.0
|
||||
# via tabulator
|
||||
mysqlclient==1.4.2.post1
|
||||
# via -r requirements/development.in
|
||||
openpyxl==3.0.5
|
||||
# via tabulator
|
||||
pillow==7.2.0
|
||||
# via -r requirements/development.in
|
||||
psycopg2-binary==2.8.5
|
||||
# via -r requirements/development.in
|
||||
pydruid==0.6.1
|
||||
# via -r requirements/development.in
|
||||
pygithub==1.54.1
|
||||
# via -r requirements/development.in
|
||||
pyhive[hive]==0.6.3
|
||||
# via -r requirements/development.in
|
||||
requests==2.24.0
|
||||
# via
|
||||
# pydruid
|
||||
# pygithub
|
||||
# tableschema
|
||||
# tabulator
|
||||
rfc3986==1.4.0
|
||||
# via tableschema
|
||||
s3transfer==0.3.3
|
||||
# via boto3
|
||||
sasl==0.2.1
|
||||
# via
|
||||
# pyhive
|
||||
# thrift-sasl
|
||||
tableschema==1.20.0
|
||||
# via -r requirements/development.in
|
||||
tabulator==1.52.5
|
||||
# via tableschema
|
||||
thrift==0.13.0
|
||||
# via
|
||||
# -r requirements/development.in
|
||||
# pyhive
|
||||
# thrift-sasl
|
||||
thrift-sasl==0.4.2
|
||||
# via pyhive
|
||||
unicodecsv==0.14.1
|
||||
# via
|
||||
# tableschema
|
||||
# tabulator
|
||||
wrapt==1.12.1
|
||||
# via deprecated
|
||||
xlrd==1.2.0
|
||||
# via tabulator
|
||||
-e file:. # via -r requirements/base.in
|
||||
boto3==1.16.10 # via tabulator
|
||||
botocore==1.19.10 # via boto3, s3transfer
|
||||
cached-property==1.5.2 # via tableschema
|
||||
certifi==2020.6.20 # via requests
|
||||
deprecated==1.2.11 # via pygithub
|
||||
et-xmlfile==1.0.1 # via openpyxl
|
||||
flask-cors==3.0.9 # via -r requirements/development.in
|
||||
future==0.18.2 # via pyhive
|
||||
ijson==3.1.2.post0 # via tabulator
|
||||
jdcal==1.4.1 # via openpyxl
|
||||
jmespath==0.10.0 # via boto3, botocore
|
||||
jsonlines==1.2.0 # via tabulator
|
||||
linear-tsv==1.1.0 # via tabulator
|
||||
mysqlclient==1.4.2.post1 # via -r requirements/development.in
|
||||
openpyxl==3.0.5 # via tabulator
|
||||
pillow==7.2.0 # via -r requirements/development.in
|
||||
progress==1.5 # via -r requirements/development.in
|
||||
psycopg2-binary==2.8.5 # via -r requirements/development.in
|
||||
pydruid==0.6.1 # via -r requirements/development.in
|
||||
pygithub==1.54.1 # via -r requirements/development.in
|
||||
pyhive[hive]==0.6.3 # via -r requirements/development.in
|
||||
requests==2.24.0 # via pydruid, pygithub, tableschema, tabulator
|
||||
rfc3986==1.4.0 # via tableschema
|
||||
s3transfer==0.3.3 # via boto3
|
||||
sasl==0.2.1 # via pyhive, thrift-sasl
|
||||
tableschema==1.20.0 # via -r requirements/development.in
|
||||
tabulator==1.52.5 # via tableschema
|
||||
thrift-sasl==0.4.2 # via pyhive
|
||||
thrift==0.13.0 # via -r requirements/development.in, pyhive, thrift-sasl
|
||||
unicodecsv==0.14.1 # via tableschema, tabulator
|
||||
wrapt==1.12.1 # via deprecated
|
||||
xlrd==1.2.0 # via tabulator
|
||||
|
||||
# The following packages are considered to be unsafe in a requirements file:
|
||||
# setuptools
|
||||
|
|
|
|||
|
|
@ -29,6 +29,7 @@ from flask import current_app
|
|||
from flask_appbuilder import Model
|
||||
from flask_migrate import downgrade, upgrade
|
||||
from graphlib import TopologicalSorter # pylint: disable=wrong-import-order
|
||||
from progress.bar import ChargingBar
|
||||
from sqlalchemy import create_engine, inspect, Table
|
||||
from sqlalchemy.ext.automap import automap_base
|
||||
|
||||
|
|
@ -177,18 +178,23 @@ def main(
|
|||
for model in models:
|
||||
missing = min_entities - model_rows[model]
|
||||
if missing > 0:
|
||||
entities: List[Model] = []
|
||||
print(f"- Adding {missing} entities to the {model.__name__} model")
|
||||
bar = ChargingBar("Processing", max=missing)
|
||||
try:
|
||||
added_models = add_sample_rows(session, model, missing)
|
||||
for entity in add_sample_rows(session, model, missing):
|
||||
entities.append(entity)
|
||||
bar.next()
|
||||
except Exception:
|
||||
session.rollback()
|
||||
raise
|
||||
bar.finish()
|
||||
model_rows[model] = min_entities
|
||||
session.add_all(entities)
|
||||
session.commit()
|
||||
|
||||
if auto_cleanup:
|
||||
new_models[model].extend(added_models)
|
||||
|
||||
new_models[model].extend(entities)
|
||||
start = time.time()
|
||||
upgrade(revision=revision)
|
||||
duration = time.time() - start
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@ combine_as_imports = true
|
|||
include_trailing_comma = true
|
||||
line_length = 88
|
||||
known_first_party = superset
|
||||
known_third_party =alembic,apispec,backoff,bleach,cachelib,celery,click,colorama,contextlib2,cron_descriptor,croniter,cryptography,dateutil,deprecation,flask,flask_appbuilder,flask_babel,flask_caching,flask_compress,flask_jwt_extended,flask_login,flask_migrate,flask_sqlalchemy,flask_talisman,flask_testing,flask_wtf,freezegun,geohash,geopy,graphlib,holidays,humanize,isodate,jinja2,jwt,markdown,markupsafe,marshmallow,marshmallow_enum,msgpack,numpy,pandas,parameterized,parsedatetime,pathlib2,pgsanity,pkg_resources,polyline,prison,pyarrow,pyhive,pyparsing,pytest,pytz,redis,requests,retry,selenium,setuptools,simplejson,slack,sqlalchemy,sqlalchemy_utils,sqlparse,typing_extensions,werkzeug,wtforms,wtforms_json,yaml
|
||||
known_third_party =alembic,apispec,backoff,bleach,cachelib,celery,click,colorama,contextlib2,cron_descriptor,croniter,cryptography,dateutil,deprecation,flask,flask_appbuilder,flask_babel,flask_caching,flask_compress,flask_jwt_extended,flask_login,flask_migrate,flask_sqlalchemy,flask_talisman,flask_testing,flask_wtf,freezegun,geohash,geopy,graphlib,holidays,humanize,isodate,jinja2,jwt,markdown,markupsafe,marshmallow,marshmallow_enum,msgpack,numpy,pandas,parameterized,parsedatetime,pathlib2,pgsanity,pkg_resources,polyline,prison,progress,pyarrow,pyhive,pyparsing,pytest,pytz,redis,requests,retry,selenium,setuptools,simplejson,slack,sqlalchemy,sqlalchemy_utils,sqlparse,typing_extensions,werkzeug,wtforms,wtforms_json,yaml
|
||||
multi_line_output = 3
|
||||
order_by_type = false
|
||||
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ import random
|
|||
import string
|
||||
import sys
|
||||
from datetime import date, datetime, time, timedelta
|
||||
from typing import Any, Callable, cast, Dict, List, Optional, Type
|
||||
from typing import Any, Callable, cast, Dict, Iterator, List, Optional, Type
|
||||
from uuid import uuid4
|
||||
|
||||
import sqlalchemy.sql.sqltypes
|
||||
|
|
@ -232,10 +232,11 @@ def generate_column_data(column: ColumnInfo, num_rows: int) -> List[Any]:
|
|||
return [gen() for _ in range(num_rows)]
|
||||
|
||||
|
||||
def add_sample_rows(session: Session, model: Type[Model], count: int) -> List[Model]:
|
||||
def add_sample_rows(
|
||||
session: Session, model: Type[Model], count: int
|
||||
) -> Iterator[Model]:
|
||||
"""
|
||||
Add entities of a given model.
|
||||
|
||||
:param Model model: a Superset/FAB model
|
||||
:param int count: how many entities to generate and insert
|
||||
"""
|
||||
|
|
@ -245,7 +246,6 @@ def add_sample_rows(session: Session, model: Type[Model], count: int) -> List[Mo
|
|||
relationships = inspector.relationships.items()
|
||||
samples = session.query(model).limit(count).all() if relationships else []
|
||||
|
||||
entities: List[Model] = []
|
||||
max_primary_key: Optional[int] = None
|
||||
for i in range(count):
|
||||
sample = samples[i % len(samples)] if samples else None
|
||||
|
|
@ -276,10 +276,8 @@ def add_sample_rows(session: Session, model: Type[Model], count: int) -> List[Mo
|
|||
else:
|
||||
kwargs[column.name] = generate_value(column)
|
||||
|
||||
entities.append(model(**kwargs))
|
||||
|
||||
session.add_all(entities)
|
||||
return entities
|
||||
entity = model(**kwargs)
|
||||
yield entity
|
||||
|
||||
|
||||
def get_valid_foreign_key(column: Column) -> Any:
|
||||
|
|
|
|||
Loading…
Reference in New Issue