From 1e37f0b41782d35d68f657bfa87aeb9055d1e6e7 Mon Sep 17 00:00:00 2001 From: John Bodley <4567245+john-bodley@users.noreply.github.com> Date: Tue, 31 Oct 2023 10:17:51 -0700 Subject: [PATCH] chore(celery): Cleanup config and async query specifications (#25314) --- docs/docs/installation/alerts-reports.mdx | 31 +++++++------- .../installation/async-queries-celery.mdx | 25 +++--------- docs/docs/installation/cache.mdx | 5 ++- .../installation/running-on-kubernetes.mdx | 40 +++++++++---------- helm/superset/Chart.yaml | 2 +- helm/superset/README.md | 2 +- helm/superset/templates/_helpers.tpl | 13 +++--- superset/config.py | 16 ++------ .../integration_tests/superset_test_config.py | 9 ++--- .../superset_test_config_thumbnails.py | 7 ++-- 10 files changed, 62 insertions(+), 88 deletions(-) diff --git a/docs/docs/installation/alerts-reports.mdx b/docs/docs/installation/alerts-reports.mdx index 41ee19a53..234ac6c9b 100644 --- a/docs/docs/installation/alerts-reports.mdx +++ b/docs/docs/installation/alerts-reports.mdx @@ -104,30 +104,27 @@ REDIS_HOST = "superset_cache" REDIS_PORT = "6379" class CeleryConfig: - broker_url = 'redis://%s:%s/0' % (REDIS_HOST, REDIS_PORT) - imports = ('superset.sql_lab', "superset.tasks", "superset.tasks.thumbnails", ) - result_backend = 'redis://%s:%s/0' % (REDIS_HOST, REDIS_PORT) + broker_url = f"redis://{REDIS_HOST}:{REDIS_PORT}/0" + imports = ( + "superset.sql_lab", + "superset.tasks.scheduler", + ) + result_backend = f"redis://{REDIS_HOST}:{REDIS_PORT}/0" worker_prefetch_multiplier = 10 task_acks_late = True task_annotations = { - 'sql_lab.get_sql_results': { - 'rate_limit': '100/s', - }, - 'email_reports.send': { - 'rate_limit': '1/s', - 'time_limit': 600, - 'soft_time_limit': 600, - 'ignore_result': True, + "sql_lab.get_sql_results": { + "rate_limit": "100/s", }, } beat_schedule = { - 'reports.scheduler': { - 'task': 'reports.scheduler', - 'schedule': crontab(minute='*', hour='*'), + "reports.scheduler": { + "task": "reports.scheduler", + "schedule": crontab(minute="*", hour="*"), }, - 'reports.prune_log': { - 'task': 'reports.prune_log', - 'schedule': crontab(minute=0, hour=0), + "reports.prune_log": { + "task": "reports.prune_log", + "schedule": crontab(minute=0, hour=0), }, } CELERY_CONFIG = CeleryConfig diff --git a/docs/docs/installation/async-queries-celery.mdx b/docs/docs/installation/async-queries-celery.mdx index 49f1ce72a..8d3d62867 100644 --- a/docs/docs/installation/async-queries-celery.mdx +++ b/docs/docs/installation/async-queries-celery.mdx @@ -23,30 +23,17 @@ and web server processes should have the same configuration. ```python class CeleryConfig(object): - broker_url = 'redis://localhost:6379/0' + broker_url = "redis://localhost:6379/0" imports = ( - 'superset.sql_lab', - 'superset.tasks', + "superset.sql_lab", + "superset.tasks.scheduler", ) - result_backend = 'redis://localhost:6379/0' - worker_log_level = 'DEBUG' + result_backend = "redis://localhost:6379/0" worker_prefetch_multiplier = 10 task_acks_late = True task_annotations = { - 'sql_lab.get_sql_results': { - 'rate_limit': '100/s', - }, - 'email_reports.send': { - 'rate_limit': '1/s', - 'time_limit': 120, - 'soft_time_limit': 150, - 'ignore_result': True, - }, - } - beat_schedule = { - 'email_reports.schedule_hourly': { - 'task': 'email_reports.schedule_hourly', - 'schedule': crontab(minute=1, hour='*'), + "sql_lab.get_sql_results": { + "rate_limit": "100/s", }, } diff --git a/docs/docs/installation/cache.mdx b/docs/docs/installation/cache.mdx index 9d50c67b5..0d64ca721 100644 --- a/docs/docs/installation/cache.mdx +++ b/docs/docs/installation/cache.mdx @@ -114,7 +114,10 @@ from s3cache.s3cache import S3Cache class CeleryConfig(object): broker_url = "redis://localhost:6379/0" - imports = ("superset.sql_lab", "superset.tasks", "superset.tasks.thumbnails") + imports = ( + "superset.sql_lab", + "superset.tasks.thumbnails", + ) result_backend = "redis://localhost:6379/0" worker_prefetch_multiplier = 10 task_acks_late = True diff --git a/docs/docs/installation/running-on-kubernetes.mdx b/docs/docs/installation/running-on-kubernetes.mdx index 534565ba7..c9b39ce4c 100644 --- a/docs/docs/installation/running-on-kubernetes.mdx +++ b/docs/docs/installation/running-on-kubernetes.mdx @@ -367,37 +367,35 @@ configOverrides: celery_conf: | from celery.schedules import crontab - class CeleryConfig(object): + class CeleryConfig: broker_url = f"redis://{env('REDIS_HOST')}:{env('REDIS_PORT')}/0" - imports = ('superset.sql_lab', "superset.tasks", "superset.tasks.thumbnails", ) + imports = ( + "superset.sql_lab", + "superset.tasks.cache", + "superset.tasks.scheduler", + ) result_backend = f"redis://{env('REDIS_HOST')}:{env('REDIS_PORT')}/0" task_annotations = { - 'sql_lab.get_sql_results': { - 'rate_limit': '100/s', - }, - 'email_reports.send': { - 'rate_limit': '1/s', - 'time_limit': 600, - 'soft_time_limit': 600, - 'ignore_result': True, + "sql_lab.get_sql_results": { + "rate_limit": "100/s", }, } beat_schedule = { - 'reports.scheduler': { - 'task': 'reports.scheduler', - 'schedule': crontab(minute='*', hour='*'), + "reports.scheduler": { + "task": "reports.scheduler", + "schedule": crontab(minute="*", hour="*"), }, - 'reports.prune_log': { - 'task': 'reports.prune_log', + "reports.prune_log": { + "task": "reports.prune_log", 'schedule': crontab(minute=0, hour=0), }, 'cache-warmup-hourly': { - 'task': 'cache-warmup', - 'schedule': crontab(minute='*/30', hour='*'), - 'kwargs': { - 'strategy_name': 'top_n_dashboards', - 'top_n': 10, - 'since': '7 days ago', + "task": "cache-warmup", + "schedule": crontab(minute="*/30", hour="*"), + "kwargs": { + "strategy_name": "top_n_dashboards", + "top_n": 10, + "since": "7 days ago", }, } } diff --git a/helm/superset/Chart.yaml b/helm/superset/Chart.yaml index 2d44a1c31..60e2510eb 100644 --- a/helm/superset/Chart.yaml +++ b/helm/superset/Chart.yaml @@ -29,7 +29,7 @@ maintainers: - name: craig-rueda email: craig@craigrueda.com url: https://github.com/craig-rueda -version: 0.10.13 +version: 0.10.14 dependencies: - name: postgresql version: 12.1.6 diff --git a/helm/superset/README.md b/helm/superset/README.md index 9040a1a73..d32ee985f 100644 --- a/helm/superset/README.md +++ b/helm/superset/README.md @@ -23,7 +23,7 @@ NOTE: This file is generated by helm-docs: https://github.com/norwoodj/helm-docs # superset -![Version: 0.10.13](https://img.shields.io/badge/Version-0.10.13-informational?style=flat-square) +![Version: 0.10.14](https://img.shields.io/badge/Version-0.10.14-informational?style=flat-square) Apache Superset is a modern, enterprise-ready business intelligence web application diff --git a/helm/superset/templates/_helpers.tpl b/helm/superset/templates/_helpers.tpl index b450ec3ef..40b769054 100644 --- a/helm/superset/templates/_helpers.tpl +++ b/helm/superset/templates/_helpers.tpl @@ -84,15 +84,14 @@ SQLALCHEMY_DATABASE_URI = f"postgresql+psycopg2://{env('DB_USER')}:{env('DB_PASS SQLALCHEMY_TRACK_MODIFICATIONS = True SECRET_KEY = env('SECRET_KEY', 'thisISaSECRET_1234') -class CeleryConfig(object): - CELERY_IMPORTS = ('superset.sql_lab', ) - CELERY_ANNOTATIONS = {'tasks.add': {'rate_limit': '10/s'}} +class CeleryConfig: + imports = ("superset.sql_lab", ) {{- if .Values.supersetNode.connections.redis_password }} - BROKER_URL = f"redis://:{env('REDIS_PASSWORD')}@{env('REDIS_HOST')}:{env('REDIS_PORT')}/0" - CELERY_RESULT_BACKEND = f"redis://:{env('REDIS_PASSWORD')}@{env('REDIS_HOST')}:{env('REDIS_PORT')}/0" + broker_url = f"redis://:{env('REDIS_PASSWORD')}@{env('REDIS_HOST')}:{env('REDIS_PORT')}/0" + result_backend = f"redis://:{env('REDIS_PASSWORD')}@{env('REDIS_HOST')}:{env('REDIS_PORT')}/0" {{- else }} - BROKER_URL = f"redis://{env('REDIS_HOST')}:{env('REDIS_PORT')}/0" - CELERY_RESULT_BACKEND = f"redis://{env('REDIS_HOST')}:{env('REDIS_PORT')}/0" + broker_url = f"redis://{env('REDIS_HOST')}:{env('REDIS_PORT')}/0" + result_backend = f"redis://{env('REDIS_HOST')}:{env('REDIS_PORT')}/0" {{- end }} CELERY_CONFIG = CeleryConfig diff --git a/superset/config.py b/superset/config.py index a85cbe82e..401dfd2f3 100644 --- a/superset/config.py +++ b/superset/config.py @@ -940,24 +940,16 @@ CELERY_BEAT_SCHEDULER_EXPIRES = timedelta(weeks=1) class CeleryConfig: # pylint: disable=too-few-public-methods broker_url = "sqla+sqlite:///celerydb.sqlite" - imports = ("superset.sql_lab",) + imports = ("superset.sql_lab", "superset.tasks.scheduler") result_backend = "db+sqlite:///celery_results.sqlite" worker_prefetch_multiplier = 1 task_acks_late = False task_annotations = { - "sql_lab.get_sql_results": {"rate_limit": "100/s"}, - "email_reports.send": { - "rate_limit": "1/s", - "time_limit": int(timedelta(seconds=120).total_seconds()), - "soft_time_limit": int(timedelta(seconds=150).total_seconds()), - "ignore_result": True, + "sql_lab.get_sql_results": { + "rate_limit": "100/s", }, } beat_schedule = { - "email_reports.schedule_hourly": { - "task": "email_reports.schedule_hourly", - "schedule": crontab(minute=1, hour="*"), - }, "reports.scheduler": { "task": "reports.scheduler", "schedule": crontab(minute="*", hour="*"), @@ -1558,7 +1550,7 @@ GLOBAL_ASYNC_QUERIES_JWT_COOKIE_SAMESITE: None | ( ) = None GLOBAL_ASYNC_QUERIES_JWT_COOKIE_DOMAIN = None GLOBAL_ASYNC_QUERIES_JWT_SECRET = "test-secret-change-me" -GLOBAL_ASYNC_QUERIES_TRANSPORT = "polling" +GLOBAL_ASYNC_QUERIES_TRANSPORT: Literal["polling", "ws"] = "polling" GLOBAL_ASYNC_QUERIES_POLLING_DELAY = int( timedelta(milliseconds=500).total_seconds() * 1000 ) diff --git a/tests/integration_tests/superset_test_config.py b/tests/integration_tests/superset_test_config.py index bcc314608..89287be66 100644 --- a/tests/integration_tests/superset_test_config.py +++ b/tests/integration_tests/superset_test_config.py @@ -133,11 +133,10 @@ ALERT_REPORTS_QUERY_EXECUTION_MAX_TRIES = 3 class CeleryConfig: - BROKER_URL = f"redis://{REDIS_HOST}:{REDIS_PORT}/{REDIS_CELERY_DB}" - CELERY_IMPORTS = ("superset.sql_lab",) - CELERY_RESULT_BACKEND = f"redis://{REDIS_HOST}:{REDIS_PORT}/{REDIS_RESULTS_DB}" - CELERY_ANNOTATIONS = {"sql_lab.add": {"rate_limit": "10/s"}} - CONCURRENCY = 1 + broker_url = f"redis://{REDIS_HOST}:{REDIS_PORT}/{REDIS_CELERY_DB}" + imports = ("superset.sql_lab",) + result_backend = f"redis://{REDIS_HOST}:{REDIS_PORT}/{REDIS_RESULTS_DB}" + concurrency = 1 CELERY_CONFIG = CeleryConfig diff --git a/tests/integration_tests/superset_test_config_thumbnails.py b/tests/integration_tests/superset_test_config_thumbnails.py index 5bd02e7b0..a761ef661 100644 --- a/tests/integration_tests/superset_test_config_thumbnails.py +++ b/tests/integration_tests/superset_test_config_thumbnails.py @@ -62,10 +62,9 @@ REDIS_RESULTS_DB = os.environ.get("REDIS_RESULTS_DB", 3) class CeleryConfig: - BROKER_URL = f"redis://{REDIS_HOST}:{REDIS_PORT}/{REDIS_CELERY_DB}" - CELERY_IMPORTS = ("superset.sql_lab", "superset.tasks.thumbnails") - CELERY_ANNOTATIONS = {"sql_lab.add": {"rate_limit": "10/s"}} - CONCURRENCY = 1 + broker_url = f"redis://{REDIS_HOST}:{REDIS_PORT}/{REDIS_CELERY_DB}" + imports = ("superset.sql_lab", "superset.tasks.thumbnails") + concurrency = 1 CELERY_CONFIG = CeleryConfig