diff --git a/UPDATING.md b/UPDATING.md index e00532fb4..c3d1de14d 100644 --- a/UPDATING.md +++ b/UPDATING.md @@ -26,6 +26,7 @@ assists people when migrating to a new version. ### Breaking Changes +- [18976](https://github.com/apache/superset/pull/18976): When running the app in debug mode, the app will default to use `SimpleCache` for `FILTER_STATE_CACHE_CONFIG` and `EXPLORE_FORM_DATA_CACHE_CONFIG`. When running in non-debug mode, a cache backend will need to be defined, otherwise the application will fail to start. For installations using Redis or other caching backends, it is recommended to use the same backend for both cache configs. - [17881](https://github.com/apache/superset/pull/17881): Previously simple adhoc filter values on string columns were stripped of enclosing single and double quotes. To fully support literal quotes in filters, both single and double quotes will no longer be removed from filter values. - [17984](https://github.com/apache/superset/pull/17984): Default Flask SECRET_KEY has changed for security reasons. You should always override with your own secret. Set `PREVIOUS_SECRET_KEY` (ex: PREVIOUS_SECRET_KEY = "\2\1thisismyscretkey\1\2\\e\\y\\y\\h") with your previous key and use `superset re-encrypt-secrets` to rotate you current secrets - [15254](https://github.com/apache/superset/pull/15254): Previously `QUERY_COST_FORMATTERS_BY_ENGINE`, `SQL_VALIDATORS_BY_ENGINE` and `SCHEDULED_QUERIES` were expected to be defined in the feature flag dictionary in the `config.py` file. These should now be defined as a top-level config, with the feature flag dictionary being reserved for boolean only values. diff --git a/docs/docs/installation/cache.mdx b/docs/docs/installation/cache.mdx index 4a4258a60..e86382b3c 100644 --- a/docs/docs/installation/cache.mdx +++ b/docs/docs/installation/cache.mdx @@ -7,20 +7,25 @@ version: 1 ## Caching -Superset uses [Flask-Caching](https://flask-caching.readthedocs.io/) for caching purpose. For security reasons, -there are two separate cache configs for Superset's own metadata (`CACHE_CONFIG`) and charting data queried from -connected datasources (`DATA_CACHE_CONFIG`). However, Query results from SQL Lab are stored in another backend -called `RESULTS_BACKEND`, See [Async Queries via Celery](/docs/installation/async-queries-celery) for details. - -Configuring caching is as easy as providing `CACHE_CONFIG` and `DATA_CACHE_CONFIG` in your +Superset uses [Flask-Caching](https://flask-caching.readthedocs.io/) for caching purpose. Configuring caching is as easy as providing a custom cache config in your `superset_config.py` that complies with [the Flask-Caching specifications](https://flask-caching.readthedocs.io/en/latest/#configuring-flask-caching). - Flask-Caching supports various caching backends, including Redis, Memcached, SimpleCache (in-memory), or the -local filesystem. +local filesystem. Custom cache backends are also supported. See [here](https://flask-caching.readthedocs.io/en/latest/#custom-cache-backends) for specifics. +The following cache configurations can be customized: +- Metadata cache (optional): `CACHE_CONFIG` +- Charting data queried from datasets (optional): `DATA_CACHE_CONFIG` +- SQL Lab query results (optional): `RESULTS_BACKEND`. See [Async Queries via Celery](/docs/installation/async-queries-celery) for details +- Dashboard filter state (required): `FILTER_STATE_CACHE_CONFIG`. +- Explore chart form data (required): `EXPLORE_FORM_DATA_CACHE_CONFIG` +Please note, that Dashboard and Explore caching is required. When running Superset in debug mode, both Explore and Dashboard caches will default to `SimpleCache`; +However, trying to run Superset in non-debug mode without defining a cache for these will cause the application to fail on startup. When running +superset in single-worker mode, any cache backend is supported. However, when running Superset in on a multi-worker setup, a dedicated cache is required. For this +we recommend using either Redis or Memcached: + +- Redis (recommended): we recommend the [redis](https://pypi.python.org/pypi/redis) Python package - Memcached: we recommend using [pylibmc](https://pypi.org/project/pylibmc/) client library as `python-memcached` does not handle storing binary data correctly. -- Redis: we recommend the [redis](https://pypi.python.org/pypi/redis) Python package Both of these libraries can be installed using pip. @@ -28,16 +33,7 @@ For chart data, Superset goes up a “timeout search path”, from a slice's con to the datasource’s, the database’s, then ultimately falls back to the global default defined in `DATA_CACHE_CONFIG`. -``` -DATA_CACHE_CONFIG = { - 'CACHE_TYPE': 'redis', - 'CACHE_DEFAULT_TIMEOUT': 60 * 60 * 24, # 1 day default (in secs) - 'CACHE_KEY_PREFIX': 'superset_results', - 'CACHE_REDIS_URL': 'redis://localhost:6379/0', -} -``` - -Custom cache backends are also supported. See [here](https://flask-caching.readthedocs.io/en/latest/#custom-cache-backends) for specifics. +## Celery beat Superset has a Celery task that will periodically warm up the cache based on different strategies. To use it, add the following to the `CELERYBEAT_SCHEDULE` section in `config.py`: diff --git a/superset/config.py b/superset/config.py index bad83615c..86d7b45c4 100644 --- a/superset/config.py +++ b/superset/config.py @@ -544,7 +544,7 @@ EXTRA_SEQUENTIAL_COLOR_SCHEMES: List[Dict[str, Any]] = [] # --------------------------------------------------- THUMBNAIL_SELENIUM_USER = "admin" THUMBNAIL_CACHE_CONFIG: CacheConfig = { - "CACHE_TYPE": "null", + "CACHE_TYPE": "NullCache", "CACHE_NO_NULL_WARNING": True, } @@ -581,26 +581,24 @@ IMG_UPLOAD_URL = "/static/uploads/" CACHE_DEFAULT_TIMEOUT = int(timedelta(days=1).total_seconds()) # Default cache for Superset objects -CACHE_CONFIG: CacheConfig = {"CACHE_TYPE": "null"} +CACHE_CONFIG: CacheConfig = {"CACHE_TYPE": "NullCache"} # Cache for datasource metadata and query results -DATA_CACHE_CONFIG: CacheConfig = {"CACHE_TYPE": "null"} +DATA_CACHE_CONFIG: CacheConfig = {"CACHE_TYPE": "NullCache"} -# Cache for filters state +# Cache for dashboard filter state (`CACHE_TYPE` defaults to `SimpleCache` when +# running in debug mode unless overridden) FILTER_STATE_CACHE_CONFIG: CacheConfig = { - "CACHE_TYPE": "FileSystemCache", - "CACHE_DIR": os.path.join(DATA_DIR, "cache"), "CACHE_DEFAULT_TIMEOUT": int(timedelta(days=90).total_seconds()), - "CACHE_THRESHOLD": 0, + # should the timeout be reset when retrieving a cached value "REFRESH_TIMEOUT_ON_RETRIEVAL": True, } -# Cache for chart form data +# Cache for explore form data state (`CACHE_TYPE` defaults to `SimpleCache` when +# running in debug mode unless overridden) EXPLORE_FORM_DATA_CACHE_CONFIG: CacheConfig = { - "CACHE_TYPE": "FileSystemCache", - "CACHE_DIR": os.path.join(DATA_DIR, "cache"), "CACHE_DEFAULT_TIMEOUT": int(timedelta(days=7).total_seconds()), - "CACHE_THRESHOLD": 0, + # should the timeout be reset when retrieving a cached value "REFRESH_TIMEOUT_ON_RETRIEVAL": True, } diff --git a/superset/typing.py b/superset/typing.py index 66b6cd4c3..253d2b635 100644 --- a/superset/typing.py +++ b/superset/typing.py @@ -15,20 +15,8 @@ # specific language governing permissions and limitations # under the License. from datetime import datetime -from typing import ( - Any, - Callable, - Dict, - List, - Optional, - Sequence, - Tuple, - TYPE_CHECKING, - Union, -) +from typing import Any, Dict, List, Optional, Sequence, Tuple, TYPE_CHECKING, Union -from flask import Flask -from flask_caching import Cache from typing_extensions import Literal, TypedDict from werkzeug.wrappers import Response @@ -69,7 +57,7 @@ class AdhocColumn(TypedDict, total=False): sqlExpression: Optional[str] -CacheConfig = Union[Callable[[Flask], Cache], Dict[str, Any]] +CacheConfig = Dict[str, Any] DbapiDescriptionRow = Tuple[ str, str, Optional[str], Optional[str], Optional[int], Optional[int], bool ] diff --git a/superset/utils/cache.py b/superset/utils/cache.py index e7bdc35ac..c10f296e1 100644 --- a/superset/utils/cache.py +++ b/superset/utils/cache.py @@ -55,7 +55,11 @@ def set_and_log_cache( if isinstance(cache_instance.cache, NullCache): return - timeout = cache_timeout if cache_timeout else config["CACHE_DEFAULT_TIMEOUT"] + timeout = ( + cache_timeout + if cache_timeout is not None + else app.config["CACHE_DEFAULT_TIMEOUT"] + ) try: dttm = datetime.utcnow().isoformat().split(".")[0] value = {**cache_value, "dttm": dttm} diff --git a/superset/utils/cache_manager.py b/superset/utils/cache_manager.py index e92d930d2..a0c759035 100644 --- a/superset/utils/cache_manager.py +++ b/superset/utils/cache_manager.py @@ -14,9 +14,15 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +import logging +import math + from flask import Flask +from flask_babel import gettext as _ from flask_caching import Cache +logger = logging.getLogger(__name__) + class CacheManager: def __init__(self) -> None: @@ -28,41 +34,47 @@ class CacheManager: self._filter_state_cache = Cache() self._explore_form_data_cache = Cache() + @staticmethod + def _init_cache( + app: Flask, cache: Cache, cache_config_key: str, required: bool = False + ) -> None: + cache_config = app.config[cache_config_key] + cache_type = cache_config.get("CACHE_TYPE") + if app.debug and cache_type is None: + cache_threshold = cache_config.get("CACHE_THRESHOLD", math.inf) + cache_config.update( + {"CACHE_TYPE": "SimpleCache", "CACHE_THRESHOLD": cache_threshold,} + ) + + if "CACHE_DEFAULT_TIMEOUT" not in cache_config: + default_timeout = app.config.get("CACHE_DEFAULT_TIMEOUT") + cache_config["CACHE_DEFAULT_TIMEOUT"] = default_timeout + + if required and cache_type in ("null", "NullCache"): + raise Exception( + _( + "The CACHE_TYPE `%(cache_type)s` for `%(cache_config_key)s` is not " + "supported. It is recommended to use `RedisCache`, " + "`MemcachedCache` or another dedicated caching backend for " + "production deployments", + cache_type=cache_config["CACHE_TYPE"], + cache_config_key=cache_config_key, + ), + ) + cache.init_app(app, cache_config) + def init_app(self, app: Flask) -> None: - self._cache.init_app( - app, - { - "CACHE_DEFAULT_TIMEOUT": app.config["CACHE_DEFAULT_TIMEOUT"], - **app.config["CACHE_CONFIG"], - }, + self._init_cache(app, self._cache, "CACHE_CONFIG") + self._init_cache(app, self._data_cache, "DATA_CACHE_CONFIG") + self._init_cache(app, self._thumbnail_cache, "THUMBNAIL_CACHE_CONFIG") + self._init_cache( + app, self._filter_state_cache, "FILTER_STATE_CACHE_CONFIG", required=True ) - self._data_cache.init_app( + self._init_cache( app, - { - "CACHE_DEFAULT_TIMEOUT": app.config["CACHE_DEFAULT_TIMEOUT"], - **app.config["DATA_CACHE_CONFIG"], - }, - ) - self._thumbnail_cache.init_app( - app, - { - "CACHE_DEFAULT_TIMEOUT": app.config["CACHE_DEFAULT_TIMEOUT"], - **app.config["THUMBNAIL_CACHE_CONFIG"], - }, - ) - self._filter_state_cache.init_app( - app, - { - "CACHE_DEFAULT_TIMEOUT": app.config["CACHE_DEFAULT_TIMEOUT"], - **app.config["FILTER_STATE_CACHE_CONFIG"], - }, - ) - self._explore_form_data_cache.init_app( - app, - { - "CACHE_DEFAULT_TIMEOUT": app.config["CACHE_DEFAULT_TIMEOUT"], - **app.config["EXPLORE_FORM_DATA_CACHE_CONFIG"], - }, + self._explore_form_data_cache, + "EXPLORE_FORM_DATA_CACHE_CONFIG", + required=True, ) @property diff --git a/tests/integration_tests/cache_tests.py b/tests/integration_tests/cache_tests.py index 62edb514b..a7da8a50d 100644 --- a/tests/integration_tests/cache_tests.py +++ b/tests/integration_tests/cache_tests.py @@ -43,7 +43,7 @@ class TestCache(SupersetTestCase): @pytest.mark.usefixtures("load_birth_names_dashboard_with_slices") def test_no_data_cache(self): data_cache_config = app.config["DATA_CACHE_CONFIG"] - app.config["DATA_CACHE_CONFIG"] = {"CACHE_TYPE": "null"} + app.config["DATA_CACHE_CONFIG"] = {"CACHE_TYPE": "NullCache"} cache_manager.init_app(app) slc = self.get_slice("Girls", db.session) @@ -68,9 +68,8 @@ class TestCache(SupersetTestCase): cache_default_timeout = app.config["CACHE_DEFAULT_TIMEOUT"] app.config["CACHE_DEFAULT_TIMEOUT"] = 100 app.config["DATA_CACHE_CONFIG"] = { - "CACHE_TYPE": "simple", + "CACHE_TYPE": "SimpleCache", "CACHE_DEFAULT_TIMEOUT": 10, - "CACHE_KEY_PREFIX": "superset_data_cache", } cache_manager.init_app(app) diff --git a/tests/integration_tests/dashboards/filter_state/api_tests.py b/tests/integration_tests/dashboards/filter_state/api_tests.py index f89efce29..3816f6ac8 100644 --- a/tests/integration_tests/dashboards/filter_state/api_tests.py +++ b/tests/integration_tests/dashboards/filter_state/api_tests.py @@ -62,8 +62,6 @@ def admin_id() -> int: @pytest.fixture(autouse=True) def cache(dashboard_id, admin_id): - app.config["FILTER_STATE_CACHE_CONFIG"] = {"CACHE_TYPE": "SimpleCache"} - cache_manager.init_app(app) entry: Entry = {"owner": admin_id, "value": value} cache_manager.filter_state_cache.set(cache_key(dashboard_id, key), entry) diff --git a/tests/integration_tests/explore/form_data/api_tests.py b/tests/integration_tests/explore/form_data/api_tests.py index 5e97aae6b..4b646a035 100644 --- a/tests/integration_tests/explore/form_data/api_tests.py +++ b/tests/integration_tests/explore/form_data/api_tests.py @@ -74,8 +74,6 @@ def dataset_id() -> int: @pytest.fixture(autouse=True) def cache(chart_id, admin_id, dataset_id): - app.config["EXPLORE_FORM_DATA_CACHE_CONFIG"] = {"CACHE_TYPE": "SimpleCache"} - cache_manager.init_app(app) entry: TemporaryExploreState = { "owner": admin_id, "dataset_id": dataset_id, diff --git a/tests/integration_tests/superset_test_config.py b/tests/integration_tests/superset_test_config.py index 698440c36..7c8623282 100644 --- a/tests/integration_tests/superset_test_config.py +++ b/tests/integration_tests/superset_test_config.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. # type: ignore +import math from copy import copy from datetime import timedelta @@ -85,10 +86,9 @@ REDIS_CELERY_DB = os.environ.get("REDIS_CELERY_DB", 2) REDIS_RESULTS_DB = os.environ.get("REDIS_RESULTS_DB", 3) REDIS_CACHE_DB = os.environ.get("REDIS_CACHE_DB", 4) -CACHE_DEFAULT_TIMEOUT = int(timedelta(minutes=10).total_seconds()) CACHE_CONFIG = { - "CACHE_TYPE": "redis", + "CACHE_TYPE": "RedisCache", "CACHE_DEFAULT_TIMEOUT": int(timedelta(minutes=1).total_seconds()), "CACHE_KEY_PREFIX": "superset_cache", "CACHE_REDIS_URL": f"redis://{REDIS_HOST}:{REDIS_PORT}/{REDIS_CACHE_DB}", @@ -100,6 +100,18 @@ DATA_CACHE_CONFIG = { "CACHE_KEY_PREFIX": "superset_data_cache", } +FILTER_STATE_CACHE_CONFIG = { + "CACHE_TYPE": "SimpleCache", + "CACHE_THRESHOLD": math.inf, + "CACHE_DEFAULT_TIMEOUT": int(timedelta(minutes=10).total_seconds()), +} + +EXPLORE_FORM_DATA_CACHE_CONFIG = { + "CACHE_TYPE": "SimpleCache", + "CACHE_THRESHOLD": math.inf, + "CACHE_DEFAULT_TIMEOUT": int(timedelta(minutes=10).total_seconds()), +} + GLOBAL_ASYNC_QUERIES_JWT_SECRET = "test-secret-change-me-test-secret-change-me" ALERT_REPORTS_WORKING_TIME_OUT_KILL = True diff --git a/tests/integration_tests/superset_test_config_thumbnails.py b/tests/integration_tests/superset_test_config_thumbnails.py index 964164cf7..9f621efab 100644 --- a/tests/integration_tests/superset_test_config_thumbnails.py +++ b/tests/integration_tests/superset_test_config_thumbnails.py @@ -53,7 +53,7 @@ PUBLIC_ROLE_LIKE = "Gamma" AUTH_ROLE_PUBLIC = "Public" EMAIL_NOTIFICATIONS = False -CACHE_CONFIG = {"CACHE_TYPE": "simple"} +CACHE_CONFIG = {"CACHE_TYPE": "SimpleCache"} REDIS_HOST = os.environ.get("REDIS_HOST", "localhost") REDIS_PORT = os.environ.get("REDIS_PORT", "6379") @@ -79,7 +79,7 @@ FEATURE_FLAGS = { } THUMBNAIL_CACHE_CONFIG = { - "CACHE_TYPE": "redis", + "CACHE_TYPE": "RedisCache", "CACHE_DEFAULT_TIMEOUT": 10000, "CACHE_KEY_PREFIX": "superset_thumbnails_", "CACHE_REDIS_HOST": REDIS_HOST,