241 lines
9.5 KiB
Python
241 lines
9.5 KiB
Python
# Licensed to the Apache Software Foundation (ASF) under one
|
|
# or more contributor license agreements. See the NOTICE file
|
|
# distributed with this work for additional information
|
|
# regarding copyright ownership. The ASF licenses this file
|
|
# to you under the Apache License, Version 2.0 (the
|
|
# "License"); you may not use this file except in compliance
|
|
# with the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing,
|
|
# software distributed under the License is distributed on an
|
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
# KIND, either express or implied. See the License for the
|
|
# specific language governing permissions and limitations
|
|
# under the License.
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
from datetime import datetime, timedelta
|
|
from functools import wraps
|
|
from typing import Any, Callable, Dict, Optional, TYPE_CHECKING, Union
|
|
|
|
from flask import current_app as app, request
|
|
from flask_caching import Cache
|
|
from flask_caching.backends import NullCache
|
|
from werkzeug.wrappers.etag import ETagResponseMixin
|
|
|
|
from superset import db
|
|
from superset.extensions import cache_manager
|
|
from superset.models.cache import CacheKey
|
|
from superset.utils.core import json_int_dttm_ser
|
|
from superset.utils.hashing import md5_sha_from_dict
|
|
|
|
if TYPE_CHECKING:
|
|
from superset.stats_logger import BaseStatsLogger
|
|
|
|
config = app.config
|
|
stats_logger: BaseStatsLogger = config["STATS_LOGGER"]
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def generate_cache_key(values_dict: Dict[str, Any], key_prefix: str = "") -> str:
|
|
hash_str = md5_sha_from_dict(values_dict, default=json_int_dttm_ser)
|
|
return f"{key_prefix}{hash_str}"
|
|
|
|
|
|
def set_and_log_cache(
|
|
cache_instance: Cache,
|
|
cache_key: str,
|
|
cache_value: Dict[str, Any],
|
|
cache_timeout: Optional[int] = None,
|
|
datasource_uid: Optional[str] = None,
|
|
) -> None:
|
|
if isinstance(cache_instance.cache, NullCache):
|
|
return
|
|
|
|
timeout = cache_timeout if cache_timeout else config["CACHE_DEFAULT_TIMEOUT"]
|
|
try:
|
|
dttm = datetime.utcnow().isoformat().split(".")[0]
|
|
value = {**cache_value, "dttm": dttm}
|
|
cache_instance.set(cache_key, value, timeout=timeout)
|
|
stats_logger.incr("set_cache_key")
|
|
|
|
if datasource_uid and config["STORE_CACHE_KEYS_IN_METADATA_DB"]:
|
|
ck = CacheKey(
|
|
cache_key=cache_key,
|
|
cache_timeout=cache_timeout,
|
|
datasource_uid=datasource_uid,
|
|
)
|
|
db.session.add(ck)
|
|
except Exception as ex: # pylint: disable=broad-except
|
|
# cache.set call can fail if the backend is down or if
|
|
# the key is too large or whatever other reasons
|
|
logger.warning("Could not cache key %s", cache_key)
|
|
logger.exception(ex)
|
|
|
|
|
|
# If a user sets `max_age` to 0, for long the browser should cache the
|
|
# resource? Flask-Caching will cache forever, but for the HTTP header we need
|
|
# to specify a "far future" date.
|
|
ONE_YEAR = 365 * 24 * 60 * 60 # 1 year in seconds
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def view_cache_key(*args: Any, **kwargs: Any) -> str: # pylint: disable=unused-argument
|
|
args_hash = hash(frozenset(request.args.items()))
|
|
return "view/{}/{}".format(request.path, args_hash)
|
|
|
|
|
|
def memoized_func(
|
|
key: Callable[..., str] = view_cache_key, cache: Cache = cache_manager.cache,
|
|
) -> Callable[..., Any]:
|
|
"""Use this decorator to cache functions that have predefined first arg.
|
|
|
|
enable_cache is treated as True by default,
|
|
except enable_cache = False is passed to the decorated function.
|
|
|
|
force means whether to force refresh the cache and is treated as False by default,
|
|
except force = True is passed to the decorated function.
|
|
|
|
timeout of cache is set to 600 seconds by default,
|
|
except cache_timeout = {timeout in seconds} is passed to the decorated function.
|
|
|
|
:param key: a callable function that takes function arguments and returns
|
|
the caching key.
|
|
:param cache: a FlaskCache instance that will store the cache.
|
|
"""
|
|
|
|
def wrap(f: Callable[..., Any]) -> Callable[..., Any]:
|
|
def wrapped_f(self: Any, *args: Any, **kwargs: Any) -> Any:
|
|
if not kwargs.get("cache", True):
|
|
return f(self, *args, **kwargs)
|
|
|
|
cache_key = key(self, *args, **kwargs)
|
|
obj = cache.get(cache_key)
|
|
if not kwargs.get("force") and obj is not None:
|
|
return obj
|
|
obj = f(self, *args, **kwargs)
|
|
cache.set(cache_key, obj, timeout=kwargs.get("cache_timeout"))
|
|
return obj
|
|
|
|
return wrapped_f
|
|
|
|
return wrap
|
|
|
|
|
|
def etag_cache(
|
|
cache: Cache = cache_manager.cache,
|
|
get_last_modified: Optional[Callable[..., datetime]] = None,
|
|
max_age: Optional[Union[int, float]] = None,
|
|
raise_for_access: Optional[Callable[..., Any]] = None,
|
|
skip: Optional[Callable[..., bool]] = None,
|
|
) -> Callable[..., Any]:
|
|
"""
|
|
A decorator for caching views and handling etag conditional requests.
|
|
|
|
The decorator adds headers to GET requests that help with caching: Last-
|
|
Modified, Expires and ETag. It also handles conditional requests, when the
|
|
client send an If-Matches header.
|
|
|
|
If a cache is set, the decorator will cache GET responses, bypassing the
|
|
dataframe serialization. POST requests will still benefit from the
|
|
dataframe cache for requests that produce the same SQL.
|
|
|
|
"""
|
|
if max_age is None:
|
|
max_age = app.config["CACHE_DEFAULT_TIMEOUT"]
|
|
|
|
def decorator(f: Callable[..., Any]) -> Callable[..., Any]:
|
|
@wraps(f)
|
|
def wrapper(*args: Any, **kwargs: Any) -> ETagResponseMixin:
|
|
# Check if the user can access the resource
|
|
if raise_for_access:
|
|
try:
|
|
raise_for_access(*args, **kwargs)
|
|
except Exception: # pylint: disable=broad-except
|
|
# If there's no access, bypass the cache and let the function
|
|
# handle the response.
|
|
return f(*args, **kwargs)
|
|
|
|
# for POST requests we can't set cache headers, use the response
|
|
# cache nor use conditional requests; this will still use the
|
|
# dataframe cache in `superset/viz.py`, though.
|
|
if request.method == "POST" or (skip and skip(*args, **kwargs)):
|
|
return f(*args, **kwargs)
|
|
|
|
response = None
|
|
try:
|
|
# build the cache key from the function arguments and any
|
|
# other additional GET arguments (like `form_data`, eg).
|
|
key_args = list(args)
|
|
key_kwargs = kwargs.copy()
|
|
key_kwargs.update(request.args)
|
|
cache_key = wrapper.make_cache_key( # type: ignore
|
|
f, *key_args, **key_kwargs
|
|
)
|
|
response = cache.get(cache_key)
|
|
except Exception: # pylint: disable=broad-except
|
|
if app.debug:
|
|
raise
|
|
logger.exception("Exception possibly due to cache backend.")
|
|
|
|
# Check if the cache is stale. Default the content_changed_time to now
|
|
# if we don't know when it was last modified.
|
|
content_changed_time = datetime.utcnow()
|
|
if get_last_modified:
|
|
content_changed_time = get_last_modified(*args, **kwargs)
|
|
if (
|
|
response
|
|
and response.last_modified
|
|
and response.last_modified.timestamp()
|
|
< content_changed_time.timestamp()
|
|
):
|
|
# Bypass the cache if the response is stale
|
|
response = None
|
|
|
|
# if no response was cached, compute it using the wrapped function
|
|
if response is None:
|
|
response = f(*args, **kwargs)
|
|
|
|
# add headers for caching: Last Modified, Expires and ETag
|
|
# always revalidate the cache if we're checking permissions or
|
|
# if the response was modified
|
|
if get_last_modified or raise_for_access:
|
|
# `Cache-Control: no-cache` asks the browser to always store
|
|
# the cache, but also must validate it with the server.
|
|
response.cache_control.no_cache = True
|
|
else:
|
|
# `Cache-Control: Public` asks the browser to always store
|
|
# the cache.
|
|
response.cache_control.public = True
|
|
|
|
response.last_modified = content_changed_time
|
|
expiration = max_age or ONE_YEAR # max_age=0 also means far future
|
|
response.expires = response.last_modified + timedelta(
|
|
seconds=expiration
|
|
)
|
|
response.add_etag()
|
|
|
|
# if we have a cache, store the response from the request
|
|
try:
|
|
cache.set(cache_key, response, timeout=max_age)
|
|
except Exception: # pylint: disable=broad-except
|
|
if app.debug:
|
|
raise
|
|
logger.exception("Exception possibly due to cache backend.")
|
|
|
|
return response.make_conditional(request)
|
|
|
|
wrapper.uncached = f # type: ignore
|
|
wrapper.cache_timeout = max_age # type: ignore
|
|
wrapper.make_cache_key = cache._memoize_make_cache_key( # type: ignore # pylint: disable=protected-access
|
|
make_name=None, timeout=max_age
|
|
)
|
|
|
|
return wrapper
|
|
|
|
return decorator
|