fix: add datasource.changed_on to cache_key (#8901)
* Add datasource.changed_on to cache_key and add+fix related unit tests * Add note to UPDATING.md * Remove redundant comment about metric names
This commit is contained in:
parent
2b0c7a1b73
commit
c087a48d52
14
UPDATING.md
14
UPDATING.md
|
|
@ -22,8 +22,12 @@ This file documents any backwards-incompatible changes in Superset and
|
|||
assists people when migrating to a new version.
|
||||
|
||||
## Next
|
||||
* [8901](https://github.com/apache/incubator-superset/pull/8901): The datasource's update
|
||||
timestamp has been added to the query object's cache key to ensure updates to
|
||||
datasources are always reflected in associated query results. As a consequence all
|
||||
previously cached results will be invalidated when updating to the next version.
|
||||
|
||||
* [8732](https://github.com/apache/incubator-superset/pull/8732): Swagger user interface is now enabled by default.
|
||||
* [8732](https://github.com/apache/incubator-superset/pull/8732): Swagger user interface is now enabled by default.
|
||||
A new permission `show on SwaggerView` is created by `superset init` and given to the `Admin` Role. To disable the UI,
|
||||
set `FAB_API_SWAGGER_UI = False` on config.
|
||||
|
||||
|
|
@ -90,9 +94,9 @@ which adds missing non-nullable fields to the `datasources` table. Depending on
|
|||
the integrity of the data, manual intervention may be required.
|
||||
|
||||
* [5452](https://github.com/apache/incubator-superset/pull/5452): a change
|
||||
which adds missing non-nullable fields and uniqueness constraints (which may be
|
||||
case insensitive depending on your database configuration) to the `columns`and
|
||||
`table_columns` tables. Depending on the integrity of the data, manual
|
||||
which adds missing non-nullable fields and uniqueness constraints (which may be
|
||||
case insensitive depending on your database configuration) to the `columns`and
|
||||
`table_columns` tables. Depending on the integrity of the data, manual
|
||||
intervention may be required.
|
||||
* `fabmanager` command line is deprecated since Flask-AppBuilder 2.0.0, use
|
||||
the new `flask fab <command>` integrated with *Flask cli*.
|
||||
|
|
@ -100,7 +104,7 @@ the new `flask fab <command>` integrated with *Flask cli*.
|
|||
`FAB_UPDATE_PERMS` config boolean key. To disable automatic
|
||||
creation of permissions set `FAB_UPDATE_PERMS = False` on config.
|
||||
* [5453](https://github.com/apache/incubator-superset/pull/5453): a change
|
||||
which adds missing non-nullable fields and uniqueness constraints (which may be
|
||||
which adds missing non-nullable fields and uniqueness constraints (which may be
|
||||
case insensitive depending on your database configuration) to the metrics
|
||||
and sql_metrics tables. Depending on the integrity of the data, manual
|
||||
intervention may be required.
|
||||
|
|
|
|||
|
|
@ -45,7 +45,7 @@ combine_as_imports = true
|
|||
include_trailing_comma = true
|
||||
line_length = 88
|
||||
known_first_party = superset
|
||||
known_third_party =alembic,backoff,bleach,celery,click,colorama,contextlib2,croniter,dateutil,flask,flask_appbuilder,flask_babel,flask_caching,flask_compress,flask_login,flask_migrate,flask_sqlalchemy,flask_talisman,flask_testing,flask_wtf,geohash,geopy,humanize,isodate,jinja2,markdown,markupsafe,marshmallow,msgpack,numpy,pandas,parsedatetime,pathlib2,polyline,prison,psycopg2,pyarrow,pyhive,pytz,retry,selenium,setuptools,simplejson,sphinx_rtd_theme,sqlalchemy,sqlalchemy_utils,sqlparse,werkzeug,wtforms,wtforms_json,yaml
|
||||
known_third_party =alembic,backoff,bleach,celery,click,colorama,contextlib2,croniter,dateutil,flask,flask_appbuilder,flask_babel,flask_caching,flask_compress,flask_login,flask_migrate,flask_sqlalchemy,flask_talisman,flask_testing,flask_wtf,geohash,geopy,humanize,isodate,jinja2,markdown,markupsafe,marshmallow,msgpack,numpy,pandas,parsedatetime,pathlib2,polyline,prison,pyarrow,pyhive,pytz,retry,selenium,setuptools,simplejson,sphinx_rtd_theme,sqlalchemy,sqlalchemy_utils,sqlparse,werkzeug,wtforms,wtforms_json,yaml
|
||||
multi_line_output = 3
|
||||
order_by_type = false
|
||||
|
||||
|
|
|
|||
|
|
@ -156,20 +156,25 @@ class QueryContext:
|
|||
return self.datasource.database.cache_timeout
|
||||
return config["CACHE_DEFAULT_TIMEOUT"]
|
||||
|
||||
def get_df_payload( # pylint: disable=too-many-locals,too-many-statements
|
||||
self, query_obj: QueryObject, **kwargs
|
||||
) -> Dict[str, Any]:
|
||||
"""Handles caching around the df payload retrieval"""
|
||||
def cache_key(self, query_obj: QueryObject, **kwargs) -> Optional[str]:
|
||||
extra_cache_keys = self.datasource.get_extra_cache_keys(query_obj.to_dict())
|
||||
cache_key = (
|
||||
query_obj.cache_key(
|
||||
datasource=self.datasource.uid,
|
||||
extra_cache_keys=extra_cache_keys,
|
||||
changed_on=self.datasource.changed_on,
|
||||
**kwargs
|
||||
)
|
||||
if query_obj
|
||||
else None
|
||||
)
|
||||
return cache_key
|
||||
|
||||
def get_df_payload( # pylint: disable=too-many-locals,too-many-statements
|
||||
self, query_obj: QueryObject, **kwargs
|
||||
) -> Dict[str, Any]:
|
||||
"""Handles caching around the df payload retrieval"""
|
||||
cache_key = self.cache_key(query_obj, **kwargs)
|
||||
logging.info("Cache key: %s", cache_key)
|
||||
is_loaded = False
|
||||
stacktrace = None
|
||||
|
|
|
|||
|
|
@ -81,12 +81,7 @@ class QueryObject:
|
|||
self.time_shift = utils.parse_human_timedelta(time_shift)
|
||||
self.groupby = groupby or []
|
||||
|
||||
# Temporal solution for backward compatability issue
|
||||
# due the new format of non-ad-hoc metric.
|
||||
self.metrics = [
|
||||
metric if "expressionType" in metric else metric["label"] # type: ignore
|
||||
for metric in metrics
|
||||
]
|
||||
self.metrics = [utils.get_metric_name(metric) for metric in metrics]
|
||||
self.row_limit = row_limit
|
||||
self.filter = filters or []
|
||||
self.timeseries_limit = timeseries_limit
|
||||
|
|
|
|||
|
|
@ -372,6 +372,7 @@ class BaseViz:
|
|||
cache_dict["time_range"] = self.form_data.get("time_range")
|
||||
cache_dict["datasource"] = self.datasource.uid
|
||||
cache_dict["extra_cache_keys"] = self.datasource.get_extra_cache_keys(query_obj)
|
||||
cache_dict["changed_on"] = self.datasource.changed_on
|
||||
json_data = self.json_dumps(cache_dict, sort_keys=True)
|
||||
return hashlib.md5(json_data.encode("utf-8")).hexdigest()
|
||||
|
||||
|
|
|
|||
|
|
@ -28,15 +28,17 @@ import pytz
|
|||
import random
|
||||
import re
|
||||
import string
|
||||
from typing import Any, Dict
|
||||
import unittest
|
||||
from unittest import mock
|
||||
|
||||
import pandas as pd
|
||||
import psycopg2
|
||||
import sqlalchemy as sqla
|
||||
|
||||
from tests.test_app import app
|
||||
from superset import dataframe, db, jinja_context, security_manager, sql_lab
|
||||
from superset.common.query_context import QueryContext
|
||||
from superset.connectors.connector_registry import ConnectorRegistry
|
||||
from superset.connectors.sqla.models import SqlaTable
|
||||
from superset.db_engine_specs.base import BaseEngineSpec
|
||||
from superset.db_engine_specs.mssql import MssqlEngineSpec
|
||||
|
|
@ -98,7 +100,23 @@ class CoreTests(SupersetTestCase):
|
|||
resp = self.client.get("/superset/slice/-1/")
|
||||
assert resp.status_code == 404
|
||||
|
||||
def test_cache_key(self):
|
||||
def _get_query_context_dict(self) -> Dict[str, Any]:
|
||||
self.login(username="admin")
|
||||
slc = self.get_slice("Girl Name Cloud", db.session)
|
||||
return {
|
||||
"datasource": {"id": slc.datasource_id, "type": slc.datasource_type},
|
||||
"queries": [
|
||||
{
|
||||
"granularity": "ds",
|
||||
"groupby": ["name"],
|
||||
"metrics": ["sum__num"],
|
||||
"filters": [],
|
||||
"row_limit": 100,
|
||||
}
|
||||
],
|
||||
}
|
||||
|
||||
def test_viz_cache_key(self):
|
||||
self.login(username="admin")
|
||||
slc = self.get_slice("Girls", db.session)
|
||||
|
||||
|
|
@ -110,30 +128,40 @@ class CoreTests(SupersetTestCase):
|
|||
qobj["groupby"] = []
|
||||
self.assertNotEqual(cache_key, viz.cache_key(qobj))
|
||||
|
||||
def test_cache_key_changes_when_datasource_is_updated(self):
|
||||
qc_dict = self._get_query_context_dict()
|
||||
|
||||
# construct baseline cache_key
|
||||
query_context = QueryContext(**qc_dict)
|
||||
query_object = query_context.queries[0]
|
||||
cache_key_original = query_context.cache_key(query_object)
|
||||
|
||||
# make temporary change and revert it to refresh the changed_on property
|
||||
datasource = ConnectorRegistry.get_datasource(
|
||||
datasource_type=qc_dict["datasource"]["type"],
|
||||
datasource_id=qc_dict["datasource"]["id"],
|
||||
session=db.session,
|
||||
)
|
||||
description_original = datasource.description
|
||||
datasource.description = "temporary description"
|
||||
db.session.commit()
|
||||
datasource.description = description_original
|
||||
db.session.commit()
|
||||
|
||||
# create new QueryContext with unchanged attributes and extract new cache_key
|
||||
query_context = QueryContext(**qc_dict)
|
||||
query_object = query_context.queries[0]
|
||||
cache_key_new = query_context.cache_key(query_object)
|
||||
|
||||
# the new cache_key should be different due to updated datasource
|
||||
self.assertNotEqual(cache_key_original, cache_key_new)
|
||||
|
||||
def test_api_v1_query_endpoint(self):
|
||||
self.login(username="admin")
|
||||
slc = self.get_slice("Girl Name Cloud", db.session)
|
||||
form_data = slc.form_data
|
||||
data = json.dumps(
|
||||
{
|
||||
"datasource": {"id": slc.datasource_id, "type": slc.datasource_type},
|
||||
"queries": [
|
||||
{
|
||||
"granularity": "ds",
|
||||
"groupby": ["name"],
|
||||
"metrics": ["sum__num"],
|
||||
"filters": [],
|
||||
"time_range": "{} : {}".format(
|
||||
form_data.get("since"), form_data.get("until")
|
||||
),
|
||||
"limit": 100,
|
||||
}
|
||||
],
|
||||
}
|
||||
)
|
||||
# TODO: update once get_data is implemented for QueryObject
|
||||
with self.assertRaises(Exception):
|
||||
self.get_resp("/api/v1/query/", {"query_context": data})
|
||||
qc_dict = self._get_query_context_dict()
|
||||
data = json.dumps(qc_dict)
|
||||
resp = json.loads(self.get_resp("/api/v1/query/", {"query_context": data}))
|
||||
self.assertEqual(resp[0]["rowcount"], 100)
|
||||
|
||||
def test_old_slice_json_endpoint(self):
|
||||
self.login(username="admin")
|
||||
|
|
|
|||
Loading…
Reference in New Issue