From 77d362d306178ceb1377a615c29798fef440d20f Mon Sep 17 00:00:00 2001 From: Erik Ritter Date: Fri, 4 Dec 2020 07:59:28 -0800 Subject: [PATCH] fix: Remove expensive logs table migration (#11920) --- UPDATING.md | 1 + superset/migrations/shared/utils.py | 40 ++++++++++++++++ .../811494c0cc23_remove_path_from_logs.py | 46 +++++++++++++++++++ .../versions/a8173232b786_add_path_to_logs.py | 20 ++++---- superset/models/core.py | 3 -- superset/utils/log.py | 19 +++----- tests/event_logger_tests.py | 5 +- 7 files changed, 110 insertions(+), 24 deletions(-) create mode 100644 superset/migrations/shared/utils.py create mode 100644 superset/migrations/versions/811494c0cc23_remove_path_from_logs.py diff --git a/UPDATING.md b/UPDATING.md index 1bbb93fc7..79777b79f 100644 --- a/UPDATING.md +++ b/UPDATING.md @@ -24,6 +24,7 @@ assists people when migrating to a new version. ## Next +- [11920](https://github.com/apache/incubator-superset/pull/11920): Undos the DB migration from [11714](https://github.com/apache/incubator-superset/pull/11714) to prevent adding new columns to the logs table. Deploying a sha between these two PRs may result in locking your DB. - [11704](https://github.com/apache/incubator-superset/pull/11704) Breaking change: Jinja templating for SQL queries has been updated, removing default modules such as `datetime` and `random` and enforcing static template values. To restore or extend functionality, use `JINJA_CONTEXT_ADDONS` and `CUSTOM_TEMPLATE_PROCESSORS` in `superset_config.py`. - [11714](https://github.com/apache/incubator-superset/pull/11714): Logs significantly more analytics events (roughly double?), and when diff --git a/superset/migrations/shared/utils.py b/superset/migrations/shared/utils.py new file mode 100644 index 000000000..033171811 --- /dev/null +++ b/superset/migrations/shared/utils.py @@ -0,0 +1,40 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from alembic import op +from sqlalchemy import engine_from_config +from sqlalchemy.engine import reflection +from sqlalchemy.exc import NoSuchTableError + + +def table_has_column(table: str, column: str) -> bool: + """ + Checks if a column exists in a given table. + + :param table: A table name + :param column: A column name + :returns: True iff the column exists in the table + """ + + config = op.get_context().config + engine = engine_from_config( + config.get_section(config.config_ini_section), prefix="sqlalchemy." + ) + insp = reflection.Inspector.from_engine(engine) + try: + return any(col["name"] == column for col in insp.get_columns(table)) + except NoSuchTableError: + return False diff --git a/superset/migrations/versions/811494c0cc23_remove_path_from_logs.py b/superset/migrations/versions/811494c0cc23_remove_path_from_logs.py new file mode 100644 index 000000000..f0731f926 --- /dev/null +++ b/superset/migrations/versions/811494c0cc23_remove_path_from_logs.py @@ -0,0 +1,46 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Remove path, path_no_int, and ref from logs + +Revision ID: 811494c0cc23 +Revises: 8ee129739cf9 +Create Date: 2020-12-03 16:21:06.771684 + +""" + +# revision identifiers, used by Alembic. +revision = "811494c0cc23" +down_revision = "8ee129739cf9" + +import sqlalchemy as sa +from alembic import op + +from superset.migrations.shared import utils + + +def upgrade(): + with op.batch_alter_table("logs") as batch_op: + if utils.table_has_column("logs", "path"): + batch_op.drop_column("path") + if utils.table_has_column("logs", "path_no_int"): + batch_op.drop_column("path_no_int") + if utils.table_has_column("logs", "ref"): + batch_op.drop_column("ref") + + +def downgrade(): + pass diff --git a/superset/migrations/versions/a8173232b786_add_path_to_logs.py b/superset/migrations/versions/a8173232b786_add_path_to_logs.py index d88f3245d..ea4900a16 100644 --- a/superset/migrations/versions/a8173232b786_add_path_to_logs.py +++ b/superset/migrations/versions/a8173232b786_add_path_to_logs.py @@ -30,16 +30,20 @@ import sqlalchemy as sa from alembic import op from sqlalchemy.dialects import mysql +from superset.migrations.shared import utils + def upgrade(): - op.add_column("logs", sa.Column("path", sa.String(length=256), nullable=True)) - op.add_column( - "logs", sa.Column("path_no_int", sa.String(length=256), nullable=True) - ) - op.add_column("logs", sa.Column("ref", sa.String(length=256), nullable=True)) + # This migration was modified post hoc to avoid locking the large logs table + # during migrations. + pass def downgrade(): - op.drop_column("logs", "path") - op.drop_column("logs", "path_no_int") - op.drop_column("logs", "ref") + with op.batch_alter_table("logs") as batch_op: + if utils.table_has_column("logs", "path"): + batch_op.drop_column("path") + if utils.table_has_column("logs", "path_no_int"): + batch_op.drop_column("path_no_int") + if utils.table_has_column("logs", "ref"): + batch_op.drop_column("ref") diff --git a/superset/models/core.py b/superset/models/core.py index 2ceff5456..c4a8369a4 100755 --- a/superset/models/core.py +++ b/superset/models/core.py @@ -715,9 +715,6 @@ class Log(Model): # pylint: disable=too-few-public-methods dttm = Column(DateTime, default=datetime.utcnow) duration_ms = Column(Integer) referrer = Column(String(1024)) - path = Column(String(256)) - path_no_int = Column(String(256)) - ref = Column(String(256)) class FavStarClassName(str, Enum): diff --git a/superset/utils/log.py b/superset/utils/log.py index 24c9161f6..1604739ef 100644 --- a/superset/utils/log.py +++ b/superset/utils/log.py @@ -46,9 +46,6 @@ class AbstractEventLogger(ABC): dashboard_id: Optional[int], duration_ms: Optional[int], slice_id: Optional[int], - path: Optional[str], - path_no_int: Optional[str], - ref: Optional[str], referrer: Optional[str], *args: Any, **kwargs: Any, @@ -92,6 +89,13 @@ class AbstractEventLogger(ABC): if log_to_statsd: self.stats_logger.incr(action) + payload.update( + { + "path": request.path, + "path_no_param": strip_int_from_path(request.path), + "ref": ref, + } + ) # bulk insert try: explode_by = payload.get("explode") @@ -107,9 +111,6 @@ class AbstractEventLogger(ABC): slice_id=slice_id, duration_ms=round((time.time() - start_time) * 1000), referrer=referrer, - path=request.path, - path_no_int=strip_int_from_path(request.path), - ref=ref, ) def _wrapper( @@ -210,9 +211,6 @@ class DBEventLogger(AbstractEventLogger): dashboard_id: Optional[int], duration_ms: Optional[int], slice_id: Optional[int], - path: Optional[str], - path_no_int: Optional[str], - ref: Optional[str], referrer: Optional[str], *args: Any, **kwargs: Any, @@ -236,9 +234,6 @@ class DBEventLogger(AbstractEventLogger): duration_ms=duration_ms, referrer=referrer, user_id=user_id, - path=path, - path_no_int=path_no_int, - ref=ref, ) logs.append(log) try: diff --git a/tests/event_logger_tests.py b/tests/event_logger_tests.py index 23f374124..9c6c5e621 100644 --- a/tests/event_logger_tests.py +++ b/tests/event_logger_tests.py @@ -79,5 +79,8 @@ class TestEventLogger(unittest.TestCase): result = test_func(1, karg1=2) # pylint: disable=no-value-for-parameter self.assertEqual(result, 2) # should contain only manual payload - self.assertEqual(mock_log.call_args[1]["records"], [{"foo": "bar"}]) + self.assertEqual( + mock_log.call_args[1]["records"], + [{"foo": "bar", "path": "/", "path_no_param": "/", "ref": None}], + ) assert mock_log.call_args[1]["duration_ms"] >= 100