fix: Remove expensive logs table migration (#11920)

This commit is contained in:
Erik Ritter 2020-12-04 07:59:28 -08:00 committed by GitHub
parent 327a2817d3
commit 77d362d306
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 110 additions and 24 deletions

View File

@ -24,6 +24,7 @@ assists people when migrating to a new version.
## Next
- [11920](https://github.com/apache/incubator-superset/pull/11920): Undos the DB migration from [11714](https://github.com/apache/incubator-superset/pull/11714) to prevent adding new columns to the logs table. Deploying a sha between these two PRs may result in locking your DB.
- [11704](https://github.com/apache/incubator-superset/pull/11704) Breaking change: Jinja templating for SQL queries has been updated, removing default modules such as `datetime` and `random` and enforcing static template values. To restore or extend functionality, use `JINJA_CONTEXT_ADDONS` and `CUSTOM_TEMPLATE_PROCESSORS` in `superset_config.py`.
- [11714](https://github.com/apache/incubator-superset/pull/11714): Logs
significantly more analytics events (roughly double?), and when

View File

@ -0,0 +1,40 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from alembic import op
from sqlalchemy import engine_from_config
from sqlalchemy.engine import reflection
from sqlalchemy.exc import NoSuchTableError
def table_has_column(table: str, column: str) -> bool:
"""
Checks if a column exists in a given table.
:param table: A table name
:param column: A column name
:returns: True iff the column exists in the table
"""
config = op.get_context().config
engine = engine_from_config(
config.get_section(config.config_ini_section), prefix="sqlalchemy."
)
insp = reflection.Inspector.from_engine(engine)
try:
return any(col["name"] == column for col in insp.get_columns(table))
except NoSuchTableError:
return False

View File

@ -0,0 +1,46 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Remove path, path_no_int, and ref from logs
Revision ID: 811494c0cc23
Revises: 8ee129739cf9
Create Date: 2020-12-03 16:21:06.771684
"""
# revision identifiers, used by Alembic.
revision = "811494c0cc23"
down_revision = "8ee129739cf9"
import sqlalchemy as sa
from alembic import op
from superset.migrations.shared import utils
def upgrade():
with op.batch_alter_table("logs") as batch_op:
if utils.table_has_column("logs", "path"):
batch_op.drop_column("path")
if utils.table_has_column("logs", "path_no_int"):
batch_op.drop_column("path_no_int")
if utils.table_has_column("logs", "ref"):
batch_op.drop_column("ref")
def downgrade():
pass

View File

@ -30,16 +30,20 @@ import sqlalchemy as sa
from alembic import op
from sqlalchemy.dialects import mysql
from superset.migrations.shared import utils
def upgrade():
op.add_column("logs", sa.Column("path", sa.String(length=256), nullable=True))
op.add_column(
"logs", sa.Column("path_no_int", sa.String(length=256), nullable=True)
)
op.add_column("logs", sa.Column("ref", sa.String(length=256), nullable=True))
# This migration was modified post hoc to avoid locking the large logs table
# during migrations.
pass
def downgrade():
op.drop_column("logs", "path")
op.drop_column("logs", "path_no_int")
op.drop_column("logs", "ref")
with op.batch_alter_table("logs") as batch_op:
if utils.table_has_column("logs", "path"):
batch_op.drop_column("path")
if utils.table_has_column("logs", "path_no_int"):
batch_op.drop_column("path_no_int")
if utils.table_has_column("logs", "ref"):
batch_op.drop_column("ref")

View File

@ -715,9 +715,6 @@ class Log(Model): # pylint: disable=too-few-public-methods
dttm = Column(DateTime, default=datetime.utcnow)
duration_ms = Column(Integer)
referrer = Column(String(1024))
path = Column(String(256))
path_no_int = Column(String(256))
ref = Column(String(256))
class FavStarClassName(str, Enum):

View File

@ -46,9 +46,6 @@ class AbstractEventLogger(ABC):
dashboard_id: Optional[int],
duration_ms: Optional[int],
slice_id: Optional[int],
path: Optional[str],
path_no_int: Optional[str],
ref: Optional[str],
referrer: Optional[str],
*args: Any,
**kwargs: Any,
@ -92,6 +89,13 @@ class AbstractEventLogger(ABC):
if log_to_statsd:
self.stats_logger.incr(action)
payload.update(
{
"path": request.path,
"path_no_param": strip_int_from_path(request.path),
"ref": ref,
}
)
# bulk insert
try:
explode_by = payload.get("explode")
@ -107,9 +111,6 @@ class AbstractEventLogger(ABC):
slice_id=slice_id,
duration_ms=round((time.time() - start_time) * 1000),
referrer=referrer,
path=request.path,
path_no_int=strip_int_from_path(request.path),
ref=ref,
)
def _wrapper(
@ -210,9 +211,6 @@ class DBEventLogger(AbstractEventLogger):
dashboard_id: Optional[int],
duration_ms: Optional[int],
slice_id: Optional[int],
path: Optional[str],
path_no_int: Optional[str],
ref: Optional[str],
referrer: Optional[str],
*args: Any,
**kwargs: Any,
@ -236,9 +234,6 @@ class DBEventLogger(AbstractEventLogger):
duration_ms=duration_ms,
referrer=referrer,
user_id=user_id,
path=path,
path_no_int=path_no_int,
ref=ref,
)
logs.append(log)
try:

View File

@ -79,5 +79,8 @@ class TestEventLogger(unittest.TestCase):
result = test_func(1, karg1=2) # pylint: disable=no-value-for-parameter
self.assertEqual(result, 2)
# should contain only manual payload
self.assertEqual(mock_log.call_args[1]["records"], [{"foo": "bar"}])
self.assertEqual(
mock_log.call_args[1]["records"],
[{"foo": "bar", "path": "/", "path_no_param": "/", "ref": None}],
)
assert mock_log.call_args[1]["duration_ms"] >= 100