fix(reports): increase crontab size and alert fixes (#12056)

* fix(reports): increase crontab size

* update to current alembic revision

* Merge branch 'master' into feat/security-converge-datasets

# Conflicts:
#	tests/security_tests.py

* Merge branch 'master' into feat/security-converge-datasets

# Conflicts:
#	tests/security_tests.py

* Merge branch 'master' into feat/security-converge-datasets

# Conflicts:
#	tests/security_tests.py

* lint

* update alembic revision

* fix related fields

* fix test
This commit is contained in:
Daniel Vaz Gaspar 2020-12-17 18:03:05 +00:00 committed by GitHub
parent a3be325b99
commit 1a20552c2b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 100 additions and 12 deletions

View File

@ -803,6 +803,10 @@ ENABLE_SCHEDULED_EMAIL_REPORTS = False
# if it meets the criteria
ENABLE_ALERTS = False
# Used for Alerts/Reports (Feature flask ALERT_REPORTS) to set the size for the
# sliding cron window size, should be synced with the celery beat config minus 1 second
ALERT_REPORTS_CRON_WINDOW_SIZE = 59
# Slack API token for the superset reports
SLACK_API_TOKEN = None
SLACK_PROXY = None

View File

@ -0,0 +1,50 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""reports alter crontab size
Revision ID: ab104a954a8f
Revises: 5daced1f0e76
Create Date: 2020-12-15 09:07:24.730545
"""
# revision identifiers, used by Alembic.
revision = "ab104a954a8f"
down_revision = "e37912a26567"
import sqlalchemy as sa
from alembic import op
def upgrade():
with op.batch_alter_table("report_schedule") as batch_op:
batch_op.alter_column(
"crontab",
existing_type=sa.VARCHAR(length=50),
type_=sa.VARCHAR(length=1000),
existing_nullable=False,
)
def downgrade():
with op.batch_alter_table("report_schedule") as batch_op:
batch_op.alter_column(
"crontab",
existing_type=sa.VARCHAR(length=1000),
type_=sa.VARCHAR(length=50),
existing_nullable=False,
)

View File

@ -98,7 +98,7 @@ class ReportSchedule(Model, AuditMixinNullable):
description = Column(Text)
context_markdown = Column(Text)
active = Column(Boolean, default=True, index=True)
crontab = Column(String(50), nullable=False)
crontab = Column(String(1000), nullable=False)
sql = Column(Text())
# (Alerts/Reports) M-O to chart
chart_id = Column(Integer, ForeignKey("slices.id"), nullable=True)

View File

@ -26,6 +26,7 @@ from marshmallow import ValidationError
from superset.charts.filters import ChartFilter
from superset.constants import MODEL_API_RW_METHOD_PERMISSION_MAP, RouteMethod
from superset.dashboards.filters import DashboardFilter
from superset.databases.filters import DatabaseFilter
from superset.models.reports import ReportSchedule
from superset.reports.commands.bulk_delete import BulkDeleteReportScheduleCommand
from superset.reports.commands.create import CreateReportScheduleCommand
@ -47,7 +48,12 @@ from superset.reports.schemas import (
ReportSchedulePostSchema,
ReportSchedulePutSchema,
)
from superset.views.base_api import BaseSupersetModelRestApi, statsd_metrics
from superset.views.base_api import (
BaseSupersetModelRestApi,
RelatedFieldFilter,
statsd_metrics,
)
from superset.views.filters import FilterRelatedOwners
logger = logging.getLogger(__name__)
@ -155,12 +161,23 @@ class ReportScheduleRestApi(BaseSupersetModelRestApi):
]
search_columns = ["name", "active", "created_by", "type", "last_state"]
search_filters = {"name": [ReportScheduleAllTextFilter]}
allowed_rel_fields = {"created_by", "chart", "dashboard"}
allowed_rel_fields = {"owners", "chart", "dashboard", "database"}
filter_rel_fields = {
"chart": [["id", ChartFilter, lambda: []]],
"dashboard": [["id", DashboardFilter, lambda: []]],
"database": [["id", DatabaseFilter, lambda: []]],
}
text_field_rel_fields = {
"dashboard": "dashboard_title",
"chart": "slice_name",
"database": "database_name",
}
related_field_filters = {
"dashboard": "dashboard_title",
"chart": "slice_name",
"database": "database_name",
"owners": RelatedFieldFilter("first_name", FilterRelatedOwners),
}
text_field_rel_fields = {"dashboard": "dashboard_title"}
apispec_parameter_schemas = {
"get_delete_ids_schema": get_delete_ids_schema,

View File

@ -26,6 +26,7 @@ from superset import jinja_context
from superset.commands.base import BaseCommand
from superset.models.reports import ReportSchedule, ReportScheduleValidatorType
from superset.reports.commands.exceptions import (
AlertQueryError,
AlertQueryInvalidTypeError,
AlertQueryMultipleColumnsError,
AlertQueryMultipleRowsError,
@ -47,7 +48,7 @@ class AlertCommand(BaseCommand):
self.validate()
if self._report_schedule.validator_type == ReportScheduleValidatorType.NOT_NULL:
self._report_schedule.last_value_row_json = self._result
self._report_schedule.last_value_row_json = str(self._result)
return self._result not in (0, None, np.nan)
self._report_schedule.last_value = self._result
try:
@ -60,9 +61,11 @@ class AlertCommand(BaseCommand):
raise AlertValidatorConfigError()
def _validate_not_null(self, rows: np.recarray) -> None:
self._validate_result(rows)
self._result = rows[0][1]
def _validate_operator(self, rows: np.recarray) -> None:
@staticmethod
def _validate_result(rows: np.recarray) -> None:
# check if query return more then one row
if len(rows) > 1:
raise AlertQueryMultipleRowsError(
@ -80,6 +83,9 @@ class AlertCommand(BaseCommand):
% (len(rows[0]) - 1)
)
)
def _validate_operator(self, rows: np.recarray) -> None:
self._validate_result(rows)
if rows[0][1] is None:
return
try:
@ -97,7 +103,10 @@ class AlertCommand(BaseCommand):
database=self._report_schedule.database
)
rendered_sql = sql_template.process_template(self._report_schedule.sql)
df = self._report_schedule.database.get_df(rendered_sql)
try:
df = self._report_schedule.database.get_df(rendered_sql)
except Exception as ex:
raise AlertQueryError(message=str(ex))
if df.empty:
return

View File

@ -151,6 +151,10 @@ class AlertQueryInvalidTypeError(CommandException):
message = _("Alert query returned a non-number value.")
class AlertQueryError(CommandException):
message = _("Alert found an error while executing a query.")
class ReportScheduleAlertGracePeriodError(CommandException):
message = _("Alert fired during grace period.")

View File

@ -22,7 +22,7 @@ from flask_appbuilder.api import expose, permission_name, protect, rison, safe
from flask_appbuilder.api.schemas import get_item_schema, get_list_schema
from flask_appbuilder.models.sqla.interface import SQLAInterface
from superset.constants import RouteMethod
from superset.constants import MODEL_API_RW_METHOD_PERMISSION_MAP, RouteMethod
from superset.models.reports import ReportExecutionLog
from superset.reports.logs.schemas import openapi_spec_methods_override
from superset.views.base_api import BaseSupersetModelRestApi
@ -34,6 +34,8 @@ class ReportExecutionLogRestApi(BaseSupersetModelRestApi):
datamodel = SQLAInterface(ReportExecutionLog)
include_route_methods = {RouteMethod.GET, RouteMethod.GET_LIST}
method_permission_name = MODEL_API_RW_METHOD_PERMISSION_MAP
class_permission_name = "ReportSchedule"
resource_name = "report"
allow_browser_login = True

View File

@ -139,7 +139,7 @@ class ReportSchedulePostSchema(Schema):
active = fields.Boolean()
crontab = fields.String(
description=crontab_description,
validate=[validate_crontab, Length(1, 50)],
validate=[validate_crontab, Length(1, 1000)],
example="*/5 * * * *",
allow_none=False,
required=True,
@ -192,7 +192,7 @@ class ReportSchedulePutSchema(Schema):
active = fields.Boolean(required=False)
crontab = fields.String(
description=crontab_description,
validate=[validate_crontab, Length(1, 50)],
validate=[validate_crontab, Length(1, 1000)],
required=False,
)
sql = fields.String(

View File

@ -20,6 +20,7 @@ from typing import Iterator
import croniter
from superset import app
from superset.commands.exceptions import CommandException
from superset.extensions import celery_app
from superset.reports.commands.execute import AsyncExecuteReportScheduleCommand
@ -30,7 +31,8 @@ from superset.utils.celery import session_scope
logger = logging.getLogger(__name__)
def cron_schedule_window(cron: str, window_size: int = 10) -> Iterator[datetime]:
def cron_schedule_window(cron: str) -> Iterator[datetime]:
window_size = app.config["ALERT_REPORTS_CRON_WINDOW_SIZE"]
utc_now = datetime.utcnow()
start_at = utc_now - timedelta(seconds=1)
stop_at = utc_now + timedelta(seconds=window_size)

View File

@ -370,7 +370,7 @@ class TestReportSchedulesApi(SupersetTestCase):
ReportSchedule Api: Test get releated report schedule
"""
self.login(username="admin")
related_columns = ["created_by", "chart", "dashboard"]
related_columns = ["owners", "chart", "dashboard", "database"]
for related_column in related_columns:
uri = f"api/v1/report/related/{related_column}"
rv = self.client.get(uri)