Fix lint in `superset/db_engine_spec` (#8338)
* Enable lint checking for files in db_engine_spec that have few to no lint issues * Enable lint and fix issue in db_engine_spec/mysql.py * Enable pylint and fix lint for db_engine_spec/pinot.py * Enable lint and fix issues for db_engine_specs/hive.py * Enable lint and fix for db_engine_spec/presto.py * Re-enable lint on base.py, fix/disable specific failures, including one bad method signature * Make flake8 happy after a number of pylint fixes * Update db_engine_spec_test test cases related to Presto to support different method naming * automated reformatting * One more pylint disable for druid.py * Find the magic invocation that makes all the lint tools happy
This commit is contained in:
parent
65a05ca47e
commit
ec86d9de17
|
|
@ -14,7 +14,6 @@
|
|||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
# pylint: disable=C,R,W
|
||||
from datetime import datetime
|
||||
|
||||
from superset.db_engine_specs.base import BaseEngineSpec
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@
|
|||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
# pylint: disable=C,R,W
|
||||
# pylint: disable=unused-argument
|
||||
from contextlib import closing
|
||||
from datetime import datetime
|
||||
import hashlib
|
||||
|
|
@ -42,10 +42,10 @@ from superset.utils import core as utils
|
|||
|
||||
if TYPE_CHECKING:
|
||||
# prevent circular imports
|
||||
from superset.models.core import Database
|
||||
from superset.models.core import Database # pylint: disable=unused-import
|
||||
|
||||
|
||||
class TimeGrain(NamedTuple):
|
||||
class TimeGrain(NamedTuple): # pylint: disable=too-few-public-methods
|
||||
name: str # TODO: redundant field, remove
|
||||
label: str
|
||||
function: str
|
||||
|
|
@ -79,7 +79,9 @@ builtin_time_grains: Dict[Optional[str], str] = {
|
|||
}
|
||||
|
||||
|
||||
class TimestampExpression(ColumnClause):
|
||||
class TimestampExpression(
|
||||
ColumnClause
|
||||
): # pylint: disable=abstract-method,too-many-ancestors,too-few-public-methods
|
||||
def __init__(self, expr: str, col: ColumnClause, **kwargs):
|
||||
"""Sqlalchemy class that can be can be used to render native column elements
|
||||
respeting engine-specific quoting rules as part of a string-based expression.
|
||||
|
|
@ -106,7 +108,7 @@ def compile_timegrain_expression(
|
|||
return element.name.replace("{col}", compiler.process(element.col, **kw))
|
||||
|
||||
|
||||
class LimitMethod(object):
|
||||
class LimitMethod(object): # pylint: disable=too-few-public-methods
|
||||
"""Enum the ways that limits can be applied"""
|
||||
|
||||
FETCH_MANY = "fetch_many"
|
||||
|
|
@ -114,7 +116,7 @@ class LimitMethod(object):
|
|||
FORCE_LIMIT = "force_limit"
|
||||
|
||||
|
||||
class BaseEngineSpec:
|
||||
class BaseEngineSpec: # pylint: disable=too-many-public-methods
|
||||
"""Abstract class for database engine specific configurations"""
|
||||
|
||||
engine = "base" # str as defined in sqlalchemy.engine.engine
|
||||
|
|
@ -128,7 +130,7 @@ class BaseEngineSpec:
|
|||
force_column_alias_quotes = False
|
||||
arraysize = 0
|
||||
max_column_name_length = 0
|
||||
try_remove_schema_from_table_name = True
|
||||
try_remove_schema_from_table_name = True # pylint: disable=invalid-name
|
||||
|
||||
@classmethod
|
||||
def get_allow_cost_estimate(cls, version: str = None) -> bool:
|
||||
|
|
@ -287,7 +289,7 @@ class BaseEngineSpec:
|
|||
:param type_code: Type code from cursor description
|
||||
:return: String representation of type code
|
||||
"""
|
||||
if isinstance(type_code, str) and len(type_code):
|
||||
if isinstance(type_code, str) and type_code != "":
|
||||
return type_code.upper()
|
||||
return None
|
||||
|
||||
|
|
@ -375,7 +377,7 @@ class BaseEngineSpec:
|
|||
return df
|
||||
|
||||
@classmethod
|
||||
def df_to_sql(cls, df: pd.DataFrame, **kwargs):
|
||||
def df_to_sql(cls, df: pd.DataFrame, **kwargs): # pylint: disable=invalid-name
|
||||
""" Upload data from a Pandas DataFrame to a database. For
|
||||
regular engines this calls the DataFrame.to_sql() method. Can be
|
||||
overridden for engines that don't work well with to_sql(), e.g.
|
||||
|
|
@ -449,35 +451,35 @@ class BaseEngineSpec:
|
|||
|
||||
@classmethod
|
||||
def get_all_datasource_names(
|
||||
cls, db, datasource_type: str
|
||||
cls, database, datasource_type: str
|
||||
) -> List[utils.DatasourceName]:
|
||||
"""Returns a list of all tables or views in database.
|
||||
|
||||
:param db: Database instance
|
||||
:param database: Database instance
|
||||
:param datasource_type: Datasource_type can be 'table' or 'view'
|
||||
:return: List of all datasources in database or schema
|
||||
"""
|
||||
# TODO: Fix circular import caused by importing Database
|
||||
schemas = db.get_all_schema_names(
|
||||
cache=db.schema_cache_enabled,
|
||||
cache_timeout=db.schema_cache_timeout,
|
||||
schemas = database.get_all_schema_names(
|
||||
cache=database.schema_cache_enabled,
|
||||
cache_timeout=database.schema_cache_timeout,
|
||||
force=True,
|
||||
)
|
||||
all_datasources: List[utils.DatasourceName] = []
|
||||
for schema in schemas:
|
||||
if datasource_type == "table":
|
||||
all_datasources += db.get_all_table_names_in_schema(
|
||||
all_datasources += database.get_all_table_names_in_schema(
|
||||
schema=schema,
|
||||
force=True,
|
||||
cache=db.table_cache_enabled,
|
||||
cache_timeout=db.table_cache_timeout,
|
||||
cache=database.table_cache_enabled,
|
||||
cache_timeout=database.table_cache_timeout,
|
||||
)
|
||||
elif datasource_type == "view":
|
||||
all_datasources += db.get_all_view_names_in_schema(
|
||||
all_datasources += database.get_all_view_names_in_schema(
|
||||
schema=schema,
|
||||
force=True,
|
||||
cache=db.table_cache_enabled,
|
||||
cache_timeout=db.table_cache_timeout,
|
||||
cache=database.table_cache_enabled,
|
||||
cache_timeout=database.table_cache_timeout,
|
||||
)
|
||||
else:
|
||||
raise Exception(f"Unsupported datasource_type: {datasource_type}")
|
||||
|
|
@ -588,7 +590,7 @@ class BaseEngineSpec:
|
|||
return inspector.get_columns(table_name, schema)
|
||||
|
||||
@classmethod
|
||||
def where_latest_partition(
|
||||
def where_latest_partition( # pylint: disable=too-many-arguments
|
||||
cls,
|
||||
table_name: str,
|
||||
schema: Optional[str],
|
||||
|
|
@ -615,7 +617,7 @@ class BaseEngineSpec:
|
|||
return [column(c.get("name")) for c in cols]
|
||||
|
||||
@classmethod
|
||||
def select_star(
|
||||
def select_star( # pylint: disable=too-many-arguments,too-many-locals
|
||||
cls,
|
||||
database,
|
||||
table_name: str,
|
||||
|
|
@ -727,7 +729,7 @@ class BaseEngineSpec:
|
|||
url.username = username
|
||||
|
||||
@classmethod
|
||||
def get_configuration_for_impersonation(
|
||||
def get_configuration_for_impersonation( # pylint: disable=invalid-name
|
||||
cls, uri: str, impersonate_user: bool, username: str
|
||||
) -> Dict[str, str]:
|
||||
"""
|
||||
|
|
@ -830,8 +832,9 @@ class BaseEngineSpec:
|
|||
cls, sqla_column_type: TypeEngine, dialect: Dialect
|
||||
) -> str:
|
||||
"""
|
||||
Convert sqlalchemy column type to string representation. Can be overridden to remove
|
||||
unnecessary details, especially collation info (see mysql, mssql).
|
||||
Convert sqlalchemy column type to string representation.
|
||||
Can be overridden to remove unnecessary details, especially
|
||||
collation info (see mysql, mssql).
|
||||
|
||||
:param sqla_column_type: SqlAlchemy column type
|
||||
:param dialect: Sqlalchemy dialect
|
||||
|
|
|
|||
|
|
@ -14,13 +14,12 @@
|
|||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
# pylint: disable=C,R,W
|
||||
from datetime import datetime
|
||||
|
||||
from superset.db_engine_specs.base import BaseEngineSpec
|
||||
|
||||
|
||||
class ClickHouseEngineSpec(BaseEngineSpec):
|
||||
class ClickHouseEngineSpec(BaseEngineSpec): # pylint: disable=abstract-method
|
||||
"""Dialect for ClickHouse analytical DB."""
|
||||
|
||||
engine = "clickhouse"
|
||||
|
|
|
|||
|
|
@ -14,7 +14,6 @@
|
|||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
# pylint: disable=C,R,W
|
||||
from datetime import datetime
|
||||
|
||||
from superset.db_engine_specs.base import BaseEngineSpec, LimitMethod
|
||||
|
|
|
|||
|
|
@ -14,7 +14,6 @@
|
|||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
# pylint: disable=C,R,W
|
||||
from datetime import datetime
|
||||
from urllib import parse
|
||||
|
||||
|
|
|
|||
|
|
@ -14,11 +14,10 @@
|
|||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
# pylint: disable=C,R,W
|
||||
from superset.db_engine_specs.base import BaseEngineSpec
|
||||
|
||||
|
||||
class DruidEngineSpec(BaseEngineSpec):
|
||||
class DruidEngineSpec(BaseEngineSpec): # pylint: disable=abstract-method
|
||||
"""Engine spec for Druid.io"""
|
||||
|
||||
engine = "druid"
|
||||
|
|
|
|||
|
|
@ -14,7 +14,6 @@
|
|||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
# pylint: disable=C,R,W
|
||||
from superset.db_engine_specs.sqlite import SqliteEngineSpec
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -14,7 +14,6 @@
|
|||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
# pylint: disable=C,R,W
|
||||
from datetime import datetime
|
||||
import logging
|
||||
import os
|
||||
|
|
@ -81,9 +80,9 @@ class HiveEngineSpec(PrestoEngineSpec):
|
|||
|
||||
@classmethod
|
||||
def get_all_datasource_names(
|
||||
cls, db, datasource_type: str
|
||||
cls, database, datasource_type: str
|
||||
) -> List[utils.DatasourceName]:
|
||||
return BaseEngineSpec.get_all_datasource_names(db, datasource_type)
|
||||
return BaseEngineSpec.get_all_datasource_names(database, datasource_type)
|
||||
|
||||
@classmethod
|
||||
def fetch_data(cls, cursor, limit: int) -> List[Tuple]:
|
||||
|
|
@ -99,7 +98,7 @@ class HiveEngineSpec(PrestoEngineSpec):
|
|||
return []
|
||||
|
||||
@classmethod
|
||||
def create_table_from_csv(cls, form, table):
|
||||
def create_table_from_csv(cls, form, table): # pylint: disable=too-many-locals
|
||||
"""Uploads a csv file and creates a superset datasource in Hive."""
|
||||
|
||||
def convert_to_hive_type(col_type):
|
||||
|
|
@ -223,7 +222,7 @@ class HiveEngineSpec(PrestoEngineSpec):
|
|||
reduce_progress = int(match.groupdict()["reduce_progress"])
|
||||
stages[stage_number] = (map_progress + reduce_progress) / 2
|
||||
logging.info(
|
||||
"Progress detail: {}, "
|
||||
"Progress detail: {}, " # pylint: disable=logging-format-interpolation
|
||||
"current job {}, "
|
||||
"total jobs: {}".format(stages, current_job, total_jobs)
|
||||
)
|
||||
|
|
@ -239,9 +238,10 @@ class HiveEngineSpec(PrestoEngineSpec):
|
|||
for line in log_lines:
|
||||
if lkp in line:
|
||||
return line.split(lkp)[1]
|
||||
return None
|
||||
|
||||
@classmethod
|
||||
def handle_cursor(cls, cursor, query, session):
|
||||
def handle_cursor(cls, cursor, query, session): # pylint: disable=too-many-locals
|
||||
"""Updates progress information"""
|
||||
from pyhive import hive # pylint: disable=no-name-in-module
|
||||
|
||||
|
|
@ -302,33 +302,33 @@ class HiveEngineSpec(PrestoEngineSpec):
|
|||
return inspector.get_columns(table_name, schema)
|
||||
|
||||
@classmethod
|
||||
def where_latest_partition(
|
||||
def where_latest_partition( # pylint: disable=too-many-arguments
|
||||
cls,
|
||||
table_name: str,
|
||||
schema: Optional[str],
|
||||
database,
|
||||
qry: Select,
|
||||
query: Select,
|
||||
columns: Optional[List] = None,
|
||||
) -> Optional[Select]:
|
||||
try:
|
||||
col_names, values = cls.latest_partition(
|
||||
table_name, schema, database, show_first=True
|
||||
)
|
||||
except Exception:
|
||||
except Exception: # pylint: disable=broad-except
|
||||
# table is not partitioned
|
||||
return None
|
||||
if values is not None and columns is not None:
|
||||
for col_name, value in zip(col_names, values):
|
||||
for c in columns:
|
||||
if c.get("name") == col_name:
|
||||
qry = qry.where(Column(col_name) == value)
|
||||
for clm in columns:
|
||||
if clm.get("name") == col_name:
|
||||
query = query.where(Column(col_name) == value)
|
||||
|
||||
return qry
|
||||
return query
|
||||
return None
|
||||
|
||||
@classmethod
|
||||
def _get_fields(cls, cols: List[dict]) -> List[ColumnClause]:
|
||||
return BaseEngineSpec._get_fields(cols)
|
||||
return BaseEngineSpec._get_fields(cols) # pylint: disable=protected-access
|
||||
|
||||
@classmethod
|
||||
def latest_sub_partition(cls, table_name, schema, database, **kwargs):
|
||||
|
|
@ -343,11 +343,13 @@ class HiveEngineSpec(PrestoEngineSpec):
|
|||
return None
|
||||
|
||||
@classmethod
|
||||
def _partition_query(cls, table_name, limit=0, order_by=None, filters=None):
|
||||
def _partition_query( # pylint: disable=too-many-arguments
|
||||
cls, table_name, database, limit=0, order_by=None, filters=None
|
||||
):
|
||||
return f"SHOW PARTITIONS {table_name}"
|
||||
|
||||
@classmethod
|
||||
def select_star(
|
||||
def select_star( # pylint: disable=too-many-arguments
|
||||
cls,
|
||||
database,
|
||||
table_name: str,
|
||||
|
|
@ -413,6 +415,8 @@ class HiveEngineSpec(PrestoEngineSpec):
|
|||
return configuration
|
||||
|
||||
@staticmethod
|
||||
def execute(cursor, query: str, async_: bool = False):
|
||||
def execute(
|
||||
cursor, query: str, async_: bool = False
|
||||
): # pylint: disable=arguments-differ
|
||||
kwargs = {"async": async_}
|
||||
cursor.execute(query, **kwargs)
|
||||
|
|
|
|||
|
|
@ -14,7 +14,6 @@
|
|||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
# pylint: disable=C,R,W
|
||||
from datetime import datetime
|
||||
from typing import List
|
||||
|
||||
|
|
|
|||
|
|
@ -14,13 +14,12 @@
|
|||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
# pylint: disable=C,R,W
|
||||
from datetime import datetime
|
||||
|
||||
from superset.db_engine_specs.base import BaseEngineSpec
|
||||
|
||||
|
||||
class KylinEngineSpec(BaseEngineSpec):
|
||||
class KylinEngineSpec(BaseEngineSpec): # pylint: disable=abstract-method
|
||||
"""Dialect for Apache Kylin"""
|
||||
|
||||
engine = "kylin"
|
||||
|
|
|
|||
|
|
@ -14,7 +14,6 @@
|
|||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
# pylint: disable=C,R,W
|
||||
from datetime import datetime
|
||||
import re
|
||||
from typing import List, Optional, Tuple
|
||||
|
|
|
|||
|
|
@ -14,7 +14,6 @@
|
|||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
# pylint: disable=C,R,W
|
||||
from datetime import datetime
|
||||
from typing import Any, Dict, Optional
|
||||
from urllib import parse
|
||||
|
|
@ -77,7 +76,7 @@ class MySQLEngineSpec(BaseEngineSpec):
|
|||
datatype = type_code
|
||||
if isinstance(type_code, int):
|
||||
datatype = cls.type_code_map.get(type_code)
|
||||
if datatype and isinstance(datatype, str) and len(datatype):
|
||||
if datatype and isinstance(datatype, str) and datatype:
|
||||
return datatype
|
||||
return None
|
||||
|
||||
|
|
@ -92,7 +91,7 @@ class MySQLEngineSpec(BaseEngineSpec):
|
|||
try:
|
||||
if isinstance(e.args, tuple) and len(e.args) > 1:
|
||||
message = e.args[1]
|
||||
except Exception:
|
||||
except Exception: # pylint: disable=broad-except
|
||||
pass
|
||||
return message
|
||||
|
||||
|
|
|
|||
|
|
@ -14,7 +14,6 @@
|
|||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
# pylint: disable=C,R,W
|
||||
from datetime import datetime
|
||||
|
||||
from superset.db_engine_specs.base import LimitMethod
|
||||
|
|
|
|||
|
|
@ -14,7 +14,6 @@
|
|||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
# pylint: disable=C,R,W
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
from sqlalchemy.sql.expression import ColumnClause, ColumnElement
|
||||
|
|
@ -22,7 +21,7 @@ from sqlalchemy.sql.expression import ColumnClause, ColumnElement
|
|||
from superset.db_engine_specs.base import BaseEngineSpec, TimestampExpression
|
||||
|
||||
|
||||
class PinotEngineSpec(BaseEngineSpec):
|
||||
class PinotEngineSpec(BaseEngineSpec): # pylint: disable=abstract-method
|
||||
engine = "pinot"
|
||||
allows_subqueries = False
|
||||
allows_joins = False
|
||||
|
|
@ -66,10 +65,10 @@ class PinotEngineSpec(BaseEngineSpec):
|
|||
# Pinot does not want the group by expr's to appear in the select clause
|
||||
select_sans_groupby = []
|
||||
# We want identity and not equality, so doing the filtering manually
|
||||
for s in select_exprs:
|
||||
for sel in select_exprs:
|
||||
for gr in groupby_exprs:
|
||||
if s is gr:
|
||||
if sel is gr:
|
||||
break
|
||||
else:
|
||||
select_sans_groupby.append(s)
|
||||
select_sans_groupby.append(sel)
|
||||
return select_sans_groupby
|
||||
|
|
|
|||
|
|
@ -14,7 +14,6 @@
|
|||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
# pylint: disable=C,R,W
|
||||
from datetime import datetime
|
||||
from typing import List, Optional, Tuple, TYPE_CHECKING
|
||||
|
||||
|
|
@ -24,7 +23,7 @@ from superset.db_engine_specs.base import BaseEngineSpec, LimitMethod
|
|||
|
||||
if TYPE_CHECKING:
|
||||
# prevent circular imports
|
||||
from superset.models.core import Database
|
||||
from superset.models.core import Database # pylint: disable=unused-import
|
||||
|
||||
|
||||
class PostgresBaseEngineSpec(BaseEngineSpec):
|
||||
|
|
|
|||
|
|
@ -14,7 +14,6 @@
|
|||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
# pylint: disable=C,R,W
|
||||
from collections import defaultdict, deque, OrderedDict
|
||||
from contextlib import closing
|
||||
from datetime import datetime
|
||||
|
|
@ -42,7 +41,7 @@ from superset.utils import core as utils
|
|||
|
||||
if TYPE_CHECKING:
|
||||
# prevent circular imports
|
||||
from superset.models.core import Database
|
||||
from superset.models.core import Database # pylint: disable=unused-import
|
||||
|
||||
QueryStatus = utils.QueryStatus
|
||||
config = app.config
|
||||
|
|
@ -80,7 +79,7 @@ def get_children(column: Dict[str, str]) -> List[Dict[str, str]]:
|
|||
:param column: dictionary representing a Presto column
|
||||
:return: list of dictionaries representing children columns
|
||||
"""
|
||||
pattern = re.compile("(?P<type>\w+)\((?P<children>.*)\)")
|
||||
pattern = re.compile(r"(?P<type>\w+)\((?P<children>.*)\)")
|
||||
match = pattern.match(column["type"])
|
||||
if not match:
|
||||
raise Exception(f"Unable to parse column type {column['type']}")
|
||||
|
|
@ -157,7 +156,10 @@ class PrestoEngineSpec(BaseEngineSpec):
|
|||
return []
|
||||
|
||||
if schema:
|
||||
sql = "SELECT table_name FROM information_schema.views WHERE table_schema=%(schema)s"
|
||||
sql = (
|
||||
"SELECT table_name FROM information_schema.views"
|
||||
"WHERE table_schema=%(schema)s"
|
||||
)
|
||||
params = {"schema": schema}
|
||||
else:
|
||||
sql = "SELECT table_name FROM information_schema.views"
|
||||
|
|
@ -220,7 +222,7 @@ class PrestoEngineSpec(BaseEngineSpec):
|
|||
)
|
||||
|
||||
@classmethod
|
||||
def _parse_structural_column(
|
||||
def _parse_structural_column( # pylint: disable=too-many-locals,too-many-branches
|
||||
cls, parent_column_name: str, parent_data_type: str, result: List[dict]
|
||||
) -> None:
|
||||
"""
|
||||
|
|
@ -243,7 +245,7 @@ class PrestoEngineSpec(BaseEngineSpec):
|
|||
inner_types = cls._split_data_type(data_type, r"\)")
|
||||
for inner_type in inner_types:
|
||||
# We have finished parsing multiple structural data types
|
||||
if not inner_type and len(stack) > 0:
|
||||
if not inner_type and stack:
|
||||
stack.pop()
|
||||
elif cls._has_nested_data_types(inner_type):
|
||||
# split on comma , to get individual data types
|
||||
|
|
@ -283,11 +285,11 @@ class PrestoEngineSpec(BaseEngineSpec):
|
|||
if not (inner_type.endswith("array") or inner_type.endswith("row")):
|
||||
stack.pop()
|
||||
# We have an array of row objects (i.e. array(row(...)))
|
||||
elif "array" == inner_type or "row" == inner_type:
|
||||
elif inner_type == "array" or inner_type == "row":
|
||||
# Push a dummy object to represent the structural data type
|
||||
stack.append(("", inner_type))
|
||||
# We have an array of a basic data types(i.e. array(varchar)).
|
||||
elif len(stack) > 0:
|
||||
elif stack:
|
||||
# Because it is an array of a basic data type. We have finished
|
||||
# parsing the structural data type and can move on.
|
||||
stack.pop()
|
||||
|
|
@ -348,7 +350,7 @@ class PrestoEngineSpec(BaseEngineSpec):
|
|||
column_type = presto_type_map[column.Type]()
|
||||
except KeyError:
|
||||
logging.info(
|
||||
"Did not recognize type {} of column {}".format(
|
||||
"Did not recognize type {} of column {}".format( # pylint: disable=logging-format-interpolation
|
||||
column.Type, column.Column
|
||||
)
|
||||
)
|
||||
|
|
@ -439,7 +441,7 @@ class PrestoEngineSpec(BaseEngineSpec):
|
|||
return filtered_cols, array_cols
|
||||
|
||||
@classmethod
|
||||
def select_star(
|
||||
def select_star( # pylint: disable=too-many-arguments
|
||||
cls,
|
||||
database,
|
||||
table_name: str,
|
||||
|
|
@ -476,7 +478,7 @@ class PrestoEngineSpec(BaseEngineSpec):
|
|||
)
|
||||
|
||||
@classmethod
|
||||
def estimate_statement_cost(
|
||||
def estimate_statement_cost( # pylint: disable=too-many-locals
|
||||
cls, statement: str, database, cursor, user_name: str
|
||||
) -> Dict[str, str]:
|
||||
"""
|
||||
|
|
@ -490,9 +492,9 @@ class PrestoEngineSpec(BaseEngineSpec):
|
|||
parsed_query = ParsedQuery(statement)
|
||||
sql = parsed_query.stripped()
|
||||
|
||||
SQL_QUERY_MUTATOR = config.get("SQL_QUERY_MUTATOR")
|
||||
if SQL_QUERY_MUTATOR:
|
||||
sql = SQL_QUERY_MUTATOR(sql, user_name, security_manager, database)
|
||||
sql_query_mutator = config.get("SQL_QUERY_MUTATOR")
|
||||
if sql_query_mutator:
|
||||
sql = sql_query_mutator(sql, user_name, security_manager, database)
|
||||
|
||||
sql = f"EXPLAIN (TYPE IO, FORMAT JSON) {sql}"
|
||||
cursor.execute(sql)
|
||||
|
|
@ -569,9 +571,9 @@ class PrestoEngineSpec(BaseEngineSpec):
|
|||
|
||||
@classmethod
|
||||
def get_all_datasource_names(
|
||||
cls, db, datasource_type: str
|
||||
cls, database, datasource_type: str
|
||||
) -> List[utils.DatasourceName]:
|
||||
datasource_df = db.get_df(
|
||||
datasource_df = database.get_df(
|
||||
"SELECT table_schema, table_name FROM INFORMATION_SCHEMA.{}S "
|
||||
"ORDER BY concat(table_schema, '.', table_name)".format(
|
||||
datasource_type.upper()
|
||||
|
|
@ -579,7 +581,7 @@ class PrestoEngineSpec(BaseEngineSpec):
|
|||
None,
|
||||
)
|
||||
datasource_names: List[utils.DatasourceName] = []
|
||||
for unused, row in datasource_df.iterrows():
|
||||
for _unused, row in datasource_df.iterrows():
|
||||
datasource_names.append(
|
||||
utils.DatasourceName(
|
||||
schema=row["table_schema"], table=row["table_name"]
|
||||
|
|
@ -599,7 +601,7 @@ class PrestoEngineSpec(BaseEngineSpec):
|
|||
be root nodes
|
||||
:param column_hierarchy: dictionary representing the graph
|
||||
"""
|
||||
if len(columns) == 0:
|
||||
if not columns:
|
||||
return
|
||||
root = columns.pop(0)
|
||||
root_info = {"type": root["type"], "children": []}
|
||||
|
|
@ -697,7 +699,7 @@ class PrestoEngineSpec(BaseEngineSpec):
|
|||
datum[row_child] = ""
|
||||
|
||||
@classmethod
|
||||
def _split_array_columns_by_process_state(
|
||||
def _split_ary_cols_by_proc_state(
|
||||
cls, array_columns: List[str], array_column_hierarchy: dict, datum: dict
|
||||
) -> Tuple[List[str], Set[str]]:
|
||||
"""
|
||||
|
|
@ -727,7 +729,7 @@ class PrestoEngineSpec(BaseEngineSpec):
|
|||
return array_columns_to_process, unprocessed_array_columns
|
||||
|
||||
@classmethod
|
||||
def _convert_data_list_to_array_data_dict(
|
||||
def _convert_data_lst_to_ary_dict(
|
||||
cls, data: List[dict], array_columns_to_process: List[str]
|
||||
) -> dict:
|
||||
"""
|
||||
|
|
@ -755,7 +757,7 @@ class PrestoEngineSpec(BaseEngineSpec):
|
|||
return array_data_dict
|
||||
|
||||
@classmethod
|
||||
def _process_array_data(
|
||||
def _process_array_data( # pylint: disable=too-many-locals,too-many-branches
|
||||
cls, data: List[dict], all_columns: List[dict], array_column_hierarchy: dict
|
||||
) -> dict:
|
||||
"""
|
||||
|
|
@ -793,16 +795,16 @@ class PrestoEngineSpec(BaseEngineSpec):
|
|||
# Determine what columns are ready to be processed. This is necessary for
|
||||
# array columns that contain rows with nested arrays. We first process
|
||||
# the outer arrays before processing inner arrays.
|
||||
array_columns_to_process, unprocessed_array_columns = cls._split_array_columns_by_process_state(
|
||||
array_columns_to_process, unprocessed_array_columns = cls._split_ary_cols_by_proc_state( # pylint: disable=line-too-long
|
||||
array_columns, array_column_hierarchy, data[0]
|
||||
)
|
||||
|
||||
# Pull out array data that is ready to be processed into a dictionary.
|
||||
all_array_data = cls._convert_data_list_to_array_data_dict(
|
||||
all_array_data = cls._convert_data_lst_to_ary_dict(
|
||||
data, array_columns_to_process
|
||||
)
|
||||
|
||||
for original_data_index, expanded_array_data in all_array_data.items():
|
||||
for expanded_array_data in all_array_data.values():
|
||||
for array_column in array_columns:
|
||||
if array_column in unprocessed_array_columns:
|
||||
continue
|
||||
|
|
@ -841,47 +843,6 @@ class PrestoEngineSpec(BaseEngineSpec):
|
|||
array_value[array_child] = ""
|
||||
return all_array_data
|
||||
|
||||
@classmethod
|
||||
def _consolidate_array_data_into_data(
|
||||
cls, data: List[dict], array_data: dict
|
||||
) -> None:
|
||||
"""
|
||||
Consolidate data given a list representing rows of data and a dictionary
|
||||
representing expanded array data
|
||||
Example:
|
||||
Original data set = [
|
||||
{'ColumnA': [1, 2], 'ColumnB': [3]},
|
||||
{'ColumnA': [11, 22], 'ColumnB': [33]}
|
||||
]
|
||||
array_data = {
|
||||
0: [
|
||||
{'ColumnA': 1, 'ColumnB': 3},
|
||||
{'ColumnA': 2, 'ColumnB': ''},
|
||||
],
|
||||
1: [
|
||||
{'ColumnA': 11, 'ColumnB': 33},
|
||||
{'ColumnA': 22, 'ColumnB': ''},
|
||||
],
|
||||
}
|
||||
Final data set = [
|
||||
{'ColumnA': 1, 'ColumnB': 3},
|
||||
{'ColumnA': 2, 'ColumnB': ''},
|
||||
{'ColumnA': 11, 'ColumnB': 33},
|
||||
{'ColumnA': 22, 'ColumnB': ''},
|
||||
]
|
||||
:param data: list representing rows of data
|
||||
:param array_data: dictionary representing expanded array data
|
||||
:return: list where data and array_data are combined
|
||||
"""
|
||||
data_index = 0
|
||||
original_data_index = 0
|
||||
while data_index < len(data):
|
||||
data[data_index].update(array_data[original_data_index][0])
|
||||
array_data[original_data_index].pop(0)
|
||||
data[data_index + 1 : data_index + 1] = array_data[original_data_index]
|
||||
data_index = data_index + len(array_data[original_data_index]) + 1
|
||||
original_data_index = original_data_index + 1
|
||||
|
||||
@classmethod
|
||||
def _remove_processed_array_columns(
|
||||
cls, unprocessed_array_columns: Set[str], array_column_hierarchy: dict
|
||||
|
|
@ -899,7 +860,7 @@ class PrestoEngineSpec(BaseEngineSpec):
|
|||
del array_column_hierarchy[array_column]
|
||||
|
||||
@classmethod
|
||||
def expand_data(
|
||||
def expand_data( # pylint: disable=too-many-locals
|
||||
cls, columns: List[dict], data: List[dict]
|
||||
) -> Tuple[List[dict], List[dict], List[dict]]:
|
||||
"""
|
||||
|
|
@ -926,7 +887,8 @@ class PrestoEngineSpec(BaseEngineSpec):
|
|||
if not is_feature_enabled("PRESTO_EXPAND_DATA"):
|
||||
return columns, data, []
|
||||
|
||||
# process each column, unnesting ARRAY types and expanding ROW types into new columns
|
||||
# process each column, unnesting ARRAY types and
|
||||
# expanding ROW types into new columns
|
||||
to_process = deque((column, 0) for column in columns)
|
||||
all_columns: List[dict] = []
|
||||
expanded_columns = []
|
||||
|
|
@ -937,10 +899,10 @@ class PrestoEngineSpec(BaseEngineSpec):
|
|||
all_columns.append(column)
|
||||
|
||||
# When unnesting arrays we need to keep track of how many extra rows
|
||||
# were added, for each original row. This is necessary when we expand multiple
|
||||
# arrays, so that the arrays after the first reuse the rows added by
|
||||
# the first. every time we change a level in the nested arrays we
|
||||
# reinitialize this.
|
||||
# were added, for each original row. This is necessary when we expand
|
||||
# multiple arrays, so that the arrays after the first reuse the rows
|
||||
# added by the first. every time we change a level in the nested arrays
|
||||
# we reinitialize this.
|
||||
if level != current_array_level:
|
||||
unnested_rows: Dict[int, int] = defaultdict(int)
|
||||
current_array_level = level
|
||||
|
|
@ -1085,7 +1047,7 @@ class PrestoEngineSpec(BaseEngineSpec):
|
|||
if total_splits and completed_splits:
|
||||
progress = 100 * (completed_splits / total_splits)
|
||||
logging.info(
|
||||
"Query {} progress: {} / {} "
|
||||
"Query {} progress: {} / {} " # pylint: disable=logging-format-interpolation
|
||||
"splits".format(query_id, completed_splits, total_splits)
|
||||
)
|
||||
if progress > query.progress:
|
||||
|
|
@ -1108,17 +1070,13 @@ class PrestoEngineSpec(BaseEngineSpec):
|
|||
error_dict.get("errorLocation"),
|
||||
error_dict.get("message"),
|
||||
)
|
||||
if (
|
||||
type(e).__name__ == "DatabaseError"
|
||||
and hasattr(e, "args")
|
||||
and len(e.args) > 0
|
||||
):
|
||||
if type(e).__name__ == "DatabaseError" and hasattr(e, "args") and e.args:
|
||||
error_dict = e.args[0]
|
||||
return error_dict.get("message")
|
||||
return utils.error_msg_from_exception(e)
|
||||
|
||||
@classmethod
|
||||
def _partition_query(
|
||||
def _partition_query( # pylint: disable=too-many-arguments,too-many-locals
|
||||
cls, table_name, database, limit=0, order_by=None, filters=None
|
||||
):
|
||||
"""Returns a partition query
|
||||
|
|
@ -1170,7 +1128,7 @@ class PrestoEngineSpec(BaseEngineSpec):
|
|||
return sql
|
||||
|
||||
@classmethod
|
||||
def where_latest_partition(
|
||||
def where_latest_partition( # pylint: disable=too-many-arguments
|
||||
cls,
|
||||
table_name: str,
|
||||
schema: str,
|
||||
|
|
@ -1182,7 +1140,7 @@ class PrestoEngineSpec(BaseEngineSpec):
|
|||
col_names, values = cls.latest_partition(
|
||||
table_name, schema, database, show_first=True
|
||||
)
|
||||
except Exception:
|
||||
except Exception: # pylint: disable=broad-except
|
||||
# table is not partitioned
|
||||
return None
|
||||
|
||||
|
|
@ -1196,7 +1154,9 @@ class PrestoEngineSpec(BaseEngineSpec):
|
|||
return query
|
||||
|
||||
@classmethod
|
||||
def _latest_partition_from_df(cls, df) -> Optional[List[str]]:
|
||||
def _latest_partition_from_df( # pylint: disable=invalid-name
|
||||
cls, df
|
||||
) -> Optional[List[str]]:
|
||||
if not df.empty:
|
||||
return df.to_records(index=False)[0].item()
|
||||
return None
|
||||
|
|
@ -1264,7 +1224,7 @@ class PrestoEngineSpec(BaseEngineSpec):
|
|||
"""
|
||||
indexes = database.get_indexes(table_name, schema)
|
||||
part_fields = indexes[0]["column_names"]
|
||||
for k in kwargs.keys():
|
||||
for k in kwargs.keys(): # pylint: disable=consider-iterating-dictionary
|
||||
if k not in k in part_fields:
|
||||
msg = "Field [{k}] is not part of the portioning key"
|
||||
raise SupersetTemplateException(msg)
|
||||
|
|
|
|||
|
|
@ -14,7 +14,6 @@
|
|||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
# pylint: disable=C,R,W
|
||||
from superset.db_engine_specs.postgres import PostgresBaseEngineSpec
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -14,7 +14,6 @@
|
|||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
# pylint: disable=C,R,W
|
||||
from urllib import parse
|
||||
|
||||
from superset.db_engine_specs.postgres import PostgresBaseEngineSpec
|
||||
|
|
|
|||
|
|
@ -14,7 +14,6 @@
|
|||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
# pylint: disable=C,R,W
|
||||
from datetime import datetime
|
||||
from typing import List, TYPE_CHECKING
|
||||
|
||||
|
|
@ -25,7 +24,7 @@ from superset.utils import core as utils
|
|||
|
||||
if TYPE_CHECKING:
|
||||
# prevent circular imports
|
||||
from superset.models.core import Database
|
||||
from superset.models.core import Database # pylint: disable=unused-import
|
||||
|
||||
|
||||
class SqliteEngineSpec(BaseEngineSpec):
|
||||
|
|
@ -50,27 +49,27 @@ class SqliteEngineSpec(BaseEngineSpec):
|
|||
|
||||
@classmethod
|
||||
def get_all_datasource_names(
|
||||
cls, db, datasource_type: str
|
||||
cls, database, datasource_type: str
|
||||
) -> List[utils.DatasourceName]:
|
||||
schemas = db.get_all_schema_names(
|
||||
cache=db.schema_cache_enabled,
|
||||
cache_timeout=db.schema_cache_timeout,
|
||||
schemas = database.get_all_schema_names(
|
||||
cache=database.schema_cache_enabled,
|
||||
cache_timeout=database.schema_cache_timeout,
|
||||
force=True,
|
||||
)
|
||||
schema = schemas[0]
|
||||
if datasource_type == "table":
|
||||
return db.get_all_table_names_in_schema(
|
||||
return database.get_all_table_names_in_schema(
|
||||
schema=schema,
|
||||
force=True,
|
||||
cache=db.table_cache_enabled,
|
||||
cache_timeout=db.table_cache_timeout,
|
||||
cache=database.table_cache_enabled,
|
||||
cache_timeout=database.table_cache_timeout,
|
||||
)
|
||||
elif datasource_type == "view":
|
||||
return db.get_all_view_names_in_schema(
|
||||
return database.get_all_view_names_in_schema(
|
||||
schema=schema,
|
||||
force=True,
|
||||
cache=db.table_cache_enabled,
|
||||
cache_timeout=db.table_cache_timeout,
|
||||
cache=database.table_cache_enabled,
|
||||
cache_timeout=database.table_cache_timeout,
|
||||
)
|
||||
else:
|
||||
raise Exception(f"Unsupported datasource_type: {datasource_type}")
|
||||
|
|
|
|||
|
|
@ -14,7 +14,6 @@
|
|||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
# pylint: disable=C,R,W
|
||||
from superset.db_engine_specs.base import BaseEngineSpec, LimitMethod
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -14,7 +14,6 @@
|
|||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
# pylint: disable=C,R,W
|
||||
from superset.db_engine_specs.postgres import PostgresBaseEngineSpec
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -528,7 +528,7 @@ class DbEngineSpecsTestCase(SupersetTestCase):
|
|||
}
|
||||
self.assertEqual(datum, expected_datum)
|
||||
|
||||
def test_split_array_columns_by_process_state(self):
|
||||
def test_presto_split_ary_cols_by_proc_state(self):
|
||||
array_cols = ["array_column", "array_column.nested_array"]
|
||||
array_col_hierarchy = {
|
||||
"array_column": {
|
||||
|
|
@ -541,7 +541,7 @@ class DbEngineSpecsTestCase(SupersetTestCase):
|
|||
},
|
||||
}
|
||||
datum = {"array_column": [[[1], [2]]]}
|
||||
actual_array_cols_to_process, actual_unprocessed_array_cols = PrestoEngineSpec._split_array_columns_by_process_state( # noqa ignore: E50
|
||||
actual_array_cols_to_process, actual_unprocessed_array_cols = PrestoEngineSpec._split_ary_cols_by_proc_state( # noqa ignore: E50
|
||||
array_cols, array_col_hierarchy, datum
|
||||
)
|
||||
expected_array_cols_to_process = ["array_column"]
|
||||
|
|
@ -549,13 +549,13 @@ class DbEngineSpecsTestCase(SupersetTestCase):
|
|||
self.assertEqual(actual_array_cols_to_process, expected_array_cols_to_process)
|
||||
self.assertEqual(actual_unprocessed_array_cols, expected_unprocessed_array_cols)
|
||||
|
||||
def test_presto_convert_data_list_to_array_data_dict(self):
|
||||
def test_presto_convert_data_lst_to_ary_dict(self):
|
||||
data = [
|
||||
{"array_column": [1, 2], "int_column": 3},
|
||||
{"array_column": [11, 22], "int_column": 33},
|
||||
]
|
||||
array_columns_to_process = ["array_column"]
|
||||
actual_array_data_dict = PrestoEngineSpec._convert_data_list_to_array_data_dict(
|
||||
actual_array_data_dict = PrestoEngineSpec._convert_data_lst_to_ary_dict(
|
||||
data, array_columns_to_process
|
||||
)
|
||||
expected_array_data_dict = {
|
||||
|
|
@ -592,30 +592,6 @@ class DbEngineSpecsTestCase(SupersetTestCase):
|
|||
}
|
||||
self.assertEqual(actual_array_data, expected_array_data)
|
||||
|
||||
def test_presto_consolidate_array_data_into_data(self):
|
||||
data = [
|
||||
{"arr_col": [[1], [2]], "int_col": 3},
|
||||
{"arr_col": [[11], [22]], "int_col": 33},
|
||||
]
|
||||
array_data = {
|
||||
0: [
|
||||
{"arr_col": [[1], [2]], "arr_col.nested_row": 1},
|
||||
{"arr_col": "", "arr_col.nested_row": 2, "int_col": ""},
|
||||
],
|
||||
1: [
|
||||
{"arr_col": [[11], [22]], "arr_col.nested_row": 11},
|
||||
{"arr_col": "", "arr_col.nested_row": 22, "int_col": ""},
|
||||
],
|
||||
}
|
||||
PrestoEngineSpec._consolidate_array_data_into_data(data, array_data)
|
||||
expected_data = [
|
||||
{"arr_col": [[1], [2]], "arr_col.nested_row": 1, "int_col": 3},
|
||||
{"arr_col": "", "arr_col.nested_row": 2, "int_col": ""},
|
||||
{"arr_col": [[11], [22]], "arr_col.nested_row": 11, "int_col": 33},
|
||||
{"arr_col": "", "arr_col.nested_row": 22, "int_col": ""},
|
||||
]
|
||||
self.assertEqual(data, expected_data)
|
||||
|
||||
def test_presto_remove_processed_array_columns(self):
|
||||
array_col_hierarchy = {
|
||||
"array_column": {
|
||||
|
|
|
|||
Loading…
Reference in New Issue