Fix lint in `superset/db_engine_spec` (#8338)

* Enable lint checking for files in db_engine_spec that have few to no
lint issues

* Enable lint and fix issue in db_engine_spec/mysql.py

* Enable pylint and fix lint for db_engine_spec/pinot.py

* Enable lint and fix issues for db_engine_specs/hive.py

* Enable lint and fix for db_engine_spec/presto.py

* Re-enable lint on base.py, fix/disable specific failures, including one
bad method signature

* Make flake8 happy after a number of pylint fixes

* Update db_engine_spec_test test cases related to Presto to support
different method naming

* automated reformatting

* One more pylint disable for druid.py

* Find the magic invocation that makes all the lint tools happy
This commit is contained in:
Will Barrett 2019-10-04 09:19:21 -07:00 committed by Beto Dealmeida
parent 65a05ca47e
commit ec86d9de17
22 changed files with 116 additions and 191 deletions

View File

@ -14,7 +14,6 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# pylint: disable=C,R,W
from datetime import datetime
from superset.db_engine_specs.base import BaseEngineSpec

View File

@ -14,7 +14,7 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# pylint: disable=C,R,W
# pylint: disable=unused-argument
from contextlib import closing
from datetime import datetime
import hashlib
@ -42,10 +42,10 @@ from superset.utils import core as utils
if TYPE_CHECKING:
# prevent circular imports
from superset.models.core import Database
from superset.models.core import Database # pylint: disable=unused-import
class TimeGrain(NamedTuple):
class TimeGrain(NamedTuple): # pylint: disable=too-few-public-methods
name: str # TODO: redundant field, remove
label: str
function: str
@ -79,7 +79,9 @@ builtin_time_grains: Dict[Optional[str], str] = {
}
class TimestampExpression(ColumnClause):
class TimestampExpression(
ColumnClause
): # pylint: disable=abstract-method,too-many-ancestors,too-few-public-methods
def __init__(self, expr: str, col: ColumnClause, **kwargs):
"""Sqlalchemy class that can be can be used to render native column elements
respeting engine-specific quoting rules as part of a string-based expression.
@ -106,7 +108,7 @@ def compile_timegrain_expression(
return element.name.replace("{col}", compiler.process(element.col, **kw))
class LimitMethod(object):
class LimitMethod(object): # pylint: disable=too-few-public-methods
"""Enum the ways that limits can be applied"""
FETCH_MANY = "fetch_many"
@ -114,7 +116,7 @@ class LimitMethod(object):
FORCE_LIMIT = "force_limit"
class BaseEngineSpec:
class BaseEngineSpec: # pylint: disable=too-many-public-methods
"""Abstract class for database engine specific configurations"""
engine = "base" # str as defined in sqlalchemy.engine.engine
@ -128,7 +130,7 @@ class BaseEngineSpec:
force_column_alias_quotes = False
arraysize = 0
max_column_name_length = 0
try_remove_schema_from_table_name = True
try_remove_schema_from_table_name = True # pylint: disable=invalid-name
@classmethod
def get_allow_cost_estimate(cls, version: str = None) -> bool:
@ -287,7 +289,7 @@ class BaseEngineSpec:
:param type_code: Type code from cursor description
:return: String representation of type code
"""
if isinstance(type_code, str) and len(type_code):
if isinstance(type_code, str) and type_code != "":
return type_code.upper()
return None
@ -375,7 +377,7 @@ class BaseEngineSpec:
return df
@classmethod
def df_to_sql(cls, df: pd.DataFrame, **kwargs):
def df_to_sql(cls, df: pd.DataFrame, **kwargs): # pylint: disable=invalid-name
""" Upload data from a Pandas DataFrame to a database. For
regular engines this calls the DataFrame.to_sql() method. Can be
overridden for engines that don't work well with to_sql(), e.g.
@ -449,35 +451,35 @@ class BaseEngineSpec:
@classmethod
def get_all_datasource_names(
cls, db, datasource_type: str
cls, database, datasource_type: str
) -> List[utils.DatasourceName]:
"""Returns a list of all tables or views in database.
:param db: Database instance
:param database: Database instance
:param datasource_type: Datasource_type can be 'table' or 'view'
:return: List of all datasources in database or schema
"""
# TODO: Fix circular import caused by importing Database
schemas = db.get_all_schema_names(
cache=db.schema_cache_enabled,
cache_timeout=db.schema_cache_timeout,
schemas = database.get_all_schema_names(
cache=database.schema_cache_enabled,
cache_timeout=database.schema_cache_timeout,
force=True,
)
all_datasources: List[utils.DatasourceName] = []
for schema in schemas:
if datasource_type == "table":
all_datasources += db.get_all_table_names_in_schema(
all_datasources += database.get_all_table_names_in_schema(
schema=schema,
force=True,
cache=db.table_cache_enabled,
cache_timeout=db.table_cache_timeout,
cache=database.table_cache_enabled,
cache_timeout=database.table_cache_timeout,
)
elif datasource_type == "view":
all_datasources += db.get_all_view_names_in_schema(
all_datasources += database.get_all_view_names_in_schema(
schema=schema,
force=True,
cache=db.table_cache_enabled,
cache_timeout=db.table_cache_timeout,
cache=database.table_cache_enabled,
cache_timeout=database.table_cache_timeout,
)
else:
raise Exception(f"Unsupported datasource_type: {datasource_type}")
@ -588,7 +590,7 @@ class BaseEngineSpec:
return inspector.get_columns(table_name, schema)
@classmethod
def where_latest_partition(
def where_latest_partition( # pylint: disable=too-many-arguments
cls,
table_name: str,
schema: Optional[str],
@ -615,7 +617,7 @@ class BaseEngineSpec:
return [column(c.get("name")) for c in cols]
@classmethod
def select_star(
def select_star( # pylint: disable=too-many-arguments,too-many-locals
cls,
database,
table_name: str,
@ -727,7 +729,7 @@ class BaseEngineSpec:
url.username = username
@classmethod
def get_configuration_for_impersonation(
def get_configuration_for_impersonation( # pylint: disable=invalid-name
cls, uri: str, impersonate_user: bool, username: str
) -> Dict[str, str]:
"""
@ -830,8 +832,9 @@ class BaseEngineSpec:
cls, sqla_column_type: TypeEngine, dialect: Dialect
) -> str:
"""
Convert sqlalchemy column type to string representation. Can be overridden to remove
unnecessary details, especially collation info (see mysql, mssql).
Convert sqlalchemy column type to string representation.
Can be overridden to remove unnecessary details, especially
collation info (see mysql, mssql).
:param sqla_column_type: SqlAlchemy column type
:param dialect: Sqlalchemy dialect

View File

@ -14,13 +14,12 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# pylint: disable=C,R,W
from datetime import datetime
from superset.db_engine_specs.base import BaseEngineSpec
class ClickHouseEngineSpec(BaseEngineSpec):
class ClickHouseEngineSpec(BaseEngineSpec): # pylint: disable=abstract-method
"""Dialect for ClickHouse analytical DB."""
engine = "clickhouse"

View File

@ -14,7 +14,6 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# pylint: disable=C,R,W
from datetime import datetime
from superset.db_engine_specs.base import BaseEngineSpec, LimitMethod

View File

@ -14,7 +14,6 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# pylint: disable=C,R,W
from datetime import datetime
from urllib import parse

View File

@ -14,11 +14,10 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# pylint: disable=C,R,W
from superset.db_engine_specs.base import BaseEngineSpec
class DruidEngineSpec(BaseEngineSpec):
class DruidEngineSpec(BaseEngineSpec): # pylint: disable=abstract-method
"""Engine spec for Druid.io"""
engine = "druid"

View File

@ -14,7 +14,6 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# pylint: disable=C,R,W
from superset.db_engine_specs.sqlite import SqliteEngineSpec

View File

@ -14,7 +14,6 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# pylint: disable=C,R,W
from datetime import datetime
import logging
import os
@ -81,9 +80,9 @@ class HiveEngineSpec(PrestoEngineSpec):
@classmethod
def get_all_datasource_names(
cls, db, datasource_type: str
cls, database, datasource_type: str
) -> List[utils.DatasourceName]:
return BaseEngineSpec.get_all_datasource_names(db, datasource_type)
return BaseEngineSpec.get_all_datasource_names(database, datasource_type)
@classmethod
def fetch_data(cls, cursor, limit: int) -> List[Tuple]:
@ -99,7 +98,7 @@ class HiveEngineSpec(PrestoEngineSpec):
return []
@classmethod
def create_table_from_csv(cls, form, table):
def create_table_from_csv(cls, form, table): # pylint: disable=too-many-locals
"""Uploads a csv file and creates a superset datasource in Hive."""
def convert_to_hive_type(col_type):
@ -223,7 +222,7 @@ class HiveEngineSpec(PrestoEngineSpec):
reduce_progress = int(match.groupdict()["reduce_progress"])
stages[stage_number] = (map_progress + reduce_progress) / 2
logging.info(
"Progress detail: {}, "
"Progress detail: {}, " # pylint: disable=logging-format-interpolation
"current job {}, "
"total jobs: {}".format(stages, current_job, total_jobs)
)
@ -239,9 +238,10 @@ class HiveEngineSpec(PrestoEngineSpec):
for line in log_lines:
if lkp in line:
return line.split(lkp)[1]
return None
@classmethod
def handle_cursor(cls, cursor, query, session):
def handle_cursor(cls, cursor, query, session): # pylint: disable=too-many-locals
"""Updates progress information"""
from pyhive import hive # pylint: disable=no-name-in-module
@ -302,33 +302,33 @@ class HiveEngineSpec(PrestoEngineSpec):
return inspector.get_columns(table_name, schema)
@classmethod
def where_latest_partition(
def where_latest_partition( # pylint: disable=too-many-arguments
cls,
table_name: str,
schema: Optional[str],
database,
qry: Select,
query: Select,
columns: Optional[List] = None,
) -> Optional[Select]:
try:
col_names, values = cls.latest_partition(
table_name, schema, database, show_first=True
)
except Exception:
except Exception: # pylint: disable=broad-except
# table is not partitioned
return None
if values is not None and columns is not None:
for col_name, value in zip(col_names, values):
for c in columns:
if c.get("name") == col_name:
qry = qry.where(Column(col_name) == value)
for clm in columns:
if clm.get("name") == col_name:
query = query.where(Column(col_name) == value)
return qry
return query
return None
@classmethod
def _get_fields(cls, cols: List[dict]) -> List[ColumnClause]:
return BaseEngineSpec._get_fields(cols)
return BaseEngineSpec._get_fields(cols) # pylint: disable=protected-access
@classmethod
def latest_sub_partition(cls, table_name, schema, database, **kwargs):
@ -343,11 +343,13 @@ class HiveEngineSpec(PrestoEngineSpec):
return None
@classmethod
def _partition_query(cls, table_name, limit=0, order_by=None, filters=None):
def _partition_query( # pylint: disable=too-many-arguments
cls, table_name, database, limit=0, order_by=None, filters=None
):
return f"SHOW PARTITIONS {table_name}"
@classmethod
def select_star(
def select_star( # pylint: disable=too-many-arguments
cls,
database,
table_name: str,
@ -413,6 +415,8 @@ class HiveEngineSpec(PrestoEngineSpec):
return configuration
@staticmethod
def execute(cursor, query: str, async_: bool = False):
def execute(
cursor, query: str, async_: bool = False
): # pylint: disable=arguments-differ
kwargs = {"async": async_}
cursor.execute(query, **kwargs)

View File

@ -14,7 +14,6 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# pylint: disable=C,R,W
from datetime import datetime
from typing import List

View File

@ -14,13 +14,12 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# pylint: disable=C,R,W
from datetime import datetime
from superset.db_engine_specs.base import BaseEngineSpec
class KylinEngineSpec(BaseEngineSpec):
class KylinEngineSpec(BaseEngineSpec): # pylint: disable=abstract-method
"""Dialect for Apache Kylin"""
engine = "kylin"

View File

@ -14,7 +14,6 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# pylint: disable=C,R,W
from datetime import datetime
import re
from typing import List, Optional, Tuple

View File

@ -14,7 +14,6 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# pylint: disable=C,R,W
from datetime import datetime
from typing import Any, Dict, Optional
from urllib import parse
@ -77,7 +76,7 @@ class MySQLEngineSpec(BaseEngineSpec):
datatype = type_code
if isinstance(type_code, int):
datatype = cls.type_code_map.get(type_code)
if datatype and isinstance(datatype, str) and len(datatype):
if datatype and isinstance(datatype, str) and datatype:
return datatype
return None
@ -92,7 +91,7 @@ class MySQLEngineSpec(BaseEngineSpec):
try:
if isinstance(e.args, tuple) and len(e.args) > 1:
message = e.args[1]
except Exception:
except Exception: # pylint: disable=broad-except
pass
return message

View File

@ -14,7 +14,6 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# pylint: disable=C,R,W
from datetime import datetime
from superset.db_engine_specs.base import LimitMethod

View File

@ -14,7 +14,6 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# pylint: disable=C,R,W
from typing import Dict, List, Optional
from sqlalchemy.sql.expression import ColumnClause, ColumnElement
@ -22,7 +21,7 @@ from sqlalchemy.sql.expression import ColumnClause, ColumnElement
from superset.db_engine_specs.base import BaseEngineSpec, TimestampExpression
class PinotEngineSpec(BaseEngineSpec):
class PinotEngineSpec(BaseEngineSpec): # pylint: disable=abstract-method
engine = "pinot"
allows_subqueries = False
allows_joins = False
@ -66,10 +65,10 @@ class PinotEngineSpec(BaseEngineSpec):
# Pinot does not want the group by expr's to appear in the select clause
select_sans_groupby = []
# We want identity and not equality, so doing the filtering manually
for s in select_exprs:
for sel in select_exprs:
for gr in groupby_exprs:
if s is gr:
if sel is gr:
break
else:
select_sans_groupby.append(s)
select_sans_groupby.append(sel)
return select_sans_groupby

View File

@ -14,7 +14,6 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# pylint: disable=C,R,W
from datetime import datetime
from typing import List, Optional, Tuple, TYPE_CHECKING
@ -24,7 +23,7 @@ from superset.db_engine_specs.base import BaseEngineSpec, LimitMethod
if TYPE_CHECKING:
# prevent circular imports
from superset.models.core import Database
from superset.models.core import Database # pylint: disable=unused-import
class PostgresBaseEngineSpec(BaseEngineSpec):

View File

@ -14,7 +14,6 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# pylint: disable=C,R,W
from collections import defaultdict, deque, OrderedDict
from contextlib import closing
from datetime import datetime
@ -42,7 +41,7 @@ from superset.utils import core as utils
if TYPE_CHECKING:
# prevent circular imports
from superset.models.core import Database
from superset.models.core import Database # pylint: disable=unused-import
QueryStatus = utils.QueryStatus
config = app.config
@ -80,7 +79,7 @@ def get_children(column: Dict[str, str]) -> List[Dict[str, str]]:
:param column: dictionary representing a Presto column
:return: list of dictionaries representing children columns
"""
pattern = re.compile("(?P<type>\w+)\((?P<children>.*)\)")
pattern = re.compile(r"(?P<type>\w+)\((?P<children>.*)\)")
match = pattern.match(column["type"])
if not match:
raise Exception(f"Unable to parse column type {column['type']}")
@ -157,7 +156,10 @@ class PrestoEngineSpec(BaseEngineSpec):
return []
if schema:
sql = "SELECT table_name FROM information_schema.views WHERE table_schema=%(schema)s"
sql = (
"SELECT table_name FROM information_schema.views"
"WHERE table_schema=%(schema)s"
)
params = {"schema": schema}
else:
sql = "SELECT table_name FROM information_schema.views"
@ -220,7 +222,7 @@ class PrestoEngineSpec(BaseEngineSpec):
)
@classmethod
def _parse_structural_column(
def _parse_structural_column( # pylint: disable=too-many-locals,too-many-branches
cls, parent_column_name: str, parent_data_type: str, result: List[dict]
) -> None:
"""
@ -243,7 +245,7 @@ class PrestoEngineSpec(BaseEngineSpec):
inner_types = cls._split_data_type(data_type, r"\)")
for inner_type in inner_types:
# We have finished parsing multiple structural data types
if not inner_type and len(stack) > 0:
if not inner_type and stack:
stack.pop()
elif cls._has_nested_data_types(inner_type):
# split on comma , to get individual data types
@ -283,11 +285,11 @@ class PrestoEngineSpec(BaseEngineSpec):
if not (inner_type.endswith("array") or inner_type.endswith("row")):
stack.pop()
# We have an array of row objects (i.e. array(row(...)))
elif "array" == inner_type or "row" == inner_type:
elif inner_type == "array" or inner_type == "row":
# Push a dummy object to represent the structural data type
stack.append(("", inner_type))
# We have an array of a basic data types(i.e. array(varchar)).
elif len(stack) > 0:
elif stack:
# Because it is an array of a basic data type. We have finished
# parsing the structural data type and can move on.
stack.pop()
@ -348,7 +350,7 @@ class PrestoEngineSpec(BaseEngineSpec):
column_type = presto_type_map[column.Type]()
except KeyError:
logging.info(
"Did not recognize type {} of column {}".format(
"Did not recognize type {} of column {}".format( # pylint: disable=logging-format-interpolation
column.Type, column.Column
)
)
@ -439,7 +441,7 @@ class PrestoEngineSpec(BaseEngineSpec):
return filtered_cols, array_cols
@classmethod
def select_star(
def select_star( # pylint: disable=too-many-arguments
cls,
database,
table_name: str,
@ -476,7 +478,7 @@ class PrestoEngineSpec(BaseEngineSpec):
)
@classmethod
def estimate_statement_cost(
def estimate_statement_cost( # pylint: disable=too-many-locals
cls, statement: str, database, cursor, user_name: str
) -> Dict[str, str]:
"""
@ -490,9 +492,9 @@ class PrestoEngineSpec(BaseEngineSpec):
parsed_query = ParsedQuery(statement)
sql = parsed_query.stripped()
SQL_QUERY_MUTATOR = config.get("SQL_QUERY_MUTATOR")
if SQL_QUERY_MUTATOR:
sql = SQL_QUERY_MUTATOR(sql, user_name, security_manager, database)
sql_query_mutator = config.get("SQL_QUERY_MUTATOR")
if sql_query_mutator:
sql = sql_query_mutator(sql, user_name, security_manager, database)
sql = f"EXPLAIN (TYPE IO, FORMAT JSON) {sql}"
cursor.execute(sql)
@ -569,9 +571,9 @@ class PrestoEngineSpec(BaseEngineSpec):
@classmethod
def get_all_datasource_names(
cls, db, datasource_type: str
cls, database, datasource_type: str
) -> List[utils.DatasourceName]:
datasource_df = db.get_df(
datasource_df = database.get_df(
"SELECT table_schema, table_name FROM INFORMATION_SCHEMA.{}S "
"ORDER BY concat(table_schema, '.', table_name)".format(
datasource_type.upper()
@ -579,7 +581,7 @@ class PrestoEngineSpec(BaseEngineSpec):
None,
)
datasource_names: List[utils.DatasourceName] = []
for unused, row in datasource_df.iterrows():
for _unused, row in datasource_df.iterrows():
datasource_names.append(
utils.DatasourceName(
schema=row["table_schema"], table=row["table_name"]
@ -599,7 +601,7 @@ class PrestoEngineSpec(BaseEngineSpec):
be root nodes
:param column_hierarchy: dictionary representing the graph
"""
if len(columns) == 0:
if not columns:
return
root = columns.pop(0)
root_info = {"type": root["type"], "children": []}
@ -697,7 +699,7 @@ class PrestoEngineSpec(BaseEngineSpec):
datum[row_child] = ""
@classmethod
def _split_array_columns_by_process_state(
def _split_ary_cols_by_proc_state(
cls, array_columns: List[str], array_column_hierarchy: dict, datum: dict
) -> Tuple[List[str], Set[str]]:
"""
@ -727,7 +729,7 @@ class PrestoEngineSpec(BaseEngineSpec):
return array_columns_to_process, unprocessed_array_columns
@classmethod
def _convert_data_list_to_array_data_dict(
def _convert_data_lst_to_ary_dict(
cls, data: List[dict], array_columns_to_process: List[str]
) -> dict:
"""
@ -755,7 +757,7 @@ class PrestoEngineSpec(BaseEngineSpec):
return array_data_dict
@classmethod
def _process_array_data(
def _process_array_data( # pylint: disable=too-many-locals,too-many-branches
cls, data: List[dict], all_columns: List[dict], array_column_hierarchy: dict
) -> dict:
"""
@ -793,16 +795,16 @@ class PrestoEngineSpec(BaseEngineSpec):
# Determine what columns are ready to be processed. This is necessary for
# array columns that contain rows with nested arrays. We first process
# the outer arrays before processing inner arrays.
array_columns_to_process, unprocessed_array_columns = cls._split_array_columns_by_process_state(
array_columns_to_process, unprocessed_array_columns = cls._split_ary_cols_by_proc_state( # pylint: disable=line-too-long
array_columns, array_column_hierarchy, data[0]
)
# Pull out array data that is ready to be processed into a dictionary.
all_array_data = cls._convert_data_list_to_array_data_dict(
all_array_data = cls._convert_data_lst_to_ary_dict(
data, array_columns_to_process
)
for original_data_index, expanded_array_data in all_array_data.items():
for expanded_array_data in all_array_data.values():
for array_column in array_columns:
if array_column in unprocessed_array_columns:
continue
@ -841,47 +843,6 @@ class PrestoEngineSpec(BaseEngineSpec):
array_value[array_child] = ""
return all_array_data
@classmethod
def _consolidate_array_data_into_data(
cls, data: List[dict], array_data: dict
) -> None:
"""
Consolidate data given a list representing rows of data and a dictionary
representing expanded array data
Example:
Original data set = [
{'ColumnA': [1, 2], 'ColumnB': [3]},
{'ColumnA': [11, 22], 'ColumnB': [33]}
]
array_data = {
0: [
{'ColumnA': 1, 'ColumnB': 3},
{'ColumnA': 2, 'ColumnB': ''},
],
1: [
{'ColumnA': 11, 'ColumnB': 33},
{'ColumnA': 22, 'ColumnB': ''},
],
}
Final data set = [
{'ColumnA': 1, 'ColumnB': 3},
{'ColumnA': 2, 'ColumnB': ''},
{'ColumnA': 11, 'ColumnB': 33},
{'ColumnA': 22, 'ColumnB': ''},
]
:param data: list representing rows of data
:param array_data: dictionary representing expanded array data
:return: list where data and array_data are combined
"""
data_index = 0
original_data_index = 0
while data_index < len(data):
data[data_index].update(array_data[original_data_index][0])
array_data[original_data_index].pop(0)
data[data_index + 1 : data_index + 1] = array_data[original_data_index]
data_index = data_index + len(array_data[original_data_index]) + 1
original_data_index = original_data_index + 1
@classmethod
def _remove_processed_array_columns(
cls, unprocessed_array_columns: Set[str], array_column_hierarchy: dict
@ -899,7 +860,7 @@ class PrestoEngineSpec(BaseEngineSpec):
del array_column_hierarchy[array_column]
@classmethod
def expand_data(
def expand_data( # pylint: disable=too-many-locals
cls, columns: List[dict], data: List[dict]
) -> Tuple[List[dict], List[dict], List[dict]]:
"""
@ -926,7 +887,8 @@ class PrestoEngineSpec(BaseEngineSpec):
if not is_feature_enabled("PRESTO_EXPAND_DATA"):
return columns, data, []
# process each column, unnesting ARRAY types and expanding ROW types into new columns
# process each column, unnesting ARRAY types and
# expanding ROW types into new columns
to_process = deque((column, 0) for column in columns)
all_columns: List[dict] = []
expanded_columns = []
@ -937,10 +899,10 @@ class PrestoEngineSpec(BaseEngineSpec):
all_columns.append(column)
# When unnesting arrays we need to keep track of how many extra rows
# were added, for each original row. This is necessary when we expand multiple
# arrays, so that the arrays after the first reuse the rows added by
# the first. every time we change a level in the nested arrays we
# reinitialize this.
# were added, for each original row. This is necessary when we expand
# multiple arrays, so that the arrays after the first reuse the rows
# added by the first. every time we change a level in the nested arrays
# we reinitialize this.
if level != current_array_level:
unnested_rows: Dict[int, int] = defaultdict(int)
current_array_level = level
@ -1085,7 +1047,7 @@ class PrestoEngineSpec(BaseEngineSpec):
if total_splits and completed_splits:
progress = 100 * (completed_splits / total_splits)
logging.info(
"Query {} progress: {} / {} "
"Query {} progress: {} / {} " # pylint: disable=logging-format-interpolation
"splits".format(query_id, completed_splits, total_splits)
)
if progress > query.progress:
@ -1108,17 +1070,13 @@ class PrestoEngineSpec(BaseEngineSpec):
error_dict.get("errorLocation"),
error_dict.get("message"),
)
if (
type(e).__name__ == "DatabaseError"
and hasattr(e, "args")
and len(e.args) > 0
):
if type(e).__name__ == "DatabaseError" and hasattr(e, "args") and e.args:
error_dict = e.args[0]
return error_dict.get("message")
return utils.error_msg_from_exception(e)
@classmethod
def _partition_query(
def _partition_query( # pylint: disable=too-many-arguments,too-many-locals
cls, table_name, database, limit=0, order_by=None, filters=None
):
"""Returns a partition query
@ -1170,7 +1128,7 @@ class PrestoEngineSpec(BaseEngineSpec):
return sql
@classmethod
def where_latest_partition(
def where_latest_partition( # pylint: disable=too-many-arguments
cls,
table_name: str,
schema: str,
@ -1182,7 +1140,7 @@ class PrestoEngineSpec(BaseEngineSpec):
col_names, values = cls.latest_partition(
table_name, schema, database, show_first=True
)
except Exception:
except Exception: # pylint: disable=broad-except
# table is not partitioned
return None
@ -1196,7 +1154,9 @@ class PrestoEngineSpec(BaseEngineSpec):
return query
@classmethod
def _latest_partition_from_df(cls, df) -> Optional[List[str]]:
def _latest_partition_from_df( # pylint: disable=invalid-name
cls, df
) -> Optional[List[str]]:
if not df.empty:
return df.to_records(index=False)[0].item()
return None
@ -1264,7 +1224,7 @@ class PrestoEngineSpec(BaseEngineSpec):
"""
indexes = database.get_indexes(table_name, schema)
part_fields = indexes[0]["column_names"]
for k in kwargs.keys():
for k in kwargs.keys(): # pylint: disable=consider-iterating-dictionary
if k not in k in part_fields:
msg = "Field [{k}] is not part of the portioning key"
raise SupersetTemplateException(msg)

View File

@ -14,7 +14,6 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# pylint: disable=C,R,W
from superset.db_engine_specs.postgres import PostgresBaseEngineSpec

View File

@ -14,7 +14,6 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# pylint: disable=C,R,W
from urllib import parse
from superset.db_engine_specs.postgres import PostgresBaseEngineSpec

View File

@ -14,7 +14,6 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# pylint: disable=C,R,W
from datetime import datetime
from typing import List, TYPE_CHECKING
@ -25,7 +24,7 @@ from superset.utils import core as utils
if TYPE_CHECKING:
# prevent circular imports
from superset.models.core import Database
from superset.models.core import Database # pylint: disable=unused-import
class SqliteEngineSpec(BaseEngineSpec):
@ -50,27 +49,27 @@ class SqliteEngineSpec(BaseEngineSpec):
@classmethod
def get_all_datasource_names(
cls, db, datasource_type: str
cls, database, datasource_type: str
) -> List[utils.DatasourceName]:
schemas = db.get_all_schema_names(
cache=db.schema_cache_enabled,
cache_timeout=db.schema_cache_timeout,
schemas = database.get_all_schema_names(
cache=database.schema_cache_enabled,
cache_timeout=database.schema_cache_timeout,
force=True,
)
schema = schemas[0]
if datasource_type == "table":
return db.get_all_table_names_in_schema(
return database.get_all_table_names_in_schema(
schema=schema,
force=True,
cache=db.table_cache_enabled,
cache_timeout=db.table_cache_timeout,
cache=database.table_cache_enabled,
cache_timeout=database.table_cache_timeout,
)
elif datasource_type == "view":
return db.get_all_view_names_in_schema(
return database.get_all_view_names_in_schema(
schema=schema,
force=True,
cache=db.table_cache_enabled,
cache_timeout=db.table_cache_timeout,
cache=database.table_cache_enabled,
cache_timeout=database.table_cache_timeout,
)
else:
raise Exception(f"Unsupported datasource_type: {datasource_type}")

View File

@ -14,7 +14,6 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# pylint: disable=C,R,W
from superset.db_engine_specs.base import BaseEngineSpec, LimitMethod

View File

@ -14,7 +14,6 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# pylint: disable=C,R,W
from superset.db_engine_specs.postgres import PostgresBaseEngineSpec

View File

@ -528,7 +528,7 @@ class DbEngineSpecsTestCase(SupersetTestCase):
}
self.assertEqual(datum, expected_datum)
def test_split_array_columns_by_process_state(self):
def test_presto_split_ary_cols_by_proc_state(self):
array_cols = ["array_column", "array_column.nested_array"]
array_col_hierarchy = {
"array_column": {
@ -541,7 +541,7 @@ class DbEngineSpecsTestCase(SupersetTestCase):
},
}
datum = {"array_column": [[[1], [2]]]}
actual_array_cols_to_process, actual_unprocessed_array_cols = PrestoEngineSpec._split_array_columns_by_process_state( # noqa ignore: E50
actual_array_cols_to_process, actual_unprocessed_array_cols = PrestoEngineSpec._split_ary_cols_by_proc_state( # noqa ignore: E50
array_cols, array_col_hierarchy, datum
)
expected_array_cols_to_process = ["array_column"]
@ -549,13 +549,13 @@ class DbEngineSpecsTestCase(SupersetTestCase):
self.assertEqual(actual_array_cols_to_process, expected_array_cols_to_process)
self.assertEqual(actual_unprocessed_array_cols, expected_unprocessed_array_cols)
def test_presto_convert_data_list_to_array_data_dict(self):
def test_presto_convert_data_lst_to_ary_dict(self):
data = [
{"array_column": [1, 2], "int_column": 3},
{"array_column": [11, 22], "int_column": 33},
]
array_columns_to_process = ["array_column"]
actual_array_data_dict = PrestoEngineSpec._convert_data_list_to_array_data_dict(
actual_array_data_dict = PrestoEngineSpec._convert_data_lst_to_ary_dict(
data, array_columns_to_process
)
expected_array_data_dict = {
@ -592,30 +592,6 @@ class DbEngineSpecsTestCase(SupersetTestCase):
}
self.assertEqual(actual_array_data, expected_array_data)
def test_presto_consolidate_array_data_into_data(self):
data = [
{"arr_col": [[1], [2]], "int_col": 3},
{"arr_col": [[11], [22]], "int_col": 33},
]
array_data = {
0: [
{"arr_col": [[1], [2]], "arr_col.nested_row": 1},
{"arr_col": "", "arr_col.nested_row": 2, "int_col": ""},
],
1: [
{"arr_col": [[11], [22]], "arr_col.nested_row": 11},
{"arr_col": "", "arr_col.nested_row": 22, "int_col": ""},
],
}
PrestoEngineSpec._consolidate_array_data_into_data(data, array_data)
expected_data = [
{"arr_col": [[1], [2]], "arr_col.nested_row": 1, "int_col": 3},
{"arr_col": "", "arr_col.nested_row": 2, "int_col": ""},
{"arr_col": [[11], [22]], "arr_col.nested_row": 11, "int_col": 33},
{"arr_col": "", "arr_col.nested_row": 22, "int_col": ""},
]
self.assertEqual(data, expected_data)
def test_presto_remove_processed_array_columns(self):
array_col_hierarchy = {
"array_column": {