feat: CSV File Upload form updates (#21922)

This commit is contained in:
Antonio Rivero Martinez 2022-11-21 12:51:18 -03:00 committed by GitHub
parent f40e6d1054
commit 6bb4d87deb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 105 additions and 107 deletions

View File

@ -17,7 +17,7 @@ specific language governing permissions and limitations
under the License.
#}
<script>
var db = $("#con");
var db = $("#database");
var schema = $("#schema");
// this element is a text input

View File

@ -105,18 +105,9 @@ class UploadToDatabaseForm(DynamicForm):
class CsvToDatabaseForm(UploadToDatabaseForm):
name = StringField(
_("Table Name"),
description=_("Name of table to be created from csv data."),
validators=[
DataRequired(),
Regexp(r"^[^\.]+$", message=_("Table name cannot contain a schema")),
],
widget=BS3TextFieldWidget(),
)
csv_file = FileField(
_("CSV File"),
description=_("Select a CSV file to be uploaded to a database."),
_("CSV Upload"),
description=_("Select a file to be uploaded to the database"),
validators=[
FileRequired(),
FileAllowed(
@ -133,31 +124,37 @@ class CsvToDatabaseForm(UploadToDatabaseForm):
),
],
)
con = QuerySelectField(
table_name = StringField(
_("Table Name"),
description=_("Name of table to be created with CSV file"),
validators=[
DataRequired(),
Regexp(r"^[^\.]+$", message=_("Table name cannot contain a schema")),
],
widget=BS3TextFieldWidget(),
)
database = QuerySelectField(
_("Database"),
description=_("Select a database to upload the file to"),
query_factory=UploadToDatabaseForm.file_allowed_dbs,
get_pk=lambda a: a.id,
get_label=lambda a: a.database_name,
)
schema = StringField(
_("Schema"),
description=_("Specify a schema (if database flavor supports this)."),
description=_("Select a schema if the database supports this"),
validators=[Optional()],
widget=BS3TextFieldWidget(),
)
sep = StringField(
delimiter = StringField(
_("Delimiter"),
description=_("Delimiter used by CSV file (for whitespace use \\s+)."),
description=_("Enter a delimiter for this data"),
validators=[DataRequired()],
widget=BS3TextFieldWidget(),
)
if_exists = SelectField(
_("Table Exists"),
description=_(
"If table exists do one of the following: "
"Fail (do nothing), Replace (drop and recreate table) "
"or Append (insert data)."
),
_("If Table Already Exists"),
description=_("What should happen if the table already exists"),
choices=[
("fail", _("Fail")),
("replace", _("Replace")),
@ -165,97 +162,98 @@ class CsvToDatabaseForm(UploadToDatabaseForm):
],
validators=[DataRequired()],
)
header = IntegerField(
_("Header Row"),
skip_initial_space = BooleanField(
_("Skip Initial Space"), description=_("Skip spaces after delimiter")
)
skip_blank_lines = BooleanField(
_("Skip Blank Lines"),
description=_(
"Row containing the headers to use as "
"column names (0 is first line of data). "
"Leave empty if there is no header row."
"Skip blank lines rather than interpreting them as Not A Number values"
),
validators=[Optional(), NumberRange(min=0)],
)
parse_dates = CommaSeparatedListField(
_("Columns To Be Parsed as Dates"),
description=_(
"A comma separated list of columns that should be parsed as dates"
),
filters=[filter_not_empty_values],
)
infer_datetime_format = BooleanField(
_("Interpret Datetime Format Automatically"),
description=_("Interpret the datetime format automatically"),
)
decimal = StringField(
_("Decimal Character"),
default=".",
description=_("Character to interpret as decimal point"),
validators=[Optional(), Length(min=1, max=1)],
widget=BS3TextFieldWidget(),
)
null_values = JsonListField(
_("Null Values"),
default=config["CSV_DEFAULT_NA_NAMES"],
description=_(
"Json list of the values that should be treated as null. "
'Examples: [""] for empty strings, ["None", "N/A"], ["nan", "null"]. '
"Warning: Hive database supports only a single value"
),
)
index_col = IntegerField(
_("Index Column"),
description=_(
"Column to use as the row labels of the "
"dataframe. Leave empty if no index column."
"dataframe. Leave empty if no index column"
),
validators=[Optional(), NumberRange(min=0)],
widget=BS3TextFieldWidget(),
)
mangle_dupe_cols = BooleanField(
_("Mangle Duplicate Columns"),
description=_('Specify duplicate columns as "X.0, X.1".'),
)
usecols = JsonListField(
_("Use Columns"),
default=None,
description=_(
"Json list of the column names that should be read. "
"If not None, only these columns will be read from the file."
),
validators=[Optional()],
)
skipinitialspace = BooleanField(
_("Skip Initial Space"), description=_("Skip spaces after delimiter.")
)
skiprows = IntegerField(
_("Skip Rows"),
description=_("Number of rows to skip at start of file."),
validators=[Optional(), NumberRange(min=0)],
widget=BS3TextFieldWidget(),
)
nrows = IntegerField(
_("Rows to Read"),
description=_("Number of rows of file to read."),
validators=[Optional(), NumberRange(min=0)],
widget=BS3TextFieldWidget(),
)
skip_blank_lines = BooleanField(
_("Skip Blank Lines"),
description=_("Skip blank lines rather than interpreting them as NaN values."),
)
parse_dates = CommaSeparatedListField(
_("Parse Dates"),
description=_(
"A comma separated list of columns that should be parsed as dates."
),
filters=[filter_not_empty_values],
)
infer_datetime_format = BooleanField(
_("Infer Datetime Format"),
description=_("Use Pandas to interpret the datetime format automatically."),
)
decimal = StringField(
_("Decimal Character"),
default=".",
description=_("Character to interpret as decimal point."),
validators=[Optional(), Length(min=1, max=1)],
widget=BS3TextFieldWidget(),
)
index = BooleanField(
_("Dataframe Index"), description=_("Write dataframe index as a column.")
dataframe_index = BooleanField(
_("Dataframe Index"), description=_("Write dataframe index as a column")
)
index_label = StringField(
_("Column Label(s)"),
description=_(
"Column label for index column(s). If None is given "
"and Dataframe Index is True, Index Names are used."
"and Dataframe Index is checked, Index Names are used"
),
validators=[Optional()],
widget=BS3TextFieldWidget(),
)
null_values = JsonListField(
_("Null values"),
default=config["CSV_DEFAULT_NA_NAMES"],
use_cols = JsonListField(
_("Columns To Read"),
default=None,
description=_("Json list of the column names that should be read"),
validators=[Optional()],
)
overwrite_duplicate = BooleanField(
_("Overwrite Duplicate Columns"),
description=_(
"Json list of the values that should be treated as null. "
'Examples: [""], ["None", "N/A"], ["nan", "null"]. '
"Warning: Hive database supports only single value. "
'Use [""] for empty string.'
"If duplicate columns are not overridden, "
'they will be presented as "X.1, X.2 ...X.x"'
),
)
header = IntegerField(
_("Header Row"),
description=_(
"Row containing the headers to use as "
"column names (0 is first line of data). "
"Leave empty if there is no header row"
),
validators=[Optional(), NumberRange(min=0)],
widget=BS3TextFieldWidget(),
)
nrows = IntegerField(
_("Rows to Read"),
description=_("Number of rows of file to read"),
validators=[Optional(), NumberRange(min=0)],
widget=BS3TextFieldWidget(),
)
skiprows = IntegerField(
_("Skip Rows"),
description=_("Number of rows to skip at start of file"),
validators=[Optional(), NumberRange(min=0)],
widget=BS3TextFieldWidget(),
)
class ExcelToDatabaseForm(UploadToDatabaseForm):

View File

@ -116,18 +116,18 @@ class CsvToDatabaseView(SimpleFormView):
add_columns = ["database", "schema", "table_name"]
def form_get(self, form: CsvToDatabaseForm) -> None:
form.sep.data = ","
form.delimiter.data = ","
form.header.data = 0
form.mangle_dupe_cols.data = True
form.skipinitialspace.data = False
form.overwrite_duplicate.data = True
form.skip_initial_space.data = False
form.skip_blank_lines.data = True
form.infer_datetime_format.data = True
form.decimal.data = "."
form.if_exists.data = "fail"
def form_post(self, form: CsvToDatabaseForm) -> Response:
database = form.con.data
csv_table = Table(table=form.name.data, schema=form.schema.data)
database = form.database.data
csv_table = Table(table=form.table_name.data, schema=form.schema.data)
if not schema_allows_file_upload(database, csv_table.schema):
message = __(
@ -150,21 +150,21 @@ class CsvToDatabaseView(SimpleFormView):
infer_datetime_format=form.infer_datetime_format.data,
iterator=True,
keep_default_na=not form.null_values.data,
mangle_dupe_cols=form.mangle_dupe_cols.data,
usecols=form.usecols.data if form.usecols.data else None,
mangle_dupe_cols=form.overwrite_duplicate.data,
usecols=form.use_cols.data if form.use_cols.data else None,
na_values=form.null_values.data if form.null_values.data else None,
nrows=form.nrows.data,
parse_dates=form.parse_dates.data,
sep=form.sep.data,
sep=form.delimiter.data,
skip_blank_lines=form.skip_blank_lines.data,
skipinitialspace=form.skipinitialspace.data,
skipinitialspace=form.skip_initial_space.data,
skiprows=form.skiprows.data,
)
)
database = (
db.session.query(models.Database)
.filter_by(id=form.data.get("con").data.get("id"))
.filter_by(id=form.data.get("database").data.get("id"))
.one()
)
@ -175,7 +175,7 @@ class CsvToDatabaseView(SimpleFormView):
to_sql_kwargs={
"chunksize": 1000,
"if_exists": form.if_exists.data,
"index": form.index.data,
"index": form.dataframe_index.data,
"index_label": form.index_label.data,
},
)
@ -221,7 +221,7 @@ class CsvToDatabaseView(SimpleFormView):
'"%(table_name)s" in database "%(db_name)s". '
"Error message: %(error_msg)s",
filename=form.csv_file.data.filename,
table_name=form.name.data,
table_name=form.table_name.data,
db_name=database.database_name,
error_msg=str(ex),
)
@ -241,9 +241,9 @@ class CsvToDatabaseView(SimpleFormView):
flash(message, "info")
event_logger.log_with_context(
action="successful_csv_upload",
database=form.con.data.name,
database=form.database.data.name,
schema=form.schema.data,
table=form.name.data,
table=form.table_name.data,
)
return redirect("/tablemodelview/list/")

View File

@ -122,12 +122,12 @@ def upload_csv(filename: str, table_name: str, extra: Optional[Dict[str, str]] =
schema = utils.get_example_default_schema()
form_data = {
"csv_file": open(filename, "rb"),
"sep": ",",
"name": table_name,
"con": csv_upload_db_id,
"delimiter": ",",
"table_name": table_name,
"database": csv_upload_db_id,
"if_exists": "fail",
"index_label": "test_label",
"mangle_dupe_cols": False,
"overwrite_duplicate": False,
}
if schema:
form_data["schema"] = schema