feat: CSV File Upload form updates (#21922)
This commit is contained in:
parent
f40e6d1054
commit
6bb4d87deb
|
|
@ -17,7 +17,7 @@ specific language governing permissions and limitations
|
|||
under the License.
|
||||
#}
|
||||
<script>
|
||||
var db = $("#con");
|
||||
var db = $("#database");
|
||||
var schema = $("#schema");
|
||||
|
||||
// this element is a text input
|
||||
|
|
|
|||
|
|
@ -105,18 +105,9 @@ class UploadToDatabaseForm(DynamicForm):
|
|||
|
||||
|
||||
class CsvToDatabaseForm(UploadToDatabaseForm):
|
||||
name = StringField(
|
||||
_("Table Name"),
|
||||
description=_("Name of table to be created from csv data."),
|
||||
validators=[
|
||||
DataRequired(),
|
||||
Regexp(r"^[^\.]+$", message=_("Table name cannot contain a schema")),
|
||||
],
|
||||
widget=BS3TextFieldWidget(),
|
||||
)
|
||||
csv_file = FileField(
|
||||
_("CSV File"),
|
||||
description=_("Select a CSV file to be uploaded to a database."),
|
||||
_("CSV Upload"),
|
||||
description=_("Select a file to be uploaded to the database"),
|
||||
validators=[
|
||||
FileRequired(),
|
||||
FileAllowed(
|
||||
|
|
@ -133,31 +124,37 @@ class CsvToDatabaseForm(UploadToDatabaseForm):
|
|||
),
|
||||
],
|
||||
)
|
||||
con = QuerySelectField(
|
||||
table_name = StringField(
|
||||
_("Table Name"),
|
||||
description=_("Name of table to be created with CSV file"),
|
||||
validators=[
|
||||
DataRequired(),
|
||||
Regexp(r"^[^\.]+$", message=_("Table name cannot contain a schema")),
|
||||
],
|
||||
widget=BS3TextFieldWidget(),
|
||||
)
|
||||
database = QuerySelectField(
|
||||
_("Database"),
|
||||
description=_("Select a database to upload the file to"),
|
||||
query_factory=UploadToDatabaseForm.file_allowed_dbs,
|
||||
get_pk=lambda a: a.id,
|
||||
get_label=lambda a: a.database_name,
|
||||
)
|
||||
schema = StringField(
|
||||
_("Schema"),
|
||||
description=_("Specify a schema (if database flavor supports this)."),
|
||||
description=_("Select a schema if the database supports this"),
|
||||
validators=[Optional()],
|
||||
widget=BS3TextFieldWidget(),
|
||||
)
|
||||
sep = StringField(
|
||||
delimiter = StringField(
|
||||
_("Delimiter"),
|
||||
description=_("Delimiter used by CSV file (for whitespace use \\s+)."),
|
||||
description=_("Enter a delimiter for this data"),
|
||||
validators=[DataRequired()],
|
||||
widget=BS3TextFieldWidget(),
|
||||
)
|
||||
if_exists = SelectField(
|
||||
_("Table Exists"),
|
||||
description=_(
|
||||
"If table exists do one of the following: "
|
||||
"Fail (do nothing), Replace (drop and recreate table) "
|
||||
"or Append (insert data)."
|
||||
),
|
||||
_("If Table Already Exists"),
|
||||
description=_("What should happen if the table already exists"),
|
||||
choices=[
|
||||
("fail", _("Fail")),
|
||||
("replace", _("Replace")),
|
||||
|
|
@ -165,97 +162,98 @@ class CsvToDatabaseForm(UploadToDatabaseForm):
|
|||
],
|
||||
validators=[DataRequired()],
|
||||
)
|
||||
header = IntegerField(
|
||||
_("Header Row"),
|
||||
skip_initial_space = BooleanField(
|
||||
_("Skip Initial Space"), description=_("Skip spaces after delimiter")
|
||||
)
|
||||
skip_blank_lines = BooleanField(
|
||||
_("Skip Blank Lines"),
|
||||
description=_(
|
||||
"Row containing the headers to use as "
|
||||
"column names (0 is first line of data). "
|
||||
"Leave empty if there is no header row."
|
||||
"Skip blank lines rather than interpreting them as Not A Number values"
|
||||
),
|
||||
validators=[Optional(), NumberRange(min=0)],
|
||||
)
|
||||
parse_dates = CommaSeparatedListField(
|
||||
_("Columns To Be Parsed as Dates"),
|
||||
description=_(
|
||||
"A comma separated list of columns that should be parsed as dates"
|
||||
),
|
||||
filters=[filter_not_empty_values],
|
||||
)
|
||||
infer_datetime_format = BooleanField(
|
||||
_("Interpret Datetime Format Automatically"),
|
||||
description=_("Interpret the datetime format automatically"),
|
||||
)
|
||||
decimal = StringField(
|
||||
_("Decimal Character"),
|
||||
default=".",
|
||||
description=_("Character to interpret as decimal point"),
|
||||
validators=[Optional(), Length(min=1, max=1)],
|
||||
widget=BS3TextFieldWidget(),
|
||||
)
|
||||
null_values = JsonListField(
|
||||
_("Null Values"),
|
||||
default=config["CSV_DEFAULT_NA_NAMES"],
|
||||
description=_(
|
||||
"Json list of the values that should be treated as null. "
|
||||
'Examples: [""] for empty strings, ["None", "N/A"], ["nan", "null"]. '
|
||||
"Warning: Hive database supports only a single value"
|
||||
),
|
||||
)
|
||||
index_col = IntegerField(
|
||||
_("Index Column"),
|
||||
description=_(
|
||||
"Column to use as the row labels of the "
|
||||
"dataframe. Leave empty if no index column."
|
||||
"dataframe. Leave empty if no index column"
|
||||
),
|
||||
validators=[Optional(), NumberRange(min=0)],
|
||||
widget=BS3TextFieldWidget(),
|
||||
)
|
||||
mangle_dupe_cols = BooleanField(
|
||||
_("Mangle Duplicate Columns"),
|
||||
description=_('Specify duplicate columns as "X.0, X.1".'),
|
||||
)
|
||||
usecols = JsonListField(
|
||||
_("Use Columns"),
|
||||
default=None,
|
||||
description=_(
|
||||
"Json list of the column names that should be read. "
|
||||
"If not None, only these columns will be read from the file."
|
||||
),
|
||||
validators=[Optional()],
|
||||
)
|
||||
skipinitialspace = BooleanField(
|
||||
_("Skip Initial Space"), description=_("Skip spaces after delimiter.")
|
||||
)
|
||||
skiprows = IntegerField(
|
||||
_("Skip Rows"),
|
||||
description=_("Number of rows to skip at start of file."),
|
||||
validators=[Optional(), NumberRange(min=0)],
|
||||
widget=BS3TextFieldWidget(),
|
||||
)
|
||||
nrows = IntegerField(
|
||||
_("Rows to Read"),
|
||||
description=_("Number of rows of file to read."),
|
||||
validators=[Optional(), NumberRange(min=0)],
|
||||
widget=BS3TextFieldWidget(),
|
||||
)
|
||||
skip_blank_lines = BooleanField(
|
||||
_("Skip Blank Lines"),
|
||||
description=_("Skip blank lines rather than interpreting them as NaN values."),
|
||||
)
|
||||
parse_dates = CommaSeparatedListField(
|
||||
_("Parse Dates"),
|
||||
description=_(
|
||||
"A comma separated list of columns that should be parsed as dates."
|
||||
),
|
||||
filters=[filter_not_empty_values],
|
||||
)
|
||||
infer_datetime_format = BooleanField(
|
||||
_("Infer Datetime Format"),
|
||||
description=_("Use Pandas to interpret the datetime format automatically."),
|
||||
)
|
||||
decimal = StringField(
|
||||
_("Decimal Character"),
|
||||
default=".",
|
||||
description=_("Character to interpret as decimal point."),
|
||||
validators=[Optional(), Length(min=1, max=1)],
|
||||
widget=BS3TextFieldWidget(),
|
||||
)
|
||||
index = BooleanField(
|
||||
_("Dataframe Index"), description=_("Write dataframe index as a column.")
|
||||
dataframe_index = BooleanField(
|
||||
_("Dataframe Index"), description=_("Write dataframe index as a column")
|
||||
)
|
||||
index_label = StringField(
|
||||
_("Column Label(s)"),
|
||||
description=_(
|
||||
"Column label for index column(s). If None is given "
|
||||
"and Dataframe Index is True, Index Names are used."
|
||||
"and Dataframe Index is checked, Index Names are used"
|
||||
),
|
||||
validators=[Optional()],
|
||||
widget=BS3TextFieldWidget(),
|
||||
)
|
||||
null_values = JsonListField(
|
||||
_("Null values"),
|
||||
default=config["CSV_DEFAULT_NA_NAMES"],
|
||||
use_cols = JsonListField(
|
||||
_("Columns To Read"),
|
||||
default=None,
|
||||
description=_("Json list of the column names that should be read"),
|
||||
validators=[Optional()],
|
||||
)
|
||||
overwrite_duplicate = BooleanField(
|
||||
_("Overwrite Duplicate Columns"),
|
||||
description=_(
|
||||
"Json list of the values that should be treated as null. "
|
||||
'Examples: [""], ["None", "N/A"], ["nan", "null"]. '
|
||||
"Warning: Hive database supports only single value. "
|
||||
'Use [""] for empty string.'
|
||||
"If duplicate columns are not overridden, "
|
||||
'they will be presented as "X.1, X.2 ...X.x"'
|
||||
),
|
||||
)
|
||||
header = IntegerField(
|
||||
_("Header Row"),
|
||||
description=_(
|
||||
"Row containing the headers to use as "
|
||||
"column names (0 is first line of data). "
|
||||
"Leave empty if there is no header row"
|
||||
),
|
||||
validators=[Optional(), NumberRange(min=0)],
|
||||
widget=BS3TextFieldWidget(),
|
||||
)
|
||||
nrows = IntegerField(
|
||||
_("Rows to Read"),
|
||||
description=_("Number of rows of file to read"),
|
||||
validators=[Optional(), NumberRange(min=0)],
|
||||
widget=BS3TextFieldWidget(),
|
||||
)
|
||||
skiprows = IntegerField(
|
||||
_("Skip Rows"),
|
||||
description=_("Number of rows to skip at start of file"),
|
||||
validators=[Optional(), NumberRange(min=0)],
|
||||
widget=BS3TextFieldWidget(),
|
||||
)
|
||||
|
||||
|
||||
class ExcelToDatabaseForm(UploadToDatabaseForm):
|
||||
|
|
|
|||
|
|
@ -116,18 +116,18 @@ class CsvToDatabaseView(SimpleFormView):
|
|||
add_columns = ["database", "schema", "table_name"]
|
||||
|
||||
def form_get(self, form: CsvToDatabaseForm) -> None:
|
||||
form.sep.data = ","
|
||||
form.delimiter.data = ","
|
||||
form.header.data = 0
|
||||
form.mangle_dupe_cols.data = True
|
||||
form.skipinitialspace.data = False
|
||||
form.overwrite_duplicate.data = True
|
||||
form.skip_initial_space.data = False
|
||||
form.skip_blank_lines.data = True
|
||||
form.infer_datetime_format.data = True
|
||||
form.decimal.data = "."
|
||||
form.if_exists.data = "fail"
|
||||
|
||||
def form_post(self, form: CsvToDatabaseForm) -> Response:
|
||||
database = form.con.data
|
||||
csv_table = Table(table=form.name.data, schema=form.schema.data)
|
||||
database = form.database.data
|
||||
csv_table = Table(table=form.table_name.data, schema=form.schema.data)
|
||||
|
||||
if not schema_allows_file_upload(database, csv_table.schema):
|
||||
message = __(
|
||||
|
|
@ -150,21 +150,21 @@ class CsvToDatabaseView(SimpleFormView):
|
|||
infer_datetime_format=form.infer_datetime_format.data,
|
||||
iterator=True,
|
||||
keep_default_na=not form.null_values.data,
|
||||
mangle_dupe_cols=form.mangle_dupe_cols.data,
|
||||
usecols=form.usecols.data if form.usecols.data else None,
|
||||
mangle_dupe_cols=form.overwrite_duplicate.data,
|
||||
usecols=form.use_cols.data if form.use_cols.data else None,
|
||||
na_values=form.null_values.data if form.null_values.data else None,
|
||||
nrows=form.nrows.data,
|
||||
parse_dates=form.parse_dates.data,
|
||||
sep=form.sep.data,
|
||||
sep=form.delimiter.data,
|
||||
skip_blank_lines=form.skip_blank_lines.data,
|
||||
skipinitialspace=form.skipinitialspace.data,
|
||||
skipinitialspace=form.skip_initial_space.data,
|
||||
skiprows=form.skiprows.data,
|
||||
)
|
||||
)
|
||||
|
||||
database = (
|
||||
db.session.query(models.Database)
|
||||
.filter_by(id=form.data.get("con").data.get("id"))
|
||||
.filter_by(id=form.data.get("database").data.get("id"))
|
||||
.one()
|
||||
)
|
||||
|
||||
|
|
@ -175,7 +175,7 @@ class CsvToDatabaseView(SimpleFormView):
|
|||
to_sql_kwargs={
|
||||
"chunksize": 1000,
|
||||
"if_exists": form.if_exists.data,
|
||||
"index": form.index.data,
|
||||
"index": form.dataframe_index.data,
|
||||
"index_label": form.index_label.data,
|
||||
},
|
||||
)
|
||||
|
|
@ -221,7 +221,7 @@ class CsvToDatabaseView(SimpleFormView):
|
|||
'"%(table_name)s" in database "%(db_name)s". '
|
||||
"Error message: %(error_msg)s",
|
||||
filename=form.csv_file.data.filename,
|
||||
table_name=form.name.data,
|
||||
table_name=form.table_name.data,
|
||||
db_name=database.database_name,
|
||||
error_msg=str(ex),
|
||||
)
|
||||
|
|
@ -241,9 +241,9 @@ class CsvToDatabaseView(SimpleFormView):
|
|||
flash(message, "info")
|
||||
event_logger.log_with_context(
|
||||
action="successful_csv_upload",
|
||||
database=form.con.data.name,
|
||||
database=form.database.data.name,
|
||||
schema=form.schema.data,
|
||||
table=form.name.data,
|
||||
table=form.table_name.data,
|
||||
)
|
||||
return redirect("/tablemodelview/list/")
|
||||
|
||||
|
|
|
|||
|
|
@ -122,12 +122,12 @@ def upload_csv(filename: str, table_name: str, extra: Optional[Dict[str, str]] =
|
|||
schema = utils.get_example_default_schema()
|
||||
form_data = {
|
||||
"csv_file": open(filename, "rb"),
|
||||
"sep": ",",
|
||||
"name": table_name,
|
||||
"con": csv_upload_db_id,
|
||||
"delimiter": ",",
|
||||
"table_name": table_name,
|
||||
"database": csv_upload_db_id,
|
||||
"if_exists": "fail",
|
||||
"index_label": "test_label",
|
||||
"mangle_dupe_cols": False,
|
||||
"overwrite_duplicate": False,
|
||||
}
|
||||
if schema:
|
||||
form_data["schema"] = schema
|
||||
|
|
|
|||
Loading…
Reference in New Issue