feat: new Columnar upload form and API (#28192)
This commit is contained in:
parent
f5843fe588
commit
9a339f08a7
|
|
@ -138,7 +138,6 @@
|
|||
"use-event-callback": "^0.1.0",
|
||||
"use-immer": "^0.9.0",
|
||||
"use-query-params": "^1.1.9",
|
||||
"xlsx": "^0.18.5",
|
||||
"yargs": "^17.7.2"
|
||||
},
|
||||
"devDependencies": {
|
||||
|
|
@ -25339,14 +25338,6 @@
|
|||
"node": ">= 0.12.0"
|
||||
}
|
||||
},
|
||||
"node_modules/adler-32": {
|
||||
"version": "1.3.1",
|
||||
"resolved": "https://registry.npmjs.org/adler-32/-/adler-32-1.3.1.tgz",
|
||||
"integrity": "sha512-ynZ4w/nUUv5rrsR8UUGoe1VC9hZj6V5hU9Qw1HlMDJGEJw5S7TfTErWTjMys6M7vr0YWcPqs3qAr4ss0nDfP+A==",
|
||||
"engines": {
|
||||
"node": ">=0.8"
|
||||
}
|
||||
},
|
||||
"node_modules/agent-base": {
|
||||
"version": "6.0.2",
|
||||
"resolved": "https://registry.npmjs.org/agent-base/-/agent-base-6.0.2.tgz",
|
||||
|
|
@ -28096,18 +28087,6 @@
|
|||
"url": "https://github.com/sponsors/wooorm"
|
||||
}
|
||||
},
|
||||
"node_modules/cfb": {
|
||||
"version": "1.2.2",
|
||||
"resolved": "https://registry.npmjs.org/cfb/-/cfb-1.2.2.tgz",
|
||||
"integrity": "sha512-KfdUZsSOw19/ObEWasvBP/Ac4reZvAGauZhs6S/gqNhXhI7cKwvlH7ulj+dOEYnca4bm4SGo8C1bTAQvnTjgQA==",
|
||||
"dependencies": {
|
||||
"adler-32": "~1.3.0",
|
||||
"crc-32": "~1.2.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=0.8"
|
||||
}
|
||||
},
|
||||
"node_modules/chainsaw": {
|
||||
"version": "0.1.0",
|
||||
"resolved": "https://registry.npmjs.org/chainsaw/-/chainsaw-0.1.0.tgz",
|
||||
|
|
@ -28906,14 +28885,6 @@
|
|||
"node": ">=0.10.0"
|
||||
}
|
||||
},
|
||||
"node_modules/codepage": {
|
||||
"version": "1.15.0",
|
||||
"resolved": "https://registry.npmjs.org/codepage/-/codepage-1.15.0.tgz",
|
||||
"integrity": "sha512-3g6NUTPd/YtuuGrhMnOMRjFc+LJw/bnMp3+0r/Wcz3IXUuCosKRJvMphm5+Q+bvTVGcJJuRvVLuYba+WojaFaA==",
|
||||
"engines": {
|
||||
"node": ">=0.8"
|
||||
}
|
||||
},
|
||||
"node_modules/collect-v8-coverage": {
|
||||
"version": "1.0.1",
|
||||
"resolved": "https://registry.npmjs.org/collect-v8-coverage/-/collect-v8-coverage-1.0.1.tgz",
|
||||
|
|
@ -29885,17 +29856,6 @@
|
|||
"node": ">=8"
|
||||
}
|
||||
},
|
||||
"node_modules/crc-32": {
|
||||
"version": "1.2.2",
|
||||
"resolved": "https://registry.npmjs.org/crc-32/-/crc-32-1.2.2.tgz",
|
||||
"integrity": "sha512-ROmzCKrTnOwybPcJApAA6WBWij23HVfGVNKqqrZpuyZOHqK2CwHSvpGuyt/UNNvaIjEd8X5IFGp4Mh+Ie1IHJQ==",
|
||||
"bin": {
|
||||
"crc32": "bin/crc32.njs"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=0.8"
|
||||
}
|
||||
},
|
||||
"node_modules/create-emotion": {
|
||||
"version": "10.0.27",
|
||||
"resolved": "https://registry.npmjs.org/create-emotion/-/create-emotion-10.0.27.tgz",
|
||||
|
|
@ -37175,14 +37135,6 @@
|
|||
"node": ">=12.20.0"
|
||||
}
|
||||
},
|
||||
"node_modules/frac": {
|
||||
"version": "1.1.2",
|
||||
"resolved": "https://registry.npmjs.org/frac/-/frac-1.1.2.tgz",
|
||||
"integrity": "sha512-w/XBfkibaTl3YDqASwfDUqkna4Z2p9cFSr1aHDt0WoMTECnRfBOv2WArlZILlqgWlmdIlALXGpM2AOhEk5W3IA==",
|
||||
"engines": {
|
||||
"node": ">=0.8"
|
||||
}
|
||||
},
|
||||
"node_modules/fragment-cache": {
|
||||
"version": "0.2.1",
|
||||
"resolved": "https://registry.npmjs.org/fragment-cache/-/fragment-cache-0.2.1.tgz",
|
||||
|
|
@ -62001,17 +61953,6 @@
|
|||
"resolved": "https://registry.npmjs.org/sprintf-js/-/sprintf-js-1.0.3.tgz",
|
||||
"integrity": "sha1-BOaSb2YolTVPPdAVIDYzuFcpfiw="
|
||||
},
|
||||
"node_modules/ssf": {
|
||||
"version": "0.11.2",
|
||||
"resolved": "https://registry.npmjs.org/ssf/-/ssf-0.11.2.tgz",
|
||||
"integrity": "sha512-+idbmIXoYET47hH+d7dfm2epdOMUDjqcB4648sTZ+t2JwoyBFL/insLfB/racrDmsKB3diwsDA696pZMieAC5g==",
|
||||
"dependencies": {
|
||||
"frac": "~1.1.2"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=0.8"
|
||||
}
|
||||
},
|
||||
"node_modules/sshpk": {
|
||||
"version": "1.15.2",
|
||||
"resolved": "https://registry.npmjs.org/sshpk/-/sshpk-1.15.2.tgz",
|
||||
|
|
@ -66195,22 +66136,6 @@
|
|||
"integrity": "sha512-JcKqAHLPxcdb9KM49dufGXn2x3ssnfjbcaQdLlfZsL9rH9wgDQjUtDxbo8NE0F6SFvydeu1VhZe7hZuHsB2/pw==",
|
||||
"dev": true
|
||||
},
|
||||
"node_modules/wmf": {
|
||||
"version": "1.0.2",
|
||||
"resolved": "https://registry.npmjs.org/wmf/-/wmf-1.0.2.tgz",
|
||||
"integrity": "sha512-/p9K7bEh0Dj6WbXg4JG0xvLQmIadrner1bi45VMJTfnbVHsc7yIajZyoSoK60/dtVBs12Fm6WkUI5/3WAVsNMw==",
|
||||
"engines": {
|
||||
"node": ">=0.8"
|
||||
}
|
||||
},
|
||||
"node_modules/word": {
|
||||
"version": "0.3.0",
|
||||
"resolved": "https://registry.npmjs.org/word/-/word-0.3.0.tgz",
|
||||
"integrity": "sha512-OELeY0Q61OXpdUfTp+oweA/vtLVg5VDOXh+3he3PNzLGG/y0oylSOC1xRVj0+l4vQ3tj/bB1HVHv1ocXkQceFA==",
|
||||
"engines": {
|
||||
"node": ">=0.8"
|
||||
}
|
||||
},
|
||||
"node_modules/wordwrap": {
|
||||
"version": "1.0.0",
|
||||
"resolved": "https://registry.npmjs.org/wordwrap/-/wordwrap-1.0.0.tgz",
|
||||
|
|
@ -66525,26 +66450,6 @@
|
|||
"url": "https://opencollective.com/node-fetch"
|
||||
}
|
||||
},
|
||||
"node_modules/xlsx": {
|
||||
"version": "0.18.5",
|
||||
"resolved": "https://registry.npmjs.org/xlsx/-/xlsx-0.18.5.tgz",
|
||||
"integrity": "sha512-dmg3LCjBPHZnQp5/F/+nnTa+miPJxUXB6vtk42YjBBKayDNagxGEeIdWApkYPOf3Z3pm3k62Knjzp7lMeTEtFQ==",
|
||||
"dependencies": {
|
||||
"adler-32": "~1.3.0",
|
||||
"cfb": "~1.2.1",
|
||||
"codepage": "~1.15.0",
|
||||
"crc-32": "~1.2.1",
|
||||
"ssf": "~0.11.2",
|
||||
"wmf": "~1.0.1",
|
||||
"word": "~0.3.0"
|
||||
},
|
||||
"bin": {
|
||||
"xlsx": "bin/xlsx.njs"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=0.8"
|
||||
}
|
||||
},
|
||||
"node_modules/xml-name-validator": {
|
||||
"version": "3.0.0",
|
||||
"resolved": "https://registry.npmjs.org/xml-name-validator/-/xml-name-validator-3.0.0.tgz",
|
||||
|
|
@ -91924,11 +91829,6 @@
|
|||
"integrity": "sha512-aT6camzM4xEA54YVJYSqxz1kv4IHnQZRtThJJHhUMRExaU5spC7jX5ugSwTaTgJliIgs4VhZOk7htClvQ/LmRA==",
|
||||
"dev": true
|
||||
},
|
||||
"adler-32": {
|
||||
"version": "1.3.1",
|
||||
"resolved": "https://registry.npmjs.org/adler-32/-/adler-32-1.3.1.tgz",
|
||||
"integrity": "sha512-ynZ4w/nUUv5rrsR8UUGoe1VC9hZj6V5hU9Qw1HlMDJGEJw5S7TfTErWTjMys6M7vr0YWcPqs3qAr4ss0nDfP+A=="
|
||||
},
|
||||
"agent-base": {
|
||||
"version": "6.0.2",
|
||||
"resolved": "https://registry.npmjs.org/agent-base/-/agent-base-6.0.2.tgz",
|
||||
|
|
@ -94034,15 +93934,6 @@
|
|||
"resolved": "https://registry.npmjs.org/ccount/-/ccount-2.0.1.tgz",
|
||||
"integrity": "sha512-eyrF0jiFpY+3drT6383f1qhkbGsLSifNAjA61IUjZjmLCWjItY6LB9ft9YhoDgwfmclB2zhu51Lc7+95b8NRAg=="
|
||||
},
|
||||
"cfb": {
|
||||
"version": "1.2.2",
|
||||
"resolved": "https://registry.npmjs.org/cfb/-/cfb-1.2.2.tgz",
|
||||
"integrity": "sha512-KfdUZsSOw19/ObEWasvBP/Ac4reZvAGauZhs6S/gqNhXhI7cKwvlH7ulj+dOEYnca4bm4SGo8C1bTAQvnTjgQA==",
|
||||
"requires": {
|
||||
"adler-32": "~1.3.0",
|
||||
"crc-32": "~1.2.0"
|
||||
}
|
||||
},
|
||||
"chainsaw": {
|
||||
"version": "0.1.0",
|
||||
"resolved": "https://registry.npmjs.org/chainsaw/-/chainsaw-0.1.0.tgz",
|
||||
|
|
@ -94654,11 +94545,6 @@
|
|||
"resolved": "https://registry.npmjs.org/code-point-at/-/code-point-at-1.1.0.tgz",
|
||||
"integrity": "sha1-DQcLTQQ6W+ozovGkDi7bPZpMz3c="
|
||||
},
|
||||
"codepage": {
|
||||
"version": "1.15.0",
|
||||
"resolved": "https://registry.npmjs.org/codepage/-/codepage-1.15.0.tgz",
|
||||
"integrity": "sha512-3g6NUTPd/YtuuGrhMnOMRjFc+LJw/bnMp3+0r/Wcz3IXUuCosKRJvMphm5+Q+bvTVGcJJuRvVLuYba+WojaFaA=="
|
||||
},
|
||||
"collect-v8-coverage": {
|
||||
"version": "1.0.1",
|
||||
"resolved": "https://registry.npmjs.org/collect-v8-coverage/-/collect-v8-coverage-1.0.1.tgz",
|
||||
|
|
@ -95426,11 +95312,6 @@
|
|||
}
|
||||
}
|
||||
},
|
||||
"crc-32": {
|
||||
"version": "1.2.2",
|
||||
"resolved": "https://registry.npmjs.org/crc-32/-/crc-32-1.2.2.tgz",
|
||||
"integrity": "sha512-ROmzCKrTnOwybPcJApAA6WBWij23HVfGVNKqqrZpuyZOHqK2CwHSvpGuyt/UNNvaIjEd8X5IFGp4Mh+Ie1IHJQ=="
|
||||
},
|
||||
"create-emotion": {
|
||||
"version": "10.0.27",
|
||||
"resolved": "https://registry.npmjs.org/create-emotion/-/create-emotion-10.0.27.tgz",
|
||||
|
|
@ -101104,11 +100985,6 @@
|
|||
"fetch-blob": "^3.1.2"
|
||||
}
|
||||
},
|
||||
"frac": {
|
||||
"version": "1.1.2",
|
||||
"resolved": "https://registry.npmjs.org/frac/-/frac-1.1.2.tgz",
|
||||
"integrity": "sha512-w/XBfkibaTl3YDqASwfDUqkna4Z2p9cFSr1aHDt0WoMTECnRfBOv2WArlZILlqgWlmdIlALXGpM2AOhEk5W3IA=="
|
||||
},
|
||||
"fragment-cache": {
|
||||
"version": "0.2.1",
|
||||
"resolved": "https://registry.npmjs.org/fragment-cache/-/fragment-cache-0.2.1.tgz",
|
||||
|
|
@ -119976,14 +119852,6 @@
|
|||
"resolved": "https://registry.npmjs.org/sprintf-js/-/sprintf-js-1.0.3.tgz",
|
||||
"integrity": "sha1-BOaSb2YolTVPPdAVIDYzuFcpfiw="
|
||||
},
|
||||
"ssf": {
|
||||
"version": "0.11.2",
|
||||
"resolved": "https://registry.npmjs.org/ssf/-/ssf-0.11.2.tgz",
|
||||
"integrity": "sha512-+idbmIXoYET47hH+d7dfm2epdOMUDjqcB4648sTZ+t2JwoyBFL/insLfB/racrDmsKB3diwsDA696pZMieAC5g==",
|
||||
"requires": {
|
||||
"frac": "~1.1.2"
|
||||
}
|
||||
},
|
||||
"sshpk": {
|
||||
"version": "1.15.2",
|
||||
"resolved": "https://registry.npmjs.org/sshpk/-/sshpk-1.15.2.tgz",
|
||||
|
|
@ -123149,16 +123017,6 @@
|
|||
"integrity": "sha512-JcKqAHLPxcdb9KM49dufGXn2x3ssnfjbcaQdLlfZsL9rH9wgDQjUtDxbo8NE0F6SFvydeu1VhZe7hZuHsB2/pw==",
|
||||
"dev": true
|
||||
},
|
||||
"wmf": {
|
||||
"version": "1.0.2",
|
||||
"resolved": "https://registry.npmjs.org/wmf/-/wmf-1.0.2.tgz",
|
||||
"integrity": "sha512-/p9K7bEh0Dj6WbXg4JG0xvLQmIadrner1bi45VMJTfnbVHsc7yIajZyoSoK60/dtVBs12Fm6WkUI5/3WAVsNMw=="
|
||||
},
|
||||
"word": {
|
||||
"version": "0.3.0",
|
||||
"resolved": "https://registry.npmjs.org/word/-/word-0.3.0.tgz",
|
||||
"integrity": "sha512-OELeY0Q61OXpdUfTp+oweA/vtLVg5VDOXh+3he3PNzLGG/y0oylSOC1xRVj0+l4vQ3tj/bB1HVHv1ocXkQceFA=="
|
||||
},
|
||||
"wordwrap": {
|
||||
"version": "1.0.0",
|
||||
"resolved": "https://registry.npmjs.org/wordwrap/-/wordwrap-1.0.0.tgz",
|
||||
|
|
@ -123398,20 +123256,6 @@
|
|||
}
|
||||
}
|
||||
},
|
||||
"xlsx": {
|
||||
"version": "0.18.5",
|
||||
"resolved": "https://registry.npmjs.org/xlsx/-/xlsx-0.18.5.tgz",
|
||||
"integrity": "sha512-dmg3LCjBPHZnQp5/F/+nnTa+miPJxUXB6vtk42YjBBKayDNagxGEeIdWApkYPOf3Z3pm3k62Knjzp7lMeTEtFQ==",
|
||||
"requires": {
|
||||
"adler-32": "~1.3.0",
|
||||
"cfb": "~1.2.1",
|
||||
"codepage": "~1.15.0",
|
||||
"crc-32": "~1.2.1",
|
||||
"ssf": "~0.11.2",
|
||||
"wmf": "~1.0.1",
|
||||
"word": "~0.3.0"
|
||||
}
|
||||
},
|
||||
"xml-name-validator": {
|
||||
"version": "3.0.0",
|
||||
"resolved": "https://registry.npmjs.org/xml-name-validator/-/xml-name-validator-3.0.0.tgz",
|
||||
|
|
|
|||
|
|
@ -204,7 +204,6 @@
|
|||
"use-event-callback": "^0.1.0",
|
||||
"use-immer": "^0.9.0",
|
||||
"use-query-params": "^1.1.9",
|
||||
"xlsx": "^0.18.5",
|
||||
"yargs": "^17.7.2"
|
||||
},
|
||||
"devDependencies": {
|
||||
|
|
|
|||
|
|
@ -29,6 +29,7 @@ import { forEach } from 'lodash';
|
|||
|
||||
fetchMock.post('glob:*api/v1/database/1/csv_upload/', {});
|
||||
fetchMock.post('glob:*api/v1/database/1/excel_upload/', {});
|
||||
fetchMock.post('glob:*api/v1/database/1/columnar_upload/', {});
|
||||
|
||||
fetchMock.get(
|
||||
'glob:*api/v1/database/?q=(filters:!((col:allow_file_upload,opr:eq,value:!t)),page:0,page_size:100)',
|
||||
|
|
@ -68,6 +69,13 @@ const excelProps = {
|
|||
type: 'excel',
|
||||
};
|
||||
|
||||
const columnarProps = {
|
||||
show: true,
|
||||
onHide: () => {},
|
||||
allowedExtensions: ['parquet', 'zip'],
|
||||
type: 'columnar',
|
||||
};
|
||||
|
||||
test('CSV, renders the general information elements correctly', () => {
|
||||
render(<UploadDataModal {...csvProps} />, {
|
||||
useRedux: true,
|
||||
|
|
@ -200,6 +208,78 @@ test('Excel, renders the general information elements correctly', () => {
|
|||
});
|
||||
});
|
||||
|
||||
test('Columnar, renders the general information elements correctly', () => {
|
||||
render(<UploadDataModal {...columnarProps} />, {
|
||||
useRedux: true,
|
||||
});
|
||||
|
||||
const cancelButton = screen.getByRole('button', {
|
||||
name: 'Cancel',
|
||||
});
|
||||
const uploadButton = screen.getByRole('button', {
|
||||
name: 'Upload',
|
||||
});
|
||||
const selectButton = screen.getByRole('button', {
|
||||
name: 'Select',
|
||||
});
|
||||
|
||||
const title = screen.getByRole('heading', {
|
||||
name: /columnar upload/i,
|
||||
});
|
||||
const missingTitle = screen.queryByRole('heading', {
|
||||
name: /csv upload/i,
|
||||
});
|
||||
expect(missingTitle).not.toBeInTheDocument();
|
||||
const panel1 = screen.getByRole('heading', {
|
||||
name: /General information/i,
|
||||
});
|
||||
const panel2 = screen.getByRole('heading', {
|
||||
name: /file settings/i,
|
||||
});
|
||||
const panel3 = screen.getByRole('heading', {
|
||||
name: /columns/i,
|
||||
});
|
||||
const panel4 = screen.queryByRole('heading', {
|
||||
name: /rows/i,
|
||||
});
|
||||
expect(panel4).not.toBeInTheDocument();
|
||||
|
||||
const selectDatabase = screen.getByRole('combobox', {
|
||||
name: /select a database/i,
|
||||
});
|
||||
const selectDelimiter = screen.queryByRole('combobox', {
|
||||
name: /choose a delimiter/i,
|
||||
});
|
||||
expect(selectDelimiter).not.toBeInTheDocument();
|
||||
|
||||
const selectSheetName = screen.queryByRole('combobox', {
|
||||
name: /choose sheet name/i,
|
||||
});
|
||||
expect(selectSheetName).not.toBeInTheDocument();
|
||||
const inputTableName = screen.getByRole('textbox', {
|
||||
name: /table name/i,
|
||||
});
|
||||
const inputSchema = screen.getByRole('combobox', {
|
||||
name: /schema/i,
|
||||
});
|
||||
|
||||
const visibleComponents = [
|
||||
cancelButton,
|
||||
uploadButton,
|
||||
selectButton,
|
||||
title,
|
||||
panel1,
|
||||
panel2,
|
||||
panel3,
|
||||
selectDatabase,
|
||||
inputTableName,
|
||||
inputSchema,
|
||||
];
|
||||
visibleComponents.forEach(component => {
|
||||
expect(component).toBeVisible();
|
||||
});
|
||||
});
|
||||
|
||||
test('CSV, renders the file settings elements correctly', () => {
|
||||
render(<UploadDataModal {...csvProps} />, {
|
||||
useRedux: true,
|
||||
|
|
@ -282,6 +362,45 @@ test('Excel, renders the file settings elements correctly', () => {
|
|||
});
|
||||
});
|
||||
|
||||
test('Columnar, renders the file settings elements correctly', () => {
|
||||
render(<UploadDataModal {...columnarProps} />, {
|
||||
useRedux: true,
|
||||
});
|
||||
|
||||
expect(screen.queryByText('If Table Already Exists')).not.toBeInTheDocument();
|
||||
const panelHeader = screen.getByRole('heading', {
|
||||
name: /file settings/i,
|
||||
});
|
||||
userEvent.click(panelHeader);
|
||||
const selectTableAlreadyExists = screen.getByRole('combobox', {
|
||||
name: /choose already exists/i,
|
||||
});
|
||||
const inputDecimalCharacter = screen.queryByRole('textbox', {
|
||||
name: /decimal character/i,
|
||||
});
|
||||
expect(inputDecimalCharacter).not.toBeInTheDocument();
|
||||
const selectColumnsDates = screen.queryByRole('combobox', {
|
||||
name: /choose columns to be parsed as dates/i,
|
||||
});
|
||||
expect(selectColumnsDates).not.toBeInTheDocument();
|
||||
const selectNullValues = screen.queryByRole('combobox', {
|
||||
name: /null values/i,
|
||||
});
|
||||
expect(selectNullValues).not.toBeInTheDocument();
|
||||
|
||||
const switchSkipInitialSpace = screen.queryByText('skipInitialSpace');
|
||||
expect(switchSkipInitialSpace).not.toBeInTheDocument();
|
||||
const switchSkipBlankLines = screen.queryByText('skipBlankLines');
|
||||
expect(switchSkipBlankLines).not.toBeInTheDocument();
|
||||
const switchDayFirst = screen.queryByText('dayFirst');
|
||||
expect(switchDayFirst).not.toBeInTheDocument();
|
||||
|
||||
const visibleComponents = [selectTableAlreadyExists];
|
||||
visibleComponents.forEach(component => {
|
||||
expect(component).toBeVisible();
|
||||
});
|
||||
});
|
||||
|
||||
test('CSV, renders the columns elements correctly', () => {
|
||||
render(<UploadDataModal {...csvProps} />, {
|
||||
useRedux: true,
|
||||
|
|
@ -291,12 +410,13 @@ test('CSV, renders the columns elements correctly', () => {
|
|||
name: /columns/i,
|
||||
});
|
||||
userEvent.click(panelHeader);
|
||||
const switchDataFrameIndex = screen.getByTestId('dataFrameIndex');
|
||||
userEvent.click(switchDataFrameIndex);
|
||||
const selectIndexColumn = screen.getByRole('combobox', {
|
||||
name: /Choose index column/i,
|
||||
});
|
||||
const switchDataFrameIndex = screen.getByTestId('dataFrameIndex');
|
||||
const inputColumnLabels = screen.getByRole('textbox', {
|
||||
name: /Column labels/i,
|
||||
name: /Index label/i,
|
||||
});
|
||||
const selectColumnsToRead = screen.getByRole('combobox', {
|
||||
name: /Choose columns to read/i,
|
||||
|
|
@ -327,12 +447,13 @@ test('Excel, renders the columns elements correctly', () => {
|
|||
name: /columns/i,
|
||||
});
|
||||
userEvent.click(panelHeader);
|
||||
const switchDataFrameIndex = screen.getByTestId('dataFrameIndex');
|
||||
userEvent.click(switchDataFrameIndex);
|
||||
const selectIndexColumn = screen.getByRole('combobox', {
|
||||
name: /Choose index column/i,
|
||||
});
|
||||
const switchDataFrameIndex = screen.getByTestId('dataFrameIndex');
|
||||
const inputColumnLabels = screen.getByRole('textbox', {
|
||||
name: /Column labels/i,
|
||||
const inputIndexLabel = screen.getByRole('textbox', {
|
||||
name: /Index label/i,
|
||||
});
|
||||
const selectColumnsToRead = screen.getByRole('combobox', {
|
||||
name: /Choose columns to read/i,
|
||||
|
|
@ -348,7 +469,45 @@ test('Excel, renders the columns elements correctly', () => {
|
|||
const visibleComponents = [
|
||||
selectIndexColumn,
|
||||
switchDataFrameIndex,
|
||||
inputColumnLabels,
|
||||
inputIndexLabel,
|
||||
selectColumnsToRead,
|
||||
];
|
||||
visibleComponents.forEach(component => {
|
||||
expect(component).toBeVisible();
|
||||
});
|
||||
});
|
||||
|
||||
test('Columnar, renders the columns elements correctly', () => {
|
||||
render(<UploadDataModal {...columnarProps} />, {
|
||||
useRedux: true,
|
||||
});
|
||||
|
||||
const panelHeader = screen.getByRole('heading', {
|
||||
name: /columns/i,
|
||||
});
|
||||
userEvent.click(panelHeader);
|
||||
const selectIndexColumn = screen.queryByRole('combobox', {
|
||||
name: /Choose index column/i,
|
||||
});
|
||||
expect(selectIndexColumn).not.toBeInTheDocument();
|
||||
const switchDataFrameIndex = screen.getByTestId('dataFrameIndex');
|
||||
userEvent.click(switchDataFrameIndex);
|
||||
const inputIndexLabel = screen.getByRole('textbox', {
|
||||
name: /Index label/i,
|
||||
});
|
||||
const selectColumnsToRead = screen.getByRole('combobox', {
|
||||
name: /Choose columns to read/i,
|
||||
});
|
||||
userEvent.click(selectColumnsToRead);
|
||||
|
||||
const columnDataTypes = screen.queryByRole('textbox', {
|
||||
name: /Column data types/i,
|
||||
});
|
||||
expect(columnDataTypes).not.toBeInTheDocument();
|
||||
|
||||
const visibleComponents = [
|
||||
switchDataFrameIndex,
|
||||
inputIndexLabel,
|
||||
selectColumnsToRead,
|
||||
];
|
||||
visibleComponents.forEach(component => {
|
||||
|
|
@ -381,6 +540,17 @@ test('renders the rows elements correctly', () => {
|
|||
});
|
||||
});
|
||||
|
||||
test('Columnar, does not render the rows', () => {
|
||||
render(<UploadDataModal {...columnarProps} />, {
|
||||
useRedux: true,
|
||||
});
|
||||
|
||||
const panelHeader = screen.queryByRole('heading', {
|
||||
name: /rows/i,
|
||||
});
|
||||
expect(panelHeader).not.toBeInTheDocument();
|
||||
});
|
||||
|
||||
test('database and schema are correctly populated', async () => {
|
||||
render(<UploadDataModal {...csvProps} />, {
|
||||
useRedux: true,
|
||||
|
|
@ -546,6 +716,67 @@ test('Excel, form post', async () => {
|
|||
expect(fileData.name).toBe('test.xls');
|
||||
});
|
||||
|
||||
test('Columnar, form post', async () => {
|
||||
render(<UploadDataModal {...columnarProps} />, {
|
||||
useRedux: true,
|
||||
});
|
||||
|
||||
const selectButton = screen.getByRole('button', {
|
||||
name: 'Select',
|
||||
});
|
||||
userEvent.click(selectButton);
|
||||
|
||||
// Select a file from the file dialog
|
||||
const file = new File(['test'], 'test.parquet', { type: 'text' });
|
||||
const inputElement = document.querySelector('input[type="file"]');
|
||||
|
||||
if (inputElement) {
|
||||
userEvent.upload(inputElement, file);
|
||||
}
|
||||
|
||||
const selectDatabase = screen.getByRole('combobox', {
|
||||
name: /select a database/i,
|
||||
});
|
||||
userEvent.click(selectDatabase);
|
||||
await waitFor(() => screen.getByText('database1'));
|
||||
await waitFor(() => screen.getByText('database2'));
|
||||
|
||||
screen.getByText('database1').click();
|
||||
const selectSchema = screen.getByRole('combobox', {
|
||||
name: /schema/i,
|
||||
});
|
||||
userEvent.click(selectSchema);
|
||||
await waitFor(() => screen.getAllByText('public'));
|
||||
screen.getAllByText('public')[1].click();
|
||||
|
||||
// Fill out form fields
|
||||
const inputTableName = screen.getByRole('textbox', {
|
||||
name: /table name/i,
|
||||
});
|
||||
userEvent.type(inputTableName, 'table1');
|
||||
const uploadButton = screen.getByRole('button', {
|
||||
name: 'Upload',
|
||||
});
|
||||
|
||||
userEvent.click(uploadButton);
|
||||
await waitFor(() =>
|
||||
fetchMock.called('glob:*api/v1/database/1/columnar_upload/'),
|
||||
);
|
||||
|
||||
// Get the matching fetch calls made
|
||||
const matchingCalls = fetchMock.calls(
|
||||
'glob:*api/v1/database/1/columnar_upload/',
|
||||
);
|
||||
expect(matchingCalls).toHaveLength(1);
|
||||
const [_, options] = matchingCalls[0];
|
||||
const formData = options?.body as FormData;
|
||||
expect(formData.get('table_name')).toBe('table1');
|
||||
expect(formData.get('schema')).toBe('public');
|
||||
expect(formData.get('table_name')).toBe('table1');
|
||||
const fileData = formData.get('file') as File;
|
||||
expect(fileData.name).toBe('test.parquet');
|
||||
});
|
||||
|
||||
test('CSV, validate file extension returns false', () => {
|
||||
const invalidFileNames = ['out', 'out.exe', 'out.csv.exe', '.csv', 'out.xls'];
|
||||
forEach(invalidFileNames, fileName => {
|
||||
|
|
@ -572,6 +803,25 @@ test('Excel, validate file extension returns false', () => {
|
|||
});
|
||||
});
|
||||
|
||||
test('Columnar, validate file extension returns false', () => {
|
||||
const invalidFileNames = [
|
||||
'out',
|
||||
'out.exe',
|
||||
'out.parquet.exe',
|
||||
'.parquet',
|
||||
'out.excel',
|
||||
];
|
||||
forEach(invalidFileNames, fileName => {
|
||||
const file: UploadFile<any> = {
|
||||
name: fileName,
|
||||
uid: 'xp',
|
||||
size: 100,
|
||||
type: 'text/csv',
|
||||
};
|
||||
expect(validateUploadFileExtension(file, ['parquet', 'zip'])).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
test('CSV, validate file extension returns true', () => {
|
||||
const invalidFileNames = ['out.csv', 'out.tsv', 'out.exe.csv', 'out a.csv'];
|
||||
forEach(invalidFileNames, fileName => {
|
||||
|
|
@ -597,3 +847,21 @@ test('Excel, validate file extension returns true', () => {
|
|||
expect(validateUploadFileExtension(file, ['xls', 'xlsx'])).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
test('Columnar, validate file extension returns true', () => {
|
||||
const invalidFileNames = [
|
||||
'out.parquet',
|
||||
'out.zip',
|
||||
'out.exe.zip',
|
||||
'out a.parquet',
|
||||
];
|
||||
forEach(invalidFileNames, fileName => {
|
||||
const file: UploadFile<any> = {
|
||||
name: fileName,
|
||||
uid: 'xp',
|
||||
size: 100,
|
||||
type: 'text/csv',
|
||||
};
|
||||
expect(validateUploadFileExtension(file, ['parquet', 'zip'])).toBe(true);
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -40,7 +40,6 @@ import { Input, InputNumber } from 'src/components/Input';
|
|||
import rison from 'rison';
|
||||
import { UploadChangeParam, UploadFile } from 'antd/lib/upload/interface';
|
||||
import withToasts from 'src/components/MessageToasts/withToasts';
|
||||
import * as XLSX from 'xlsx';
|
||||
import {
|
||||
antdCollapseStyles,
|
||||
antDModalNoPaddingStyles,
|
||||
|
|
@ -69,9 +68,25 @@ const CSVSpecificFields = [
|
|||
'skip_blank_lines',
|
||||
'day_first',
|
||||
'column_data_types',
|
||||
'column_dates',
|
||||
'decimal_character',
|
||||
'null_values',
|
||||
'index_column',
|
||||
'header_row',
|
||||
'rows_to_read',
|
||||
'skip_rows',
|
||||
];
|
||||
|
||||
const ExcelSpecificFields = ['sheet_name'];
|
||||
const ExcelSpecificFields = [
|
||||
'sheet_name',
|
||||
'column_dates',
|
||||
'decimal_character',
|
||||
'null_values',
|
||||
'index_column',
|
||||
'header_row',
|
||||
'rows_to_read',
|
||||
'skip_rows',
|
||||
];
|
||||
|
||||
const ColumnarSpecificFields: string[] = [];
|
||||
|
||||
|
|
@ -89,6 +104,9 @@ const UploadTypeToSpecificFields: Record<UploadType, string[]> = {
|
|||
columnar: ColumnarSpecificFields,
|
||||
};
|
||||
|
||||
const isFieldATypeSpecificField = (field: string, type: UploadType) =>
|
||||
UploadTypeToSpecificFields[type].includes(field);
|
||||
|
||||
interface UploadInfo {
|
||||
table_name: string;
|
||||
schema: string;
|
||||
|
|
@ -106,11 +124,16 @@ interface UploadInfo {
|
|||
column_dates: Array<string>;
|
||||
index_column: string | null;
|
||||
dataframe_index: boolean;
|
||||
column_labels: string;
|
||||
index_label: string;
|
||||
columns_read: Array<string>;
|
||||
column_data_types: string;
|
||||
}
|
||||
|
||||
interface SheetColumnNames {
|
||||
sheet_name: string;
|
||||
column_names: string[];
|
||||
}
|
||||
|
||||
const defaultUploadInfo: UploadInfo = {
|
||||
table_name: '',
|
||||
schema: '',
|
||||
|
|
@ -128,7 +151,7 @@ const defaultUploadInfo: UploadInfo = {
|
|||
column_dates: [],
|
||||
index_column: null,
|
||||
dataframe_index: false,
|
||||
column_labels: '',
|
||||
index_label: '',
|
||||
columns_read: [],
|
||||
column_data_types: '',
|
||||
};
|
||||
|
|
@ -136,7 +159,11 @@ const defaultUploadInfo: UploadInfo = {
|
|||
// Allowed extensions to accept for file upload, users can always override this
|
||||
// by selecting all file extensions on the OS file picker. Also ".txt" will
|
||||
// allow all files to be selected.
|
||||
const READ_HEADER_SIZE = 10000;
|
||||
const allowedExtensionsToAccept = {
|
||||
csv: '.csv, .tsv',
|
||||
excel: '.xls, .xlsx',
|
||||
columnar: '.parquet, .zip',
|
||||
};
|
||||
|
||||
export const validateUploadFileExtension = (
|
||||
file: UploadFile<any>,
|
||||
|
|
@ -183,21 +210,17 @@ const UploadDataModal: FunctionComponent<UploadDataModalProps> = ({
|
|||
const [fileList, setFileList] = useState<UploadFile[]>([]);
|
||||
const [columns, setColumns] = React.useState<string[]>([]);
|
||||
const [sheetNames, setSheetNames] = React.useState<string[]>([]);
|
||||
const [currentSheetName, setCurrentSheetName] = React.useState<
|
||||
string | undefined
|
||||
>();
|
||||
const [sheetsColumnNames, setSheetsColumnNames] = React.useState<
|
||||
SheetColumnNames[]
|
||||
>([]);
|
||||
const [delimiter, setDelimiter] = useState<string>(',');
|
||||
const [isLoading, setIsLoading] = useState<boolean>(false);
|
||||
const [currentSchema, setCurrentSchema] = useState<string | undefined>();
|
||||
const [currentDataframeIndex, setCurrentDataframeIndex] =
|
||||
useState<boolean>(false);
|
||||
const [previewUploadedFile, setPreviewUploadedFile] = useState<boolean>(true);
|
||||
const [fileLoading, setFileLoading] = useState<boolean>(false);
|
||||
|
||||
const allowedExtensionsToAccept = {
|
||||
csv: '.csv, .tsv',
|
||||
excel: '.xls, .xlsx',
|
||||
columnar: '.parquet, .orc',
|
||||
};
|
||||
|
||||
const createTypeToEndpointMap = (
|
||||
databaseId: number,
|
||||
): { [key: string]: string } => ({
|
||||
|
|
@ -206,6 +229,12 @@ const UploadDataModal: FunctionComponent<UploadDataModalProps> = ({
|
|||
columnar: `/api/v1/database/${databaseId}/columnar_upload/`,
|
||||
});
|
||||
|
||||
const typeToFileMetadataEndpoint = {
|
||||
csv: '/api/v1/database/csv_metadata/',
|
||||
excel: '/api/v1/database/excel_metadata/',
|
||||
columnar: '/api/v1/database/columnar_metadata/',
|
||||
};
|
||||
|
||||
const nullValuesOptions = [
|
||||
{
|
||||
value: '""',
|
||||
|
|
@ -286,12 +315,12 @@ const UploadDataModal: FunctionComponent<UploadDataModalProps> = ({
|
|||
setColumns([]);
|
||||
setCurrentSchema('');
|
||||
setCurrentDatabaseId(0);
|
||||
setCurrentSheetName(undefined);
|
||||
setSheetNames([]);
|
||||
setIsLoading(false);
|
||||
setDelimiter(',');
|
||||
setPreviewUploadedFile(true);
|
||||
setFileLoading(false);
|
||||
setSheetsColumnNames([]);
|
||||
form.resetFields();
|
||||
};
|
||||
|
||||
|
|
@ -343,6 +372,58 @@ const UploadDataModal: FunctionComponent<UploadDataModalProps> = ({
|
|||
[currentDatabaseId],
|
||||
);
|
||||
|
||||
const loadFileMetadata = (file: File) => {
|
||||
const fields = form.getFieldsValue();
|
||||
const mergedValues = { ...defaultUploadInfo, ...fields };
|
||||
const formData = new FormData();
|
||||
formData.append('file', file);
|
||||
if (type === 'csv') {
|
||||
formData.append('delimiter', mergedValues.delimiter);
|
||||
}
|
||||
setFileLoading(true);
|
||||
return SupersetClient.post({
|
||||
endpoint: typeToFileMetadataEndpoint[type],
|
||||
body: formData,
|
||||
headers: { Accept: 'application/json' },
|
||||
})
|
||||
.then(response => {
|
||||
const { items } = response.json.result;
|
||||
if (items && type !== 'excel') {
|
||||
setColumns(items[0].column_names);
|
||||
} else {
|
||||
const { allSheetNames, sheetColumnNamesMap } = items.reduce(
|
||||
(
|
||||
acc: {
|
||||
allSheetNames: any[];
|
||||
sheetColumnNamesMap: Record<string, string[]>;
|
||||
},
|
||||
item: { sheet_name: any; column_names: any },
|
||||
) => {
|
||||
acc.allSheetNames.push(item.sheet_name);
|
||||
acc.sheetColumnNamesMap[item.sheet_name] = item.column_names;
|
||||
return acc;
|
||||
},
|
||||
{ allSheetNames: [], sheetColumnNamesMap: {} },
|
||||
);
|
||||
setColumns(items[0].column_names);
|
||||
setSheetNames(allSheetNames);
|
||||
form.setFieldsValue({ sheet_name: allSheetNames[0] });
|
||||
setSheetsColumnNames(sheetColumnNamesMap);
|
||||
}
|
||||
})
|
||||
.catch(response =>
|
||||
getClientErrorObject(response).then(error => {
|
||||
addDangerToast(error.error || 'Error');
|
||||
setColumns([]);
|
||||
form.setFieldsValue({ sheet_name: undefined });
|
||||
setSheetNames([]);
|
||||
}),
|
||||
)
|
||||
.finally(() => {
|
||||
setFileLoading(false);
|
||||
});
|
||||
};
|
||||
|
||||
const getAllFieldsNotInType = (): string[] => {
|
||||
const specificFields = UploadTypeToSpecificFields[type] || [];
|
||||
return [...AllSpecificFields].filter(
|
||||
|
|
@ -353,7 +434,13 @@ const UploadDataModal: FunctionComponent<UploadDataModalProps> = ({
|
|||
const appendFormData = (formData: FormData, data: Record<string, any>) => {
|
||||
const allFieldsNotInType = getAllFieldsNotInType();
|
||||
Object.entries(data).forEach(([key, value]) => {
|
||||
if (!(allFieldsNotInType.includes(key) || NonNullFields.includes(key))) {
|
||||
if (
|
||||
!(
|
||||
allFieldsNotInType.includes(key) ||
|
||||
(NonNullFields.includes(key) &&
|
||||
(value === undefined || value === null))
|
||||
)
|
||||
) {
|
||||
formData.append(key, value);
|
||||
}
|
||||
});
|
||||
|
|
@ -401,13 +488,12 @@ const UploadDataModal: FunctionComponent<UploadDataModalProps> = ({
|
|||
setFileList(fileList.filter(file => file.uid !== removedFile.uid));
|
||||
setColumns([]);
|
||||
setSheetNames([]);
|
||||
setCurrentSheetName(undefined);
|
||||
form.setFieldsValue({ sheet_name: undefined });
|
||||
return false;
|
||||
};
|
||||
|
||||
const onSheetNameChange = (value: string) => {
|
||||
setCurrentSheetName(value);
|
||||
setColumns(sheetsColumnNames[value] ?? []);
|
||||
};
|
||||
|
||||
const columnsToOptions = () =>
|
||||
|
|
@ -422,97 +508,6 @@ const UploadDataModal: FunctionComponent<UploadDataModalProps> = ({
|
|||
label: sheetName,
|
||||
}));
|
||||
|
||||
const readFileContent = (file: File) =>
|
||||
new Promise<string>((resolve, reject) => {
|
||||
const reader = new FileReader();
|
||||
reader.onload = event => {
|
||||
if (event.target) {
|
||||
const text = event.target.result as string;
|
||||
resolve(text);
|
||||
} else {
|
||||
reject(new Error('Failed to read file content'));
|
||||
}
|
||||
};
|
||||
reader.onerror = () => {
|
||||
reject(new Error('Failed to read file content'));
|
||||
};
|
||||
reader.readAsText(file.slice(0, READ_HEADER_SIZE));
|
||||
});
|
||||
|
||||
const processCSVFile = async (file: File) => {
|
||||
try {
|
||||
setFileLoading(true);
|
||||
const text = await readFileContent(file);
|
||||
const firstLine = text.split('\n')[0].trim();
|
||||
const firstRow = firstLine
|
||||
.split(delimiter)
|
||||
.map(column => column.replace(/^"(.*)"$/, '$1'));
|
||||
setColumns(firstRow);
|
||||
setFileLoading(false);
|
||||
} catch (error) {
|
||||
addDangerToast('Failed to process file content');
|
||||
setFileLoading(false);
|
||||
}
|
||||
};
|
||||
|
||||
const processExcelColumns = (workbook: XLSX.WorkBook, sn: string[]) => {
|
||||
if (!workbook) {
|
||||
return;
|
||||
}
|
||||
let cSheetName = currentSheetName;
|
||||
if (!currentSheetName) {
|
||||
setCurrentSheetName(sn[0]);
|
||||
cSheetName = sn[0];
|
||||
}
|
||||
cSheetName = cSheetName || sn[0];
|
||||
form.setFieldsValue({ sheet_name: cSheetName });
|
||||
const worksheet = workbook.Sheets[cSheetName];
|
||||
|
||||
const worksheetRef: string = worksheet['!ref'] ? worksheet['!ref'] : '';
|
||||
const range = XLSX.utils.decode_range(worksheetRef);
|
||||
const columnNames = Array.from({ length: range.e.c + 1 }, (_, i) => {
|
||||
const cellAddress = XLSX.utils.encode_cell({ r: 0, c: i });
|
||||
return worksheet[cellAddress]?.v;
|
||||
});
|
||||
setColumns(columnNames);
|
||||
};
|
||||
|
||||
const processExcelFile = async (file: File) =>
|
||||
new Promise<string>((resolve, reject) => {
|
||||
setFileLoading(true);
|
||||
const reader = new FileReader();
|
||||
reader.readAsBinaryString(file);
|
||||
|
||||
reader.onload = event => {
|
||||
if (!event.target && event.target == null) {
|
||||
reader.onerror = () => {
|
||||
reject(new Error('Failed to read file content'));
|
||||
};
|
||||
return;
|
||||
}
|
||||
// Read workbook
|
||||
const workbook = XLSX.read(event.target.result, { type: 'binary' });
|
||||
if (workbook == null) {
|
||||
reject(new Error('Failed to process file content'));
|
||||
addDangerToast('Failed to process file content');
|
||||
setFileLoading(false);
|
||||
return;
|
||||
}
|
||||
// Extract sheet names
|
||||
const tmpSheetNames = workbook.SheetNames;
|
||||
if (tmpSheetNames.length < 1) {
|
||||
reject(new Error('Failed to read file content'));
|
||||
addDangerToast('Failed to process file content');
|
||||
setFileLoading(false);
|
||||
return;
|
||||
}
|
||||
processExcelColumns(workbook, tmpSheetNames);
|
||||
setSheetNames(workbook.SheetNames);
|
||||
setFileLoading(false);
|
||||
resolve('success');
|
||||
};
|
||||
});
|
||||
|
||||
const onChangeFile = async (info: UploadChangeParam<any>) => {
|
||||
setFileList([
|
||||
{
|
||||
|
|
@ -523,14 +518,7 @@ const UploadDataModal: FunctionComponent<UploadDataModalProps> = ({
|
|||
if (!previewUploadedFile) {
|
||||
return;
|
||||
}
|
||||
if (type === 'csv') {
|
||||
await processCSVFile(info.file.originFileObj);
|
||||
}
|
||||
if (type === 'excel') {
|
||||
setSheetNames([]);
|
||||
setCurrentSheetName(undefined);
|
||||
await processExcelFile(info.file.originFileObj);
|
||||
}
|
||||
await loadFileMetadata(info.file.originFileObj);
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
|
|
@ -542,25 +530,10 @@ const UploadDataModal: FunctionComponent<UploadDataModalProps> = ({
|
|||
if (!previewUploadedFile) {
|
||||
return;
|
||||
}
|
||||
processCSVFile(fileList[0].originFileObj).then(r => r);
|
||||
loadFileMetadata(fileList[0].originFileObj).then(r => r);
|
||||
}
|
||||
}, [delimiter]);
|
||||
|
||||
useEffect(() => {
|
||||
(async () => {
|
||||
if (
|
||||
columns.length > 0 &&
|
||||
fileList[0].originFileObj &&
|
||||
fileList[0].originFileObj instanceof File
|
||||
) {
|
||||
if (!previewUploadedFile) {
|
||||
return;
|
||||
}
|
||||
await processExcelFile(fileList[0].originFileObj);
|
||||
}
|
||||
})();
|
||||
}, [currentSheetName]);
|
||||
|
||||
const validateUpload = (_: any, value: string) => {
|
||||
if (fileList.length === 0) {
|
||||
return Promise.reject(t('Uploading a file is required'));
|
||||
|
|
@ -734,9 +707,9 @@ const UploadDataModal: FunctionComponent<UploadDataModalProps> = ({
|
|||
</StyledFormItem>
|
||||
</Col>
|
||||
</Row>
|
||||
<Row>
|
||||
<Col span={24}>
|
||||
{type === 'csv' && (
|
||||
{isFieldATypeSpecificField('delimiter', type) && (
|
||||
<Row>
|
||||
<Col span={24}>
|
||||
<StyledFormItemWithTip
|
||||
label={t('Delimiter')}
|
||||
tip={t('Select a delimiter for this data')}
|
||||
|
|
@ -749,8 +722,12 @@ const UploadDataModal: FunctionComponent<UploadDataModalProps> = ({
|
|||
allowNewOptions
|
||||
/>
|
||||
</StyledFormItemWithTip>
|
||||
)}
|
||||
{type === 'excel' && (
|
||||
</Col>
|
||||
</Row>
|
||||
)}
|
||||
{isFieldATypeSpecificField('sheet_name', type) && (
|
||||
<Row>
|
||||
<Col span={24}>
|
||||
<StyledFormItem label={t('Sheet name')} name="sheet_name">
|
||||
<Select
|
||||
ariaLabel={t('Choose sheet name')}
|
||||
|
|
@ -762,9 +739,9 @@ const UploadDataModal: FunctionComponent<UploadDataModalProps> = ({
|
|||
)}
|
||||
/>
|
||||
</StyledFormItem>
|
||||
)}
|
||||
</Col>
|
||||
</Row>
|
||||
</Col>
|
||||
</Row>
|
||||
)}
|
||||
</Collapse.Panel>
|
||||
<Collapse.Panel
|
||||
header={
|
||||
|
|
@ -794,91 +771,99 @@ const UploadDataModal: FunctionComponent<UploadDataModalProps> = ({
|
|||
</StyledFormItemWithTip>
|
||||
</Col>
|
||||
</Row>
|
||||
<Row>
|
||||
<Col span={24}>
|
||||
<StyledFormItem
|
||||
label={t('Columns To Be Parsed as Dates')}
|
||||
name="column_dates"
|
||||
>
|
||||
<Select
|
||||
ariaLabel={t('Choose columns to be parsed as dates')}
|
||||
mode="multiple"
|
||||
options={columnsToOptions()}
|
||||
allowClear
|
||||
allowNewOptions
|
||||
placeholder={t(
|
||||
'A comma separated list of columns that should be parsed as dates',
|
||||
{isFieldATypeSpecificField('column_dates', type) && (
|
||||
<Row>
|
||||
<Col span={24}>
|
||||
<StyledFormItem
|
||||
label={t('Columns To Be Parsed as Dates')}
|
||||
name="column_dates"
|
||||
>
|
||||
<Select
|
||||
ariaLabel={t('Choose columns to be parsed as dates')}
|
||||
mode="multiple"
|
||||
options={columnsToOptions()}
|
||||
allowClear
|
||||
allowNewOptions
|
||||
placeholder={t(
|
||||
'A comma separated list of columns that should be parsed as dates',
|
||||
)}
|
||||
/>
|
||||
</StyledFormItem>
|
||||
</Col>
|
||||
</Row>
|
||||
)}
|
||||
{isFieldATypeSpecificField('decimal_character', type) && (
|
||||
<Row>
|
||||
<Col span={24}>
|
||||
<StyledFormItemWithTip
|
||||
label={t('Decimal Character')}
|
||||
tip={t('Character to interpret as decimal point')}
|
||||
name="decimal_character"
|
||||
>
|
||||
<Input type="text" />
|
||||
</StyledFormItemWithTip>
|
||||
</Col>
|
||||
</Row>
|
||||
)}
|
||||
{isFieldATypeSpecificField('null_values', type) && (
|
||||
<Row>
|
||||
<Col span={24}>
|
||||
<StyledFormItemWithTip
|
||||
label={t('Null Values')}
|
||||
tip={t(
|
||||
'Choose values that should be treated as null. Warning: Hive database supports only a single value',
|
||||
)}
|
||||
/>
|
||||
</StyledFormItem>
|
||||
</Col>
|
||||
</Row>
|
||||
<Row>
|
||||
<Col span={24}>
|
||||
<StyledFormItemWithTip
|
||||
label={t('Decimal Character')}
|
||||
tip={t('Character to interpret as decimal point')}
|
||||
name="decimal_character"
|
||||
>
|
||||
<Input type="text" />
|
||||
</StyledFormItemWithTip>
|
||||
</Col>
|
||||
</Row>
|
||||
<Row>
|
||||
<Col span={24}>
|
||||
<StyledFormItemWithTip
|
||||
label={t('Null Values')}
|
||||
tip={t(
|
||||
'Choose values that should be treated as null. Warning: Hive database supports only a single value',
|
||||
)}
|
||||
name="null_values"
|
||||
>
|
||||
<Select
|
||||
mode="multiple"
|
||||
options={nullValuesOptions}
|
||||
allowClear
|
||||
allowNewOptions
|
||||
/>
|
||||
</StyledFormItemWithTip>
|
||||
</Col>
|
||||
</Row>
|
||||
{type === 'csv' && (
|
||||
<>
|
||||
<Row>
|
||||
<Col span={24}>
|
||||
<StyledFormItem name="skip_initial_space">
|
||||
<SwitchContainer
|
||||
label={t('Skip spaces after delimiter')}
|
||||
dataTest="skipInitialSpace"
|
||||
/>
|
||||
</StyledFormItem>
|
||||
</Col>
|
||||
</Row>
|
||||
<Row>
|
||||
<Col span={24}>
|
||||
<StyledFormItem name="skip_blank_lines">
|
||||
<SwitchContainer
|
||||
label={t(
|
||||
'Skip blank lines rather than interpreting them as Not A Number values',
|
||||
)}
|
||||
dataTest="skipBlankLines"
|
||||
/>
|
||||
</StyledFormItem>
|
||||
</Col>
|
||||
</Row>
|
||||
<Row>
|
||||
<Col span={24}>
|
||||
<StyledFormItem name="day_first">
|
||||
<SwitchContainer
|
||||
label={t(
|
||||
'DD/MM format dates, international and European format',
|
||||
)}
|
||||
dataTest="dayFirst"
|
||||
/>
|
||||
</StyledFormItem>
|
||||
</Col>
|
||||
</Row>
|
||||
</>
|
||||
name="null_values"
|
||||
>
|
||||
<Select
|
||||
mode="multiple"
|
||||
options={nullValuesOptions}
|
||||
allowClear
|
||||
allowNewOptions
|
||||
/>
|
||||
</StyledFormItemWithTip>
|
||||
</Col>
|
||||
</Row>
|
||||
)}
|
||||
{isFieldATypeSpecificField('skip_initial_space', type) && (
|
||||
<Row>
|
||||
<Col span={24}>
|
||||
<StyledFormItem name="skip_initial_space">
|
||||
<SwitchContainer
|
||||
label={t('Skip spaces after delimiter')}
|
||||
dataTest="skipInitialSpace"
|
||||
/>
|
||||
</StyledFormItem>
|
||||
</Col>
|
||||
</Row>
|
||||
)}
|
||||
{isFieldATypeSpecificField('skip_blank_lines', type) && (
|
||||
<Row>
|
||||
<Col span={24}>
|
||||
<StyledFormItem name="skip_blank_lines">
|
||||
<SwitchContainer
|
||||
label={t(
|
||||
'Skip blank lines rather than interpreting them as Not A Number values',
|
||||
)}
|
||||
dataTest="skipBlankLines"
|
||||
/>
|
||||
</StyledFormItem>
|
||||
</Col>
|
||||
</Row>
|
||||
)}
|
||||
{isFieldATypeSpecificField('day_first', type) && (
|
||||
<Row>
|
||||
<Col span={24}>
|
||||
<StyledFormItem name="day_first">
|
||||
<SwitchContainer
|
||||
label={t(
|
||||
'DD/MM format dates, international and European format',
|
||||
)}
|
||||
dataTest="dayFirst"
|
||||
/>
|
||||
</StyledFormItem>
|
||||
</Col>
|
||||
</Row>
|
||||
)}
|
||||
</Collapse.Panel>
|
||||
<Collapse.Panel
|
||||
|
|
@ -894,40 +879,6 @@ const UploadDataModal: FunctionComponent<UploadDataModalProps> = ({
|
|||
}
|
||||
key="3"
|
||||
>
|
||||
<Row>
|
||||
<Col span={24}>
|
||||
<StyledFormItemWithTip
|
||||
label={t('Index Column')}
|
||||
tip={t(
|
||||
'Column to use as the row labels of the dataframe. Leave empty if no index column',
|
||||
)}
|
||||
name="index_column"
|
||||
>
|
||||
<Select
|
||||
ariaLabel={t('Choose index column')}
|
||||
options={columns.map(column => ({
|
||||
value: column,
|
||||
label: column,
|
||||
}))}
|
||||
allowClear
|
||||
allowNewOptions
|
||||
/>
|
||||
</StyledFormItemWithTip>
|
||||
</Col>
|
||||
</Row>
|
||||
<Row>
|
||||
<Col span={24}>
|
||||
<StyledFormItemWithTip
|
||||
label={t('Column Label(s)')}
|
||||
tip={t(
|
||||
'Column label for index column(s). If None is given and Dataframe Index is checked, Index Names are used',
|
||||
)}
|
||||
name="column_labels"
|
||||
>
|
||||
<Input aria-label={t('Column labels')} type="text" />
|
||||
</StyledFormItemWithTip>
|
||||
</Col>
|
||||
</Row>
|
||||
<Row>
|
||||
<Col span={24}>
|
||||
<StyledFormItem
|
||||
|
|
@ -947,7 +898,7 @@ const UploadDataModal: FunctionComponent<UploadDataModalProps> = ({
|
|||
</StyledFormItem>
|
||||
</Col>
|
||||
</Row>
|
||||
{type === 'csv' && (
|
||||
{isFieldATypeSpecificField('column_data_types', type) && (
|
||||
<Row>
|
||||
<Col span={24}>
|
||||
<StyledFormItemWithTip
|
||||
|
|
@ -966,66 +917,114 @@ const UploadDataModal: FunctionComponent<UploadDataModalProps> = ({
|
|||
<Col span={24}>
|
||||
<StyledFormItem name="dataframe_index">
|
||||
<SwitchContainer
|
||||
label={t('Write dataframe index as a column')}
|
||||
label={t('Create dataframe index')}
|
||||
dataTest="dataFrameIndex"
|
||||
onChange={setCurrentDataframeIndex}
|
||||
/>
|
||||
</StyledFormItem>
|
||||
</Col>
|
||||
</Row>
|
||||
{currentDataframeIndex &&
|
||||
isFieldATypeSpecificField('index_column', type) && (
|
||||
<Row>
|
||||
<Col span={24}>
|
||||
<StyledFormItemWithTip
|
||||
label={t('Index Column')}
|
||||
tip={t(
|
||||
'Column to use as the index of the dataframe. If None is given, Index label is used.',
|
||||
)}
|
||||
name="index_column"
|
||||
>
|
||||
<Select
|
||||
ariaLabel={t('Choose index column')}
|
||||
options={columns.map(column => ({
|
||||
value: column,
|
||||
label: column,
|
||||
}))}
|
||||
allowClear
|
||||
allowNewOptions
|
||||
/>
|
||||
</StyledFormItemWithTip>
|
||||
</Col>
|
||||
</Row>
|
||||
)}
|
||||
{currentDataframeIndex && (
|
||||
<Row>
|
||||
<Col span={24}>
|
||||
<StyledFormItemWithTip
|
||||
label={t('Index Label')}
|
||||
tip={t(
|
||||
"Label for the index column. Don't use an existing column name.",
|
||||
)}
|
||||
name="index_label"
|
||||
>
|
||||
<Input aria-label={t('Index label')} type="text" />
|
||||
</StyledFormItemWithTip>
|
||||
</Col>
|
||||
</Row>
|
||||
)}
|
||||
</Collapse.Panel>
|
||||
<Collapse.Panel
|
||||
header={
|
||||
<div>
|
||||
<h4>{t('Rows')}</h4>
|
||||
<p className="helper">
|
||||
{t('Set header rows and the number of rows to read or skip.')}
|
||||
</p>
|
||||
</div>
|
||||
}
|
||||
key="4"
|
||||
>
|
||||
<Row>
|
||||
<Col span={8}>
|
||||
<StyledFormItemWithTip
|
||||
label={t('Header Row')}
|
||||
tip={t(
|
||||
'Row containing the headers to use as column names (0 is first line of data).',
|
||||
)}
|
||||
name="header_row"
|
||||
rules={[
|
||||
{ required: true, message: 'Header row is required' },
|
||||
]}
|
||||
>
|
||||
<InputNumber
|
||||
aria-label={t('Header row')}
|
||||
type="text"
|
||||
min={0}
|
||||
/>
|
||||
</StyledFormItemWithTip>
|
||||
</Col>
|
||||
<Col span={8}>
|
||||
<StyledFormItemWithTip
|
||||
label={t('Rows to Read')}
|
||||
tip={t(
|
||||
'Number of rows of file to read. Leave empty (default) to read all rows',
|
||||
)}
|
||||
name="rows_to_read"
|
||||
>
|
||||
<InputNumber aria-label={t('Rows to read')} min={1} />
|
||||
</StyledFormItemWithTip>
|
||||
</Col>
|
||||
<Col span={8}>
|
||||
<StyledFormItemWithTip
|
||||
label={t('Skip Rows')}
|
||||
tip={t('Number of rows to skip at start of file.')}
|
||||
name="skip_rows"
|
||||
rules={[{ required: true, message: 'Skip rows is required' }]}
|
||||
>
|
||||
<InputNumber aria-label={t('Skip rows')} min={0} />
|
||||
</StyledFormItemWithTip>
|
||||
</Col>
|
||||
</Row>
|
||||
</Collapse.Panel>
|
||||
{isFieldATypeSpecificField('header_row', type) &&
|
||||
isFieldATypeSpecificField('rows_to_read', type) &&
|
||||
isFieldATypeSpecificField('skip_rows', type) && (
|
||||
<Collapse.Panel
|
||||
header={
|
||||
<div>
|
||||
<h4>{t('Rows')}</h4>
|
||||
<p className="helper">
|
||||
{t(
|
||||
'Set header rows and the number of rows to read or skip.',
|
||||
)}
|
||||
</p>
|
||||
</div>
|
||||
}
|
||||
key="4"
|
||||
>
|
||||
<Row>
|
||||
<Col span={8}>
|
||||
<StyledFormItemWithTip
|
||||
label={t('Header Row')}
|
||||
tip={t(
|
||||
'Row containing the headers to use as column names (0 is first line of data).',
|
||||
)}
|
||||
name="header_row"
|
||||
rules={[
|
||||
{ required: true, message: 'Header row is required' },
|
||||
]}
|
||||
>
|
||||
<InputNumber
|
||||
aria-label={t('Header row')}
|
||||
type="text"
|
||||
min={0}
|
||||
/>
|
||||
</StyledFormItemWithTip>
|
||||
</Col>
|
||||
<Col span={8}>
|
||||
<StyledFormItemWithTip
|
||||
label={t('Rows to Read')}
|
||||
tip={t(
|
||||
'Number of rows of file to read. Leave empty (default) to read all rows',
|
||||
)}
|
||||
name="rows_to_read"
|
||||
>
|
||||
<InputNumber aria-label={t('Rows to read')} min={1} />
|
||||
</StyledFormItemWithTip>
|
||||
</Col>
|
||||
<Col span={8}>
|
||||
<StyledFormItemWithTip
|
||||
label={t('Skip Rows')}
|
||||
tip={t('Number of rows to skip at start of file.')}
|
||||
name="skip_rows"
|
||||
rules={[
|
||||
{ required: true, message: 'Skip rows is required' },
|
||||
]}
|
||||
>
|
||||
<InputNumber aria-label={t('Skip rows')} min={0} />
|
||||
</StyledFormItemWithTip>
|
||||
</Col>
|
||||
</Row>
|
||||
</Collapse.Panel>
|
||||
)}
|
||||
</Collapse>
|
||||
</AntdForm>
|
||||
</Modal>
|
||||
|
|
|
|||
|
|
@ -43,19 +43,19 @@ const dropdownItems = [
|
|||
{
|
||||
label: 'Upload a CSV',
|
||||
name: 'Upload a CSV',
|
||||
url: '/csvtodatabaseview/form',
|
||||
url: '#',
|
||||
perm: true,
|
||||
},
|
||||
{
|
||||
label: 'Upload a Columnar File',
|
||||
name: 'Upload a Columnar file',
|
||||
url: '/columnartodatabaseview/form',
|
||||
url: '#',
|
||||
perm: true,
|
||||
},
|
||||
{
|
||||
label: 'Upload Excel',
|
||||
name: 'Upload Excel',
|
||||
url: '/exceltodatabaseview/form',
|
||||
url: '#',
|
||||
perm: true,
|
||||
},
|
||||
],
|
||||
|
|
|
|||
|
|
@ -54,13 +54,13 @@ const dropdownItems = [
|
|||
{
|
||||
label: 'Upload CSV to database',
|
||||
name: 'Upload a CSV',
|
||||
url: '/csvtodatabaseview/form',
|
||||
url: '#',
|
||||
perm: true,
|
||||
},
|
||||
{
|
||||
label: 'Upload columnar file to database',
|
||||
name: 'Upload a Columnar file',
|
||||
url: '/columnartodatabaseview/form',
|
||||
url: '#',
|
||||
perm: true,
|
||||
},
|
||||
],
|
||||
|
|
@ -309,12 +309,10 @@ test('If there is a DB with allow_file_upload set as True the option should be e
|
|||
userEvent.hover(dropdown);
|
||||
const dataMenu = await screen.findByText(dropdownItems[0].label);
|
||||
userEvent.hover(dataMenu);
|
||||
expect(await screen.findByText('Upload CSV to database')).toBeInTheDocument();
|
||||
expect(
|
||||
(await screen.findByText('Upload CSV to database')).closest('a'),
|
||||
).toHaveAttribute('href', '#');
|
||||
expect(
|
||||
(await screen.findByText('Upload Excel to database')).closest('a'),
|
||||
).toHaveAttribute('href', '#');
|
||||
await screen.findByText('Upload Excel to database'),
|
||||
).toBeInTheDocument();
|
||||
});
|
||||
|
||||
test('If there is NOT a DB with allow_file_upload set as True the option should be disabled', async () => {
|
||||
|
|
|
|||
|
|
@ -45,6 +45,7 @@ import {
|
|||
} from 'src/types/bootstrapTypes';
|
||||
import { RootState } from 'src/dashboard/types';
|
||||
import DatabaseModal from 'src/features/databases/DatabaseModal';
|
||||
import UploadDataModal from 'src/features/databases/UploadDataModel';
|
||||
import { uploadUserPerms } from 'src/views/CRUD/utils';
|
||||
import TelemetryPixel from 'src/components/TelemetryPixel';
|
||||
import LanguagePicker from './LanguagePicker';
|
||||
|
|
@ -143,6 +144,11 @@ const RightMenu = ({
|
|||
HAS_GSHEETS_INSTALLED,
|
||||
} = useSelector<any, ExtensionConfigs>(state => state.common.conf);
|
||||
const [showDatabaseModal, setShowDatabaseModal] = useState<boolean>(false);
|
||||
const [showCSVUploadModal, setShowCSVUploadModal] = useState<boolean>(false);
|
||||
const [showExcelUploadModal, setShowExcelUploadModal] =
|
||||
useState<boolean>(false);
|
||||
const [showColumnarUploadModal, setShowColumnarUploadModal] =
|
||||
useState<boolean>(false);
|
||||
const [engine, setEngine] = useState<string>('');
|
||||
const canSql = findPermission('can_sqllab', 'Superset', roles);
|
||||
const canDashboard = findPermission('can_write', 'Dashboard', roles);
|
||||
|
|
@ -188,23 +194,20 @@ const RightMenu = ({
|
|||
},
|
||||
{
|
||||
label: t('Upload CSV to database'),
|
||||
name: 'Upload a CSV',
|
||||
url: '#',
|
||||
name: GlobalMenuDataOptions.CSVUpload,
|
||||
perm: canUploadCSV && showUploads,
|
||||
disable: isAdmin && !allowUploads,
|
||||
},
|
||||
{
|
||||
label: t('Upload columnar file to database'),
|
||||
name: 'Upload a Columnar file',
|
||||
url: '/columnartodatabaseview/form',
|
||||
perm: canUploadColumnar && showUploads,
|
||||
label: t('Upload Excel to database'),
|
||||
name: GlobalMenuDataOptions.ExcelUpload,
|
||||
perm: canUploadExcel && showUploads,
|
||||
disable: isAdmin && !allowUploads,
|
||||
},
|
||||
{
|
||||
label: t('Upload Excel to database'),
|
||||
name: 'Upload Excel',
|
||||
url: '#',
|
||||
perm: canUploadExcel && showUploads,
|
||||
label: t('Upload Columnar file to database'),
|
||||
name: GlobalMenuDataOptions.ColumnarUpload,
|
||||
perm: canUploadColumnar && showUploads,
|
||||
disable: isAdmin && !allowUploads,
|
||||
},
|
||||
],
|
||||
|
|
@ -289,6 +292,12 @@ const RightMenu = ({
|
|||
} else if (itemChose.key === GlobalMenuDataOptions.GoogleSheets) {
|
||||
setShowDatabaseModal(true);
|
||||
setEngine('Google Sheets');
|
||||
} else if (itemChose.key === GlobalMenuDataOptions.CSVUpload) {
|
||||
setShowCSVUploadModal(true);
|
||||
} else if (itemChose.key === GlobalMenuDataOptions.ExcelUpload) {
|
||||
setShowExcelUploadModal(true);
|
||||
} else if (itemChose.key === GlobalMenuDataOptions.ColumnarUpload) {
|
||||
setShowColumnarUploadModal(true);
|
||||
}
|
||||
};
|
||||
|
||||
|
|
@ -350,6 +359,30 @@ const RightMenu = ({
|
|||
onDatabaseAdd={handleDatabaseAdd}
|
||||
/>
|
||||
)}
|
||||
{canUploadCSV && (
|
||||
<UploadDataModal
|
||||
onHide={() => setShowCSVUploadModal(false)}
|
||||
show={showCSVUploadModal}
|
||||
allowedExtensions={CSV_EXTENSIONS}
|
||||
type="csv"
|
||||
/>
|
||||
)}
|
||||
{canUploadExcel && (
|
||||
<UploadDataModal
|
||||
onHide={() => setShowExcelUploadModal(false)}
|
||||
show={showExcelUploadModal}
|
||||
allowedExtensions={EXCEL_EXTENSIONS}
|
||||
type="excel"
|
||||
/>
|
||||
)}
|
||||
{canUploadColumnar && (
|
||||
<UploadDataModal
|
||||
onHide={() => setShowColumnarUploadModal(false)}
|
||||
show={showColumnarUploadModal}
|
||||
allowedExtensions={COLUMNAR_EXTENSIONS}
|
||||
type="columnar"
|
||||
/>
|
||||
)}
|
||||
{environmentTag?.text && (
|
||||
<Label
|
||||
css={{ borderRadius: `${theme.gridUnit * 125}px` }}
|
||||
|
|
|
|||
|
|
@ -51,4 +51,7 @@ export enum GlobalMenuDataOptions {
|
|||
GoogleSheets = 'gsheets',
|
||||
DbConnection = 'dbconnection',
|
||||
DatasetCreation = 'datasetCreation',
|
||||
CSVUpload = 'csvUpload',
|
||||
ExcelUpload = 'excelUpload',
|
||||
ColumnarUpload = 'columnarUpload',
|
||||
}
|
||||
|
|
|
|||
|
|
@ -140,6 +140,8 @@ function DatabaseList({
|
|||
useState<boolean>(false);
|
||||
const [excelUploadDataModalOpen, setExcelUploadDataModalOpen] =
|
||||
useState<boolean>(false);
|
||||
const [columnarUploadDataModalOpen, setColumnarUploadDataModalOpen] =
|
||||
useState<boolean>(false);
|
||||
|
||||
const [allowUploads, setAllowUploads] = useState<boolean>(false);
|
||||
const isAdmin = isUserAdmin(fullUser);
|
||||
|
|
@ -257,9 +259,12 @@ function DatabaseList({
|
|||
disable: isDisabled,
|
||||
},
|
||||
{
|
||||
label: t('Upload columnar file'),
|
||||
label: t('Upload Columnar'),
|
||||
name: 'Upload columnar file',
|
||||
url: '/columnartodatabaseview/form',
|
||||
url: '#',
|
||||
onClick: () => {
|
||||
setColumnarUploadDataModalOpen(true);
|
||||
},
|
||||
perm: canUploadColumnar && showUploads,
|
||||
disable: isDisabled,
|
||||
},
|
||||
|
|
@ -577,6 +582,7 @@ function DatabaseList({
|
|||
}}
|
||||
show={csvUploadDataModalOpen}
|
||||
allowedExtensions={CSV_EXTENSIONS}
|
||||
type="csv"
|
||||
/>
|
||||
<UploadDataModal
|
||||
addDangerToast={addDangerToast}
|
||||
|
|
@ -588,6 +594,16 @@ function DatabaseList({
|
|||
allowedExtensions={EXCEL_EXTENSIONS}
|
||||
type="excel"
|
||||
/>
|
||||
<UploadDataModal
|
||||
addDangerToast={addDangerToast}
|
||||
addSuccessToast={addSuccessToast}
|
||||
onHide={() => {
|
||||
setColumnarUploadDataModalOpen(false);
|
||||
}}
|
||||
show={columnarUploadDataModalOpen}
|
||||
allowedExtensions={COLUMNAR_EXTENSIONS}
|
||||
type="columnar"
|
||||
/>
|
||||
{databaseCurrentlyDeleting && (
|
||||
<DeleteModal
|
||||
description={
|
||||
|
|
|
|||
|
|
@ -491,7 +491,7 @@ export const uploadUserPerms = (
|
|||
checkUploadExtensions(csvExt, allowedExt);
|
||||
const canUploadColumnar =
|
||||
checkUploadExtensions(colExt, allowedExt) &&
|
||||
findPermission('can_this_form_get', 'ColumnarToDatabaseView', roles);
|
||||
findPermission('can_columnar_upload', 'Database', roles);
|
||||
const canUploadExcel =
|
||||
checkUploadExtensions(excelExt, allowedExt) &&
|
||||
findPermission('can_excel_upload', 'Database', roles);
|
||||
|
|
|
|||
|
|
@ -14,7 +14,6 @@
|
|||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
import logging
|
||||
from abc import abstractmethod
|
||||
from typing import Any, Optional, TypedDict
|
||||
|
|
@ -22,6 +21,7 @@ from typing import Any, Optional, TypedDict
|
|||
import pandas as pd
|
||||
from flask_babel import lazy_gettext as _
|
||||
from sqlalchemy.exc import SQLAlchemyError
|
||||
from werkzeug.datastructures import FileStorage
|
||||
|
||||
from superset import db
|
||||
from superset.commands.base import BaseCommand
|
||||
|
|
@ -46,8 +46,17 @@ READ_CHUNK_SIZE = 1000
|
|||
|
||||
class ReaderOptions(TypedDict, total=False):
|
||||
already_exists: str
|
||||
column_labels: str
|
||||
index_column: str
|
||||
index_label: str
|
||||
dataframe_index: bool
|
||||
|
||||
|
||||
class FileMetadataItem(TypedDict):
|
||||
sheet_name: Optional[str]
|
||||
column_names: list[str]
|
||||
|
||||
|
||||
class FileMetadata(TypedDict, total=False):
|
||||
items: list[FileMetadataItem]
|
||||
|
||||
|
||||
class BaseDataReader:
|
||||
|
|
@ -57,14 +66,21 @@ class BaseDataReader:
|
|||
to read data from multiple file types (e.g. CSV, Excel, etc.)
|
||||
"""
|
||||
|
||||
def __init__(self, options: dict[str, Any]) -> None:
|
||||
self._options = options
|
||||
def __init__(self, options: Optional[dict[str, Any]] = None) -> None:
|
||||
self._options = options or {}
|
||||
|
||||
@abstractmethod
|
||||
def file_to_dataframe(self, file: Any) -> pd.DataFrame: ...
|
||||
def file_to_dataframe(self, file: FileStorage) -> pd.DataFrame: ...
|
||||
|
||||
@abstractmethod
|
||||
def file_metadata(self, file: FileStorage) -> FileMetadata: ...
|
||||
|
||||
def read(
|
||||
self, file: Any, database: Database, table_name: str, schema_name: Optional[str]
|
||||
self,
|
||||
file: FileStorage,
|
||||
database: Database,
|
||||
table_name: str,
|
||||
schema_name: Optional[str],
|
||||
) -> None:
|
||||
self._dataframe_to_database(
|
||||
self.file_to_dataframe(file), database, table_name, schema_name
|
||||
|
|
@ -85,16 +101,20 @@ class BaseDataReader:
|
|||
"""
|
||||
try:
|
||||
data_table = Table(table=table_name, schema=schema_name)
|
||||
to_sql_kwargs = {
|
||||
"chunksize": READ_CHUNK_SIZE,
|
||||
"if_exists": self._options.get("already_exists", "fail"),
|
||||
"index": self._options.get("dataframe_index", False),
|
||||
}
|
||||
if self._options.get("index_label") and self._options.get(
|
||||
"dataframe_index"
|
||||
):
|
||||
to_sql_kwargs["index_label"] = self._options.get("index_label")
|
||||
database.db_engine_spec.df_to_sql(
|
||||
database,
|
||||
data_table,
|
||||
df,
|
||||
to_sql_kwargs={
|
||||
"chunksize": READ_CHUNK_SIZE,
|
||||
"if_exists": self._options.get("already_exists", "fail"),
|
||||
"index": self._options.get("index_column"),
|
||||
"index_label": self._options.get("column_labels"),
|
||||
},
|
||||
to_sql_kwargs=to_sql_kwargs,
|
||||
)
|
||||
except ValueError as ex:
|
||||
raise DatabaseUploadFailed(
|
||||
|
|
|
|||
|
|
@ -0,0 +1,134 @@
|
|||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
import logging
|
||||
from collections.abc import Generator
|
||||
from io import BytesIO
|
||||
from pathlib import Path
|
||||
from typing import Any, IO, Optional
|
||||
from zipfile import BadZipfile, is_zipfile, ZipFile
|
||||
|
||||
import pandas as pd
|
||||
import pyarrow.parquet as pq
|
||||
from flask_babel import lazy_gettext as _
|
||||
from pyarrow.lib import ArrowException
|
||||
from werkzeug.datastructures import FileStorage
|
||||
|
||||
from superset.commands.database.exceptions import DatabaseUploadFailed
|
||||
from superset.commands.database.uploaders.base import (
|
||||
BaseDataReader,
|
||||
FileMetadata,
|
||||
ReaderOptions,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ColumnarReaderOptions(ReaderOptions, total=False):
|
||||
columns_read: list[str]
|
||||
|
||||
|
||||
class ColumnarReader(BaseDataReader):
|
||||
def __init__(
|
||||
self,
|
||||
options: Optional[ColumnarReaderOptions] = None,
|
||||
) -> None:
|
||||
options = options or {}
|
||||
super().__init__(
|
||||
options=dict(options),
|
||||
)
|
||||
|
||||
def _read_buffer_to_dataframe(self, buffer: IO[bytes]) -> pd.DataFrame:
|
||||
kwargs: dict[str, Any] = {
|
||||
"path": buffer,
|
||||
}
|
||||
if self._options.get("columns_read"):
|
||||
kwargs["columns"] = self._options.get("columns_read")
|
||||
try:
|
||||
return pd.read_parquet(**kwargs)
|
||||
except (
|
||||
pd.errors.ParserError,
|
||||
pd.errors.EmptyDataError,
|
||||
UnicodeDecodeError,
|
||||
ValueError,
|
||||
) as ex:
|
||||
raise DatabaseUploadFailed(
|
||||
message=_("Parsing error: %(error)s", error=str(ex))
|
||||
) from ex
|
||||
except Exception as ex:
|
||||
raise DatabaseUploadFailed(_("Error reading Columnar file")) from ex
|
||||
|
||||
@staticmethod
|
||||
def _yield_files(file: FileStorage) -> Generator[IO[bytes], None, None]:
|
||||
"""
|
||||
Yields files from the provided file. If the file is a zip file, it yields each
|
||||
file within the zip file. If it's a single file, it yields the file itself.
|
||||
|
||||
:param file: The file to yield files from.
|
||||
:return: A generator that yields files.
|
||||
"""
|
||||
file_suffix = Path(file.filename).suffix
|
||||
if not file_suffix:
|
||||
raise DatabaseUploadFailed(_("Unexpected no file extension found"))
|
||||
file_suffix = file_suffix[1:] # remove the dot
|
||||
if file_suffix == "zip":
|
||||
if not is_zipfile(file):
|
||||
raise DatabaseUploadFailed(_("Not a valid ZIP file"))
|
||||
try:
|
||||
with ZipFile(file) as zip_file:
|
||||
# check if all file types are of the same extension
|
||||
file_suffixes = {Path(name).suffix for name in zip_file.namelist()}
|
||||
if len(file_suffixes) > 1:
|
||||
raise DatabaseUploadFailed(
|
||||
_("ZIP file contains multiple file types")
|
||||
)
|
||||
for filename in zip_file.namelist():
|
||||
with zip_file.open(filename) as file_in_zip:
|
||||
yield BytesIO(file_in_zip.read())
|
||||
except BadZipfile as ex:
|
||||
raise DatabaseUploadFailed(_("Not a valid ZIP file")) from ex
|
||||
else:
|
||||
yield file
|
||||
|
||||
def file_to_dataframe(self, file: FileStorage) -> pd.DataFrame:
|
||||
"""
|
||||
Read Columnar file into a DataFrame
|
||||
|
||||
:return: pandas DataFrame
|
||||
:throws DatabaseUploadFailed: if there is an error reading the file
|
||||
"""
|
||||
return pd.concat(
|
||||
self._read_buffer_to_dataframe(buffer) for buffer in self._yield_files(file)
|
||||
)
|
||||
|
||||
def file_metadata(self, file: FileStorage) -> FileMetadata:
|
||||
column_names = set()
|
||||
try:
|
||||
for file_item in self._yield_files(file):
|
||||
parquet_file = pq.ParquetFile(file_item)
|
||||
column_names.update(parquet_file.metadata.schema.names) # pylint: disable=no-member
|
||||
except ArrowException as ex:
|
||||
raise DatabaseUploadFailed(
|
||||
message=_("Parsing error: %(error)s", error=str(ex))
|
||||
) from ex
|
||||
return {
|
||||
"items": [
|
||||
{
|
||||
"column_names": list(column_names),
|
||||
"sheet_name": None,
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -15,17 +15,23 @@
|
|||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
import logging
|
||||
from typing import Any
|
||||
from typing import Any, Optional
|
||||
|
||||
import pandas as pd
|
||||
from flask_babel import lazy_gettext as _
|
||||
from werkzeug.datastructures import FileStorage
|
||||
|
||||
from superset.commands.database.exceptions import DatabaseUploadFailed
|
||||
from superset.commands.database.uploaders.base import BaseDataReader, ReaderOptions
|
||||
from superset.commands.database.uploaders.base import (
|
||||
BaseDataReader,
|
||||
FileMetadata,
|
||||
ReaderOptions,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
READ_CSV_CHUNK_SIZE = 1000
|
||||
ROWS_TO_READ_METADATA = 2
|
||||
|
||||
|
||||
class CSVReaderOptions(ReaderOptions, total=False):
|
||||
|
|
@ -33,7 +39,7 @@ class CSVReaderOptions(ReaderOptions, total=False):
|
|||
column_data_types: dict[str, str]
|
||||
column_dates: list[str]
|
||||
columns_read: list[str]
|
||||
dataframe_index: str
|
||||
index_column: str
|
||||
day_first: bool
|
||||
decimal_character: str
|
||||
header_row: int
|
||||
|
|
@ -47,47 +53,26 @@ class CSVReaderOptions(ReaderOptions, total=False):
|
|||
class CSVReader(BaseDataReader):
|
||||
def __init__(
|
||||
self,
|
||||
options: CSVReaderOptions,
|
||||
options: Optional[CSVReaderOptions] = None,
|
||||
) -> None:
|
||||
options = options or {}
|
||||
super().__init__(
|
||||
options=dict(options),
|
||||
)
|
||||
|
||||
def file_to_dataframe(self, file: Any) -> pd.DataFrame:
|
||||
"""
|
||||
Read CSV file into a DataFrame
|
||||
|
||||
:return: pandas DataFrame
|
||||
:throws DatabaseUploadFailed: if there is an error reading the CSV file
|
||||
"""
|
||||
@staticmethod
|
||||
def _read_csv(file: FileStorage, kwargs: dict[str, Any]) -> pd.DataFrame:
|
||||
try:
|
||||
return pd.concat(
|
||||
pd.read_csv(
|
||||
chunksize=READ_CSV_CHUNK_SIZE,
|
||||
encoding="utf-8",
|
||||
filepath_or_buffer=file,
|
||||
header=self._options.get("header_row", 0),
|
||||
decimal=self._options.get("decimal_character", "."),
|
||||
index_col=self._options.get("index_column"),
|
||||
dayfirst=self._options.get("day_first", False),
|
||||
iterator=True,
|
||||
keep_default_na=not self._options.get("null_values"),
|
||||
usecols=self._options.get("columns_read")
|
||||
if self._options.get("columns_read") # None if an empty list
|
||||
else None,
|
||||
na_values=self._options.get("null_values")
|
||||
if self._options.get("null_values") # None if an empty list
|
||||
else None,
|
||||
nrows=self._options.get("rows_to_read"),
|
||||
parse_dates=self._options.get("column_dates"),
|
||||
sep=self._options.get("delimiter", ","),
|
||||
skip_blank_lines=self._options.get("skip_blank_lines", False),
|
||||
skipinitialspace=self._options.get("skip_initial_space", False),
|
||||
skiprows=self._options.get("skip_rows", 0),
|
||||
dtype=self._options.get("column_data_types")
|
||||
if self._options.get("column_data_types")
|
||||
else None,
|
||||
if "chunksize" in kwargs:
|
||||
return pd.concat(
|
||||
pd.read_csv(
|
||||
filepath_or_buffer=file.stream,
|
||||
**kwargs,
|
||||
)
|
||||
)
|
||||
return pd.read_csv(
|
||||
filepath_or_buffer=file.stream,
|
||||
**kwargs,
|
||||
)
|
||||
except (
|
||||
pd.errors.ParserError,
|
||||
|
|
@ -100,3 +85,59 @@ class CSVReader(BaseDataReader):
|
|||
) from ex
|
||||
except Exception as ex:
|
||||
raise DatabaseUploadFailed(_("Error reading CSV file")) from ex
|
||||
|
||||
def file_to_dataframe(self, file: FileStorage) -> pd.DataFrame:
|
||||
"""
|
||||
Read CSV file into a DataFrame
|
||||
|
||||
:return: pandas DataFrame
|
||||
:throws DatabaseUploadFailed: if there is an error reading the file
|
||||
"""
|
||||
kwargs = {
|
||||
"chunksize": READ_CSV_CHUNK_SIZE,
|
||||
"encoding": "utf-8",
|
||||
"header": self._options.get("header_row", 0),
|
||||
"decimal": self._options.get("decimal_character", "."),
|
||||
"index_col": self._options.get("index_column"),
|
||||
"dayfirst": self._options.get("day_first", False),
|
||||
"iterator": True,
|
||||
"keep_default_na": not self._options.get("null_values"),
|
||||
"usecols": self._options.get("columns_read")
|
||||
if self._options.get("columns_read") # None if an empty list
|
||||
else None,
|
||||
"na_values": self._options.get("null_values")
|
||||
if self._options.get("null_values") # None if an empty list
|
||||
else None,
|
||||
"nrows": self._options.get("rows_to_read"),
|
||||
"parse_dates": self._options.get("column_dates"),
|
||||
"sep": self._options.get("delimiter", ","),
|
||||
"skip_blank_lines": self._options.get("skip_blank_lines", False),
|
||||
"skipinitialspace": self._options.get("skip_initial_space", False),
|
||||
"skiprows": self._options.get("skip_rows", 0),
|
||||
"dtype": self._options.get("column_data_types")
|
||||
if self._options.get("column_data_types")
|
||||
else None,
|
||||
}
|
||||
return self._read_csv(file, kwargs)
|
||||
|
||||
def file_metadata(self, file: FileStorage) -> FileMetadata:
|
||||
"""
|
||||
Get metadata from a CSV file
|
||||
|
||||
:return: FileMetadata
|
||||
:throws DatabaseUploadFailed: if there is an error reading the file
|
||||
"""
|
||||
kwargs = {
|
||||
"nrows": ROWS_TO_READ_METADATA,
|
||||
"header": self._options.get("header_row", 0),
|
||||
"sep": self._options.get("delimiter", ","),
|
||||
}
|
||||
df = self._read_csv(file, kwargs)
|
||||
return {
|
||||
"items": [
|
||||
{
|
||||
"column_names": df.columns.tolist(),
|
||||
"sheet_name": None,
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
|
|||
|
|
@ -15,22 +15,29 @@
|
|||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
import logging
|
||||
from typing import Any
|
||||
from typing import Optional
|
||||
|
||||
import pandas as pd
|
||||
from flask_babel import lazy_gettext as _
|
||||
from werkzeug.datastructures import FileStorage
|
||||
|
||||
from superset.commands.database.exceptions import DatabaseUploadFailed
|
||||
from superset.commands.database.uploaders.base import BaseDataReader, ReaderOptions
|
||||
from superset.commands.database.uploaders.base import (
|
||||
BaseDataReader,
|
||||
FileMetadata,
|
||||
ReaderOptions,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
ROWS_TO_READ_METADATA = 2
|
||||
|
||||
|
||||
class ExcelReaderOptions(ReaderOptions, total=False):
|
||||
sheet_name: str
|
||||
column_dates: list[str]
|
||||
columns_read: list[str]
|
||||
dataframe_index: str
|
||||
index_column: str
|
||||
decimal_character: str
|
||||
header_row: int
|
||||
null_values: list[str]
|
||||
|
|
@ -41,18 +48,19 @@ class ExcelReaderOptions(ReaderOptions, total=False):
|
|||
class ExcelReader(BaseDataReader):
|
||||
def __init__(
|
||||
self,
|
||||
options: ExcelReaderOptions,
|
||||
options: Optional[ExcelReaderOptions] = None,
|
||||
) -> None:
|
||||
options = options or {}
|
||||
super().__init__(
|
||||
options=dict(options),
|
||||
)
|
||||
|
||||
def file_to_dataframe(self, file: Any) -> pd.DataFrame:
|
||||
def file_to_dataframe(self, file: FileStorage) -> pd.DataFrame:
|
||||
"""
|
||||
Read Excel file into a DataFrame
|
||||
|
||||
:return: pandas DataFrame
|
||||
:throws DatabaseUploadFailed: if there is an error reading the CSV file
|
||||
:throws DatabaseUploadFailed: if there is an error reading the file
|
||||
"""
|
||||
|
||||
kwargs = {
|
||||
|
|
@ -84,3 +92,25 @@ class ExcelReader(BaseDataReader):
|
|||
) from ex
|
||||
except Exception as ex:
|
||||
raise DatabaseUploadFailed(_("Error reading Excel file")) from ex
|
||||
|
||||
def file_metadata(self, file: FileStorage) -> FileMetadata:
|
||||
try:
|
||||
excel_file = pd.ExcelFile(file)
|
||||
except (ValueError, AssertionError) as ex:
|
||||
raise DatabaseUploadFailed(
|
||||
message=_("Excel file format cannot be determined")
|
||||
) from ex
|
||||
|
||||
sheet_names = excel_file.sheet_names
|
||||
|
||||
result: FileMetadata = {"items": []}
|
||||
for sheet in sheet_names:
|
||||
df = excel_file.parse(sheet, nrows=ROWS_TO_READ_METADATA)
|
||||
column_names = df.columns.tolist()
|
||||
result["items"].append(
|
||||
{
|
||||
"sheet_name": sheet,
|
||||
"column_names": column_names,
|
||||
}
|
||||
)
|
||||
return result
|
||||
|
|
|
|||
|
|
@ -167,6 +167,9 @@ MODEL_API_RW_METHOD_PERMISSION_MAP = {
|
|||
"delete_object": "write",
|
||||
"copy_dash": "write",
|
||||
"get_connection": "write",
|
||||
"excel_metadata": "excel_upload",
|
||||
"columnar_metadata": "columnar_upload",
|
||||
"csv_metadata": "csv_upload",
|
||||
}
|
||||
|
||||
EXTRA_FORM_DATA_APPEND_KEYS = {
|
||||
|
|
|
|||
|
|
@ -58,6 +58,7 @@ from superset.commands.database.tables import TablesDatabaseCommand
|
|||
from superset.commands.database.test_connection import TestConnectionDatabaseCommand
|
||||
from superset.commands.database.update import UpdateDatabaseCommand
|
||||
from superset.commands.database.uploaders.base import UploadCommand
|
||||
from superset.commands.database.uploaders.columnar_reader import ColumnarReader
|
||||
from superset.commands.database.uploaders.csv_reader import CSVReader
|
||||
from superset.commands.database.uploaders.excel_reader import ExcelReader
|
||||
from superset.commands.database.validate import ValidateDatabaseParametersCommand
|
||||
|
|
@ -72,6 +73,9 @@ from superset.daos.database import DatabaseDAO, DatabaseUserOAuth2TokensDAO
|
|||
from superset.databases.decorators import check_table_access
|
||||
from superset.databases.filters import DatabaseFilter, DatabaseUploadEnabledFilter
|
||||
from superset.databases.schemas import (
|
||||
ColumnarMetadataUploadFilePostSchema,
|
||||
ColumnarUploadPostSchema,
|
||||
CSVMetadataUploadFilePostSchema,
|
||||
CSVUploadPostSchema,
|
||||
database_schemas_query_schema,
|
||||
database_tables_query_schema,
|
||||
|
|
@ -84,6 +88,7 @@ from superset.databases.schemas import (
|
|||
DatabaseTablesResponse,
|
||||
DatabaseTestConnectionSchema,
|
||||
DatabaseValidateParametersSchema,
|
||||
ExcelMetadataUploadFilePostSchema,
|
||||
ExcelUploadPostSchema,
|
||||
get_export_ids_schema,
|
||||
OAuth2ProviderResponseSchema,
|
||||
|
|
@ -93,6 +98,7 @@ from superset.databases.schemas import (
|
|||
SelectStarResponseSchema,
|
||||
TableExtraMetadataResponseSchema,
|
||||
TableMetadataResponseSchema,
|
||||
UploadFileMetadata,
|
||||
ValidateSQLRequest,
|
||||
ValidateSQLResponse,
|
||||
)
|
||||
|
|
@ -151,7 +157,11 @@ class DatabaseRestApi(BaseSupersetModelRestApi):
|
|||
"schemas_access_for_file_upload",
|
||||
"get_connection",
|
||||
"csv_upload",
|
||||
"csv_metadata",
|
||||
"excel_upload",
|
||||
"excel_metadata",
|
||||
"columnar_upload",
|
||||
"columnar_metadata",
|
||||
"oauth2",
|
||||
}
|
||||
|
||||
|
|
@ -263,6 +273,7 @@ class DatabaseRestApi(BaseSupersetModelRestApi):
|
|||
|
||||
openapi_spec_tag = "Database"
|
||||
openapi_spec_component_schemas = (
|
||||
ColumnarUploadPostSchema,
|
||||
CSVUploadPostSchema,
|
||||
DatabaseConnectionSchema,
|
||||
DatabaseFunctionNamesResponse,
|
||||
|
|
@ -276,6 +287,10 @@ class DatabaseRestApi(BaseSupersetModelRestApi):
|
|||
TableMetadataResponseSchema,
|
||||
SelectStarResponseSchema,
|
||||
SchemasResponseSchema,
|
||||
CSVMetadataUploadFilePostSchema,
|
||||
ExcelMetadataUploadFilePostSchema,
|
||||
ColumnarMetadataUploadFilePostSchema,
|
||||
UploadFileMetadata,
|
||||
ValidateSQLRequest,
|
||||
ValidateSQLResponse,
|
||||
)
|
||||
|
|
@ -1524,11 +1539,60 @@ class DatabaseRestApi(BaseSupersetModelRestApi):
|
|||
command.run()
|
||||
return self.response(200, message="OK")
|
||||
|
||||
@expose("/csv_metadata/", methods=("POST",))
|
||||
@protect()
|
||||
@statsd_metrics
|
||||
@event_logger.log_this_with_context(
|
||||
action=(
|
||||
lambda self, *args, **kwargs: f"{self.__class__.__name__}" ".csv_metadata"
|
||||
),
|
||||
log_to_statsd=False,
|
||||
)
|
||||
@requires_form_data
|
||||
def csv_metadata(self) -> Response:
|
||||
"""Upload an CSV file and returns file metadata.
|
||||
---
|
||||
post:
|
||||
summary: Upload an CSV file and returns file metadata
|
||||
requestBody:
|
||||
required: true
|
||||
content:
|
||||
multipart/form-data:
|
||||
schema:
|
||||
$ref: '#/components/schemas/CSVMetadataUploadFilePostSchema'
|
||||
responses:
|
||||
200:
|
||||
description: Columnar upload response
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
type: object
|
||||
properties:
|
||||
result:
|
||||
$ref: '#/components/schemas/UploadFileMetadata'
|
||||
400:
|
||||
$ref: '#/components/responses/400'
|
||||
401:
|
||||
$ref: '#/components/responses/401'
|
||||
404:
|
||||
$ref: '#/components/responses/404'
|
||||
500:
|
||||
$ref: '#/components/responses/500'
|
||||
"""
|
||||
try:
|
||||
request_form = request.form.to_dict()
|
||||
request_form["file"] = request.files.get("file")
|
||||
parameters = CSVMetadataUploadFilePostSchema().load(request_form)
|
||||
except ValidationError as error:
|
||||
return self.response_400(message=error.messages)
|
||||
metadata = CSVReader(parameters).file_metadata(parameters["file"])
|
||||
return self.response(200, result=UploadFileMetadata().dump(metadata))
|
||||
|
||||
@expose("/<int:pk>/csv_upload/", methods=("POST",))
|
||||
@protect()
|
||||
@statsd_metrics
|
||||
@event_logger.log_this_with_context(
|
||||
action=lambda self, *args, **kwargs: f"{self.__class__.__name__}.import_",
|
||||
action=lambda self, *args, **kwargs: f"{self.__class__.__name__}.csv_upload",
|
||||
log_to_statsd=False,
|
||||
)
|
||||
@requires_form_data
|
||||
|
|
@ -1549,7 +1613,7 @@ class DatabaseRestApi(BaseSupersetModelRestApi):
|
|||
schema:
|
||||
$ref: '#/components/schemas/CSVUploadPostSchema'
|
||||
responses:
|
||||
200:
|
||||
201:
|
||||
description: CSV upload response
|
||||
content:
|
||||
application/json:
|
||||
|
|
@ -1582,13 +1646,62 @@ class DatabaseRestApi(BaseSupersetModelRestApi):
|
|||
).run()
|
||||
except ValidationError as error:
|
||||
return self.response_400(message=error.messages)
|
||||
return self.response(200, message="OK")
|
||||
return self.response(201, message="OK")
|
||||
|
||||
@expose("/excel_metadata/", methods=("POST",))
|
||||
@protect()
|
||||
@statsd_metrics
|
||||
@event_logger.log_this_with_context(
|
||||
action=(
|
||||
lambda self, *args, **kwargs: f"{self.__class__.__name__}" ".excel_metadata"
|
||||
),
|
||||
log_to_statsd=False,
|
||||
)
|
||||
@requires_form_data
|
||||
def excel_metadata(self) -> Response:
|
||||
"""Upload an Excel file and returns file metadata.
|
||||
---
|
||||
post:
|
||||
summary: Upload an Excel file and returns file metadata
|
||||
requestBody:
|
||||
required: true
|
||||
content:
|
||||
multipart/form-data:
|
||||
schema:
|
||||
$ref: '#/components/schemas/ExcelMetadataUploadFilePostSchema'
|
||||
responses:
|
||||
200:
|
||||
description: Columnar upload response
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
type: object
|
||||
properties:
|
||||
result:
|
||||
$ref: '#/components/schemas/UploadFileMetadata'
|
||||
400:
|
||||
$ref: '#/components/responses/400'
|
||||
401:
|
||||
$ref: '#/components/responses/401'
|
||||
404:
|
||||
$ref: '#/components/responses/404'
|
||||
500:
|
||||
$ref: '#/components/responses/500'
|
||||
"""
|
||||
try:
|
||||
request_form = request.form.to_dict()
|
||||
request_form["file"] = request.files.get("file")
|
||||
parameters = ExcelMetadataUploadFilePostSchema().load(request_form)
|
||||
except ValidationError as error:
|
||||
return self.response_400(message=error.messages)
|
||||
metadata = ExcelReader().file_metadata(parameters["file"])
|
||||
return self.response(200, result=UploadFileMetadata().dump(metadata))
|
||||
|
||||
@expose("/<int:pk>/excel_upload/", methods=("POST",))
|
||||
@protect()
|
||||
@statsd_metrics
|
||||
@event_logger.log_this_with_context(
|
||||
action=lambda self, *args, **kwargs: f"{self.__class__.__name__}.import_",
|
||||
action=lambda self, *args, **kwargs: f"{self.__class__.__name__}.excel_upload",
|
||||
log_to_statsd=False,
|
||||
)
|
||||
@requires_form_data
|
||||
|
|
@ -1609,7 +1722,7 @@ class DatabaseRestApi(BaseSupersetModelRestApi):
|
|||
schema:
|
||||
$ref: '#/components/schemas/ExcelUploadPostSchema'
|
||||
responses:
|
||||
200:
|
||||
201:
|
||||
description: Excel upload response
|
||||
content:
|
||||
application/json:
|
||||
|
|
@ -1642,7 +1755,117 @@ class DatabaseRestApi(BaseSupersetModelRestApi):
|
|||
).run()
|
||||
except ValidationError as error:
|
||||
return self.response_400(message=error.messages)
|
||||
return self.response(200, message="OK")
|
||||
return self.response(201, message="OK")
|
||||
|
||||
@expose("/columnar_metadata/", methods=("POST",))
|
||||
@protect()
|
||||
@statsd_metrics
|
||||
@event_logger.log_this_with_context(
|
||||
action=lambda self, *args, **kwargs: f"{self.__class__.__name__}"
|
||||
".columnar_metadata",
|
||||
log_to_statsd=False,
|
||||
)
|
||||
@requires_form_data
|
||||
def columnar_metadata(self) -> Response:
|
||||
"""Upload a Columnar file and returns file metadata.
|
||||
---
|
||||
post:
|
||||
summary: Upload a Columnar file and returns file metadata
|
||||
requestBody:
|
||||
required: true
|
||||
content:
|
||||
multipart/form-data:
|
||||
schema:
|
||||
$ref: '#/components/schemas/ColumnarMetadataUploadFilePostSchema'
|
||||
responses:
|
||||
200:
|
||||
description: Columnar upload response
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
type: object
|
||||
properties:
|
||||
result:
|
||||
$ref: '#/components/schemas/UploadFileMetadata'
|
||||
400:
|
||||
$ref: '#/components/responses/400'
|
||||
401:
|
||||
$ref: '#/components/responses/401'
|
||||
404:
|
||||
$ref: '#/components/responses/404'
|
||||
500:
|
||||
$ref: '#/components/responses/500'
|
||||
"""
|
||||
try:
|
||||
request_form = request.form.to_dict()
|
||||
request_form["file"] = request.files.get("file")
|
||||
parameters = ColumnarMetadataUploadFilePostSchema().load(request_form)
|
||||
except ValidationError as error:
|
||||
return self.response_400(message=error.messages)
|
||||
metadata = ColumnarReader().file_metadata(parameters["file"])
|
||||
return self.response(200, result=UploadFileMetadata().dump(metadata))
|
||||
|
||||
@expose("/<int:pk>/columnar_upload/", methods=("POST",))
|
||||
@protect()
|
||||
@statsd_metrics
|
||||
@event_logger.log_this_with_context(
|
||||
action=lambda self,
|
||||
*args,
|
||||
**kwargs: f"{self.__class__.__name__}.columnar_upload",
|
||||
log_to_statsd=False,
|
||||
)
|
||||
@requires_form_data
|
||||
def columnar_upload(self, pk: int) -> Response:
|
||||
"""Upload a Columnar file into a database.
|
||||
---
|
||||
post:
|
||||
summary: Upload a Columnar file to a database table
|
||||
parameters:
|
||||
- in: path
|
||||
schema:
|
||||
type: integer
|
||||
name: pk
|
||||
requestBody:
|
||||
required: true
|
||||
content:
|
||||
multipart/form-data:
|
||||
schema:
|
||||
$ref: '#/components/schemas/ColumnarUploadPostSchema'
|
||||
responses:
|
||||
201:
|
||||
description: Columnar upload response
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
type: object
|
||||
properties:
|
||||
message:
|
||||
type: string
|
||||
400:
|
||||
$ref: '#/components/responses/400'
|
||||
401:
|
||||
$ref: '#/components/responses/401'
|
||||
404:
|
||||
$ref: '#/components/responses/404'
|
||||
422:
|
||||
$ref: '#/components/responses/422'
|
||||
500:
|
||||
$ref: '#/components/responses/500'
|
||||
"""
|
||||
try:
|
||||
request_form = request.form.to_dict()
|
||||
request_form["file"] = request.files.get("file")
|
||||
parameters = ColumnarUploadPostSchema().load(request_form)
|
||||
UploadCommand(
|
||||
pk,
|
||||
parameters["table_name"],
|
||||
parameters["file"],
|
||||
parameters.get("schema"),
|
||||
ColumnarReader(parameters),
|
||||
).run()
|
||||
except ValidationError as error:
|
||||
return self.response_400(message=error.messages)
|
||||
return self.response(201, message="OK")
|
||||
|
||||
@expose("/<int:pk>/function_names/", methods=("GET",))
|
||||
@protect()
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ from __future__ import annotations
|
|||
import inspect
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Any, TypedDict
|
||||
|
||||
from flask import current_app
|
||||
|
|
@ -1054,7 +1054,20 @@ class DelimitedListField(fields.List):
|
|||
) from exc
|
||||
|
||||
|
||||
class BaseUploadPostSchema(Schema):
|
||||
class BaseUploadFilePostSchema(Schema):
|
||||
_extension_config_key = ""
|
||||
|
||||
@validates("file")
|
||||
def validate_file_extension(self, file: FileStorage) -> None:
|
||||
allowed_extensions = current_app.config["ALLOWED_EXTENSIONS"].intersection(
|
||||
current_app.config[self._extension_config_key]
|
||||
)
|
||||
file_suffix = Path(file.filename).suffix
|
||||
if not file_suffix or file_suffix[1:] not in allowed_extensions:
|
||||
raise ValidationError([_("File extension is not allowed.")])
|
||||
|
||||
|
||||
class BaseUploadPostSchema(BaseUploadFilePostSchema):
|
||||
already_exists = fields.String(
|
||||
load_default="fail",
|
||||
validate=OneOf(choices=("fail", "replace", "append")),
|
||||
|
|
@ -1063,6 +1076,79 @@ class BaseUploadPostSchema(Schema):
|
|||
"exists accepts: fail, replace, append"
|
||||
},
|
||||
)
|
||||
index_label = fields.String(
|
||||
metadata={"description": "Index label for index column."}
|
||||
)
|
||||
columns_read = DelimitedListField(
|
||||
fields.String(),
|
||||
metadata={"description": "A List of the column names that should be read"},
|
||||
)
|
||||
dataframe_index = fields.Boolean(
|
||||
metadata={"description": "Write dataframe index as a column."}
|
||||
)
|
||||
schema = fields.String(
|
||||
metadata={"description": "The schema to upload the data file to."}
|
||||
)
|
||||
table_name = fields.String(
|
||||
required=True,
|
||||
validate=[Length(min=1, max=10000)],
|
||||
allow_none=False,
|
||||
metadata={"description": "The name of the table to be created/appended"},
|
||||
)
|
||||
|
||||
|
||||
class ColumnarUploadPostSchema(BaseUploadPostSchema):
|
||||
"""
|
||||
Schema for Columnar Upload
|
||||
"""
|
||||
|
||||
_extension_config_key = "COLUMNAR_EXTENSIONS"
|
||||
|
||||
file = fields.Raw(
|
||||
required=True,
|
||||
metadata={
|
||||
"description": "The Columnar file to upload",
|
||||
"type": "string",
|
||||
"format": "binary",
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
class CSVUploadPostSchema(BaseUploadPostSchema):
|
||||
"""
|
||||
Schema for CSV Upload
|
||||
"""
|
||||
|
||||
_extension_config_key = "CSV_EXTENSIONS"
|
||||
|
||||
file = fields.Raw(
|
||||
required=True,
|
||||
metadata={
|
||||
"description": "The CSV file to upload",
|
||||
"type": "string",
|
||||
"format": "text/csv",
|
||||
},
|
||||
)
|
||||
delimiter = fields.String(metadata={"description": "The delimiter of the CSV file"})
|
||||
column_data_types = fields.String(
|
||||
metadata={
|
||||
"description": "A dictionary with column names and "
|
||||
"their data types if you need to change "
|
||||
"the defaults. Example: {'user_id':'int'}. "
|
||||
"Check Python Pandas library for supported data types"
|
||||
}
|
||||
)
|
||||
day_first = fields.Boolean(
|
||||
metadata={
|
||||
"description": "DD/MM format dates, international and European format"
|
||||
}
|
||||
)
|
||||
skip_blank_lines = fields.Boolean(
|
||||
metadata={"description": "Skip blank lines in the CSV file."}
|
||||
)
|
||||
skip_initial_space = fields.Boolean(
|
||||
metadata={"description": "Skip spaces after delimiter."}
|
||||
)
|
||||
column_dates = DelimitedListField(
|
||||
fields.String(),
|
||||
metadata={
|
||||
|
|
@ -1070,23 +1156,6 @@ class BaseUploadPostSchema(Schema):
|
|||
"parsed as dates. Example: date,timestamp"
|
||||
},
|
||||
)
|
||||
column_labels = fields.String(
|
||||
metadata={
|
||||
"description": "Column label for index column(s). "
|
||||
"If None is given and Dataframe"
|
||||
"Index is checked, Index Names are used"
|
||||
}
|
||||
)
|
||||
columns_read = DelimitedListField(
|
||||
fields.String(),
|
||||
metadata={"description": "A List of the column names that should be read"},
|
||||
)
|
||||
dataframe_index = fields.String(
|
||||
metadata={
|
||||
"description": "Column to use as the row labels of the dataframe. "
|
||||
"Leave empty if no index column"
|
||||
}
|
||||
)
|
||||
decimal_character = fields.String(
|
||||
metadata={
|
||||
"description": "Character to recognize as decimal point. Default is '.'"
|
||||
|
|
@ -1120,54 +1189,10 @@ class BaseUploadPostSchema(Schema):
|
|||
allow_none=True,
|
||||
validate=Range(min=1),
|
||||
)
|
||||
schema = fields.String(
|
||||
metadata={"description": "The schema to upload the data file to."}
|
||||
)
|
||||
table_name = fields.String(
|
||||
required=True,
|
||||
validate=[Length(min=1, max=10000)],
|
||||
allow_none=False,
|
||||
metadata={"description": "The name of the table to be created/appended"},
|
||||
)
|
||||
skip_rows = fields.Integer(
|
||||
metadata={"description": "Number of rows to skip at start of file."}
|
||||
)
|
||||
|
||||
|
||||
class CSVUploadPostSchema(BaseUploadPostSchema):
|
||||
"""
|
||||
Schema for CSV Upload
|
||||
"""
|
||||
|
||||
file = fields.Raw(
|
||||
required=True,
|
||||
metadata={
|
||||
"description": "The CSV file to upload",
|
||||
"type": "string",
|
||||
"format": "text/csv",
|
||||
},
|
||||
)
|
||||
delimiter = fields.String(metadata={"description": "The delimiter of the CSV file"})
|
||||
column_data_types = fields.String(
|
||||
metadata={
|
||||
"description": "A dictionary with column names and "
|
||||
"their data types if you need to change "
|
||||
"the defaults. Example: {'user_id':'int'}. "
|
||||
"Check Python Pandas library for supported data types"
|
||||
}
|
||||
)
|
||||
day_first = fields.Boolean(
|
||||
metadata={
|
||||
"description": "DD/MM format dates, international and European format"
|
||||
}
|
||||
)
|
||||
skip_blank_lines = fields.Boolean(
|
||||
metadata={"description": "Skip blank lines in the CSV file."}
|
||||
)
|
||||
skip_initial_space = fields.Boolean(
|
||||
metadata={"description": "Skip spaces after delimiter."}
|
||||
)
|
||||
|
||||
@post_load
|
||||
def convert_column_data_types(
|
||||
self, data: dict[str, Any], **kwargs: Any
|
||||
|
|
@ -1191,24 +1216,14 @@ class CSVUploadPostSchema(BaseUploadPostSchema):
|
|||
):
|
||||
raise ValidationError([_("File size exceeds the maximum allowed size.")])
|
||||
|
||||
@validates("file")
|
||||
def validate_file_extension(self, file: FileStorage) -> None:
|
||||
allowed_extensions = current_app.config["ALLOWED_EXTENSIONS"].intersection(
|
||||
current_app.config["CSV_EXTENSIONS"]
|
||||
)
|
||||
matches = re.match(r".+\.([^.]+)$", file.filename)
|
||||
if not matches:
|
||||
raise ValidationError([_("File extension is not allowed.")])
|
||||
extension = matches.group(1)
|
||||
if extension not in allowed_extensions:
|
||||
raise ValidationError([_("File extension is not allowed.")])
|
||||
|
||||
|
||||
class ExcelUploadPostSchema(BaseUploadPostSchema):
|
||||
"""
|
||||
Schema for Excel Upload
|
||||
"""
|
||||
|
||||
_extension_config_key = "EXCEL_EXTENSIONS"
|
||||
|
||||
file = fields.Raw(
|
||||
required=True,
|
||||
metadata={
|
||||
|
|
@ -1223,18 +1238,129 @@ class ExcelUploadPostSchema(BaseUploadPostSchema):
|
|||
"(default is the first sheet)."
|
||||
}
|
||||
)
|
||||
column_dates = DelimitedListField(
|
||||
fields.String(),
|
||||
metadata={
|
||||
"description": "A list of column names that should be "
|
||||
"parsed as dates. Example: date,timestamp"
|
||||
},
|
||||
)
|
||||
decimal_character = fields.String(
|
||||
metadata={
|
||||
"description": "Character to recognize as decimal point. Default is '.'"
|
||||
}
|
||||
)
|
||||
header_row = fields.Integer(
|
||||
metadata={
|
||||
"description": "Row containing the headers to use as column names"
|
||||
"(0 is first line of data). Leave empty if there is no header row."
|
||||
}
|
||||
)
|
||||
index_column = fields.String(
|
||||
metadata={
|
||||
"description": "Column to use as the row labels of the dataframe. "
|
||||
"Leave empty if no index column"
|
||||
}
|
||||
)
|
||||
null_values = DelimitedListField(
|
||||
fields.String(),
|
||||
metadata={
|
||||
"description": "A list of strings that should be treated as null. "
|
||||
"Examples: '' for empty strings, 'None', 'N/A',"
|
||||
"Warning: Hive database supports only a single value"
|
||||
},
|
||||
)
|
||||
rows_to_read = fields.Integer(
|
||||
metadata={
|
||||
"description": "Number of rows to read from the file. "
|
||||
"If None, reads all rows."
|
||||
},
|
||||
allow_none=True,
|
||||
validate=Range(min=1),
|
||||
)
|
||||
skip_rows = fields.Integer(
|
||||
metadata={"description": "Number of rows to skip at start of file."}
|
||||
)
|
||||
|
||||
@validates("file")
|
||||
def validate_file_extension(self, file: FileStorage) -> None:
|
||||
allowed_extensions = current_app.config["ALLOWED_EXTENSIONS"].intersection(
|
||||
current_app.config["EXCEL_EXTENSIONS"]
|
||||
)
|
||||
matches = re.match(r".+\.([^.]+)$", file.filename)
|
||||
if not matches:
|
||||
raise ValidationError([_("File extension is not allowed.")])
|
||||
extension = matches.group(1)
|
||||
if extension not in allowed_extensions:
|
||||
raise ValidationError([_("File extension is not allowed.")])
|
||||
|
||||
class CSVMetadataUploadFilePostSchema(BaseUploadFilePostSchema):
|
||||
"""
|
||||
Schema for CSV metadata.
|
||||
"""
|
||||
|
||||
_extension_config_key = "CSV_EXTENSIONS"
|
||||
|
||||
file = fields.Raw(
|
||||
required=True,
|
||||
metadata={
|
||||
"description": "The file to upload",
|
||||
"type": "string",
|
||||
"format": "binary",
|
||||
},
|
||||
)
|
||||
delimiter = fields.String(metadata={"description": "The delimiter of the CSV file"})
|
||||
header_row = fields.Integer(
|
||||
metadata={
|
||||
"description": "Row containing the headers to use as column names"
|
||||
"(0 is first line of data). Leave empty if there is no header row."
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
class ExcelMetadataUploadFilePostSchema(BaseUploadFilePostSchema):
|
||||
"""
|
||||
Schema for CSV metadata.
|
||||
"""
|
||||
|
||||
_extension_config_key = "EXCEL_EXTENSIONS"
|
||||
|
||||
file = fields.Raw(
|
||||
required=True,
|
||||
metadata={
|
||||
"description": "The file to upload",
|
||||
"type": "string",
|
||||
"format": "binary",
|
||||
},
|
||||
)
|
||||
header_row = fields.Integer(
|
||||
metadata={
|
||||
"description": "Row containing the headers to use as column names"
|
||||
"(0 is first line of data). Leave empty if there is no header row."
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
class ColumnarMetadataUploadFilePostSchema(BaseUploadFilePostSchema):
|
||||
"""
|
||||
Schema for CSV metadata.
|
||||
"""
|
||||
|
||||
_extension_config_key = "COLUMNAR_EXTENSIONS"
|
||||
|
||||
file = fields.Raw(
|
||||
required=True,
|
||||
metadata={
|
||||
"description": "The file to upload",
|
||||
"type": "string",
|
||||
"format": "binary",
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
class UploadFileMetadataItemSchema(Schema):
|
||||
sheet_name = fields.String(metadata={"description": "The name of the sheet"})
|
||||
column_names = fields.List(
|
||||
fields.String(),
|
||||
metadata={"description": "A list of columns names in the sheet"},
|
||||
)
|
||||
|
||||
|
||||
class UploadFileMetadata(Schema):
|
||||
"""
|
||||
Schema for upload file metadata response.
|
||||
"""
|
||||
|
||||
items = fields.List(fields.Nested(UploadFileMetadataItemSchema))
|
||||
|
||||
|
||||
class OAuth2ProviderResponseSchema(Schema):
|
||||
|
|
|
|||
|
|
@ -170,7 +170,7 @@ class SupersetAppInitializer: # pylint: disable=too-many-public-methods
|
|||
DashboardModelView,
|
||||
DashboardModelViewAsync,
|
||||
)
|
||||
from superset.views.database.views import ColumnarToDatabaseView, DatabaseView
|
||||
from superset.views.database.views import DatabaseView
|
||||
from superset.views.datasource.views import DatasetEditor, Datasource
|
||||
from superset.views.dynamic_plugins import DynamicPluginsView
|
||||
from superset.views.explore import ExplorePermalinkView, ExploreView
|
||||
|
|
@ -291,7 +291,6 @@ class SupersetAppInitializer: # pylint: disable=too-many-public-methods
|
|||
#
|
||||
appbuilder.add_view_no_menu(Api)
|
||||
appbuilder.add_view_no_menu(CssTemplateAsyncModelView)
|
||||
appbuilder.add_view_no_menu(ColumnarToDatabaseView)
|
||||
appbuilder.add_view_no_menu(Dashboard)
|
||||
appbuilder.add_view_no_menu(DashboardModelViewAsync)
|
||||
appbuilder.add_view_no_menu(Datasource)
|
||||
|
|
|
|||
|
|
@ -0,0 +1,88 @@
|
|||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
"""mig new columnar upload perm
|
||||
|
||||
Revision ID: 4a33124c18ad
|
||||
Revises: 5f57af97bc3f
|
||||
Create Date: 2024-04-26 12:36:07.800489
|
||||
|
||||
"""
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "4a33124c18ad"
|
||||
down_revision = "5f57af97bc3f"
|
||||
|
||||
|
||||
from alembic import op # noqa: E402
|
||||
from sqlalchemy.exc import SQLAlchemyError # noqa: E402
|
||||
from sqlalchemy.orm import Session # noqa: E402
|
||||
|
||||
from superset.migrations.shared.security_converge import ( # noqa: E402
|
||||
add_pvms,
|
||||
get_reversed_new_pvms,
|
||||
get_reversed_pvm_map,
|
||||
migrate_roles,
|
||||
Pvm,
|
||||
)
|
||||
|
||||
NEW_PVMS = {"Database": ("can_columnar_upload",)}
|
||||
|
||||
PVM_MAP = {
|
||||
Pvm("ColumnarToDatabaseView", "can_this_form_post"): (
|
||||
Pvm("Database", "can_columnar_upload"),
|
||||
),
|
||||
Pvm("ColumnarToDatabaseView", "can_this_form_get"): (
|
||||
Pvm("Database", "can_columnar_upload"),
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
def do_upgrade(session: Session) -> None:
|
||||
add_pvms(session, NEW_PVMS)
|
||||
migrate_roles(session, PVM_MAP)
|
||||
|
||||
|
||||
def do_downgrade(session: Session) -> None:
|
||||
add_pvms(session, get_reversed_new_pvms(PVM_MAP))
|
||||
migrate_roles(session, get_reversed_pvm_map(PVM_MAP))
|
||||
|
||||
|
||||
def upgrade():
|
||||
bind = op.get_bind()
|
||||
session = Session(bind=bind)
|
||||
|
||||
do_upgrade(session)
|
||||
|
||||
try:
|
||||
session.commit()
|
||||
except SQLAlchemyError as ex:
|
||||
session.rollback()
|
||||
raise Exception(f"An error occurred while upgrading permissions: {ex}")
|
||||
|
||||
|
||||
def downgrade():
|
||||
bind = op.get_bind()
|
||||
session = Session(bind=bind)
|
||||
|
||||
do_downgrade(session)
|
||||
|
||||
try:
|
||||
session.commit()
|
||||
except SQLAlchemyError as ex:
|
||||
print(f"An error occurred while downgrading permissions: {ex}")
|
||||
session.rollback()
|
||||
pass
|
||||
|
|
@ -1,174 +0,0 @@
|
|||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
"""Contains the logic to create cohesive forms on the explore view"""
|
||||
|
||||
from flask_appbuilder.fields import QuerySelectField
|
||||
from flask_appbuilder.fieldwidgets import BS3TextFieldWidget
|
||||
from flask_appbuilder.forms import DynamicForm
|
||||
from flask_babel import lazy_gettext as _
|
||||
from flask_wtf.file import FileAllowed
|
||||
from wtforms import BooleanField, MultipleFileField, SelectField, StringField
|
||||
from wtforms.validators import DataRequired, Optional, Regexp
|
||||
|
||||
from superset import app, db, security_manager
|
||||
from superset.forms import JsonListField
|
||||
from superset.models.core import Database
|
||||
|
||||
config = app.config
|
||||
|
||||
|
||||
class UploadToDatabaseForm(DynamicForm):
|
||||
@staticmethod
|
||||
def file_allowed_dbs() -> list[Database]:
|
||||
file_enabled_dbs = (
|
||||
db.session.query(Database).filter_by(allow_file_upload=True).all()
|
||||
)
|
||||
return [
|
||||
file_enabled_db
|
||||
for file_enabled_db in file_enabled_dbs
|
||||
if UploadToDatabaseForm.at_least_one_schema_is_allowed(file_enabled_db)
|
||||
and UploadToDatabaseForm.is_engine_allowed_to_file_upl(file_enabled_db)
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
def at_least_one_schema_is_allowed(database: Database) -> bool:
|
||||
"""
|
||||
If the user has access to the database or all datasource
|
||||
1. if schemas_allowed_for_file_upload is empty
|
||||
a) if database does not support schema
|
||||
user is able to upload csv without specifying schema name
|
||||
b) if database supports schema
|
||||
user is able to upload csv to any schema
|
||||
2. if schemas_allowed_for_file_upload is not empty
|
||||
a) if database does not support schema
|
||||
This situation is impossible and upload will fail
|
||||
b) if database supports schema
|
||||
user is able to upload to schema in schemas_allowed_for_file_upload
|
||||
elif the user does not access to the database or all datasource
|
||||
1. if schemas_allowed_for_file_upload is empty
|
||||
a) if database does not support schema
|
||||
user is unable to upload csv
|
||||
b) if database supports schema
|
||||
user is unable to upload csv
|
||||
2. if schemas_allowed_for_file_upload is not empty
|
||||
a) if database does not support schema
|
||||
This situation is impossible and user is unable to upload csv
|
||||
b) if database supports schema
|
||||
user is able to upload to schema in schemas_allowed_for_file_upload
|
||||
"""
|
||||
if security_manager.can_access_database(database):
|
||||
return True
|
||||
schemas = database.get_schema_access_for_file_upload()
|
||||
if schemas and security_manager.get_schemas_accessible_by_user(
|
||||
database, schemas, False
|
||||
):
|
||||
return True
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def is_engine_allowed_to_file_upl(database: Database) -> bool:
|
||||
"""
|
||||
This method is mainly used for existing Gsheets and Clickhouse DBs
|
||||
that have allow_file_upload set as True but they are no longer valid
|
||||
DBs for file uploading.
|
||||
New GSheets and Clickhouse DBs won't have the option to set
|
||||
allow_file_upload set as True.
|
||||
"""
|
||||
if database.db_engine_spec.supports_file_upload:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
class ColumnarToDatabaseForm(UploadToDatabaseForm):
|
||||
name = StringField(
|
||||
_("Table Name"),
|
||||
description=_("Name of table to be created from columnar data."),
|
||||
validators=[
|
||||
DataRequired(),
|
||||
Regexp(r"^[^\.]+$", message=_("Table name cannot contain a schema")),
|
||||
],
|
||||
widget=BS3TextFieldWidget(),
|
||||
)
|
||||
columnar_file = MultipleFileField(
|
||||
_("Columnar File"),
|
||||
description=_("Select a Columnar file to be uploaded to a database."),
|
||||
validators=[
|
||||
DataRequired(),
|
||||
FileAllowed(
|
||||
config["ALLOWED_EXTENSIONS"].intersection(
|
||||
config["COLUMNAR_EXTENSIONS"]
|
||||
),
|
||||
_(
|
||||
"Only the following file extensions are allowed: "
|
||||
"%(allowed_extensions)s",
|
||||
allowed_extensions=", ".join(
|
||||
config["ALLOWED_EXTENSIONS"].intersection(
|
||||
config["COLUMNAR_EXTENSIONS"]
|
||||
)
|
||||
),
|
||||
),
|
||||
),
|
||||
],
|
||||
)
|
||||
|
||||
database = QuerySelectField(
|
||||
_("Database"),
|
||||
query_func=UploadToDatabaseForm.file_allowed_dbs,
|
||||
get_pk_func=lambda a: a.id,
|
||||
get_label=lambda a: a.database_name,
|
||||
)
|
||||
schema = StringField(
|
||||
_("Schema"),
|
||||
description=_("Specify a schema (if database flavor supports this)."),
|
||||
validators=[Optional()],
|
||||
widget=BS3TextFieldWidget(),
|
||||
)
|
||||
if_exists = SelectField(
|
||||
_("Table Exists"),
|
||||
description=_(
|
||||
"If table exists do one of the following: "
|
||||
"Fail (do nothing), Replace (drop and recreate table) "
|
||||
"or Append (insert data)."
|
||||
),
|
||||
choices=[
|
||||
("fail", _("Fail")),
|
||||
("replace", _("Replace")),
|
||||
("append", _("Append")),
|
||||
],
|
||||
validators=[DataRequired()],
|
||||
)
|
||||
usecols = JsonListField(
|
||||
_("Use Columns"),
|
||||
default=None,
|
||||
description=_(
|
||||
"Json list of the column names that should be read. "
|
||||
"If not None, only these columns will be read from the file."
|
||||
),
|
||||
validators=[Optional()],
|
||||
)
|
||||
index = BooleanField(
|
||||
_("Dataframe Index"), description=_("Write dataframe index as a column.")
|
||||
)
|
||||
index_label = StringField(
|
||||
_("Column Label(s)"),
|
||||
description=_(
|
||||
"Column label for index column(s). If None is given "
|
||||
"and Dataframe Index is True, Index Names are used."
|
||||
),
|
||||
validators=[Optional()],
|
||||
widget=BS3TextFieldWidget(),
|
||||
)
|
||||
|
|
@ -14,34 +14,26 @@
|
|||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
import io
|
||||
import zipfile
|
||||
from typing import Any, TYPE_CHECKING
|
||||
|
||||
import pandas as pd
|
||||
from flask import flash, g, redirect
|
||||
from flask import redirect
|
||||
from flask_appbuilder import expose, SimpleFormView
|
||||
from flask_appbuilder.models.sqla.interface import SQLAInterface
|
||||
from flask_appbuilder.security.decorators import has_access
|
||||
from flask_babel import lazy_gettext as _
|
||||
from werkzeug.wrappers import Response
|
||||
from wtforms.fields import StringField
|
||||
from wtforms.validators import ValidationError
|
||||
|
||||
import superset.models.core as models
|
||||
from superset import app, db
|
||||
from superset.connectors.sqla.models import SqlaTable
|
||||
from superset import app
|
||||
from superset.constants import MODEL_VIEW_RW_METHOD_PERMISSION_MAP, RouteMethod
|
||||
from superset.exceptions import CertificateException
|
||||
from superset.extensions import event_logger
|
||||
from superset.sql_parse import Table
|
||||
from superset.superset_typing import FlaskResponse
|
||||
from superset.utils import core as utils
|
||||
from superset.views.base import DeleteMixin, SupersetModelView, YamlExportMixin
|
||||
|
||||
from .forms import ColumnarToDatabaseForm
|
||||
from .mixins import DatabaseMixin
|
||||
from .validators import schema_allows_file_upload, sqlalchemy_uri_validator
|
||||
from .validators import sqlalchemy_uri_validator
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from werkzeug.datastructures import FileStorage
|
||||
|
|
@ -148,145 +140,3 @@ class CustomFormView(SimpleFormView):
|
|||
form=form,
|
||||
appbuilder=self.appbuilder,
|
||||
)
|
||||
|
||||
|
||||
class ColumnarToDatabaseView(SimpleFormView):
|
||||
form = ColumnarToDatabaseForm
|
||||
form_template = "superset/form_view/columnar_to_database_view/edit.html"
|
||||
form_title = _("Columnar to Database configuration")
|
||||
add_columns = ["database", "schema", "table_name"]
|
||||
|
||||
def form_get(self, form: ColumnarToDatabaseForm) -> None:
|
||||
form.if_exists.data = "fail"
|
||||
|
||||
def form_post( # pylint: disable=too-many-locals
|
||||
self, form: ColumnarToDatabaseForm
|
||||
) -> Response:
|
||||
database = form.database.data
|
||||
columnar_table = Table(table=form.name.data, schema=form.schema.data)
|
||||
files = form.columnar_file.data
|
||||
file_type = {file.filename.split(".")[-1] for file in files}
|
||||
|
||||
if file_type == {"zip"}:
|
||||
zipfile_ob = zipfile.ZipFile( # pylint: disable=consider-using-with
|
||||
form.columnar_file.data[0]
|
||||
)
|
||||
file_type = {filename.split(".")[-1] for filename in zipfile_ob.namelist()}
|
||||
files = [
|
||||
# pylint: disable=consider-using-with
|
||||
io.BytesIO((zipfile_ob.open(filename).read(), filename)[0])
|
||||
for filename in zipfile_ob.namelist()
|
||||
]
|
||||
|
||||
if len(file_type) > 1:
|
||||
message = _(
|
||||
"Multiple file extensions are not allowed for columnar uploads."
|
||||
" Please make sure all files are of the same extension.",
|
||||
)
|
||||
flash(message, "danger")
|
||||
return redirect("/columnartodatabaseview/form")
|
||||
|
||||
read = pd.read_parquet
|
||||
kwargs = {
|
||||
"columns": form.usecols.data if form.usecols.data else None,
|
||||
}
|
||||
|
||||
if not schema_allows_file_upload(database, columnar_table.schema):
|
||||
message = _(
|
||||
'Database "%(database_name)s" schema "%(schema_name)s" '
|
||||
"is not allowed for columnar uploads. "
|
||||
"Please contact your Superset Admin.",
|
||||
database_name=database.database_name,
|
||||
schema_name=columnar_table.schema,
|
||||
)
|
||||
flash(message, "danger")
|
||||
return redirect("/columnartodatabaseview/form")
|
||||
|
||||
try:
|
||||
chunks = [read(file, **kwargs) for file in files]
|
||||
df = pd.concat(chunks)
|
||||
|
||||
database = (
|
||||
db.session.query(models.Database)
|
||||
.filter_by(id=form.data.get("database").data.get("id"))
|
||||
.one()
|
||||
)
|
||||
|
||||
database.db_engine_spec.df_to_sql(
|
||||
database,
|
||||
columnar_table,
|
||||
df,
|
||||
to_sql_kwargs={
|
||||
"chunksize": 1000,
|
||||
"if_exists": form.if_exists.data,
|
||||
"index": form.index.data,
|
||||
"index_label": form.index_label.data,
|
||||
},
|
||||
)
|
||||
|
||||
# Connect table to the database that should be used for exploration.
|
||||
# E.g. if hive was used to upload a csv, presto will be a better option
|
||||
# to explore the table.
|
||||
explore_database = database
|
||||
explore_database_id = database.explore_database_id
|
||||
if explore_database_id:
|
||||
explore_database = (
|
||||
db.session.query(models.Database)
|
||||
.filter_by(id=explore_database_id)
|
||||
.one_or_none()
|
||||
or database
|
||||
)
|
||||
|
||||
sqla_table = (
|
||||
db.session.query(SqlaTable)
|
||||
.filter_by(
|
||||
table_name=columnar_table.table,
|
||||
schema=columnar_table.schema,
|
||||
database_id=explore_database.id,
|
||||
)
|
||||
.one_or_none()
|
||||
)
|
||||
|
||||
if sqla_table:
|
||||
sqla_table.fetch_metadata()
|
||||
if not sqla_table:
|
||||
sqla_table = SqlaTable(table_name=columnar_table.table)
|
||||
sqla_table.database = explore_database
|
||||
sqla_table.database_id = database.id
|
||||
sqla_table.owners = [g.user]
|
||||
sqla_table.schema = columnar_table.schema
|
||||
sqla_table.fetch_metadata()
|
||||
db.session.add(sqla_table)
|
||||
db.session.commit()
|
||||
except Exception as ex: # pylint: disable=broad-except
|
||||
db.session.rollback()
|
||||
message = _(
|
||||
'Unable to upload Columnar file "%(filename)s" to table '
|
||||
'"%(table_name)s" in database "%(db_name)s". '
|
||||
"Error message: %(error_msg)s",
|
||||
filename=[file.filename for file in form.columnar_file.data],
|
||||
table_name=form.name.data,
|
||||
db_name=database.database_name,
|
||||
error_msg=str(ex),
|
||||
)
|
||||
|
||||
flash(message, "danger")
|
||||
stats_logger.incr("failed_columnar_upload")
|
||||
return redirect("/columnartodatabaseview/form")
|
||||
|
||||
# Go back to welcome page / splash screen
|
||||
message = _(
|
||||
'Columnar file "%(columnar_filename)s" uploaded to table "%(table_name)s" '
|
||||
'in database "%(db_name)s"',
|
||||
columnar_filename=[file.filename for file in form.columnar_file.data],
|
||||
table_name=str(columnar_table),
|
||||
db_name=sqla_table.database.database_name,
|
||||
)
|
||||
flash(message, "info")
|
||||
event_logger.log_with_context(
|
||||
action="successful_columnar_upload",
|
||||
database=form.database.data.name,
|
||||
schema=form.schema.data,
|
||||
table=form.name.data,
|
||||
)
|
||||
return redirect("/tablemodelview/list/")
|
||||
|
|
|
|||
|
|
@ -1,237 +0,0 @@
|
|||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
# isort:skip_file
|
||||
"""Unit tests for Superset CSV upload"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import shutil
|
||||
from typing import Optional
|
||||
|
||||
from unittest import mock
|
||||
|
||||
import pandas as pd
|
||||
import pytest
|
||||
|
||||
import superset.utils.database
|
||||
from superset.sql_parse import Table
|
||||
from tests.integration_tests.conftest import ADMIN_SCHEMA_NAME # noqa: F401
|
||||
from superset import db
|
||||
from superset import security_manager
|
||||
from superset.models.core import Database
|
||||
from superset.utils import core as utils
|
||||
from tests.integration_tests.test_app import app, login
|
||||
from tests.integration_tests.base_tests import get_resp, SupersetTestCase
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
test_client = app.test_client()
|
||||
|
||||
CSV_UPLOAD_DATABASE = "csv_explore_db"
|
||||
EXCEL_FILENAME = "testExcel.xlsx"
|
||||
PARQUET_FILENAME1 = "testZip/testParquet1.parquet"
|
||||
PARQUET_FILENAME2 = "testZip/testParquet2.parquet"
|
||||
ZIP_DIRNAME = "testZip"
|
||||
ZIP_FILENAME = "testZip.zip"
|
||||
|
||||
EXCEL_UPLOAD_TABLE = "excel_upload"
|
||||
CSV_UPLOAD_TABLE = "csv_upload"
|
||||
PARQUET_UPLOAD_TABLE = "parquet_upload"
|
||||
CSV_UPLOAD_TABLE_W_SCHEMA = "csv_upload_w_schema"
|
||||
CSV_UPLOAD_TABLE_W_EXPLORE = "csv_upload_w_explore"
|
||||
|
||||
|
||||
def _setup_csv_upload():
|
||||
upload_db = superset.utils.database.get_or_create_db(
|
||||
CSV_UPLOAD_DATABASE, app.config["SQLALCHEMY_EXAMPLES_URI"]
|
||||
)
|
||||
extra = upload_db.get_extra()
|
||||
extra["explore_database_id"] = superset.utils.database.get_example_database().id
|
||||
upload_db.extra = json.dumps(extra)
|
||||
upload_db.allow_file_upload = True
|
||||
db.session.commit()
|
||||
|
||||
yield
|
||||
|
||||
upload_db = get_upload_db()
|
||||
with upload_db.get_sqla_engine() as engine:
|
||||
engine.execute(f"DROP TABLE IF EXISTS {EXCEL_UPLOAD_TABLE}")
|
||||
engine.execute(f"DROP TABLE IF EXISTS {CSV_UPLOAD_TABLE}")
|
||||
engine.execute(f"DROP TABLE IF EXISTS {PARQUET_UPLOAD_TABLE}")
|
||||
engine.execute(f"DROP TABLE IF EXISTS {CSV_UPLOAD_TABLE_W_SCHEMA}")
|
||||
engine.execute(f"DROP TABLE IF EXISTS {CSV_UPLOAD_TABLE_W_EXPLORE}")
|
||||
db.session.delete(upload_db)
|
||||
db.session.commit()
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def setup_csv_upload(login_as_admin):
|
||||
yield from _setup_csv_upload()
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def setup_csv_upload_with_context():
|
||||
with app.app_context():
|
||||
login(test_client, username="admin")
|
||||
yield from _setup_csv_upload()
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def create_columnar_files():
|
||||
os.mkdir(ZIP_DIRNAME)
|
||||
pd.DataFrame({"a": ["john", "paul"], "b": [1, 2]}).to_parquet(PARQUET_FILENAME1)
|
||||
pd.DataFrame({"a": ["max", "bob"], "b": [3, 4]}).to_parquet(PARQUET_FILENAME2)
|
||||
shutil.make_archive(ZIP_DIRNAME, "zip", ZIP_DIRNAME)
|
||||
yield
|
||||
os.remove(ZIP_FILENAME)
|
||||
shutil.rmtree(ZIP_DIRNAME)
|
||||
|
||||
|
||||
def get_upload_db():
|
||||
return db.session.query(Database).filter_by(database_name=CSV_UPLOAD_DATABASE).one()
|
||||
|
||||
|
||||
def upload_columnar(
|
||||
filename: str, table_name: str, extra: Optional[dict[str, str]] = None
|
||||
):
|
||||
columnar_upload_db_id = get_upload_db().id
|
||||
form_data = {
|
||||
"columnar_file": open(filename, "rb"),
|
||||
"name": table_name,
|
||||
"database": columnar_upload_db_id,
|
||||
"if_exists": "fail",
|
||||
"index_label": "test_label",
|
||||
}
|
||||
if schema := utils.get_example_default_schema():
|
||||
form_data["schema"] = schema
|
||||
if extra:
|
||||
form_data.update(extra)
|
||||
return get_resp(test_client, "/columnartodatabaseview/form", data=form_data)
|
||||
|
||||
|
||||
def mock_upload_to_s3(filename: str, upload_prefix: str, table: Table) -> str:
|
||||
"""
|
||||
HDFS is used instead of S3 for the unit tests.integration_tests.
|
||||
|
||||
:param filename: The file to upload
|
||||
:param upload_prefix: The S3 prefix
|
||||
:param table: The table that will be created
|
||||
:returns: The HDFS path to the directory with external table files
|
||||
"""
|
||||
# only needed for the hive tests
|
||||
import docker
|
||||
|
||||
client = docker.from_env() # type: ignore
|
||||
container = client.containers.get("namenode")
|
||||
# docker mounted volume that contains csv uploads
|
||||
src = os.path.join("/tmp/superset_uploads", os.path.basename(filename))
|
||||
# hdfs destination for the external tables
|
||||
dest_dir = os.path.join("/tmp/external/superset_uploads/", str(table))
|
||||
container.exec_run(f"hdfs dfs -mkdir -p {dest_dir}")
|
||||
dest = os.path.join(dest_dir, os.path.basename(filename))
|
||||
container.exec_run(f"hdfs dfs -put {src} {dest}")
|
||||
# hive external table expects a directory for the location
|
||||
return dest_dir
|
||||
|
||||
|
||||
def escaped_double_quotes(text):
|
||||
return rf"\"{text}\""
|
||||
|
||||
|
||||
def escaped_parquet(text):
|
||||
return escaped_double_quotes(f"['{text}']")
|
||||
|
||||
|
||||
@pytest.mark.usefixtures("setup_csv_upload_with_context")
|
||||
@pytest.mark.usefixtures("create_columnar_files")
|
||||
@mock.patch("superset.db_engine_specs.hive.upload_to_s3", mock_upload_to_s3)
|
||||
@mock.patch("superset.views.database.views.event_logger.log_with_context")
|
||||
def test_import_parquet(mock_event_logger):
|
||||
if utils.backend() == "hive":
|
||||
pytest.skip("Hive doesn't allow parquet upload.")
|
||||
|
||||
schema = utils.get_example_default_schema()
|
||||
full_table_name = (
|
||||
f"{schema}.{PARQUET_UPLOAD_TABLE}" if schema else PARQUET_UPLOAD_TABLE
|
||||
)
|
||||
test_db = get_upload_db()
|
||||
|
||||
success_msg_f1 = f"Columnar file {escaped_parquet(PARQUET_FILENAME1)} uploaded to table {escaped_double_quotes(full_table_name)}"
|
||||
|
||||
# initial upload with fail mode
|
||||
resp = upload_columnar(PARQUET_FILENAME1, PARQUET_UPLOAD_TABLE)
|
||||
assert success_msg_f1 in resp
|
||||
|
||||
# upload again with fail mode; should fail
|
||||
fail_msg = f"Unable to upload Columnar file {escaped_parquet(PARQUET_FILENAME1)} to table {escaped_double_quotes(PARQUET_UPLOAD_TABLE)}"
|
||||
resp = upload_columnar(PARQUET_FILENAME1, PARQUET_UPLOAD_TABLE)
|
||||
assert fail_msg in resp
|
||||
|
||||
if utils.backend() != "hive":
|
||||
# upload again with append mode
|
||||
resp = upload_columnar(
|
||||
PARQUET_FILENAME1, PARQUET_UPLOAD_TABLE, extra={"if_exists": "append"}
|
||||
)
|
||||
assert success_msg_f1 in resp
|
||||
mock_event_logger.assert_called_with(
|
||||
action="successful_columnar_upload",
|
||||
database=test_db.name,
|
||||
schema=schema,
|
||||
table=PARQUET_UPLOAD_TABLE,
|
||||
)
|
||||
|
||||
# upload again with replace mode and specific columns
|
||||
resp = upload_columnar(
|
||||
PARQUET_FILENAME1,
|
||||
PARQUET_UPLOAD_TABLE,
|
||||
extra={"if_exists": "replace", "usecols": '["a"]'},
|
||||
)
|
||||
assert success_msg_f1 in resp
|
||||
|
||||
table = SupersetTestCase.get_table(name=PARQUET_UPLOAD_TABLE, schema=None)
|
||||
# make sure only specified column name was read
|
||||
assert "b" not in table.column_names
|
||||
|
||||
# ensure user is assigned as an owner
|
||||
assert security_manager.find_user("admin") in table.owners
|
||||
|
||||
# upload again with replace mode
|
||||
resp = upload_columnar(
|
||||
PARQUET_FILENAME1, PARQUET_UPLOAD_TABLE, extra={"if_exists": "replace"}
|
||||
)
|
||||
assert success_msg_f1 in resp
|
||||
|
||||
with test_db.get_sqla_engine() as engine:
|
||||
data = engine.execute(
|
||||
f"SELECT * from {PARQUET_UPLOAD_TABLE} ORDER BY b"
|
||||
).fetchall()
|
||||
assert data == [("john", 1), ("paul", 2)]
|
||||
|
||||
# replace table with zip file
|
||||
resp = upload_columnar(
|
||||
ZIP_FILENAME, PARQUET_UPLOAD_TABLE, extra={"if_exists": "replace"}
|
||||
)
|
||||
success_msg_f2 = f"Columnar file {escaped_parquet(ZIP_FILENAME)} uploaded to table {escaped_double_quotes(full_table_name)}"
|
||||
assert success_msg_f2 in resp
|
||||
|
||||
with test_db.get_sqla_engine() as engine:
|
||||
data = engine.execute(
|
||||
f"SELECT * from {PARQUET_UPLOAD_TABLE} ORDER BY b"
|
||||
).fetchall()
|
||||
assert data == [("john", 1), ("paul", 2), ("max", 3), ("bob", 4)]
|
||||
|
|
@ -1448,6 +1448,7 @@ class TestDatabaseApi(SupersetTestCase):
|
|||
assert rv.status_code == 200
|
||||
assert set(data["permissions"]) == {
|
||||
"can_read",
|
||||
"can_columnar_upload",
|
||||
"can_csv_upload",
|
||||
"can_excel_upload",
|
||||
"can_write",
|
||||
|
|
|
|||
|
|
@ -138,6 +138,38 @@ def test_csv_upload_dataset():
|
|||
assert security_manager.find_user("admin") in dataset.owners
|
||||
|
||||
|
||||
@pytest.mark.usefixtures("setup_csv_upload_with_context")
|
||||
def test_csv_upload_with_index():
|
||||
admin_user = security_manager.find_user(username="admin")
|
||||
upload_database = get_upload_db()
|
||||
|
||||
with override_user(admin_user):
|
||||
UploadCommand(
|
||||
upload_database.id,
|
||||
CSV_UPLOAD_TABLE,
|
||||
create_csv_file(CSV_FILE_1),
|
||||
None,
|
||||
CSVReader({"dataframe_index": True, "index_label": "id"}),
|
||||
).run()
|
||||
with upload_database.get_sqla_engine() as engine:
|
||||
data = engine.execute(f"SELECT * from {CSV_UPLOAD_TABLE}").fetchall()
|
||||
assert data == [
|
||||
(0, "name1", 30, "city1", "1-1-1980"),
|
||||
(1, "name2", 29, "city2", "1-1-1981"),
|
||||
(2, "name3", 28, "city3", "1-1-1982"),
|
||||
]
|
||||
# assert column names
|
||||
assert [
|
||||
col for col in engine.execute(f"SELECT * from {CSV_UPLOAD_TABLE}").keys()
|
||||
] == [
|
||||
"id",
|
||||
"Name",
|
||||
"Age",
|
||||
"City",
|
||||
"Birth",
|
||||
]
|
||||
|
||||
|
||||
@only_postgresql
|
||||
@pytest.mark.usefixtures("setup_csv_upload_with_context")
|
||||
def test_csv_upload_database_not_found():
|
||||
|
|
|
|||
|
|
@ -0,0 +1,253 @@
|
|||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
import io
|
||||
import tempfile
|
||||
from typing import Any
|
||||
from zipfile import ZipFile
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
from werkzeug.datastructures import FileStorage
|
||||
|
||||
from superset.commands.database.exceptions import DatabaseUploadFailed
|
||||
from superset.commands.database.uploaders.columnar_reader import (
|
||||
ColumnarReader,
|
||||
ColumnarReaderOptions,
|
||||
)
|
||||
from tests.unit_tests.fixtures.common import create_columnar_file
|
||||
|
||||
COLUMNAR_DATA: dict[str, list[Any]] = {
|
||||
"Name": ["name1", "name2", "name3"],
|
||||
"Age": [30, 25, 20],
|
||||
"City": ["city1", "city2", "city3"],
|
||||
"Birth": ["1990-02-01", "1995-02-01", "2000-02-01"],
|
||||
}
|
||||
|
||||
COLUMNAR_WITH_NULLS: dict[str, list[Any]] = {
|
||||
"Name": ["name1", "name2", "name3"],
|
||||
"Age": [None, 25, 20],
|
||||
"City": ["city1", None, "city3"],
|
||||
"Birth": ["1990-02-01", "1995-02-01", "2000-02-01"],
|
||||
}
|
||||
|
||||
|
||||
COLUMNAR_WITH_FLOATS: dict[str, list[Any]] = {
|
||||
"Name": ["name1", "name2", "name3"],
|
||||
"Age": [30.1, 25.1, 20.1],
|
||||
"City": ["city1", "city2", "city3"],
|
||||
"Birth": ["1990-02-01", "1995-02-01", "2000-02-01"],
|
||||
}
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"file, options, expected_cols, expected_values",
|
||||
[
|
||||
(
|
||||
create_columnar_file(COLUMNAR_DATA),
|
||||
ColumnarReaderOptions(),
|
||||
["Name", "Age", "City", "Birth"],
|
||||
[
|
||||
["name1", 30, "city1", "1990-02-01"],
|
||||
["name2", 25, "city2", "1995-02-01"],
|
||||
["name3", 20, "city3", "2000-02-01"],
|
||||
],
|
||||
),
|
||||
(
|
||||
create_columnar_file(COLUMNAR_DATA),
|
||||
ColumnarReaderOptions(
|
||||
columns_read=["Name", "Age"],
|
||||
),
|
||||
["Name", "Age"],
|
||||
[
|
||||
["name1", 30],
|
||||
["name2", 25],
|
||||
["name3", 20],
|
||||
],
|
||||
),
|
||||
(
|
||||
create_columnar_file(COLUMNAR_DATA),
|
||||
ColumnarReaderOptions(
|
||||
columns_read=[],
|
||||
),
|
||||
["Name", "Age", "City", "Birth"],
|
||||
[
|
||||
["name1", 30, "city1", "1990-02-01"],
|
||||
["name2", 25, "city2", "1995-02-01"],
|
||||
["name3", 20, "city3", "2000-02-01"],
|
||||
],
|
||||
),
|
||||
(
|
||||
create_columnar_file(COLUMNAR_WITH_NULLS),
|
||||
ColumnarReaderOptions(),
|
||||
["Name", "Age", "City", "Birth"],
|
||||
[
|
||||
["name1", np.nan, "city1", "1990-02-01"],
|
||||
["name2", 25, None, "1995-02-01"],
|
||||
["name3", 20, "city3", "2000-02-01"],
|
||||
],
|
||||
),
|
||||
(
|
||||
create_columnar_file(COLUMNAR_WITH_FLOATS),
|
||||
ColumnarReaderOptions(),
|
||||
["Name", "Age", "City", "Birth"],
|
||||
[
|
||||
["name1", 30.1, "city1", "1990-02-01"],
|
||||
["name2", 25.1, "city2", "1995-02-01"],
|
||||
["name3", 20.1, "city3", "2000-02-01"],
|
||||
],
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_columnar_reader_file_to_dataframe(
|
||||
file, options, expected_cols, expected_values
|
||||
):
|
||||
reader = ColumnarReader(
|
||||
options=options,
|
||||
)
|
||||
df = reader.file_to_dataframe(file)
|
||||
assert df.columns.tolist() == expected_cols
|
||||
actual_values = df.values.tolist()
|
||||
for i in range(len(expected_values)):
|
||||
for j in range(len(expected_values[i])):
|
||||
expected_val = expected_values[i][j]
|
||||
actual_val = actual_values[i][j]
|
||||
|
||||
# Check if both values are NaN
|
||||
if isinstance(expected_val, float) and isinstance(actual_val, float):
|
||||
assert np.isnan(expected_val) == np.isnan(actual_val)
|
||||
else:
|
||||
assert expected_val == actual_val
|
||||
file.close()
|
||||
|
||||
|
||||
def test_excel_reader_wrong_columns_to_read():
|
||||
reader = ColumnarReader(
|
||||
options=ColumnarReaderOptions(columns_read=["xpto"]),
|
||||
)
|
||||
with pytest.raises(DatabaseUploadFailed) as ex:
|
||||
reader.file_to_dataframe(create_columnar_file(COLUMNAR_DATA))
|
||||
assert (
|
||||
str(ex.value)
|
||||
== (
|
||||
"Parsing error: No match for FieldRef.Name(xpto) in Name: string\n"
|
||||
"Age: int64\n"
|
||||
"City: string\n"
|
||||
"Birth: string\n"
|
||||
"__fragment_index: int32\n"
|
||||
"__batch_index: int32\n"
|
||||
"__last_in_fragment: bool\n"
|
||||
"__filename: string"
|
||||
)
|
||||
!= (
|
||||
"Parsing error: Usecols do not match columns, columns expected but not found: "
|
||||
"['xpto'] (sheet: 0)"
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def test_columnar_reader_invalid_file():
|
||||
reader = ColumnarReader(
|
||||
options=ColumnarReaderOptions(),
|
||||
)
|
||||
with pytest.raises(DatabaseUploadFailed) as ex:
|
||||
reader.file_to_dataframe(FileStorage(io.BytesIO(b"c1"), "test.parquet"))
|
||||
assert str(ex.value) == (
|
||||
"Parsing error: Could not open Parquet input source '<Buffer>': Parquet file "
|
||||
"size is 2 bytes, smaller than the minimum file footer (8 bytes)"
|
||||
)
|
||||
|
||||
|
||||
def test_columnar_reader_zip():
|
||||
reader = ColumnarReader(
|
||||
options=ColumnarReaderOptions(),
|
||||
)
|
||||
file1 = create_columnar_file(COLUMNAR_DATA, "test1.parquet")
|
||||
file2 = create_columnar_file(COLUMNAR_DATA, "test2.parquet")
|
||||
|
||||
with tempfile.NamedTemporaryFile(delete=False) as tmp_file1:
|
||||
tmp_file1.write(file1.read())
|
||||
tmp_file1.seek(0)
|
||||
|
||||
with tempfile.NamedTemporaryFile(delete=False) as tmp_file2:
|
||||
tmp_file2.write(file2.read())
|
||||
tmp_file2.seek(0)
|
||||
|
||||
with tempfile.NamedTemporaryFile(delete=False) as tmp_zip:
|
||||
with ZipFile(tmp_zip, "w") as zip_file:
|
||||
zip_file.write(tmp_file1.name, "test1.parquet")
|
||||
zip_file.write(tmp_file2.name, "test2.parquet")
|
||||
tmp_zip.seek(0) # Reset file pointer to beginning
|
||||
df = reader.file_to_dataframe(FileStorage(tmp_zip, "test.zip"))
|
||||
assert df.columns.tolist() == ["Name", "Age", "City", "Birth"]
|
||||
assert df.values.tolist() == [
|
||||
["name1", 30, "city1", "1990-02-01"],
|
||||
["name2", 25, "city2", "1995-02-01"],
|
||||
["name3", 20, "city3", "2000-02-01"],
|
||||
["name1", 30, "city1", "1990-02-01"],
|
||||
["name2", 25, "city2", "1995-02-01"],
|
||||
["name3", 20, "city3", "2000-02-01"],
|
||||
]
|
||||
|
||||
|
||||
def test_columnar_reader_bad_parquet_in_zip():
|
||||
reader = ColumnarReader(
|
||||
options=ColumnarReaderOptions(),
|
||||
)
|
||||
with tempfile.NamedTemporaryFile(delete=False) as tmp_zip:
|
||||
with ZipFile(tmp_zip, "w") as zip_file:
|
||||
zip_file.writestr("test1.parquet", b"bad parquet file")
|
||||
zip_file.writestr("test2.parquet", b"bad parquet file")
|
||||
tmp_zip.seek(0) # Reset file pointer to beginning
|
||||
with pytest.raises(DatabaseUploadFailed) as ex:
|
||||
reader.file_to_dataframe(FileStorage(tmp_zip, "test.zip"))
|
||||
assert str(ex.value) == (
|
||||
"Parsing error: Could not open Parquet input source '<Buffer>': "
|
||||
"Parquet magic bytes not found in footer. "
|
||||
"Either the file is corrupted or this is not a parquet file."
|
||||
)
|
||||
|
||||
|
||||
def test_columnar_reader_bad_zip():
|
||||
reader = ColumnarReader(
|
||||
options=ColumnarReaderOptions(),
|
||||
)
|
||||
with pytest.raises(DatabaseUploadFailed) as ex:
|
||||
reader.file_to_dataframe(FileStorage(io.BytesIO(b"bad zip file"), "test.zip"))
|
||||
assert str(ex.value) == "Not a valid ZIP file"
|
||||
|
||||
|
||||
def test_columnar_reader_metadata():
|
||||
reader = ColumnarReader(
|
||||
options=ColumnarReaderOptions(),
|
||||
)
|
||||
file = create_columnar_file(COLUMNAR_DATA)
|
||||
metadata = reader.file_metadata(file)
|
||||
column_names = sorted(metadata["items"][0]["column_names"])
|
||||
assert column_names == ["Age", "Birth", "City", "Name"]
|
||||
assert metadata["items"][0]["sheet_name"] is None
|
||||
|
||||
|
||||
def test_columnar_reader_metadata_invalid_file():
|
||||
reader = ColumnarReader(
|
||||
options=ColumnarReaderOptions(),
|
||||
)
|
||||
with pytest.raises(DatabaseUploadFailed) as ex:
|
||||
reader.file_metadata(FileStorage(io.BytesIO(b"c1"), "test.parquet"))
|
||||
assert str(ex.value) == (
|
||||
"Parsing error: Parquet file size is 2 bytes, "
|
||||
"smaller than the minimum file footer (8 bytes)"
|
||||
)
|
||||
|
|
@ -19,6 +19,7 @@ from datetime import datetime
|
|||
|
||||
import numpy as np
|
||||
import pytest
|
||||
from werkzeug.datastructures import FileStorage
|
||||
|
||||
from superset.commands.database.exceptions import DatabaseUploadFailed
|
||||
from superset.commands.database.uploaders.csv_reader import CSVReader, CSVReaderOptions
|
||||
|
|
@ -265,6 +266,23 @@ def test_csv_reader_file_to_dataframe(file, options, expected_cols, expected_val
|
|||
file.close()
|
||||
|
||||
|
||||
def test_csv_reader_index_column():
|
||||
csv_reader = CSVReader(
|
||||
options=CSVReaderOptions(index_column="Name"),
|
||||
)
|
||||
df = csv_reader.file_to_dataframe(create_csv_file(CSV_DATA))
|
||||
assert df.index.name == "Name"
|
||||
|
||||
|
||||
def test_csv_reader_wrong_index_column():
|
||||
csv_reader = CSVReader(
|
||||
options=CSVReaderOptions(index_column="wrong"),
|
||||
)
|
||||
with pytest.raises(DatabaseUploadFailed) as ex:
|
||||
csv_reader.file_to_dataframe(create_csv_file(CSV_DATA))
|
||||
assert str(ex.value) == "Parsing error: Index wrong invalid"
|
||||
|
||||
|
||||
def test_csv_reader_broken_file_no_columns():
|
||||
csv_reader = CSVReader(
|
||||
options=CSVReaderOptions(),
|
||||
|
|
@ -292,7 +310,9 @@ def test_csv_reader_invalid_file():
|
|||
)
|
||||
with pytest.raises(DatabaseUploadFailed) as ex:
|
||||
csv_reader.file_to_dataframe(
|
||||
io.StringIO("c1,c2,c3\na,b,c\n1,2,3,4,5,6,7\n1,2,3")
|
||||
FileStorage(
|
||||
io.StringIO("c1,c2,c3\na,b,c\n1,2,3,4,5,6,7\n1,2,3"), filename=""
|
||||
)
|
||||
)
|
||||
assert str(ex.value) == (
|
||||
"Parsing error: Error tokenizing data. C error:"
|
||||
|
|
@ -306,8 +326,48 @@ def test_csv_reader_invalid_encoding():
|
|||
)
|
||||
binary_data = b"col1,col2,col3\nv1,v2,\xba\nv3,v4,v5\n"
|
||||
with pytest.raises(DatabaseUploadFailed) as ex:
|
||||
csv_reader.file_to_dataframe(io.BytesIO(binary_data))
|
||||
csv_reader.file_to_dataframe(FileStorage(io.BytesIO(binary_data)))
|
||||
assert str(ex.value) == (
|
||||
"Parsing error: 'utf-8' codec can't decode byte 0xba in"
|
||||
" position 21: invalid start byte"
|
||||
)
|
||||
|
||||
|
||||
def test_csv_reader_file_metadata():
|
||||
csv_reader = CSVReader(
|
||||
options=CSVReaderOptions(),
|
||||
)
|
||||
file = create_csv_file(CSV_DATA)
|
||||
metadata = csv_reader.file_metadata(file)
|
||||
assert metadata == {
|
||||
"items": [
|
||||
{"column_names": ["Name", "Age", "City", "Birth"], "sheet_name": None}
|
||||
]
|
||||
}
|
||||
file.close()
|
||||
|
||||
file = create_csv_file(CSV_DATA, delimiter="|")
|
||||
csv_reader = CSVReader(
|
||||
options=CSVReaderOptions(delimiter="|"),
|
||||
)
|
||||
metadata = csv_reader.file_metadata(file)
|
||||
assert metadata == {
|
||||
"items": [
|
||||
{"column_names": ["Name", "Age", "City", "Birth"], "sheet_name": None}
|
||||
]
|
||||
}
|
||||
file.close()
|
||||
|
||||
|
||||
def test_csv_reader_file_metadata_invalid_file():
|
||||
csv_reader = CSVReader(
|
||||
options=CSVReaderOptions(),
|
||||
)
|
||||
with pytest.raises(DatabaseUploadFailed) as ex:
|
||||
csv_reader.file_metadata(
|
||||
FileStorage(io.StringIO("c1,c2,c3\na,b,c\n1,2,3,4,5,6,7\n1,2,3"))
|
||||
)
|
||||
assert str(ex.value) == (
|
||||
"Parsing error: Error tokenizing data. C error:"
|
||||
" Expected 3 fields in line 3, saw 7\n"
|
||||
)
|
||||
|
|
|
|||
|
|
@ -20,6 +20,9 @@ from typing import Any
|
|||
|
||||
import numpy as np
|
||||
import pytest
|
||||
import xlsxwriter
|
||||
from werkzeug.datastructures import FileStorage
|
||||
from xlsxwriter.workbook import Worksheet
|
||||
|
||||
from superset.commands.database.exceptions import DatabaseUploadFailed
|
||||
from superset.commands.database.uploaders.excel_reader import (
|
||||
|
|
@ -50,6 +53,18 @@ EXCEL_DATA_DECIMAL_CHAR = {
|
|||
}
|
||||
|
||||
|
||||
def write_data_to_worksheet(
|
||||
worksheet: Worksheet, header: list[str], data: list[list[Any]]
|
||||
):
|
||||
all_data = [header] + data
|
||||
row = 0
|
||||
col = 0
|
||||
for name, age in all_data:
|
||||
worksheet.write(row, col, name)
|
||||
worksheet.write(row, col + 1, age)
|
||||
row += 1
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"file, options, expected_cols, expected_values",
|
||||
[
|
||||
|
|
@ -175,6 +190,23 @@ def test_excel_reader_file_to_dataframe(file, options, expected_cols, expected_v
|
|||
file.close()
|
||||
|
||||
|
||||
def test_excel_reader_index_column():
|
||||
excel_reader = ExcelReader(
|
||||
options=ExcelReaderOptions(index_column="Name"),
|
||||
)
|
||||
df = excel_reader.file_to_dataframe(create_excel_file(EXCEL_DATA))
|
||||
assert df.index.name == "Name"
|
||||
|
||||
|
||||
def test_excel_reader_wrong_index_column():
|
||||
excel_reader = ExcelReader(
|
||||
options=ExcelReaderOptions(index_column="wrong"),
|
||||
)
|
||||
with pytest.raises(DatabaseUploadFailed) as ex:
|
||||
excel_reader.file_to_dataframe(create_excel_file(EXCEL_DATA))
|
||||
assert str(ex.value) == ("Parsing error: Index wrong invalid (sheet: 0)")
|
||||
|
||||
|
||||
def test_excel_reader_wrong_columns_to_read():
|
||||
excel_reader = ExcelReader(
|
||||
options=ExcelReaderOptions(columns_read=["xpto"]),
|
||||
|
|
@ -203,7 +235,60 @@ def test_excel_reader_invalid_file():
|
|||
options=ExcelReaderOptions(),
|
||||
)
|
||||
with pytest.raises(DatabaseUploadFailed) as ex:
|
||||
excel_reader.file_to_dataframe(io.StringIO("c1"))
|
||||
excel_reader.file_to_dataframe(FileStorage(io.BytesIO(b"c1")))
|
||||
assert str(ex.value) == (
|
||||
"Parsing error: Excel file format cannot be determined, you must specify an engine manually."
|
||||
)
|
||||
|
||||
|
||||
def test_excel_reader_metadata():
|
||||
excel_reader = ExcelReader(
|
||||
options=ExcelReaderOptions(),
|
||||
)
|
||||
file = create_excel_file(EXCEL_DATA)
|
||||
metadata = excel_reader.file_metadata(file)
|
||||
assert metadata == {
|
||||
"items": [
|
||||
{"column_names": ["Name", "Age", "City", "Birth"], "sheet_name": "Sheet1"}
|
||||
]
|
||||
}
|
||||
file.close()
|
||||
|
||||
|
||||
def test_excel_reader_metadata_mul_sheets():
|
||||
buffer = io.BytesIO()
|
||||
workbook = xlsxwriter.Workbook(buffer)
|
||||
|
||||
worksheet1 = workbook.add_worksheet("Sheet1")
|
||||
header1 = ["col11", "col12"]
|
||||
data1 = [["v11", "v12"]]
|
||||
write_data_to_worksheet(worksheet1, header1, data1)
|
||||
|
||||
worksheet2 = workbook.add_worksheet("Sheet2")
|
||||
header2 = ["col21", "col22"]
|
||||
data2 = [["v21", "v22"]]
|
||||
write_data_to_worksheet(worksheet2, header2, data2)
|
||||
workbook.close()
|
||||
|
||||
file = FileStorage(stream=buffer, filename="test.xls")
|
||||
|
||||
excel_reader = ExcelReader(
|
||||
options=ExcelReaderOptions(),
|
||||
)
|
||||
metadata = excel_reader.file_metadata(file)
|
||||
assert metadata == {
|
||||
"items": [
|
||||
{"column_names": ["col11", "col12"], "sheet_name": "Sheet1"},
|
||||
{"column_names": ["col21", "col22"], "sheet_name": "Sheet2"},
|
||||
]
|
||||
}
|
||||
file.close()
|
||||
|
||||
|
||||
def test_excel_reader_file_metadata_invalid_file():
|
||||
excel_reader = ExcelReader(
|
||||
options=ExcelReaderOptions(),
|
||||
)
|
||||
with pytest.raises(DatabaseUploadFailed) as ex:
|
||||
excel_reader.file_metadata(FileStorage(io.BytesIO(b"1")))
|
||||
assert str(ex.value) == ("Excel file format cannot be determined")
|
||||
|
|
|
|||
|
|
@ -34,13 +34,18 @@ from sqlalchemy.orm.session import Session
|
|||
|
||||
from superset import db
|
||||
from superset.commands.database.uploaders.base import UploadCommand
|
||||
from superset.commands.database.uploaders.columnar_reader import ColumnarReader
|
||||
from superset.commands.database.uploaders.csv_reader import CSVReader
|
||||
from superset.commands.database.uploaders.excel_reader import ExcelReader
|
||||
from superset.db_engine_specs.sqlite import SqliteEngineSpec
|
||||
from superset.errors import ErrorLevel, SupersetError, SupersetErrorType
|
||||
from superset.exceptions import SupersetSecurityException
|
||||
from superset.sql_parse import Table
|
||||
from tests.unit_tests.fixtures.common import create_csv_file, create_excel_file
|
||||
from tests.unit_tests.fixtures.common import (
|
||||
create_columnar_file,
|
||||
create_csv_file,
|
||||
create_excel_file,
|
||||
)
|
||||
|
||||
|
||||
def test_filter_by_uuid(
|
||||
|
|
@ -940,7 +945,7 @@ def test_csv_upload(
|
|||
data=payload,
|
||||
content_type="multipart/form-data",
|
||||
)
|
||||
assert response.status_code == 200
|
||||
assert response.status_code == 201
|
||||
assert response.json == {"message": "OK"}
|
||||
init_mock.assert_called_with(*upload_called_with)
|
||||
reader_mock.assert_called_with(*reader_called_with)
|
||||
|
|
@ -1135,7 +1140,7 @@ def test_csv_upload_file_extension_invalid(
|
|||
response = client.post(
|
||||
"/api/v1/database/1/csv_upload/",
|
||||
data={
|
||||
"file": (create_csv_file(), filename),
|
||||
"file": create_csv_file(filename=filename),
|
||||
"table_name": "table1",
|
||||
"delimiter": ",",
|
||||
},
|
||||
|
|
@ -1171,13 +1176,13 @@ def test_csv_upload_file_extension_valid(
|
|||
response = client.post(
|
||||
"/api/v1/database/1/csv_upload/",
|
||||
data={
|
||||
"file": (create_csv_file(), filename),
|
||||
"file": create_csv_file(filename=filename),
|
||||
"table_name": "table1",
|
||||
"delimiter": ",",
|
||||
},
|
||||
content_type="multipart/form-data",
|
||||
)
|
||||
assert response.status_code == 200
|
||||
assert response.status_code == 201
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
|
|
@ -1282,7 +1287,7 @@ def test_excel_upload(
|
|||
data=payload,
|
||||
content_type="multipart/form-data",
|
||||
)
|
||||
assert response.status_code == 200
|
||||
assert response.status_code == 201
|
||||
assert response.json == {"message": "OK"}
|
||||
init_mock.assert_called_with(*upload_called_with)
|
||||
reader_mock.assert_called_with(*reader_called_with)
|
||||
|
|
@ -1406,7 +1411,7 @@ def test_excel_upload_file_extension_invalid(
|
|||
response = client.post(
|
||||
"/api/v1/database/1/excel_upload/",
|
||||
data={
|
||||
"file": (create_excel_file(), filename),
|
||||
"file": create_excel_file(filename=filename),
|
||||
"table_name": "table1",
|
||||
},
|
||||
content_type="multipart/form-data",
|
||||
|
|
@ -1415,6 +1420,326 @@ def test_excel_upload_file_extension_invalid(
|
|||
assert response.json == {"message": {"file": ["File extension is not allowed."]}}
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"payload,upload_called_with,reader_called_with",
|
||||
[
|
||||
(
|
||||
{
|
||||
"file": (create_columnar_file(), "out.parquet"),
|
||||
"table_name": "table1",
|
||||
},
|
||||
(
|
||||
1,
|
||||
"table1",
|
||||
ANY,
|
||||
None,
|
||||
ANY,
|
||||
),
|
||||
(
|
||||
{
|
||||
"already_exists": "fail",
|
||||
"file": ANY,
|
||||
"table_name": "table1",
|
||||
},
|
||||
),
|
||||
),
|
||||
(
|
||||
{
|
||||
"file": (create_columnar_file(), "out.parquet"),
|
||||
"table_name": "table2",
|
||||
"already_exists": "replace",
|
||||
"columns_read": "col1,col2",
|
||||
"dataframe_index": True,
|
||||
"index_label": "label",
|
||||
},
|
||||
(
|
||||
1,
|
||||
"table2",
|
||||
ANY,
|
||||
None,
|
||||
ANY,
|
||||
),
|
||||
(
|
||||
{
|
||||
"already_exists": "replace",
|
||||
"columns_read": ["col1", "col2"],
|
||||
"file": ANY,
|
||||
"table_name": "table2",
|
||||
"dataframe_index": True,
|
||||
"index_label": "label",
|
||||
},
|
||||
),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_columnar_upload(
|
||||
payload: dict[str, Any],
|
||||
upload_called_with: tuple[int, str, Any, dict[str, Any]],
|
||||
reader_called_with: dict[str, Any],
|
||||
mocker: MockFixture,
|
||||
client: Any,
|
||||
full_api_access: None,
|
||||
) -> None:
|
||||
"""
|
||||
Test Excel Upload success.
|
||||
"""
|
||||
init_mock = mocker.patch.object(UploadCommand, "__init__")
|
||||
init_mock.return_value = None
|
||||
_ = mocker.patch.object(UploadCommand, "run")
|
||||
reader_mock = mocker.patch.object(ColumnarReader, "__init__")
|
||||
reader_mock.return_value = None
|
||||
response = client.post(
|
||||
"/api/v1/database/1/columnar_upload/",
|
||||
data=payload,
|
||||
content_type="multipart/form-data",
|
||||
)
|
||||
assert response.status_code == 201
|
||||
assert response.json == {"message": "OK"}
|
||||
init_mock.assert_called_with(*upload_called_with)
|
||||
reader_mock.assert_called_with(*reader_called_with)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"payload,expected_response",
|
||||
[
|
||||
(
|
||||
{
|
||||
"file": (create_columnar_file(), "out.parquet"),
|
||||
"already_exists": "fail",
|
||||
},
|
||||
{"message": {"table_name": ["Missing data for required field."]}},
|
||||
),
|
||||
(
|
||||
{
|
||||
"file": (create_columnar_file(), "out.parquet"),
|
||||
"table_name": "",
|
||||
"already_exists": "fail",
|
||||
},
|
||||
{"message": {"table_name": ["Length must be between 1 and 10000."]}},
|
||||
),
|
||||
(
|
||||
{"table_name": "table1", "already_exists": "fail"},
|
||||
{"message": {"file": ["Field may not be null."]}},
|
||||
),
|
||||
(
|
||||
{
|
||||
"file": "xpto",
|
||||
"table_name": "table1",
|
||||
"already_exists": "fail",
|
||||
},
|
||||
{"message": {"file": ["Field may not be null."]}},
|
||||
),
|
||||
(
|
||||
{
|
||||
"file": (create_columnar_file(), "out.parquet"),
|
||||
"table_name": "table1",
|
||||
"already_exists": "xpto",
|
||||
},
|
||||
{"message": {"already_exists": ["Must be one of: fail, replace, append."]}},
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_columnar_upload_validation(
|
||||
payload: Any,
|
||||
expected_response: dict[str, str],
|
||||
mocker: MockFixture,
|
||||
client: Any,
|
||||
full_api_access: None,
|
||||
) -> None:
|
||||
"""
|
||||
Test Excel Upload validation fails.
|
||||
"""
|
||||
_ = mocker.patch.object(UploadCommand, "run")
|
||||
|
||||
response = client.post(
|
||||
"/api/v1/database/1/columnar_upload/",
|
||||
data=payload,
|
||||
content_type="multipart/form-data",
|
||||
)
|
||||
assert response.status_code == 400
|
||||
assert response.json == expected_response
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"filename",
|
||||
[
|
||||
"out.parquet",
|
||||
"out.zip",
|
||||
"out.parquet.zip",
|
||||
"out something.parquet",
|
||||
"out something.zip",
|
||||
],
|
||||
)
|
||||
def test_columnar_upload_file_extension_valid(
|
||||
filename: str,
|
||||
mocker: MockFixture,
|
||||
client: Any,
|
||||
full_api_access: None,
|
||||
) -> None:
|
||||
"""
|
||||
Test Excel Upload file extension fails.
|
||||
"""
|
||||
_ = mocker.patch.object(UploadCommand, "run")
|
||||
response = client.post(
|
||||
"/api/v1/database/1/columnar_upload/",
|
||||
data={
|
||||
"file": (create_columnar_file(), filename),
|
||||
"table_name": "table1",
|
||||
},
|
||||
content_type="multipart/form-data",
|
||||
)
|
||||
assert response.status_code == 201
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"filename",
|
||||
[
|
||||
"out.xpto",
|
||||
"out.exe",
|
||||
"out",
|
||||
"out zip",
|
||||
"",
|
||||
"out.parquet.exe",
|
||||
".parquet",
|
||||
"out.",
|
||||
".",
|
||||
"out parquet a.exe",
|
||||
],
|
||||
)
|
||||
def test_columnar_upload_file_extension_invalid(
|
||||
filename: str,
|
||||
mocker: MockFixture,
|
||||
client: Any,
|
||||
full_api_access: None,
|
||||
) -> None:
|
||||
"""
|
||||
Test Excel Upload file extension fails.
|
||||
"""
|
||||
_ = mocker.patch.object(UploadCommand, "run")
|
||||
response = client.post(
|
||||
"/api/v1/database/1/columnar_upload/",
|
||||
data={
|
||||
"file": create_columnar_file(filename=filename),
|
||||
"table_name": "table1",
|
||||
},
|
||||
content_type="multipart/form-data",
|
||||
)
|
||||
assert response.status_code == 400
|
||||
assert response.json == {"message": {"file": ["File extension is not allowed."]}}
|
||||
|
||||
|
||||
def test_csv_metadata(mocker: MockFixture, client: Any, full_api_access: None) -> None:
|
||||
_ = mocker.patch.object(CSVReader, "file_metadata")
|
||||
response = client.post(
|
||||
"/api/v1/database/csv_metadata/",
|
||||
data={"file": create_csv_file()},
|
||||
content_type="multipart/form-data",
|
||||
)
|
||||
assert response.status_code == 200
|
||||
|
||||
|
||||
def test_csv_metadata_bad_extension(
|
||||
mocker: MockFixture, client: Any, full_api_access: None
|
||||
) -> None:
|
||||
_ = mocker.patch.object(CSVReader, "file_metadata")
|
||||
response = client.post(
|
||||
"/api/v1/database/csv_metadata/",
|
||||
data={"file": create_csv_file(filename="test.out")},
|
||||
content_type="multipart/form-data",
|
||||
)
|
||||
assert response.status_code == 400
|
||||
assert response.json == {"message": {"file": ["File extension is not allowed."]}}
|
||||
|
||||
|
||||
def test_csv_metadata_validation(
|
||||
mocker: MockFixture, client: Any, full_api_access: None
|
||||
) -> None:
|
||||
_ = mocker.patch.object(CSVReader, "file_metadata")
|
||||
response = client.post(
|
||||
"/api/v1/database/csv_metadata/",
|
||||
data={},
|
||||
content_type="multipart/form-data",
|
||||
)
|
||||
assert response.status_code == 400
|
||||
assert response.json == {"message": {"file": ["Field may not be null."]}}
|
||||
|
||||
|
||||
def test_excel_metadata(
|
||||
mocker: MockFixture, client: Any, full_api_access: None
|
||||
) -> None:
|
||||
_ = mocker.patch.object(ExcelReader, "file_metadata")
|
||||
response = client.post(
|
||||
"/api/v1/database/excel_metadata/",
|
||||
data={"file": create_excel_file()},
|
||||
content_type="multipart/form-data",
|
||||
)
|
||||
assert response.status_code == 200
|
||||
|
||||
|
||||
def test_excel_metadata_bad_extension(
|
||||
mocker: MockFixture, client: Any, full_api_access: None
|
||||
) -> None:
|
||||
_ = mocker.patch.object(ExcelReader, "file_metadata")
|
||||
response = client.post(
|
||||
"/api/v1/database/excel_metadata/",
|
||||
data={"file": create_excel_file(filename="test.out")},
|
||||
content_type="multipart/form-data",
|
||||
)
|
||||
assert response.status_code == 400
|
||||
assert response.json == {"message": {"file": ["File extension is not allowed."]}}
|
||||
|
||||
|
||||
def test_excel_metadata_validation(
|
||||
mocker: MockFixture, client: Any, full_api_access: None
|
||||
) -> None:
|
||||
_ = mocker.patch.object(ExcelReader, "file_metadata")
|
||||
response = client.post(
|
||||
"/api/v1/database/excel_metadata/",
|
||||
data={},
|
||||
content_type="multipart/form-data",
|
||||
)
|
||||
assert response.status_code == 400
|
||||
assert response.json == {"message": {"file": ["Field may not be null."]}}
|
||||
|
||||
|
||||
def test_columnar_metadata(
|
||||
mocker: MockFixture, client: Any, full_api_access: None
|
||||
) -> None:
|
||||
_ = mocker.patch.object(ColumnarReader, "file_metadata")
|
||||
response = client.post(
|
||||
"/api/v1/database/columnar_metadata/",
|
||||
data={"file": create_columnar_file()},
|
||||
content_type="multipart/form-data",
|
||||
)
|
||||
assert response.status_code == 200
|
||||
|
||||
|
||||
def test_columnar_metadata_bad_extension(
|
||||
mocker: MockFixture, client: Any, full_api_access: None
|
||||
) -> None:
|
||||
_ = mocker.patch.object(ColumnarReader, "file_metadata")
|
||||
response = client.post(
|
||||
"/api/v1/database/columnar_metadata/",
|
||||
data={"file": create_columnar_file(filename="test.out")},
|
||||
content_type="multipart/form-data",
|
||||
)
|
||||
assert response.status_code == 400
|
||||
assert response.json == {"message": {"file": ["File extension is not allowed."]}}
|
||||
|
||||
|
||||
def test_columnar_metadata_validation(
|
||||
mocker: MockFixture, client: Any, full_api_access: None
|
||||
) -> None:
|
||||
_ = mocker.patch.object(ColumnarReader, "file_metadata")
|
||||
response = client.post(
|
||||
"/api/v1/database/columnar_metadata/",
|
||||
data={},
|
||||
content_type="multipart/form-data",
|
||||
)
|
||||
assert response.status_code == 400
|
||||
assert response.json == {"message": {"file": ["Field may not be null."]}}
|
||||
|
||||
|
||||
def test_table_metadata_happy_path(
|
||||
mocker: MockFixture,
|
||||
client: Any,
|
||||
|
|
|
|||
|
|
@ -24,6 +24,7 @@ from typing import Any
|
|||
|
||||
import pandas as pd
|
||||
import pytest
|
||||
from werkzeug.datastructures import FileStorage
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
|
|
@ -31,7 +32,9 @@ def dttm() -> datetime:
|
|||
return datetime.strptime("2019-01-02 03:04:05.678900", "%Y-%m-%d %H:%M:%S.%f")
|
||||
|
||||
|
||||
def create_csv_file(data: list[list[str]] | None = None, delimiter=",") -> BytesIO:
|
||||
def create_csv_file(
|
||||
data: list[list[str]] | None = None, delimiter=",", filename="test.csv"
|
||||
) -> FileStorage:
|
||||
data = (
|
||||
[
|
||||
["Name", "Age", "City"],
|
||||
|
|
@ -46,14 +49,27 @@ def create_csv_file(data: list[list[str]] | None = None, delimiter=",") -> Bytes
|
|||
for row in data:
|
||||
writer.writerow(row)
|
||||
output.seek(0)
|
||||
bytes_buffer = BytesIO(output.getvalue().encode("utf-8"))
|
||||
return bytes_buffer
|
||||
buffer = BytesIO(output.getvalue().encode("utf-8"))
|
||||
return FileStorage(stream=buffer, filename=filename)
|
||||
|
||||
|
||||
def create_excel_file(data: dict[str, list[Any]] | None = None) -> BytesIO:
|
||||
def create_excel_file(
|
||||
data: dict[str, list[Any]] | None = None, filename="test.xls"
|
||||
) -> FileStorage:
|
||||
data = {"Name": ["John"], "Age": [30], "City": ["New York"]} if not data else data
|
||||
excel_buffer = BytesIO()
|
||||
buffer = BytesIO()
|
||||
df = pd.DataFrame(data)
|
||||
df.to_excel(excel_buffer, index=False)
|
||||
excel_buffer.seek(0)
|
||||
return excel_buffer
|
||||
df.to_excel(buffer, index=False)
|
||||
buffer.seek(0)
|
||||
return FileStorage(stream=buffer, filename=filename)
|
||||
|
||||
|
||||
def create_columnar_file(
|
||||
data: dict[str, list[Any]] | None = None, filename="test.parquet"
|
||||
) -> FileStorage:
|
||||
data = {"Name": ["John"], "Age": [30], "City": ["New York"]} if not data else data
|
||||
buffer = BytesIO()
|
||||
df = pd.DataFrame(data)
|
||||
df.to_parquet(buffer, index=False)
|
||||
buffer.seek(0)
|
||||
return FileStorage(stream=buffer, filename=filename)
|
||||
|
|
|
|||
Loading…
Reference in New Issue