auto-detect CSV delimiter

This commit is contained in:
SPRINX0\prochazka 2024-09-17 10:28:58 +02:00
parent 4065e05013
commit 0c2b25f79a
4 changed files with 49 additions and 5 deletions

View File

@ -18,11 +18,14 @@ function readFirstLine(file) {
}
if (reader.hasNextLine()) {
reader.nextLine((err, line) => {
if (err) reject(err);
resolve(line);
if (err) {
reader.close(() => reject(err)); // Ensure reader is closed on error
return;
}
reader.close(() => resolve(line)); // Ensure reader is closed after reading
});
} else {
resolve(null);
reader.close(() => resolve(null)); // Properly close if no lines are present
}
});
});

View File

@ -34,8 +34,9 @@
"devDependencies": {
"csv": "^6.3.10",
"dbgate-plugin-tools": "^1.0.7",
"line-reader": "^0.4.0",
"lodash": "^4.17.21",
"webpack": "^5.91.0",
"webpack-cli": "^5.1.4"
}
}
}

View File

@ -2,8 +2,32 @@ const zipObject = require('lodash/zipObject');
const csv = require('csv');
const fs = require('fs');
const stream = require('stream');
const lineReader = require('line-reader');
let dbgateApi;
function readFirstLine(file) {
return new Promise((resolve, reject) => {
lineReader.open(file, (err, reader) => {
if (err) {
reject(err);
return;
}
if (reader.hasNextLine()) {
reader.nextLine((err, line) => {
if (err) {
reader.close(() => reject(err)); // Ensure reader is closed on error
return;
}
reader.close(() => resolve(line)); // Ensure reader is closed after reading
});
} else {
reader.close(() => resolve(null)); // Properly close if no lines are present
}
});
});
}
class CsvPrepareStream extends stream.Transform {
constructor({ header }) {
super({ objectMode: true });
@ -46,6 +70,22 @@ class CsvPrepareStream extends stream.Transform {
async function reader({ fileName, encoding = 'utf-8', header = true, delimiter, limitRows = undefined }) {
console.log(`Reading file ${fileName}`);
const downloadedFile = await dbgateApi.download(fileName);
if (!delimiter) {
// auto detect delimiter
// read first line from downloadedFile
const firstLine = await readFirstLine(downloadedFile);
if (firstLine) {
const delimiterCounts = {
',': firstLine.replace(/[^,]/g, '').length,
';': firstLine.replace(/[^;]/g, '').length,
'|': firstLine.replace(/[^|]/g, '').length,
};
delimiter = Object.keys(delimiterCounts).reduce((a, b) => (delimiterCounts[a] > delimiterCounts[b] ? a : b), ',');
}
}
const csvStream = csv.parse({
// @ts-ignore
delimiter,
@ -53,7 +93,6 @@ async function reader({ fileName, encoding = 'utf-8', header = true, delimiter,
to_line: limitRows ? limitRows + 1 : undefined,
ltrim: true,
});
const downloadedFile = await dbgateApi.download(fileName);
const fileStream = fs.createReadStream(downloadedFile, encoding);
const csvPrepare = new CsvPrepareStream({ header });
fileStream.pipe(csvStream);

View File

@ -17,6 +17,7 @@ const fileFormat = {
name: 'delimiter',
label: 'Delimiter',
options: [
{ name: 'Auto-detect', value: '' },
{ name: 'Comma (,)', value: ',' },
{ name: 'Semicolon (;)', value: ';' },
{ name: 'Tab', value: '\t' },