[AdvPaste]Fix CSV parser supporting escape delimiter by enclosing in double quotes (#33874)

## Summary of the Pull Request
This PR fixes the CSV parser support for escaping delimiter by enclosing
it in quotes

## Detailed Description of the Pull Request / Additional comments
- This PR introduces a fix for the support of adding a delimiter to the
string and supporting it by enclosing it in `"`
This commit is contained in:
Vaibhav Sharma 2024-07-23 18:59:33 +05:30 committed by GitHub
parent 07c4972c2c
commit 3652e3627a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 13 additions and 8 deletions

View File

@ -1064,6 +1064,7 @@ numberbox
nwc
Objbase
objidl
occurrence
ocr
Ocrsettings
odbccp

View File

@ -24,6 +24,9 @@ namespace AdvancedPaste.Helpers
private static readonly char[] CsvDelimArry = [',', ';', '\t'];
private static readonly Regex CsvSepIdentifierRegex = new Regex(@"^sep=(.)$", RegexOptions.IgnoreCase);
// Split on every occurrence of the delimiter except if it is enclosed by " and ignore two " as escaped "
private static readonly string CsvDelimSepRegexStr = @"(?=(?:[^""]*""[^""]*"")*(?![^""]*""))";
internal static string ToJsonFromXmlOrCsv(DataPackageView clipboardData)
{
Logger.LogTrace();
@ -146,10 +149,11 @@ namespace AdvancedPaste.Helpers
continue;
}
// A CSV line is valid, if the delimiter occurs more or equal times in every line compared to the first data line. (More because sometimes the delimiter occurs in a data string.)
if (line.Count(x => x == delim) >= delimCount)
// A CSV line is valid, if the delimiter occurs equal times in every line compared to the first data line
// and if every line contains no or an even count of quotation marks.
if (Regex.Count(line, delim + CsvDelimSepRegexStr) == delimCount && int.IsEvenInteger(line.Count(x => x == '"')))
{
csv.Add(line.Split(delim));
csv.Add(Regex.Split(line, delim + CsvDelimSepRegexStr, RegexOptions.IgnoreCase));
}
else
{
@ -205,7 +209,7 @@ namespace AdvancedPaste.Helpers
// We get the count from the second line, as the first one only contains the character definition and not a CSV data line.
char delimChar = matchChar.Groups[1].Value.Trim()[0];
delimiter = delimChar;
delimiterCount = csvLines[1].Count(x => x == delimChar);
delimiterCount = Regex.Count(csvLines[1], delimChar + CsvDelimSepRegexStr, RegexOptions.IgnoreCase);
}
}
@ -214,19 +218,19 @@ namespace AdvancedPaste.Helpers
// Try to select the correct delimiter based on the first two CSV lines from a list of predefined delimiters.
foreach (char c in CsvDelimArry)
{
int cntFirstLine = csvLines[0].Count(x => x == c);
int cntFirstLine = Regex.Count(csvLines[0], c + CsvDelimSepRegexStr, RegexOptions.IgnoreCase);
int cntNextLine = 0; // Default to 0 that the 'second line' check is always true.
// Additional count if we have more than one line
if (csvLines.Length >= 2)
{
cntNextLine = csvLines[1].Count(x => x == c);
cntNextLine = Regex.Count(csvLines[1], c + CsvDelimSepRegexStr, RegexOptions.IgnoreCase);
}
// The delimiter is found if the count is bigger as from the last selected delimiter
// and if the next csv line does not exist or has the same number or more occurrences of the delimiter.
// and if the next csv line does not exist or has the same number of occurrences of the delimiter.
// (We check the next line to prevent false positives.)
if (cntFirstLine > delimiterCount && (cntNextLine == 0 || cntNextLine >= cntFirstLine))
if (cntFirstLine > delimiterCount && (cntNextLine == 0 || cntNextLine == cntFirstLine))
{
delimiter = c;
delimiterCount = cntFirstLine;