mirror of
https://github.com/microsoft/PowerToys
synced 2024-11-21 15:53:19 +00:00
[AdvPaste]Fix CSV parser supporting escape delimiter by enclosing in double quotes (#33874)
## Summary of the Pull Request This PR fixes the CSV parser support for escaping delimiter by enclosing it in quotes ## Detailed Description of the Pull Request / Additional comments - This PR introduces a fix for the support of adding a delimiter to the string and supporting it by enclosing it in `"`
This commit is contained in:
parent
07c4972c2c
commit
3652e3627a
1
.github/actions/spell-check/expect.txt
vendored
1
.github/actions/spell-check/expect.txt
vendored
@ -1064,6 +1064,7 @@ numberbox
|
||||
nwc
|
||||
Objbase
|
||||
objidl
|
||||
occurrence
|
||||
ocr
|
||||
Ocrsettings
|
||||
odbccp
|
||||
|
@ -24,6 +24,9 @@ namespace AdvancedPaste.Helpers
|
||||
private static readonly char[] CsvDelimArry = [',', ';', '\t'];
|
||||
private static readonly Regex CsvSepIdentifierRegex = new Regex(@"^sep=(.)$", RegexOptions.IgnoreCase);
|
||||
|
||||
// Split on every occurrence of the delimiter except if it is enclosed by " and ignore two " as escaped "
|
||||
private static readonly string CsvDelimSepRegexStr = @"(?=(?:[^""]*""[^""]*"")*(?![^""]*""))";
|
||||
|
||||
internal static string ToJsonFromXmlOrCsv(DataPackageView clipboardData)
|
||||
{
|
||||
Logger.LogTrace();
|
||||
@ -146,10 +149,11 @@ namespace AdvancedPaste.Helpers
|
||||
continue;
|
||||
}
|
||||
|
||||
// A CSV line is valid, if the delimiter occurs more or equal times in every line compared to the first data line. (More because sometimes the delimiter occurs in a data string.)
|
||||
if (line.Count(x => x == delim) >= delimCount)
|
||||
// A CSV line is valid, if the delimiter occurs equal times in every line compared to the first data line
|
||||
// and if every line contains no or an even count of quotation marks.
|
||||
if (Regex.Count(line, delim + CsvDelimSepRegexStr) == delimCount && int.IsEvenInteger(line.Count(x => x == '"')))
|
||||
{
|
||||
csv.Add(line.Split(delim));
|
||||
csv.Add(Regex.Split(line, delim + CsvDelimSepRegexStr, RegexOptions.IgnoreCase));
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -205,7 +209,7 @@ namespace AdvancedPaste.Helpers
|
||||
// We get the count from the second line, as the first one only contains the character definition and not a CSV data line.
|
||||
char delimChar = matchChar.Groups[1].Value.Trim()[0];
|
||||
delimiter = delimChar;
|
||||
delimiterCount = csvLines[1].Count(x => x == delimChar);
|
||||
delimiterCount = Regex.Count(csvLines[1], delimChar + CsvDelimSepRegexStr, RegexOptions.IgnoreCase);
|
||||
}
|
||||
}
|
||||
|
||||
@ -214,19 +218,19 @@ namespace AdvancedPaste.Helpers
|
||||
// Try to select the correct delimiter based on the first two CSV lines from a list of predefined delimiters.
|
||||
foreach (char c in CsvDelimArry)
|
||||
{
|
||||
int cntFirstLine = csvLines[0].Count(x => x == c);
|
||||
int cntFirstLine = Regex.Count(csvLines[0], c + CsvDelimSepRegexStr, RegexOptions.IgnoreCase);
|
||||
int cntNextLine = 0; // Default to 0 that the 'second line' check is always true.
|
||||
|
||||
// Additional count if we have more than one line
|
||||
if (csvLines.Length >= 2)
|
||||
{
|
||||
cntNextLine = csvLines[1].Count(x => x == c);
|
||||
cntNextLine = Regex.Count(csvLines[1], c + CsvDelimSepRegexStr, RegexOptions.IgnoreCase);
|
||||
}
|
||||
|
||||
// The delimiter is found if the count is bigger as from the last selected delimiter
|
||||
// and if the next csv line does not exist or has the same number or more occurrences of the delimiter.
|
||||
// and if the next csv line does not exist or has the same number of occurrences of the delimiter.
|
||||
// (We check the next line to prevent false positives.)
|
||||
if (cntFirstLine > delimiterCount && (cntNextLine == 0 || cntNextLine >= cntFirstLine))
|
||||
if (cntFirstLine > delimiterCount && (cntNextLine == 0 || cntNextLine == cntFirstLine))
|
||||
{
|
||||
delimiter = c;
|
||||
delimiterCount = cntFirstLine;
|
||||
|
Loading…
Reference in New Issue
Block a user