mirror of
https://github.com/microsoft/PowerToys
synced 2024-11-21 15:53:19 +00:00
[AdvPaste]CSV parser: Handle control characters (quotation marks) correctly (#33986)
## Summary of the Pull Request This PR fixes the csv parsing related to quotation marks according to csv standard rules: - An empty data value can be written as `""`. => Remove both quotation marks. - Enclosing data by starting and ending with `"` if they contain the delimiter. => First and last quotation mark has to be removed. - Escape quotation mark with second quotation mark. => Replace pairs of two with a single one. ### Input ```csv A,B,,"","my ""nice"" string","""zz""","""""double quotes""""" ``` ### Before this PR (Wrong result) ```json [ [ "A", "B", "", "\"\"", "\"my \"\"nice\"\" string\"", "\"\"\"zz\"\"\"", "\"\"\"\"\"double quotes\"\"\"\"\"" ] ] ``` ### After this PR (Correct result) ```json [ [ "A", "B", "", "", "my \"nice\" string", "\"zz\"", "\"\"double quotes\"\"" ] ] ```
This commit is contained in:
parent
d40367a860
commit
4fee37c35a
@ -24,9 +24,14 @@ namespace AdvancedPaste.Helpers
|
|||||||
private static readonly char[] CsvDelimArry = [',', ';', '\t'];
|
private static readonly char[] CsvDelimArry = [',', ';', '\t'];
|
||||||
private static readonly Regex CsvSepIdentifierRegex = new Regex(@"^sep=(.)$", RegexOptions.IgnoreCase);
|
private static readonly Regex CsvSepIdentifierRegex = new Regex(@"^sep=(.)$", RegexOptions.IgnoreCase);
|
||||||
|
|
||||||
// Split on every occurrence of the delimiter except if it is enclosed by " and ignore two " as escaped "
|
// CSV: Split on every occurrence of the delimiter except if it is enclosed by " and ignore two " as escaped "
|
||||||
private static readonly string CsvDelimSepRegexStr = @"(?=(?:[^""]*""[^""]*"")*(?![^""]*""))";
|
private static readonly string CsvDelimSepRegexStr = @"(?=(?:[^""]*""[^""]*"")*(?![^""]*""))";
|
||||||
|
|
||||||
|
// CSV: Regex to remove/replace quotation marks
|
||||||
|
private static readonly Regex CsvRemoveSingleQuotationMarksRegex = new Regex(@"^""(?!"")|(?<!"")""$|^""""$");
|
||||||
|
private static readonly Regex CsvRemoveStartAndEndQuotationMarksRegex = new Regex(@"^""(?=(""{2})+)|(?<=(""{2})+)""$");
|
||||||
|
private static readonly Regex CsvReplaceDoubleQuotationMarksRegex = new Regex(@"""{2}");
|
||||||
|
|
||||||
internal static string ToJsonFromXmlOrCsv(DataPackageView clipboardData)
|
internal static string ToJsonFromXmlOrCsv(DataPackageView clipboardData)
|
||||||
{
|
{
|
||||||
Logger.LogTrace();
|
Logger.LogTrace();
|
||||||
@ -134,7 +139,7 @@ namespace AdvancedPaste.Helpers
|
|||||||
{
|
{
|
||||||
if (string.IsNullOrEmpty(jsonText))
|
if (string.IsNullOrEmpty(jsonText))
|
||||||
{
|
{
|
||||||
var csv = new List<string[]>();
|
var csv = new List<IEnumerable<string>>();
|
||||||
|
|
||||||
string[] lines = text.Split(new string[] { Environment.NewLine }, StringSplitOptions.RemoveEmptyEntries);
|
string[] lines = text.Split(new string[] { Environment.NewLine }, StringSplitOptions.RemoveEmptyEntries);
|
||||||
|
|
||||||
@ -153,7 +158,8 @@ namespace AdvancedPaste.Helpers
|
|||||||
// and if every line contains no or an even count of quotation marks.
|
// and if every line contains no or an even count of quotation marks.
|
||||||
if (Regex.Count(line, delim + CsvDelimSepRegexStr) == delimCount && int.IsEvenInteger(line.Count(x => x == '"')))
|
if (Regex.Count(line, delim + CsvDelimSepRegexStr) == delimCount && int.IsEvenInteger(line.Count(x => x == '"')))
|
||||||
{
|
{
|
||||||
csv.Add(Regex.Split(line, delim + CsvDelimSepRegexStr, RegexOptions.IgnoreCase));
|
string[] dataCells = Regex.Split(line, delim + CsvDelimSepRegexStr, RegexOptions.IgnoreCase);
|
||||||
|
csv.Add(dataCells.Select(x => ReplaceQuotationMarksInCsvData(x)));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -244,5 +250,26 @@ namespace AdvancedPaste.Helpers
|
|||||||
throw new FormatException("Invalid CSV format: Failed to detect the delimiter.");
|
throw new FormatException("Invalid CSV format: Failed to detect the delimiter.");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Remove and replace quotation marks used as control sequences. (Enclosing quotation marks and escaping quotation marks.)
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="str">CSV cell data to manipulate.</param>
|
||||||
|
/// <returns>Manipulated string.</returns>
|
||||||
|
private static string ReplaceQuotationMarksInCsvData(string str)
|
||||||
|
{
|
||||||
|
// Remove first and last single quotation mark (enclosing quotation marks) and remove quotation marks of an empty data set ("").
|
||||||
|
str = CsvRemoveSingleQuotationMarksRegex.Replace(str, string.Empty);
|
||||||
|
|
||||||
|
// Remove first quotation mark if followed by pairs of quotation marks
|
||||||
|
// and remove last quotation mark if precede by pairs of quotation marks.
|
||||||
|
// (Removes enclosing quotation marks around the cell data for data like /"""abc"""/.)
|
||||||
|
str = CsvRemoveStartAndEndQuotationMarksRegex.Replace(str, string.Empty);
|
||||||
|
|
||||||
|
// Replace pairs of two quotation marks with a single quotation mark. (Escaped quotation marks.)
|
||||||
|
str = CsvReplaceDoubleQuotationMarksRegex.Replace(str, "\"");
|
||||||
|
|
||||||
|
return str;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user