fix: Don't replace tabs with spaces (#3438)

* fix: don't convert tabs to spaces

* test exact

* save nextLineWithoutTabs

* fix code
This commit is contained in:
Tony Brix 2024-09-03 18:06:32 -06:00 committed by GitHub
parent 2ff0547e87
commit 9ed6456a37
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 28 additions and 20 deletions

View File

@ -106,10 +106,6 @@ export class _Lexer {
blockTokens(src: string, tokens: Token[] = [], lastParagraphClipped = false) {
if (this.options.pedantic) {
src = src.replace(/\t/g, ' ').replace(/^ +$/gm, '');
} else {
src = src.replace(/^( *)(\t+)/gm, (_, leading, tabs) => {
return leading + ' '.repeat(tabs.length);
});
}
let token: Tokens.Generic | undefined;

View File

@ -90,7 +90,7 @@ export class _Tokenizer {
code(src: string): Tokens.Code | undefined {
const cap = this.rules.block.code.exec(src);
if (cap) {
const text = cap[0].replace(/^ {1,4}/gm, '');
const text = cap[0].replace(/^(?: {1,4}| {0,3}\t)/gm, '');
return {
type: 'code',
raw: cap[0],
@ -294,7 +294,7 @@ export class _Tokenizer {
indent += cap[1].length;
}
if (blankLine && /^ *$/.test(nextLine)) { // Items begin with at most one blank line
if (blankLine && /^[ \t]*$/.test(nextLine)) { // Items begin with at most one blank line
raw += nextLine + '\n';
src = src.substring(nextLine.length + 1);
endEarly = true;
@ -309,11 +309,15 @@ export class _Tokenizer {
// Check if following lines should be included in List Item
while (src) {
const rawLine = src.split('\n', 1)[0];
let nextLineWithoutTabs;
nextLine = rawLine;
// Re-align to follow commonmark nesting rules
if (this.options.pedantic) {
nextLine = nextLine.replace(/^ {1,4}(?=( {4})*[^ ])/g, ' ');
nextLineWithoutTabs = nextLine;
} else {
nextLineWithoutTabs = nextLine.replace(/\t/g, ' ');
}
// End list item if found code fences
@ -332,12 +336,12 @@ export class _Tokenizer {
}
// Horizontal rule found
if (hrRegex.test(src)) {
if (hrRegex.test(nextLine)) {
break;
}
if (nextLine.search(/[^ ]/) >= indent || !nextLine.trim()) { // Dedent if possible
itemContents += '\n' + nextLine.slice(indent);
if (nextLineWithoutTabs.search(/[^ ]/) >= indent || !nextLine.trim()) { // Dedent if possible
itemContents += '\n' + nextLineWithoutTabs.slice(indent);
} else {
// not enough indentation
if (blankLine) {
@ -345,7 +349,7 @@ export class _Tokenizer {
}
// paragraph continuation unless last line was a different block level element
if (line.search(/[^ ]/) >= 4) { // indented code block
if (line.replace(/\t/g, ' ').search(/[^ ]/) >= 4) { // indented code block
break;
}
if (fencesBeginRegex.test(line)) {
@ -367,7 +371,7 @@ export class _Tokenizer {
raw += rawLine + '\n';
src = src.substring(rawLine.length + 1);
line = nextLine.slice(indent);
line = nextLineWithoutTabs.slice(indent);
}
}
@ -375,7 +379,7 @@ export class _Tokenizer {
// If the previous item ended with a blank line, the list is loose
if (endsWithBlankLine) {
list.loose = true;
} else if (/\n *\n *$/.test(raw)) {
} else if (/\n[ \t]*\n[ \t]*$/.test(raw)) {
endsWithBlankLine = true;
}
}

View File

@ -6,15 +6,15 @@ import {
* Block-Level Grammar
*/
const newline = /^(?: *(?:\n|$))+/;
const blockCode = /^( {4}[^\n]+(?:\n(?: *(?:\n|$))*)?)+/;
const newline = /^(?:[ \t]*(?:\n|$))+/;
const blockCode = /^((?: {4}| {0,3}\t)[^\n]+(?:\n(?:[ \t]*(?:\n|$))*)?)+/;
const fences = /^ {0,3}(`{3,}(?=[^`\n]*(?:\n|$))|~{3,})([^\n]*)(?:\n|$)(?:|([\s\S]*?)(?:\n|$))(?: {0,3}\1[~`]* *(?=\n|$)|$)/;
const hr = /^ {0,3}((?:-[\t ]*){3,}|(?:_[ \t]*){3,}|(?:\*[ \t]*){3,})(?:\n+|$)/;
const heading = /^ {0,3}(#{1,6})(?=\s|$)(.*)(?:\n+|$)/;
const bullet = /(?:[*+-]|\d{1,9}[.)])/;
const lheading = edit(/^(?!bull |blockCode|fences|blockquote|heading|html)((?:.|\n(?!\s*?\n|bull |blockCode|fences|blockquote|heading|html))+?)\n {0,3}(=+|-+) *(?:\n+|$)/)
.replace(/bull/g, bullet) // lists can interrupt
.replace(/blockCode/g, / {4}/) // indented code blocks can interrupt
.replace(/blockCode/g, /(?: {4}| {0,3}\t)/) // indented code blocks can interrupt
.replace(/fences/g, / {0,3}(?:`{3,}|~{3,})/) // fenced code blocks can interrupt
.replace(/blockquote/g, / {0,3}>/) // blockquote can interrupt
.replace(/heading/g, / {0,3}#{1,6}/) // ATX heading can interrupt
@ -23,7 +23,7 @@ const lheading = edit(/^(?!bull |blockCode|fences|blockquote|heading|html)((?:.|
const _paragraph = /^([^\n]+(?:\n(?!hr|heading|lheading|blockquote|fences|list|html|table| +\n)[^\n]+)*)/;
const blockText = /^[^\n]+/;
const _blockLabel = /(?!\s*\])(?:\\.|[^\[\]\\])+/;
const def = edit(/^ {0,3}\[(label)\]: *(?:\n *)?([^<\s][^\s]*|<.*?>)(?:(?: +(?:\n *)?| *\n *)(title))? *(?:\n+|$)/)
const def = edit(/^ {0,3}\[(label)\]: *(?:\n[ \t]*)?([^<\s][^\s]*|<.*?>)(?:(?: +(?:\n[ \t]*)?| *\n[ \t]*)(title))? *(?:\n+|$)/)
.replace('label', _blockLabel)
.replace('title', /(?:"(?:\\"?|[^"\\])*"|'[^'\n]*(?:\n[^'\n]+)*\n?'|\([^()]*\))/)
.getRegex();
@ -46,9 +46,9 @@ const html = edit(
+ '|<\\?[\\s\\S]*?(?:\\?>\\n*|$)' // (3)
+ '|<![A-Z][\\s\\S]*?(?:>\\n*|$)' // (4)
+ '|<!\\[CDATA\\[[\\s\\S]*?(?:\\]\\]>\\n*|$)' // (5)
+ '|</?(tag)(?: +|\\n|/?>)[\\s\\S]*?(?:(?:\\n *)+\\n|$)' // (6)
+ '|<(?!script|pre|style|textarea)([a-z][\\w-]*)(?:attribute)*? */?>(?=[ \\t]*(?:\\n|$))[\\s\\S]*?(?:(?:\\n *)+\\n|$)' // (7) open tag
+ '|</(?!script|pre|style|textarea)[a-z][\\w-]*\\s*>(?=[ \\t]*(?:\\n|$))[\\s\\S]*?(?:(?:\\n *)+\\n|$)' // (7) closing tag
+ '|</?(tag)(?: +|\\n|/?>)[\\s\\S]*?(?:(?:\\n[ \t]*)+\\n|$)' // (6)
+ '|<(?!script|pre|style|textarea)([a-z][\\w-]*)(?:attribute)*? */?>(?=[ \\t]*(?:\\n|$))[\\s\\S]*?(?:(?:\\n[ \t]*)+\\n|$)' // (7) open tag
+ '|</(?!script|pre|style|textarea)[a-z][\\w-]*\\s*>(?=[ \\t]*(?:\\n|$))[\\s\\S]*?(?:(?:\\n[ \t]*)+\\n|$)' // (7) closing tag
+ ')', 'i')
.replace('comment', _comment)
.replace('tag', _tag)
@ -104,7 +104,7 @@ const gfmTable = edit(
.replace('hr', hr)
.replace('heading', ' {0,3}#{1,6}(?:\\s|$)')
.replace('blockquote', ' {0,3}>')
.replace('code', ' {4}[^\\n]')
.replace('code', '(?: {4}| {0,3}\t)[^\\n]')
.replace('fences', ' {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n')
.replace('list', ' {0,3}(?:[*+-]|1[.)]) ') // only lists starting from 1 can interrupt
.replace('html', '</?(?:tag)(?: +|\\n|/?>)|<(?:script|pre|style|textarea|!--)')

View File

@ -0,0 +1,2 @@
<pre><code> tab
</code></pre>

View File

@ -0,0 +1,6 @@
---
renderExact: true
---
```
tab
```