Skip to content

Commit 651550a

Browse files
authored
refactor: simplify no-bare-urls with ESQuery selector (#511)
* refactor: simplify `no-bare-urls` with ESQuery selector * wip: use named capture group * wip: cleanup * wip * wip: cleanup * wip: cleanup * wip: cleanup * wip: remove top level var * wip: cleanup * wip: cleanup * wip: cleanup * wip: add more test cases * wip: reduce top-level variable * wip: simplify * wip: rename variable * wip: create `reset` helper func * wip: rename helper function * wip: simplify visitor patterns * wip: simplify visitor patterns 2 * wip: cleanup * wip: cleanup * wip: add more test cases (including false positives) * fix: handle `null` case (edge case) * wip: add comments * wip
1 parent c790317 commit 651550a

File tree

3 files changed

+193
-90
lines changed

3 files changed

+193
-90
lines changed

src/rules/no-bare-urls.js

Lines changed: 99 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,33 @@
33
* @author xbinaryx
44
*/
55

6+
/*
7+
* Here's a note on how the approach (algorithm) works:
8+
*
9+
* - When entering an `Html` node that is a child of a `Heading`, `Paragraph` or `TableCell`,
10+
* we check whether it is an opening or closing tag.
11+
* If we encounter an opening tag, we store the tag name and set `lastTagName`.
12+
* (`lastTagName` serves as a state to represent whether we're between opening and closing HTML tags.)
13+
* If we encounter a closing tag, we reset the stored tag name and `tempLinkNodes`.
14+
*
15+
* - When entering a `Link` node that is a child of a `Heading`, `Paragraph` or `TableCell`,
16+
* we check whether it is between opening and closing HTML tags.
17+
* If it's between opening and closing HTML tags, we add it to `tempLinkNodes`.
18+
* If it's not between opening and closing HTML tags, we add it to `linkNodes`.
19+
*
20+
* - When exiting a `Heading`, `Paragraph` or `TableCell`, we add all `tempLinkNodes` to `linkNodes`.
21+
* If there are any remaining `tempLinkNodes`, it means they are not between opening and closing HTML tags. (ex. `<br> ... <br>`)
22+
* If there are no remaining `tempLinkNodes`, it means they are between opening and closing HTML tags.
23+
*
24+
* - When exiting a `root` node, we report all `Link` nodes for bare URLs.
25+
*/
26+
627
//-----------------------------------------------------------------------------
728
// Type Definitions
829
//-----------------------------------------------------------------------------
930

1031
/**
11-
* @import { Node, Heading, Paragraph, TableCell, Link } from "mdast";
32+
* @import { Link, Html } from "mdast";
1233
* @import { MarkdownRuleDefinition } from "../types.js";
1334
* @typedef {"bareUrl"} NoBareUrlsMessageIds
1435
* @typedef {[]} NoBareUrlsOptions
@@ -19,17 +40,17 @@
1940
// Helpers
2041
//-----------------------------------------------------------------------------
2142

22-
const htmlTagNamePattern = /^<([^!>][^/\s>]*)/u;
43+
const htmlTagNamePattern = /^<(?<tagName>[^!>][^/\s>]*)/u;
2344

2445
/**
2546
* Parses an HTML tag to extract its name and closing status
2647
* @param {string} tagText The HTML tag text to parse
27-
* @returns {{ name: string; isClosing: boolean; } | null} Object containing tag name and closing status, or null if not a valid tag
48+
* @returns {{ name: string, isClosing: boolean } | null} Object containing tag name and closing status, or null if not a valid tag
2849
*/
2950
function parseHtmlTag(tagText) {
3051
const match = tagText.match(htmlTagNamePattern);
3152
if (match) {
32-
const tagName = match[1].toLowerCase();
53+
const tagName = match.groups.tagName.toLowerCase();
3354
const isClosing = tagName.startsWith("/");
3455

3556
return {
@@ -65,105 +86,94 @@ export default {
6586

6687
create(context) {
6788
const { sourceCode } = context;
68-
/** @type {Array<Link>} */
69-
const bareUrls = [];
7089

7190
/**
72-
* Finds bare URLs in markdown nodes while handling HTML tags.
73-
* When an HTML tag is found, it looks for its closing tag and skips all nodes
74-
* between them to prevent checking for bare URLs inside HTML content.
75-
* @param {Paragraph|Heading|TableCell} node The node to process
76-
* @returns {void}
91+
* This array is used to store all `Link` nodes for the final report.
92+
* @type {Array<Link>}
7793
*/
78-
function findBareUrls(node) {
79-
/**
80-
* Recursively traverses the AST to find bare URLs, skipping over HTML blocks.
81-
* @param {Node} currentNode The current AST node being traversed.
82-
* @returns {void}
83-
*/
84-
function traverse(currentNode) {
85-
if (
86-
"children" in currentNode &&
87-
Array.isArray(currentNode.children)
88-
) {
89-
for (let i = 0; i < currentNode.children.length; i++) {
90-
const child = currentNode.children[i];
91-
92-
if (child.type === "html") {
93-
const tagInfo = parseHtmlTag(
94-
sourceCode.getText(child),
95-
);
96-
97-
if (tagInfo && !tagInfo.isClosing) {
98-
for (
99-
let j = i + 1;
100-
j < currentNode.children.length;
101-
j++
102-
) {
103-
const nextChild = currentNode.children[j];
104-
if (nextChild.type === "html") {
105-
const closingTagInfo = parseHtmlTag(
106-
sourceCode.getText(nextChild),
107-
);
108-
if (
109-
closingTagInfo?.name ===
110-
tagInfo.name &&
111-
closingTagInfo?.isClosing
112-
) {
113-
i = j;
114-
break;
115-
}
116-
}
117-
}
118-
continue;
119-
}
120-
}
121-
122-
if (child.type === "link") {
123-
const text = sourceCode.getText(child);
124-
const { url } = child;
125-
126-
if (
127-
text === url ||
128-
url === `http://${text}` ||
129-
url === `mailto:${text}`
130-
) {
131-
bareUrls.push(child);
132-
}
133-
}
134-
135-
traverse(child);
136-
}
137-
}
138-
}
94+
const linkNodes = [];
13995

140-
traverse(node);
96+
/**
97+
* This array is used to store `Link` nodes that are estimated to be between opening and closing HTML tags.
98+
* @type {Array<Link>}
99+
*/
100+
const tempLinkNodes = [];
101+
102+
/** @type {string | null} */
103+
let lastTagName = null;
104+
105+
/**
106+
* Resets `tempLinkNodes` and `lastTagName`
107+
* @returns {void}
108+
*/
109+
function reset() {
110+
tempLinkNodes.length = 0;
111+
lastTagName = null;
141112
}
142113

143114
return {
144-
"root:exit"() {
145-
for (const bareUrl of bareUrls) {
146-
context.report({
147-
node: bareUrl,
148-
messageId: "bareUrl",
149-
fix(fixer) {
150-
const text = sourceCode.getText(bareUrl);
151-
return fixer.replaceText(bareUrl, `<${text}>`);
152-
},
153-
});
115+
":matches(heading, paragraph, tableCell) html"(
116+
/** @type {Html} */ node,
117+
) {
118+
const tagInfo = parseHtmlTag(node.value);
119+
120+
if (!tagInfo) {
121+
return;
122+
}
123+
124+
if (!tagInfo.isClosing && lastTagName === null) {
125+
lastTagName = tagInfo.name;
126+
}
127+
128+
if (tagInfo.isClosing && lastTagName === tagInfo.name) {
129+
reset();
154130
}
155131
},
156132

157-
paragraph(node) {
158-
findBareUrls(node);
133+
":matches(heading, paragraph, tableCell) link"(
134+
/** @type {Link} */ node,
135+
) {
136+
if (lastTagName !== null) {
137+
tempLinkNodes.push(node);
138+
} else {
139+
linkNodes.push(node);
140+
}
159141
},
160142

161-
heading(node) {
162-
findBareUrls(node);
143+
"heading:exit"() {
144+
linkNodes.push(...tempLinkNodes);
145+
reset();
163146
},
164147

165-
tableCell(node) {
166-
findBareUrls(node);
148+
"paragraph:exit"() {
149+
linkNodes.push(...tempLinkNodes);
150+
reset();
151+
},
152+
153+
"tableCell:exit"() {
154+
linkNodes.push(...tempLinkNodes);
155+
reset();
156+
},
157+
158+
"root:exit"() {
159+
for (const linkNode of linkNodes) {
160+
const text = sourceCode.getText(linkNode);
161+
const { url } = linkNode;
162+
163+
if (
164+
url === text ||
165+
url === `http://${text}` ||
166+
url === `mailto:${text}`
167+
) {
168+
context.report({
169+
node: linkNode,
170+
messageId: "bareUrl",
171+
fix(fixer) {
172+
return fixer.replaceText(linkNode, `<${text}>`);
173+
},
174+
});
175+
}
176+
}
167177
},
168178
};
169179
},

src/util.js

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,11 @@
33
* @author Nicholas C. Zakas
44
*/
55

6-
/*
6+
//-----------------------------------------------------------------------------
7+
// Regex Patterns
8+
//-----------------------------------------------------------------------------
9+
10+
/**
711
* CommonMark does not allow any white space between the brackets in a reference link.
812
* If that pattern is detected, then it's treated as text and not as a link. This pattern
913
* is used to detect that situation.
@@ -15,6 +19,10 @@ export const illegalShorthandTailPattern = /\]\[\s+\]$/u;
1519
*/
1620
export const htmlCommentPattern = /<!--[\s\S]*?-->/gu;
1721

22+
//-----------------------------------------------------------------------------
23+
// Helpers
24+
//-----------------------------------------------------------------------------
25+
1826
/**
1927
* Finds the line and column offsets for a given offset in a string.
2028
* @param {string} text The text to search.

tests/rules/no-bare-urls.test.js

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -287,6 +287,32 @@ ruleTester.run("no-bare-urls", rule, {
287287
},
288288
],
289289
},
290+
{
291+
code: "<br> Another violation: https://example.com. <br />",
292+
output: "<br> Another violation: <https://example.com>. <br />",
293+
errors: [
294+
{
295+
messageId: "bareUrl",
296+
line: 1,
297+
column: 25,
298+
endLine: 1,
299+
endColumn: 44,
300+
},
301+
],
302+
},
303+
{
304+
code: "<br /> Another violation: https://example.com. <br />",
305+
output: "<br /> Another violation: <https://example.com>. <br />",
306+
errors: [
307+
{
308+
messageId: "bareUrl",
309+
line: 1,
310+
column: 27,
311+
endLine: 1,
312+
endColumn: 46,
313+
},
314+
],
315+
},
290316
{
291317
code: dedent`
292318
<div>
@@ -344,5 +370,64 @@ ruleTester.run("no-bare-urls", rule, {
344370
},
345371
],
346372
},
373+
{
374+
code: "text <>https://example.com</> https://example.com", // Empty tag is not recognized as an HTML node.
375+
output: "text <><https://example.com></> <https://example.com>",
376+
errors: [
377+
{
378+
messageId: "bareUrl",
379+
line: 1,
380+
column: 8,
381+
endLine: 1,
382+
endColumn: 27,
383+
},
384+
{
385+
messageId: "bareUrl",
386+
line: 1,
387+
column: 31,
388+
endLine: 1,
389+
endColumn: 50,
390+
},
391+
],
392+
},
393+
{
394+
code: "<!DOCTYPE html>\nhttps://example.com",
395+
output: "<!DOCTYPE html>\n<https://example.com>",
396+
errors: [
397+
{
398+
messageId: "bareUrl",
399+
line: 2,
400+
column: 1,
401+
endLine: 2,
402+
endColumn: 20,
403+
},
404+
],
405+
},
406+
{
407+
code: "hi <!-- comment --> https://example.com <!-- comment -->",
408+
output: "hi <!-- comment --> <https://example.com> <!-- comment -->",
409+
errors: [
410+
{
411+
messageId: "bareUrl",
412+
line: 1,
413+
column: 21,
414+
endLine: 1,
415+
endColumn: 40,
416+
},
417+
],
418+
},
419+
{
420+
code: "hi <!-- comment --> https://example.com <!-- comment --> <a>https://example.com</a>",
421+
output: "hi <!-- comment --> <https://example.com> <!-- comment --> <a>https://example.com</a>",
422+
errors: [
423+
{
424+
messageId: "bareUrl",
425+
line: 1,
426+
column: 21,
427+
endLine: 1,
428+
endColumn: 40,
429+
},
430+
],
431+
},
347432
],
348433
});

0 commit comments

Comments
 (0)