Skip to content
Merged
Show file tree
Hide file tree
Changes from 25 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
181 changes: 92 additions & 89 deletions src/rules/no-bare-urls.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,33 @@
* @author xbinaryx
*/

/*
* Here's a note on how the approach (algorithm) works:
*
* - When entering an `Html` node that is a child of a `Heading`, `Paragraph` or `TableCell`,
* we check whether it is an opening or closing tag.
* If we encounter an opening tag, we store the tag name and set `lastTagName`.
* (`lastTagName` serves as a state to represent whether we're between opening and closing HTML tags.)
* If we encounter a closing tag, we reset the stored tag name and `tempLinkNodes`.
*
* - When entering a `Link` node that is a child of a `Heading`, `Paragraph` or `TableCell`,
* we check whether it is between opening and closing HTML tags.
* If it's between opening and closing HTML tags, we add it to `tempLinkNodes`.
* If it's not between opening and closing HTML tags, we add it to `linkNodes`.
*
* - When exiting a `Heading`, `Paragraph` or `TableCell`, we add all `tempLinkNodes` to `linkNodes`.
* If there are any remaining `tempLinkNodes`, it means they are not between opening and closing HTML tags. (ex. `<br> ... <br>`)
* If there are no remaining `tempLinkNodes`, it means they are between opening and closing HTML tags.
*
* - When exiting a `root` node, we report all `Link` nodes for bare URLs.
*/

//-----------------------------------------------------------------------------
// Type Definitions
//-----------------------------------------------------------------------------

/**
* @import { Node, Heading, Paragraph, TableCell, Link } from "mdast";
* @import { Link, Html } from "mdast";
* @import { MarkdownRuleDefinition } from "../types.js";
* @typedef {"bareUrl"} NoBareUrlsMessageIds
* @typedef {[]} NoBareUrlsOptions
Expand All @@ -19,17 +40,17 @@
// Helpers
//-----------------------------------------------------------------------------

const htmlTagNamePattern = /^<([^!>][^/\s>]*)/u;
const htmlTagNamePattern = /^<(?<tagName>[^!>][^/\s>]*)/u;

/**
* Parses an HTML tag to extract its name and closing status
* @param {string} tagText The HTML tag text to parse
* @returns {{ name: string; isClosing: boolean; } | null} Object containing tag name and closing status, or null if not a valid tag
* @returns {{ name: string, isClosing: boolean } | null} Object containing tag name and closing status, or null if not a valid tag
*/
function parseHtmlTag(tagText) {
const match = tagText.match(htmlTagNamePattern);
if (match) {
const tagName = match[1].toLowerCase();
const tagName = match.groups.tagName.toLowerCase();
const isClosing = tagName.startsWith("/");

return {
Expand Down Expand Up @@ -65,105 +86,87 @@ export default {

create(context) {
const { sourceCode } = context;
/** @type {Array<Link>} */
const bareUrls = [];

/** This array is used to store all `Link` nodes for the final report. @type {Array<Link>} */
const linkNodes = [];
/** This array is used to store `Link` nodes that are estimated to be between opening and closing HTML tags. @type {Array<Link>} */
const tempLinkNodes = [];

/** @type {string | null} */
let lastTagName = null;

/**
* Finds bare URLs in markdown nodes while handling HTML tags.
* When an HTML tag is found, it looks for its closing tag and skips all nodes
* between them to prevent checking for bare URLs inside HTML content.
* @param {Paragraph|Heading|TableCell} node The node to process
* Resets `tempLinkNodes` and `lastTagName`
* @returns {void}
*/
function findBareUrls(node) {
/**
* Recursively traverses the AST to find bare URLs, skipping over HTML blocks.
* @param {Node} currentNode The current AST node being traversed.
* @returns {void}
*/
function traverse(currentNode) {
if (
"children" in currentNode &&
Array.isArray(currentNode.children)
) {
for (let i = 0; i < currentNode.children.length; i++) {
const child = currentNode.children[i];

if (child.type === "html") {
const tagInfo = parseHtmlTag(
sourceCode.getText(child),
);

if (tagInfo && !tagInfo.isClosing) {
for (
let j = i + 1;
j < currentNode.children.length;
j++
) {
const nextChild = currentNode.children[j];
if (nextChild.type === "html") {
const closingTagInfo = parseHtmlTag(
sourceCode.getText(nextChild),
);
if (
closingTagInfo?.name ===
tagInfo.name &&
closingTagInfo?.isClosing
) {
i = j;
break;
}
}
}
continue;
}
}

if (child.type === "link") {
const text = sourceCode.getText(child);
const { url } = child;

if (
text === url ||
url === `http://${text}` ||
url === `mailto:${text}`
) {
bareUrls.push(child);
}
}

traverse(child);
}
}
}

traverse(node);
function reset() {
tempLinkNodes.length = 0;
lastTagName = null;
}

return {
"root:exit"() {
for (const bareUrl of bareUrls) {
context.report({
node: bareUrl,
messageId: "bareUrl",
fix(fixer) {
const text = sourceCode.getText(bareUrl);
return fixer.replaceText(bareUrl, `<${text}>`);
},
});
":matches(heading, paragraph, tableCell) html"(
/** @type {Html} */ node,
) {
const tagInfo = parseHtmlTag(node.value);

if (!tagInfo) {
return;
}

if (!tagInfo.isClosing && lastTagName === null) {
lastTagName = tagInfo.name;
}

if (tagInfo.isClosing && lastTagName === tagInfo.name) {
reset();
}
},

":matches(heading, paragraph, tableCell) link"(
/** @type {Link} */ node,
) {
if (lastTagName !== null) {
tempLinkNodes.push(node);
} else {
linkNodes.push(node);
}
},

paragraph(node) {
findBareUrls(node);
"heading:exit"() {
linkNodes.push(...tempLinkNodes);
reset();
},

heading(node) {
findBareUrls(node);
"paragraph:exit"() {
linkNodes.push(...tempLinkNodes);
reset();
},

tableCell(node) {
findBareUrls(node);
"tableCell:exit"() {
linkNodes.push(...tempLinkNodes);
reset();
},

"root:exit"() {
for (const linkNode of linkNodes) {
const text = sourceCode.getText(linkNode);
const { url } = linkNode;

if (
url === text ||
url === `http://${text}` ||
url === `mailto:${text}`
) {
context.report({
node: linkNode,
messageId: "bareUrl",
fix(fixer) {
return fixer.replaceText(linkNode, `<${text}>`);
},
});
}
}
},
};
},
Expand Down
10 changes: 9 additions & 1 deletion src/util.js
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I’ve just added some markers to clarify which category each util belongs to.

Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,11 @@
* @author Nicholas C. Zakas
*/

/*
//-----------------------------------------------------------------------------
// Regex Patterns
//-----------------------------------------------------------------------------

/**
* CommonMark does not allow any white space between the brackets in a reference link.
* If that pattern is detected, then it's treated as text and not as a link. This pattern
* is used to detect that situation.
Expand All @@ -15,6 +19,10 @@ export const illegalShorthandTailPattern = /\]\[\s+\]$/u;
*/
export const htmlCommentPattern = /<!--[\s\S]*?-->/gu;

//-----------------------------------------------------------------------------
// Helpers
//-----------------------------------------------------------------------------

/**
* Finds the line and column offsets for a given offset in a string.
* @param {string} text The text to search.
Expand Down
85 changes: 85 additions & 0 deletions tests/rules/no-bare-urls.test.js
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've added additional test cases to more thoroughly verify the changes before and after.

I've confirmed that these tests pass in the previous codebase.

Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,32 @@ ruleTester.run("no-bare-urls", rule, {
},
],
},
{
code: "<br> Another violation: https://example.com. <br />",
output: "<br> Another violation: <https://example.com>. <br />",
errors: [
{
messageId: "bareUrl",
line: 1,
column: 25,
endLine: 1,
endColumn: 44,
},
],
},
{
code: "<br /> Another violation: https://example.com. <br />",
output: "<br /> Another violation: <https://example.com>. <br />",
errors: [
{
messageId: "bareUrl",
line: 1,
column: 27,
endLine: 1,
endColumn: 46,
},
],
},
{
code: dedent`
<div>
Expand Down Expand Up @@ -344,5 +370,64 @@ ruleTester.run("no-bare-urls", rule, {
},
],
},
{
code: "text <>https://example.com</> https://example.com", // Empty tag is not recognized as an HTML node.
output: "text <><https://example.com></> <https://example.com>",
errors: [
{
messageId: "bareUrl",
line: 1,
column: 8,
endLine: 1,
endColumn: 27,
},
{
messageId: "bareUrl",
line: 1,
column: 31,
endLine: 1,
endColumn: 50,
},
],
},
{
code: "<!DOCTYPE html>\nhttps://example.com",
output: "<!DOCTYPE html>\n<https://example.com>",
errors: [
{
messageId: "bareUrl",
line: 2,
column: 1,
endLine: 2,
endColumn: 20,
},
],
},
{
code: "hi <!-- comment --> https://example.com <!-- comment -->",
output: "hi <!-- comment --> <https://example.com> <!-- comment -->",
errors: [
{
messageId: "bareUrl",
line: 1,
column: 21,
endLine: 1,
endColumn: 40,
},
],
},
{
code: "hi <!-- comment --> https://example.com <!-- comment --> <a>https://example.com</a>",
output: "hi <!-- comment --> <https://example.com> <!-- comment --> <a>https://example.com</a>",
errors: [
{
messageId: "bareUrl",
line: 1,
column: 21,
endLine: 1,
endColumn: 40,
},
],
},
],
});
Loading