Skip to content

Commit

Permalink
Return brackets in autolinks behavior back to cmark-gfm
Browse files Browse the repository at this point in the history
When `relaxed-autolinks` is specified, allow only balanced
brackets/curly braces in autolink.
  • Loading branch information
digitalmoksha authored and kivikakk committed Apr 29, 2024
1 parent 00ffbfe commit a842fe9
Show file tree
Hide file tree
Showing 2 changed files with 92 additions and 10 deletions.
35 changes: 25 additions & 10 deletions src/parser/autolink.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,13 +47,13 @@ pub(crate) fn process_autolinks<'a>(
}
}
b'w' => {
post_org = www_match(arena, contents, i);
post_org = www_match(arena, contents, i, relaxed_autolinks);
if post_org.is_some() {
break;
}
}
b'@' => {
post_org = email_match(arena, contents, i);
post_org = email_match(arena, contents, i, relaxed_autolinks);
if post_org.is_some() {
break;
}
Expand Down Expand Up @@ -85,6 +85,7 @@ fn www_match<'a>(
arena: &'a Arena<AstNode<'a>>,
contents: &[u8],
i: usize,
relaxed_autolinks: bool,
) -> Option<(&'a AstNode<'a>, usize, usize)> {
static WWW_DELIMS: Lazy<[bool; 256]> = Lazy::new(|| {
let mut sc = [false; 256];
Expand All @@ -111,7 +112,7 @@ fn www_match<'a>(
link_end += 1;
}

link_end = autolink_delim(&contents[i..], link_end);
link_end = autolink_delim(&contents[i..], link_end, relaxed_autolinks);

let mut url = "http://".to_string();
url.push_str(str::from_utf8(&contents[i..link_end + i]).unwrap());
Expand Down Expand Up @@ -170,12 +171,10 @@ fn is_valid_hostchar(ch: char) -> bool {
!ch.is_whitespace() && !ch.is_punctuation()
}

fn autolink_delim(data: &[u8], mut link_end: usize) -> usize {
fn autolink_delim(data: &[u8], mut link_end: usize, relaxed_autolinks: bool) -> usize {
static LINK_END_ASSORTMENT: Lazy<[bool; 256]> = Lazy::new(|| {
let mut sc = [false; 256];
for c in &[
b'?', b'!', b'.', b',', b':', b'*', b'_', b'~', b'\'', b'"', b'[', b']',
] {
for c in &[b'?', b'!', b'.', b',', b':', b'*', b'_', b'~', b'\'', b'"'] {
sc[*c as usize] = true;
}
sc
Expand All @@ -191,7 +190,22 @@ fn autolink_delim(data: &[u8], mut link_end: usize) -> usize {
while link_end > 0 {
let cclose = data[link_end - 1];

let copen = if cclose == b')' { Some(b'(') } else { None };
// Allow any number of matching parentheses (as recognised in copen/cclose)
// at the end of the URL. If there is a greater number of closing
// parentheses than opening ones, we remove one character from the end of
// the link.
let mut copen = if cclose == b')' { Some(b'(') } else { None };

if relaxed_autolinks && copen.is_none() {
// allow balancing of `[]` and `{}` just like `()`
copen = if cclose == b']' {
Some(b'[')
} else if cclose == b'}' {
Some(b'{')
} else {
None
};
}

if LINK_END_ASSORTMENT[cclose as usize] {
link_end -= 1;
Expand Down Expand Up @@ -266,7 +280,7 @@ fn url_match<'a>(
link_end += 1;
}

link_end = autolink_delim(&contents[i..], link_end);
link_end = autolink_delim(&contents[i..], link_end, relaxed_autolinks);

let url = str::from_utf8(&contents[i - rewind..i + link_end])
.unwrap()
Expand All @@ -292,6 +306,7 @@ fn email_match<'a>(
arena: &'a Arena<AstNode<'a>>,
contents: &[u8],
i: usize,
relaxed_autolinks: bool,
) -> Option<(&'a AstNode<'a>, usize, usize)> {
static EMAIL_OK_SET: Lazy<[bool; 256]> = Lazy::new(|| {
let mut sc = [false; 256];
Expand Down Expand Up @@ -365,7 +380,7 @@ fn email_match<'a>(
return None;
}

link_end = autolink_delim(&contents[i..], link_end);
link_end = autolink_delim(&contents[i..], link_end, relaxed_autolinks);
if link_end == 0 {
return None;
}
Expand Down
67 changes: 67 additions & 0 deletions src/tests/autolink.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,51 @@ fn autolink_no_link_bad() {
);
}

#[test]
fn autolink_parentheses_balanced() {
let examples = [
[
"http://www.pokemon.com/Pikachu_(Electric)",
"<p><a href=\"http://www.pokemon.com/Pikachu_(Electric)\">http://www.pokemon.com/Pikachu_(Electric)</a></p>\n",
],
[
"http://www.pokemon.com/Pikachu_((Electric)",
"<p><a href=\"http://www.pokemon.com/Pikachu_((Electric)\">http://www.pokemon.com/Pikachu_((Electric)</a></p>\n",
],
[
"http://www.pokemon.com/Pikachu_(Electric))",
"<p><a href=\"http://www.pokemon.com/Pikachu_(Electric)\">http://www.pokemon.com/Pikachu_(Electric)</a>)</p>\n",
],
[
"http://www.pokemon.com/Pikachu_((Electric))",
"<p><a href=\"http://www.pokemon.com/Pikachu_((Electric))\">http://www.pokemon.com/Pikachu_((Electric))</a></p>\n",
],
];

for example in examples {
html_opts!([extension.autolink], example[0], example[1]);
}

for example in examples {
html_opts!(
[extension.autolink, parse.relaxed_autolinks],
example[0],
example[1]
);
}
}

#[test]
fn autolink_brackets_unbalanced() {
html_opts!(
[extension.autolink],
concat!("http://example.com/[abc]]...\n"),
concat!(
"<p><a href=\"http://example.com/%5Babc%5D%5D\">http://example.com/[abc]]</a>...</p>\n"
),
);
}

#[test]
fn autolink_ignore_links_in_brackets() {
let examples = [
Expand Down Expand Up @@ -102,6 +147,28 @@ fn autolink_relaxed_links_in_brackets() {
}
}

#[test]
fn autolink_relaxed_links_brackets_balanced() {
html_opts!(
[extension.autolink, parse.relaxed_autolinks],
concat!("http://example.com/[abc]]...\n"),
concat!(
"<p><a href=\"http://example.com/%5Babc%5D\">http://example.com/[abc]</a>]...</p>\n"
),
);
}

#[test]
fn autolink_relaxed_links_curly_braces_balanced() {
html_opts!(
[extension.autolink, parse.relaxed_autolinks],
concat!("http://example.com/{abc}}...\n"),
concat!(
"<p><a href=\"http://example.com/%7Babc%7D\">http://example.com/{abc}</a>}...</p>\n"
),
);
}

#[test]
fn autolink_relaxed_links_schemes() {
let examples = [
Expand Down

0 comments on commit a842fe9

Please sign in to comment.