From a842fe9d1a2ed7e1d97a4690c55c2e937fe3df31 Mon Sep 17 00:00:00 2001 From: digitalMoksha Date: Sun, 28 Apr 2024 12:47:50 -0500 Subject: [PATCH] Return brackets in autolinks behavior back to cmark-gfm When `relaxed-autolinks` is specified, allow only balanced brackets/curly braces in autolink. --- src/parser/autolink.rs | 35 +++++++++++++++------- src/tests/autolink.rs | 67 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 92 insertions(+), 10 deletions(-) diff --git a/src/parser/autolink.rs b/src/parser/autolink.rs index a66c321f..287dc64d 100644 --- a/src/parser/autolink.rs +++ b/src/parser/autolink.rs @@ -47,13 +47,13 @@ pub(crate) fn process_autolinks<'a>( } } b'w' => { - post_org = www_match(arena, contents, i); + post_org = www_match(arena, contents, i, relaxed_autolinks); if post_org.is_some() { break; } } b'@' => { - post_org = email_match(arena, contents, i); + post_org = email_match(arena, contents, i, relaxed_autolinks); if post_org.is_some() { break; } @@ -85,6 +85,7 @@ fn www_match<'a>( arena: &'a Arena>, contents: &[u8], i: usize, + relaxed_autolinks: bool, ) -> Option<(&'a AstNode<'a>, usize, usize)> { static WWW_DELIMS: Lazy<[bool; 256]> = Lazy::new(|| { let mut sc = [false; 256]; @@ -111,7 +112,7 @@ fn www_match<'a>( link_end += 1; } - link_end = autolink_delim(&contents[i..], link_end); + link_end = autolink_delim(&contents[i..], link_end, relaxed_autolinks); let mut url = "http://".to_string(); url.push_str(str::from_utf8(&contents[i..link_end + i]).unwrap()); @@ -170,12 +171,10 @@ fn is_valid_hostchar(ch: char) -> bool { !ch.is_whitespace() && !ch.is_punctuation() } -fn autolink_delim(data: &[u8], mut link_end: usize) -> usize { +fn autolink_delim(data: &[u8], mut link_end: usize, relaxed_autolinks: bool) -> usize { static LINK_END_ASSORTMENT: Lazy<[bool; 256]> = Lazy::new(|| { let mut sc = [false; 256]; - for c in &[ - b'?', b'!', b'.', b',', b':', b'*', b'_', b'~', b'\'', b'"', b'[', b']', - ] { + for c in &[b'?', b'!', b'.', b',', b':', b'*', b'_', b'~', b'\'', b'"'] { sc[*c as usize] = true; } sc @@ -191,7 +190,22 @@ fn autolink_delim(data: &[u8], mut link_end: usize) -> usize { while link_end > 0 { let cclose = data[link_end - 1]; - let copen = if cclose == b')' { Some(b'(') } else { None }; + // Allow any number of matching parentheses (as recognised in copen/cclose) + // at the end of the URL. If there is a greater number of closing + // parentheses than opening ones, we remove one character from the end of + // the link. + let mut copen = if cclose == b')' { Some(b'(') } else { None }; + + if relaxed_autolinks && copen.is_none() { + // allow balancing of `[]` and `{}` just like `()` + copen = if cclose == b']' { + Some(b'[') + } else if cclose == b'}' { + Some(b'{') + } else { + None + }; + } if LINK_END_ASSORTMENT[cclose as usize] { link_end -= 1; @@ -266,7 +280,7 @@ fn url_match<'a>( link_end += 1; } - link_end = autolink_delim(&contents[i..], link_end); + link_end = autolink_delim(&contents[i..], link_end, relaxed_autolinks); let url = str::from_utf8(&contents[i - rewind..i + link_end]) .unwrap() @@ -292,6 +306,7 @@ fn email_match<'a>( arena: &'a Arena>, contents: &[u8], i: usize, + relaxed_autolinks: bool, ) -> Option<(&'a AstNode<'a>, usize, usize)> { static EMAIL_OK_SET: Lazy<[bool; 256]> = Lazy::new(|| { let mut sc = [false; 256]; @@ -365,7 +380,7 @@ fn email_match<'a>( return None; } - link_end = autolink_delim(&contents[i..], link_end); + link_end = autolink_delim(&contents[i..], link_end, relaxed_autolinks); if link_end == 0 { return None; } diff --git a/src/tests/autolink.rs b/src/tests/autolink.rs index a8b4f2c1..7dd2db33 100644 --- a/src/tests/autolink.rs +++ b/src/tests/autolink.rs @@ -52,6 +52,51 @@ fn autolink_no_link_bad() { ); } +#[test] +fn autolink_parentheses_balanced() { + let examples = [ + [ + "http://www.pokemon.com/Pikachu_(Electric)", + "

http://www.pokemon.com/Pikachu_(Electric)

\n", + ], + [ + "http://www.pokemon.com/Pikachu_((Electric)", + "

http://www.pokemon.com/Pikachu_((Electric)

\n", + ], + [ + "http://www.pokemon.com/Pikachu_(Electric))", + "

http://www.pokemon.com/Pikachu_(Electric))

\n", + ], + [ + "http://www.pokemon.com/Pikachu_((Electric))", + "

http://www.pokemon.com/Pikachu_((Electric))

\n", + ], + ]; + + for example in examples { + html_opts!([extension.autolink], example[0], example[1]); + } + + for example in examples { + html_opts!( + [extension.autolink, parse.relaxed_autolinks], + example[0], + example[1] + ); + } +} + +#[test] +fn autolink_brackets_unbalanced() { + html_opts!( + [extension.autolink], + concat!("http://example.com/[abc]]...\n"), + concat!( + "

http://example.com/[abc]]...

\n" + ), + ); +} + #[test] fn autolink_ignore_links_in_brackets() { let examples = [ @@ -102,6 +147,28 @@ fn autolink_relaxed_links_in_brackets() { } } +#[test] +fn autolink_relaxed_links_brackets_balanced() { + html_opts!( + [extension.autolink, parse.relaxed_autolinks], + concat!("http://example.com/[abc]]...\n"), + concat!( + "

http://example.com/[abc]]...

\n" + ), + ); +} + +#[test] +fn autolink_relaxed_links_curly_braces_balanced() { + html_opts!( + [extension.autolink, parse.relaxed_autolinks], + concat!("http://example.com/{abc}}...\n"), + concat!( + "

http://example.com/{abc}}...

\n" + ), + ); +} + #[test] fn autolink_relaxed_links_schemes() { let examples = [