From 6bc19f1785bd1b814c8be1a93f8632dccf977c83 Mon Sep 17 00:00:00 2001 From: Luna Date: Wed, 30 Oct 2024 10:54:23 -0400 Subject: [PATCH] Add support for subscript extension --- src/cm.rs | 5 +++++ src/html.rs | 12 ++++++++++++ src/main.rs | 2 ++ src/nodes.rs | 7 +++++++ src/parser/inlines.rs | 24 ++++++++++++++++-------- src/parser/mod.rs | 17 +++++++++++++++++ src/tests.rs | 1 + src/tests/api.rs | 2 ++ src/tests/subscript.rs | 10 ++++++++++ src/xml.rs | 1 + 10 files changed, 73 insertions(+), 8 deletions(-) create mode 100644 src/tests/subscript.rs diff --git a/src/cm.rs b/src/cm.rs index 9d46d387..33a983be 100644 --- a/src/cm.rs +++ b/src/cm.rs @@ -396,6 +396,7 @@ impl<'a, 'o, 'c> CommonMarkFormatter<'a, 'o, 'c> { } NodeValue::Math(ref math) => self.format_math(math, allow_wrap, entering), NodeValue::WikiLink(ref nl) => return self.format_wikilink(nl, entering), + NodeValue::Subscript => self.format_subscript(), NodeValue::Underline => self.format_underline(), NodeValue::SpoileredText => self.format_spoiler(), NodeValue::EscapedTag(ref net) => self.format_escaped_tag(net), @@ -711,6 +712,10 @@ impl<'a, 'o, 'c> CommonMarkFormatter<'a, 'o, 'c> { write!(self, "^").unwrap(); } + fn format_subscript(&mut self) { + write!(self, "%").unwrap(); + } + fn format_underline(&mut self) { write!(self, "__").unwrap(); } diff --git a/src/html.rs b/src/html.rs index 421458f5..9b41ea3b 100644 --- a/src/html.rs +++ b/src/html.rs @@ -1123,6 +1123,18 @@ impl<'o, 'c: 'o> HtmlFormatter<'o, 'c> { self.output.write_all(b"")?; } } + NodeValue::Subscript => { + // Unreliable sourcepos. + if entering { + self.output.write_all(b"")?; + } else { + self.output.write_all(b"")?; + } + } NodeValue::Underline => { // Unreliable sourcepos. if entering { diff --git a/src/main.rs b/src/main.rs index ac59dab3..480a5bd4 100644 --- a/src/main.rs +++ b/src/main.rs @@ -183,6 +183,7 @@ enum Extension { MathCode, WikilinksTitleAfterPipe, WikilinksTitleBeforePipe, + Subscript, Underline, Spoiler, Greentext, @@ -266,6 +267,7 @@ fn main() -> Result<(), Box> { .math_code(exts.contains(&Extension::MathCode)) .wikilinks_title_after_pipe(exts.contains(&Extension::WikilinksTitleAfterPipe)) .wikilinks_title_before_pipe(exts.contains(&Extension::WikilinksTitleBeforePipe)) + .subscript(exts.contains(&Extension::Subscript)) .underline(exts.contains(&Extension::Underline)) .spoiler(exts.contains(&Extension::Spoiler)) .greentext(exts.contains(&Extension::Greentext)) diff --git a/src/nodes.rs b/src/nodes.rs index 2051badd..38b16b80 100644 --- a/src/nodes.rs +++ b/src/nodes.rs @@ -186,6 +186,9 @@ pub enum NodeValue { /// **Inline**. A wikilink to some URL. WikiLink(NodeWikiLink), + /// **Inline**. Subscript. Enabled with `subscript` option. + Subscript, + /// **Inline**. Underline. Enabled with `underline` option. Underline, @@ -513,6 +516,7 @@ impl NodeValue { NodeValue::Escaped => "escaped", NodeValue::Math(..) => "math", NodeValue::WikiLink(..) => "wikilink", + NodeValue::Subscript => "subscript", NodeValue::Underline => "underline", NodeValue::SpoileredText => "spoiler", NodeValue::EscapedTag(_) => "escaped_tag", @@ -762,6 +766,7 @@ pub fn can_contain_type<'a>(node: &'a AstNode<'a>, child: &NodeValue) -> bool { | NodeValue::WikiLink(..) | NodeValue::Strikethrough | NodeValue::Superscript + | NodeValue::Subscript | NodeValue::SpoileredText | NodeValue::Underline // XXX: this is quite a hack: the EscapedTag _contains_ whatever was @@ -789,6 +794,7 @@ pub fn can_contain_type<'a>(node: &'a AstNode<'a>, child: &NodeValue) -> bool { | NodeValue::WikiLink(..) | NodeValue::FootnoteReference(..) | NodeValue::Superscript + | NodeValue::Subscript | NodeValue::SpoileredText | NodeValue::Underline ), @@ -808,6 +814,7 @@ pub fn can_contain_type<'a>(node: &'a AstNode<'a>, child: &NodeValue) -> bool { | NodeValue::WikiLink(..) | NodeValue::FootnoteReference(..) | NodeValue::Superscript + | NodeValue::Subscript | NodeValue::SpoileredText | NodeValue::Underline | NodeValue::ShortCode(..) diff --git a/src/parser/inlines.rs b/src/parser/inlines.rs index acea0ea0..700efd9e 100644 --- a/src/parser/inlines.rs +++ b/src/parser/inlines.rs @@ -160,6 +160,9 @@ impl<'a, 'r, 'o, 'c, 'd, 'i> Subject<'a, 'r, 'o, 'c, 'd, 'i> { if options.extension.shortcodes { s.special_chars[b':' as usize] = true; } + if options.extension.subscript { + s.special_chars[b'%' as usize] = true; + } if options.extension.underline { s.special_chars[b'_' as usize] = true; } @@ -286,6 +289,7 @@ impl<'a, 'r, 'o, 'c, 'd, 'i> Subject<'a, 'r, 'o, 'c, 'd, 'i> { Some(self.handle_delim(b'^')) } '$' => Some(self.handle_dollars()), + '%' if self.options.extension.subscript => Some(self.handle_delim(b'%')), '|' if self.options.extension.spoiler => Some(self.handle_delim(b'|')), _ => { let endpos = self.find_special_char(); @@ -384,7 +388,7 @@ impl<'a, 'r, 'o, 'c, 'd, 'i> Subject<'a, 'r, 'o, 'c, 'd, 'i> { // This array is an important optimization that prevents searching down // the stack for openers we've previously searched for and know don't // exist, preventing exponential blowup on pathological cases. - let mut openers_bottom: [usize; 12] = [stack_bottom; 12]; + let mut openers_bottom: [usize; 13] = [stack_bottom; 13]; // This is traversing the stack from the top to the bottom, setting `closer` to // the delimiter directly above `stack_bottom`. In the case where we are processing @@ -408,13 +412,14 @@ impl<'a, 'r, 'o, 'c, 'd, 'i> Subject<'a, 'r, 'o, 'c, 'd, 'i> { let mut mod_three_rule_invoked = false; let ix = match c.delim_char { - b'|' => 0, - b'~' => 1, - b'^' => 2, - b'"' => 3, - b'\'' => 4, - b'_' => 5, - b'*' => 6 + (if c.can_open { 3 } else { 0 }) + (c.length % 3), + b'%' => 0, + b'|' => 1, + b'~' => 2, + b'^' => 3, + b'"' => 4, + b'\'' => 5, + b'_' => 6, + b'*' => 7 + (if c.can_open { 3 } else { 0 }) + (c.length % 3), _ => unreachable!(), }; @@ -463,6 +468,7 @@ impl<'a, 'r, 'o, 'c, 'd, 'i> Subject<'a, 'r, 'o, 'c, 'd, 'i> { || c.delim_char == b'_' || (self.options.extension.strikethrough && c.delim_char == b'~') || (self.options.extension.superscript && c.delim_char == b'^') + || (self.options.extension.subscript && c.delim_char == b'%') || (self.options.extension.spoiler && c.delim_char == b'|') { if opener_found { @@ -1105,6 +1111,8 @@ impl<'a, 'r, 'o, 'c, 'd, 'i> Subject<'a, 'r, 'o, 'c, 'd, 'i> { NodeValue::Strikethrough } else if self.options.extension.superscript && opener_char == b'^' { NodeValue::Superscript + } else if self.options.extension.subscript && opener_char == b'%' { + NodeValue::Subscript } else if self.options.extension.spoiler && opener_char == b'|' { if use_delims == 2 { NodeValue::SpoileredText diff --git a/src/parser/mod.rs b/src/parser/mod.rs index cf74cbd8..8d55b175 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -458,6 +458,23 @@ pub struct ExtensionOptions { #[builder(default)] pub wikilinks_title_before_pipe: bool, + /// Enables subscripts using percent signs + /// + /// ```md + /// H%2%O + /// ``` + /// + /// ``` + /// # use comrak::{markdown_to_html, Options}; + /// let mut options = Options::default(); + /// options.extension.subscript = true; + /// + /// assert_eq!(markdown_to_html("H%2%O", &options), + /// "

H2O

\n"); + /// ``` + #[builder(default)] + pub subscript: bool, + /// Enables underlines using double underscores /// /// ```md diff --git a/src/tests.rs b/src/tests.rs index 64ffee1a..a8e73002 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -24,6 +24,7 @@ mod regressions; mod shortcodes; mod spoiler; mod strikethrough; +mod subscript; mod superscript; mod table; mod tagfilter; diff --git a/src/tests/api.rs b/src/tests/api.rs index b06e2a24..229b9171 100644 --- a/src/tests/api.rs +++ b/src/tests/api.rs @@ -70,6 +70,7 @@ fn exercise_full_api() { let _extension = extension .wikilinks_title_after_pipe(true) .wikilinks_title_before_pipe(true) + .subscript(true) .underline(true) .spoiler(true) .greentext(true); @@ -270,6 +271,7 @@ fn exercise_full_api() { nodes::NodeValue::WikiLink(nl) => { let _: String = nl.url; } + nodes::NodeValue::Subscript => {} nodes::NodeValue::Underline => {} nodes::NodeValue::SpoileredText => {} nodes::NodeValue::EscapedTag(data) => { diff --git a/src/tests/subscript.rs b/src/tests/subscript.rs new file mode 100644 index 00000000..d5766203 --- /dev/null +++ b/src/tests/subscript.rs @@ -0,0 +1,10 @@ +use super::*; + +#[test] +fn subscript() { + html_opts!( + [extension.subscript], + concat!("H%2%O\n"), + concat!("

H2O

\n"), + ); +} diff --git a/src/xml.rs b/src/xml.rs index 1afa5cf8..db3aa3a2 100644 --- a/src/xml.rs +++ b/src/xml.rs @@ -284,6 +284,7 @@ impl<'o, 'c> XmlFormatter<'o, 'c> { self.escape(nl.url.as_bytes())?; self.output.write_all(b"\"")?; } + NodeValue::Subscript => {} NodeValue::Underline => {} NodeValue::SpoileredText => {} NodeValue::EscapedTag(ref data) => {