diff --git a/src/html.rs b/src/html.rs index 55bf5bc0..6c521720 100644 --- a/src/html.rs +++ b/src/html.rs @@ -725,33 +725,31 @@ impl<'o, 'c: 'o> HtmlFormatter<'o, 'c> { } } NodeValue::Text(ref literal) => { + // No sourcepos. if entering { self.escape(literal.as_bytes())?; } } NodeValue::LineBreak => { + // No sourcepos. if entering { - self.output.write_all(b"\n")?; + self.output.write_all(b"
\n")?; } } NodeValue::SoftBreak => { + // No sourcepos. if entering { if self.options.render.hardbreaks { - self.output.write_all(b"\n")?; + self.output.write_all(b"
\n")?; } else { self.output.write_all(b"\n")?; } } } NodeValue::Code(NodeCode { ref literal, .. }) => { + // No sourcepos. if entering { - self.output.write_all(b"")?; + self.output.write_all(b"")?; self.escape(literal.as_bytes())?; self.output.write_all(b"")?; } @@ -773,52 +771,47 @@ impl<'o, 'c: 'o> HtmlFormatter<'o, 'c> { } } NodeValue::Strong => { + // No sourcepos. let parent_node = node.parent(); if !self.options.render.gfm_quirks || (parent_node.is_none() || !matches!(parent_node.unwrap().data.borrow().value, NodeValue::Strong)) { if entering { - self.output.write_all(b"")?; + self.output.write_all(b"")?; } else { self.output.write_all(b"")?; } } } NodeValue::Emph => { + // No sourcepos. if entering { - self.output.write_all(b"")?; + self.output.write_all(b"")?; } else { self.output.write_all(b"")?; } } NodeValue::Strikethrough => { + // No sourcepos. if entering { - self.output.write_all(b"")?; + self.output.write_all(b"")?; } else { self.output.write_all(b"")?; } } NodeValue::Superscript => { + // No sourcepos. if entering { - self.output.write_all(b"")?; + self.output.write_all(b"")?; } else { self.output.write_all(b"")?; } } NodeValue::Link(ref nl) => { + // No sourcepos. if entering { - self.output.write_all(b" HtmlFormatter<'o, 'c> { } } NodeValue::Image(ref nl) => { + // No sourcepos. if entering { - self.output.write_all(b" HtmlFormatter<'o, 'c> { } } NodeValue::FootnoteDefinition(ref nfd) => { + // No sourcepos. if entering { if self.footnote_ix == 0 { - self.output.write_all(b"\n
    \n")?; + .write_all(b"
    \n
      \n")?; } self.footnote_ix += 1; - self.output.write_all(b"")?; } else { @@ -970,18 +959,14 @@ impl<'o, 'c: 'o> HtmlFormatter<'o, 'c> { } } NodeValue::FootnoteReference(ref nfr) => { + // No sourcepos. if entering { let mut ref_id = format!("fnref-{}", nfr.name); - - self.output.write_all(b" 1 { ref_id = format!("{}-{}", ref_id, nfr.ref_num); } - self.output - .write_all(b" class=\"footnote-ref\"> HtmlFormatter<'o, 'c> { } } NodeValue::Escaped => { + // No sourcepos. if self.options.render.escaped_char_spans { if entering { - self.output.write_all(b"")?; + self.output.write_all(b"")?; } else { self.output.write_all(b"")?; } @@ -1040,10 +1024,9 @@ impl<'o, 'c: 'o> HtmlFormatter<'o, 'c> { } } NodeValue::WikiLink(ref nl) => { + // No sourcepos. if entering { - self.output.write_all(b" HtmlFormatter<'o, 'c> { } } NodeValue::Underline => { + // No sourcepos. if entering { self.output.write_all(b"")?; } else { @@ -1062,6 +1046,7 @@ impl<'o, 'c: 'o> HtmlFormatter<'o, 'c> { } } NodeValue::SpoileredText => { + // No sourcepos. if entering { self.output.write_all(b"")?; } else { @@ -1069,6 +1054,7 @@ impl<'o, 'c: 'o> HtmlFormatter<'o, 'c> { } } NodeValue::EscapedTag(ref net) => { + // No sourcepos. self.output.write_all(net.as_bytes())?; } } diff --git a/src/parser/inlines.rs b/src/parser/inlines.rs index a22a4874..33407e4e 100644 --- a/src/parser/inlines.rs +++ b/src/parser/inlines.rs @@ -1648,13 +1648,12 @@ impl<'a, 'r, 'o, 'c, 'd, 'i> Subject<'a, 'r, 'o, 'c, 'd, 'i> { self.pos, self.pos, ); - inl.data.borrow_mut().sourcepos.start.column = self.brackets[brackets_len - 1] + inl.data.borrow_mut().sourcepos.start = self.brackets[brackets_len - 1] .inl_text .data .borrow() .sourcepos - .start - .column; + .start; inl.data.borrow_mut().sourcepos.end.column = usize::try_from(self.pos as isize + self.column_offset + self.block_offset as isize) .unwrap(); diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 0993dadf..b6987a97 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -755,9 +755,14 @@ pub struct RenderOptions { /// ``` pub list_style: ListStyleType, - /// Include source position attributes in XML output. + /// Include source position attributes in HTML and XML output. /// - /// Not yet compatible with extension.description_lists. + /// Sourcepos information is reliable for all core block items, and most + /// extensions. The description lists extension still has issues; see + /// . + /// + /// Sourcepos information is **not** reliable for inlines. See + /// for a discussion. /// /// ```rust /// # use comrak::{markdown_to_commonmark_xml, Options}; diff --git a/src/tests/core.rs b/src/tests/core.rs index 3b5adefa..0ed9419b 100644 --- a/src/tests/core.rs +++ b/src/tests/core.rs @@ -495,3 +495,109 @@ fn case_insensitive_safety() { "\n", ); } + +#[test] +fn link_sourcepos_baseline() { + assert_ast_match!( + [], + "[ABCD](/)\n", + (document (1:1-1:9) [ + (paragraph (1:1-1:9) [ + (link (1:1-1:9) [ + (text (1:2-1:5) "ABCD") + ]) + ]) + ]) + ); +} + +// https://github.com/kivikakk/comrak/issues/301 +#[test] +fn link_sourcepos_newline() { + assert_ast_match!( + [], + "[AB\nCD](/)\n", + (document (1:1-2:6) [ + (paragraph (1:1-2:6) [ + (link (1:1-2:6) [ + (text (1:2-1:3) "AB") + (softbreak (1:4-1:4)) + (text (2:1-2:2) "CD") + ]) + ]) + ]) + ); +} + +// Ignored per https://github.com/kivikakk/comrak/pull/439#issuecomment-2225129960. +#[ignore] +#[test] +fn link_sourcepos_truffle() { + assert_ast_match!( + [], + "- A\n[![B](/B.png)](/B)\n", + (document (1:1-2:18) [ + (list (1:1-2:18) [ + (item (1:1-2:18) [ + (paragraph (1:3-2:18) [ + (text (1:3-1:3) "A") + (softbreak (1:4-1:4)) + (link (2:1-2:18) [ + (image (2:2-2:13) [ + (text (2:4-2:4) "B") + ]) + ]) + ]) + ]) + ]) + ]) + ); +} + +#[test] +fn link_sourcepos_truffle_twist() { + assert_ast_match!( + [], + "- A\n [![B](/B.png)](/B)\n", + (document (1:1-2:20) [ + (list (1:1-2:20) [ + (item (1:1-2:20) [ + (paragraph (1:3-2:20) [ + (text (1:3-1:3) "A") + (softbreak (1:4-1:4)) + (link (2:3-2:20) [ + (image (2:4-2:15) [ + (text (2:6-2:6) "B") + ]) + ]) + ]) + ]) + ]) + ]) + ); +} + +// Ignored per https://github.com/kivikakk/comrak/pull/439#issuecomment-2225129960. +#[ignore] +#[test] +fn link_sourcepos_truffle_bergamot() { + assert_ast_match!( + [], + "- A\n [![B](/B.png)](/B)\n", + (document (1:1-2:21) [ + (list (1:1-2:21) [ + (item (1:1-2:21) [ + (paragraph (1:3-2:21) [ + (text (1:3-1:3) "A") + (softbreak (1:4-1:4)) + (link (2:4-2:21) [ + (image (2:5-2:16) [ + (text (2:7-2:7) "B") + ]) + ]) + ]) + ]) + ]) + ]) + ); +} diff --git a/src/tests/escaped_char_spans.rs b/src/tests/escaped_char_spans.rs index 7e2d747b..5b8bbf33 100644 --- a/src/tests/escaped_char_spans.rs +++ b/src/tests/escaped_char_spans.rs @@ -1,17 +1,10 @@ use super::*; use ntest::test_case; -// html_opts! does a roundtrip check unless sourcepos is set. -// These cases don't work roundtrip, because converting to commonmark -// automatically escapes certain characters. -#[test_case("\\@user", "

      @user

      \n")] -#[test_case("This\\@that", "

      This@that

      \n")] +#[test_case("\\@user", "

      @user

      \n")] +#[test_case("This\\@that", "

      This@that

      \n")] fn escaped_char_spans(markdown: &str, html: &str) { - html_opts!( - [render.escaped_char_spans, render.sourcepos], - markdown, - html - ); + html_opts!([render.escaped_char_spans], markdown, html, no_roundtrip); } #[test_case("\\@user", "

      @user

      \n")] diff --git a/src/tests/fuzz.rs b/src/tests/fuzz.rs index 47cd1fb6..21223729 100644 --- a/src/tests/fuzz.rs +++ b/src/tests/fuzz.rs @@ -47,7 +47,7 @@ fn footnote_def() { render.hardbreaks ], "\u{15}\u{b}\r[^ ]:", - "

      \u{15}\u{b}
      \n[^ ]:

      \n", + "

      \u{15}\u{b}
      \n[^ ]:

      \n", ); } diff --git a/src/tests/wikilinks.rs b/src/tests/wikilinks.rs index 19b14445..afcaa07e 100644 --- a/src/tests/wikilinks.rs +++ b/src/tests/wikilinks.rs @@ -1,20 +1,19 @@ use super::*; -// html_opts! does a roundtrip check unless sourcepos is set. -// These cases don't work roundtrip, because converting to commonmark -// automatically escapes certain characters. #[test] fn wikilinks_does_not_unescape_html_entities_in_link_label() { html_opts!( - [extension.wikilinks_title_after_pipe, render.sourcepos], + [extension.wikilinks_title_after_pipe], concat!("This is [[<script>alert(0)</script>|a <link]]",), - concat!("

      This is a <link

      \n"), + concat!("

      This is a <link

      \n"), + no_roundtrip, ); html_opts!( - [extension.wikilinks_title_before_pipe, render.sourcepos], + [extension.wikilinks_title_before_pipe], concat!("This is [[a <link|<script>alert(0)</script>]]",), - concat!("

      This is a <link

      \n"), + concat!("

      This is a <link

      \n"), + no_roundtrip, ); }

      a b c d e f g