Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Inline sourcepos fixes. #439

Merged
merged 8 commits into from
Jul 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 31 additions & 45 deletions src/html.rs
Original file line number Diff line number Diff line change
Expand Up @@ -725,33 +725,31 @@ impl<'o, 'c: 'o> HtmlFormatter<'o, 'c> {
}
}
NodeValue::Text(ref literal) => {
// No sourcepos.
if entering {
self.escape(literal.as_bytes())?;
}
}
NodeValue::LineBreak => {
// No sourcepos.
if entering {
self.output.write_all(b"<br")?;
self.render_sourcepos(node)?;
self.output.write_all(b" />\n")?;
self.output.write_all(b"<br />\n")?;
}
}
NodeValue::SoftBreak => {
// No sourcepos.
if entering {
if self.options.render.hardbreaks {
self.output.write_all(b"<br")?;
self.render_sourcepos(node)?;
self.output.write_all(b" />\n")?;
self.output.write_all(b"<br />\n")?;
} else {
self.output.write_all(b"\n")?;
}
}
}
NodeValue::Code(NodeCode { ref literal, .. }) => {
// No sourcepos.
if entering {
self.output.write_all(b"<code")?;
self.render_sourcepos(node)?;
self.output.write_all(b">")?;
self.output.write_all(b"<code>")?;
self.escape(literal.as_bytes())?;
self.output.write_all(b"</code>")?;
}
Expand All @@ -773,52 +771,47 @@ impl<'o, 'c: 'o> HtmlFormatter<'o, 'c> {
}
}
NodeValue::Strong => {
// No sourcepos.
let parent_node = node.parent();
if !self.options.render.gfm_quirks
|| (parent_node.is_none()
|| !matches!(parent_node.unwrap().data.borrow().value, NodeValue::Strong))
{
if entering {
self.output.write_all(b"<strong")?;
self.render_sourcepos(node)?;
self.output.write_all(b">")?;
self.output.write_all(b"<strong>")?;
} else {
self.output.write_all(b"</strong>")?;
}
}
}
NodeValue::Emph => {
// No sourcepos.
if entering {
self.output.write_all(b"<em")?;
self.render_sourcepos(node)?;
self.output.write_all(b">")?;
self.output.write_all(b"<em>")?;
} else {
self.output.write_all(b"</em>")?;
}
}
NodeValue::Strikethrough => {
// No sourcepos.
if entering {
self.output.write_all(b"<del")?;
self.render_sourcepos(node)?;
self.output.write_all(b">")?;
self.output.write_all(b"<del>")?;
} else {
self.output.write_all(b"</del>")?;
}
}
NodeValue::Superscript => {
// No sourcepos.
if entering {
self.output.write_all(b"<sup")?;
self.render_sourcepos(node)?;
self.output.write_all(b">")?;
self.output.write_all(b"<sup>")?;
} else {
self.output.write_all(b"</sup>")?;
}
}
NodeValue::Link(ref nl) => {
// No sourcepos.
if entering {
self.output.write_all(b"<a")?;
self.render_sourcepos(node)?;
self.output.write_all(b" href=\"")?;
self.output.write_all(b"<a href=\"")?;
let url = nl.url.as_bytes();
if self.options.render.unsafe_ || !dangerous_url(url) {
self.escape_href(url)?;
Expand All @@ -833,10 +826,9 @@ impl<'o, 'c: 'o> HtmlFormatter<'o, 'c> {
}
}
NodeValue::Image(ref nl) => {
// No sourcepos.
if entering {
self.output.write_all(b"<img")?;
self.render_sourcepos(node)?;
self.output.write_all(b" src=\"")?;
self.output.write_all(b"<img src=\"")?;
let url = nl.url.as_bytes();
if self.options.render.unsafe_ || !dangerous_url(url) {
self.escape_href(url)?;
Expand Down Expand Up @@ -949,17 +941,14 @@ impl<'o, 'c: 'o> HtmlFormatter<'o, 'c> {
}
}
NodeValue::FootnoteDefinition(ref nfd) => {
// No sourcepos.
if entering {
if self.footnote_ix == 0 {
self.output.write_all(b"<section")?;
self.render_sourcepos(node)?;
self.output
.write_all(b" class=\"footnotes\" data-footnotes>\n<ol>\n")?;
.write_all(b"<section class=\"footnotes\" data-footnotes>\n<ol>\n")?;
}
self.footnote_ix += 1;
self.output.write_all(b"<li")?;
self.render_sourcepos(node)?;
self.output.write_all(b" id=\"fn-")?;
self.output.write_all(b"<li id=\"fn-")?;
self.escape_href(nfd.name.as_bytes())?;
self.output.write_all(b"\">")?;
} else {
Expand All @@ -970,18 +959,14 @@ impl<'o, 'c: 'o> HtmlFormatter<'o, 'c> {
}
}
NodeValue::FootnoteReference(ref nfr) => {
// No sourcepos.
if entering {
let mut ref_id = format!("fnref-{}", nfr.name);

self.output.write_all(b"<sup")?;
self.render_sourcepos(node)?;

if nfr.ref_num > 1 {
ref_id = format!("{}-{}", ref_id, nfr.ref_num);
}

self.output
.write_all(b" class=\"footnote-ref\"><a href=\"#fn-")?;
.write_all(b"<sup class=\"footnote-ref\"><a href=\"#fn-")?;
self.escape_href(nfr.name.as_bytes())?;
self.output.write_all(b"\" id=\"")?;
self.escape_href(ref_id.as_bytes())?;
Expand Down Expand Up @@ -1019,11 +1004,10 @@ impl<'o, 'c: 'o> HtmlFormatter<'o, 'c> {
}
}
NodeValue::Escaped => {
// No sourcepos.
if self.options.render.escaped_char_spans {
if entering {
self.output.write_all(b"<span data-escaped-char")?;
self.render_sourcepos(node)?;
self.output.write_all(b">")?;
self.output.write_all(b"<span data-escaped-char>")?;
} else {
self.output.write_all(b"</span>")?;
}
Expand All @@ -1040,10 +1024,9 @@ impl<'o, 'c: 'o> HtmlFormatter<'o, 'c> {
}
}
NodeValue::WikiLink(ref nl) => {
// No sourcepos.
if entering {
self.output.write_all(b"<a")?;
self.render_sourcepos(node)?;
self.output.write_all(b" href=\"")?;
self.output.write_all(b"<a href=\"")?;
let url = nl.url.as_bytes();
if self.options.render.unsafe_ || !dangerous_url(url) {
self.escape_href(url)?;
Expand All @@ -1055,20 +1038,23 @@ impl<'o, 'c: 'o> HtmlFormatter<'o, 'c> {
}
}
NodeValue::Underline => {
// No sourcepos.
if entering {
self.output.write_all(b"<u>")?;
} else {
self.output.write_all(b"</u>")?;
}
}
NodeValue::SpoileredText => {
// No sourcepos.
if entering {
self.output.write_all(b"<span class=\"spoiler\">")?;
} else {
self.output.write_all(b"</span>")?;
}
}
NodeValue::EscapedTag(ref net) => {
// No sourcepos.
self.output.write_all(net.as_bytes())?;
}
}
Expand Down
5 changes: 2 additions & 3 deletions src/parser/inlines.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1648,13 +1648,12 @@ impl<'a, 'r, 'o, 'c, 'd, 'i> Subject<'a, 'r, 'o, 'c, 'd, 'i> {
self.pos,
self.pos,
);
inl.data.borrow_mut().sourcepos.start.column = self.brackets[brackets_len - 1]
inl.data.borrow_mut().sourcepos.start = self.brackets[brackets_len - 1]
.inl_text
.data
.borrow()
.sourcepos
.start
.column;
.start;
inl.data.borrow_mut().sourcepos.end.column =
usize::try_from(self.pos as isize + self.column_offset + self.block_offset as isize)
.unwrap();
Expand Down
9 changes: 7 additions & 2 deletions src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -755,9 +755,14 @@ pub struct RenderOptions {
/// ```
pub list_style: ListStyleType,

/// Include source position attributes in XML output.
/// Include source position attributes in HTML and XML output.
///
/// Not yet compatible with extension.description_lists.
/// Sourcepos information is reliable for all core block items, and most
/// extensions. The description lists extension still has issues; see
/// <https://github.com/kivikakk/comrak/blob/3bb6d4ce/src/tests/description_lists.rs#L60-L125>.
///
/// Sourcepos information is **not** reliable for inlines. See
/// <https://github.com/kivikakk/comrak/pull/439> for a discussion.
///
/// ```rust
/// # use comrak::{markdown_to_commonmark_xml, Options};
Expand Down
106 changes: 106 additions & 0 deletions src/tests/core.rs
Original file line number Diff line number Diff line change
Expand Up @@ -495,3 +495,109 @@ fn case_insensitive_safety() {
"<p><a href=\"\">a</a> <a href=\"\">b</a> <a href=\"\">c</a> <a href=\"\">d</a> <a href=\"\">e</a> <a href=\"\">f</a> <a href=\"\">g</a></p>\n",
);
}

#[test]
fn link_sourcepos_baseline() {
assert_ast_match!(
[],
"[ABCD](/)\n",
(document (1:1-1:9) [
(paragraph (1:1-1:9) [
(link (1:1-1:9) [
(text (1:2-1:5) "ABCD")
])
])
])
);
}

// https://github.com/kivikakk/comrak/issues/301
#[test]
fn link_sourcepos_newline() {
assert_ast_match!(
[],
"[AB\nCD](/)\n",
(document (1:1-2:6) [
(paragraph (1:1-2:6) [
(link (1:1-2:6) [
(text (1:2-1:3) "AB")
(softbreak (1:4-1:4))
(text (2:1-2:2) "CD")
])
])
])
);
}

// Ignored per https://github.com/kivikakk/comrak/pull/439#issuecomment-2225129960.
#[ignore]
#[test]
fn link_sourcepos_truffle() {
assert_ast_match!(
[],
"- A\n[![B](/B.png)](/B)\n",
(document (1:1-2:18) [
(list (1:1-2:18) [
(item (1:1-2:18) [
(paragraph (1:3-2:18) [
(text (1:3-1:3) "A")
(softbreak (1:4-1:4))
(link (2:1-2:18) [
(image (2:2-2:13) [
(text (2:4-2:4) "B")
])
])
])
])
])
])
);
}

#[test]
fn link_sourcepos_truffle_twist() {
assert_ast_match!(
[],
"- A\n [![B](/B.png)](/B)\n",
(document (1:1-2:20) [
(list (1:1-2:20) [
(item (1:1-2:20) [
(paragraph (1:3-2:20) [
(text (1:3-1:3) "A")
(softbreak (1:4-1:4))
(link (2:3-2:20) [
(image (2:4-2:15) [
(text (2:6-2:6) "B")
])
])
])
])
])
])
);
}

// Ignored per https://github.com/kivikakk/comrak/pull/439#issuecomment-2225129960.
#[ignore]
#[test]
fn link_sourcepos_truffle_bergamot() {
assert_ast_match!(
[],
"- A\n [![B](/B.png)](/B)\n",
(document (1:1-2:21) [
(list (1:1-2:21) [
(item (1:1-2:21) [
(paragraph (1:3-2:21) [
(text (1:3-1:3) "A")
(softbreak (1:4-1:4))
(link (2:4-2:21) [
(image (2:5-2:16) [
(text (2:7-2:7) "B")
])
])
])
])
])
])
);
}
13 changes: 3 additions & 10 deletions src/tests/escaped_char_spans.rs
Original file line number Diff line number Diff line change
@@ -1,17 +1,10 @@
use super::*;
use ntest::test_case;

// html_opts! does a roundtrip check unless sourcepos is set.
// These cases don't work roundtrip, because converting to commonmark
// automatically escapes certain characters.
#[test_case("\\@user", "<p data-sourcepos=\"1:1-1:6\"><span data-escaped-char data-sourcepos=\"1:1-1:2\">@</span>user</p>\n")]
#[test_case("This\\@that", "<p data-sourcepos=\"1:1-1:10\">This<span data-escaped-char data-sourcepos=\"1:5-1:6\">@</span>that</p>\n")]
#[test_case("\\@user", "<p><span data-escaped-char>@</span>user</p>\n")]
#[test_case("This\\@that", "<p>This<span data-escaped-char>@</span>that</p>\n")]
fn escaped_char_spans(markdown: &str, html: &str) {
html_opts!(
[render.escaped_char_spans, render.sourcepos],
markdown,
html
);
html_opts!([render.escaped_char_spans], markdown, html, no_roundtrip);
}

#[test_case("\\@user", "<p>@user</p>\n")]
Expand Down
2 changes: 1 addition & 1 deletion src/tests/fuzz.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ fn footnote_def() {
render.hardbreaks
],
"\u{15}\u{b}\r[^ ]:",
"<p data-sourcepos=\"1:1-2:5\">\u{15}\u{b}<br data-sourcepos=\"1:3-1:3\" />\n[^ ]:</p>\n",
"<p data-sourcepos=\"1:1-2:5\">\u{15}\u{b}<br />\n[^ ]:</p>\n",
);
}

Expand Down
Loading