Skip to content

Commit da3d6bb

Browse files
committed
👌 IMPROVE: Allow for heading anchor links in docutils
This aligns the treatment of `[](#target)` style links for docutils with sphinx, such that they are linked to a heading slug. The core behaviour for sphinx is not changed, except that failed reference resolution now emits a `myst.xref_missing` warning (as opposed to a `std.ref` one), with a clearer warning message. Also on failure, the reference is still created, for people who wish to suppress the warning (see e.g. #677) This is another step towards jupyter-book/myst-enhancement-proposals#10
1 parent 797af5f commit da3d6bb

21 files changed

+274
-216
lines changed

docs/intro.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ To parse single documents, see the [](docutils.md) section
4646
## Write a CommonMark document
4747

4848
MyST is an extension of [CommonMark Markdown](https://commonmark.org/),
49-
that includes [additional syntax](../syntax/syntax.md) for technical authoring,
49+
that includes [additional syntax](syntax/syntax.md) for technical authoring,
5050
which integrates with Docutils and Sphinx.
5151

5252
To start off, create an empty file called `myfile.md` and give it a markdown title and text.

docs/syntax/syntax.md

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -255,7 +255,8 @@ By default, MyST will resolve link destinations according to the following rules
255255
3. Destinations which point to a local file path are treated as links to that file.
256256
- The path must be relative and in [POSIX format](https://en.wikipedia.org/wiki/Path_(computing)#POSIX_and_Unix_paths) (i.e. `/` separators).
257257
- If the path is to another source file in the project (e.g. a `.md` or `.rst` file),
258-
then the link will be to the initial heading in that file.
258+
then the link will be to the initial heading in that file or,
259+
if the path is appended by a `#target`, to the heading "slug" in that file.
259260
- If the path is to a non-source file (e.g. a `.png` or `.pdf` file),
260261
then the link will be to the file itself, e.g. to download it.
261262

@@ -290,10 +291,14 @@ Here are some examples:
290291
- `[Non-source file](example.txt)`
291292
- [Non-source file](example.txt)
292293

293-
* - Internal heading
294+
* - Local heading
294295
- `[Heading](#markdown-links-and-referencing)`
295296
- [Heading](#markdown-links-and-referencing)
296297

298+
* - Heading in another file
299+
- `[Heading](optional.md#auto-generated-header-anchors)`
300+
- [Heading](optional.md#auto-generated-header-anchors)
301+
297302
:::
298303

299304
### Customising destination resolution

myst_parser/config/main.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,6 @@ class MdParserConfig:
170170
metadata={
171171
"validator": optional(in_([1, 2, 3, 4, 5, 6, 7])),
172172
"help": "Heading level depth to assign HTML anchors",
173-
"sphinx_only": True,
174173
},
175174
)
176175

@@ -180,7 +179,7 @@ class MdParserConfig:
180179
"validator": optional(is_callable),
181180
"help": "Function for creating heading anchors",
182181
"global_only": True,
183-
"sphinx_only": True,
182+
"sphinx_only": True, # TODO docutils config doesn't handle callables
184183
},
185184
)
186185

myst_parser/mdit_to_docutils/base.py

Lines changed: 84 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,8 @@ def setup_render(
136136
self._level_to_elem: dict[int, nodes.document | nodes.section] = {
137137
0: self.document
138138
}
139+
# mapping of section slug to section node
140+
self._slug_to_section: dict[str, nodes.section] = {}
139141

140142
@property
141143
def sphinx_env(self) -> BuildEnvironment | None:
@@ -236,6 +238,37 @@ def _render_initialise(self) -> None:
236238
def _render_finalise(self) -> None:
237239
"""Finalise the render of the document."""
238240

241+
# attempt to replace id_link references with internal links
242+
for refnode in findall(self.document)(nodes.reference):
243+
if not refnode.get("id_link"):
244+
continue
245+
target = refnode["refuri"][1:]
246+
if target in self._slug_to_section:
247+
section_node = self._slug_to_section[target]
248+
refnode["refid"] = section_node["ids"][0]
249+
250+
if not refnode.children:
251+
implicit_text = clean_astext(section_node[0])
252+
refnode += nodes.inline(
253+
implicit_text, implicit_text, classes=["std", "std-ref"]
254+
)
255+
else:
256+
self.create_warning(
257+
f"local id not found: {refnode['refuri']!r}",
258+
MystWarnings.XREF_MISSING,
259+
line=refnode.line,
260+
append_to=refnode,
261+
)
262+
refnode["refid"] = target
263+
del refnode["refuri"]
264+
265+
if self._slug_to_section and self.sphinx_env:
266+
# save for later reference resolution
267+
self.sphinx_env.metadata[self.sphinx_env.docname]["myst_slugs"] = {
268+
slug: (snode["ids"][0], clean_astext(snode[0]))
269+
for slug, snode in self._slug_to_section.items()
270+
}
271+
239272
# log warnings for duplicate reference definitions
240273
# "duplicate_refs": [{"href": "ijk", "label": "B", "map": [4, 5], "title": ""}],
241274
for dup_ref in self.md_env.get("duplicate_refs", []):
@@ -713,11 +746,29 @@ def render_heading(self, token: SyntaxTreeNode) -> None:
713746
with self.current_node_context(title_node):
714747
self.render_children(token)
715748

716-
# create a target reference for the section, based on the heading text
749+
# create a target reference for the section, based on the heading text.
750+
# Note, this is an implicit target, meaning that it is not prioritised,
751+
# and is not stored by sphinx for ref resolution
717752
name = nodes.fully_normalize_name(title_node.astext())
718753
new_section["names"].append(name)
719754
self.document.note_implicit_target(new_section, new_section)
720755

756+
# add possible reference slug, this may be different to the standard name above,
757+
# and does not have to be normalised, so we treat it separately
758+
if "id" in token.attrs:
759+
slug = str(token.attrs["id"])
760+
new_section["slug"] = slug
761+
if slug in self._slug_to_section:
762+
other_node = self._slug_to_section[slug]
763+
self.create_warning(
764+
f"duplicate heading slug {slug!r}, other at line {other_node.line}",
765+
MystWarnings.ANCHOR_DUPE,
766+
line=new_section.line,
767+
)
768+
else:
769+
# we store this for later processing on finalise
770+
self._slug_to_section[slug] = new_section
771+
721772
# set the section as the current node for subsequent rendering
722773
self.current_node = new_section
723774

@@ -736,19 +787,19 @@ def render_link(self, token: SyntaxTreeNode) -> None:
736787
or self.md_config.gfm_only
737788
or self.md_config.all_links_external
738789
):
739-
if token.info == "auto": # handles both autolink and linkify
740-
return self.render_autolink(token)
741-
else:
742-
return self.render_external_url(token)
790+
return self.render_external_url(token)
743791

744792
href = cast(str, token.attrGet("href") or "")
745793

794+
if href.startswith("#"):
795+
return self.render_id_link(token)
796+
746797
# TODO ideally whether inv_link is enabled could be precomputed
747798
if "inv_link" in self.md_config.enable_extensions and href.startswith("inv:"):
748799
return self.create_inventory_link(token)
749800

750801
if token.info == "auto": # handles both autolink and linkify
751-
return self.render_autolink(token)
802+
return self.render_external_url(token)
752803

753804
# Check for external URL
754805
url_scheme = urlparse(href).scheme
@@ -761,20 +812,27 @@ def render_link(self, token: SyntaxTreeNode) -> None:
761812
return self.render_internal_link(token)
762813

763814
def render_external_url(self, token: SyntaxTreeNode) -> None:
764-
"""Render link token `[text](link "title")`,
765-
where the link has been identified as an external URL::
766-
767-
<reference refuri="link" title="title">
768-
text
769-
770-
`text` can contain nested syntax, e.g. `[**bold**](url "title")`.
815+
"""Render link token (including autolink and linkify),
816+
where the link has been identified as an external URL.
771817
"""
772818
ref_node = nodes.reference()
773819
self.add_line_and_source_path(ref_node, token)
774820
self.copy_attributes(
775821
token, ref_node, ("class", "id", "reftitle"), aliases={"title": "reftitle"}
776822
)
777-
ref_node["refuri"] = cast(str, token.attrGet("href") or "")
823+
ref_node["refuri"] = escapeHtml(token.attrGet("href") or "") # type: ignore[arg-type]
824+
with self.current_node_context(ref_node, append=True):
825+
self.render_children(token)
826+
827+
def render_id_link(self, token: SyntaxTreeNode) -> None:
828+
"""Render link token like `[text](#id)`, to a local target."""
829+
ref_node = nodes.reference()
830+
self.add_line_and_source_path(ref_node, token)
831+
ref_node["id_link"] = True
832+
ref_node["refuri"] = token.attrGet("href") or ""
833+
self.copy_attributes(
834+
token, ref_node, ("class", "id", "reftitle"), aliases={"title": "reftitle"}
835+
)
778836
with self.current_node_context(ref_node, append=True):
779837
self.render_children(token)
780838

@@ -799,17 +857,6 @@ def render_internal_link(self, token: SyntaxTreeNode) -> None:
799857
with self.current_node_context(ref_node, append=True):
800858
self.render_children(token)
801859

802-
def render_autolink(self, token: SyntaxTreeNode) -> None:
803-
refuri = escapeHtml(token.attrGet("href") or "") # type: ignore[arg-type]
804-
ref_node = nodes.reference()
805-
self.copy_attributes(
806-
token, ref_node, ("class", "id", "reftitle"), aliases={"title": "reftitle"}
807-
)
808-
ref_node["refuri"] = refuri
809-
self.add_line_and_source_path(ref_node, token)
810-
with self.current_node_context(ref_node, append=True):
811-
self.render_children(token)
812-
813860
def create_inventory_link(self, token: SyntaxTreeNode) -> None:
814861
r"""Create a link to an inventory object.
815862
@@ -1641,3 +1688,15 @@ def html_meta_to_nodes(
16411688
output.append(pending)
16421689

16431690
return output
1691+
1692+
1693+
def clean_astext(node: nodes.Element) -> str:
1694+
"""Like node.astext(), but ignore images.
1695+
Copied from sphinx.
1696+
"""
1697+
node = node.deepcopy()
1698+
for img in node.findall(nodes.image):
1699+
img["alt"] = ""
1700+
for raw in list(node.findall(nodes.raw)):
1701+
raw.parent.remove(raw)
1702+
return node.astext()

myst_parser/mdit_to_docutils/sphinx_.py

Lines changed: 29 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -11,15 +11,12 @@
1111
from markdown_it.tree import SyntaxTreeNode
1212
from sphinx import addnodes
1313
from sphinx.domains.math import MathDomain
14-
from sphinx.domains.std import StandardDomain
1514
from sphinx.environment import BuildEnvironment
1615
from sphinx.ext.intersphinx import InventoryAdapter
1716
from sphinx.util import logging
18-
from sphinx.util.nodes import clean_astext
1917

2018
from myst_parser import inventory
2119
from myst_parser.mdit_to_docutils.base import DocutilsRenderer
22-
from myst_parser.warnings_ import MystWarnings
2320

2421
LOGGER = logging.getLogger(__name__)
2522

@@ -49,38 +46,42 @@ def render_internal_link(self, token: SyntaxTreeNode) -> None:
4946
destination = os.path.relpath(
5047
os.path.join(include_dir, os.path.normpath(destination)), source_dir
5148
)
52-
49+
kwargs = {
50+
"refdoc": self.sphinx_env.docname,
51+
"reftype": "myst",
52+
"refexplicit": len(token.children or []) > 0,
53+
}
54+
path_dest, *_path_ids = destination.split("#", maxsplit=1)
55+
path_id = _path_ids[0] if _path_ids else None
5356
potential_path = (
54-
Path(self.sphinx_env.doc2path(self.sphinx_env.docname)).parent / destination
57+
Path(self.sphinx_env.doc2path(self.sphinx_env.docname)).parent / path_dest
5558
if self.sphinx_env.srcdir # not set in some test situations
5659
else None
5760
)
58-
if (
59-
potential_path
60-
and potential_path.is_file()
61-
and not any(
62-
destination.endswith(suffix)
63-
for suffix in self.sphinx_env.config.source_suffix
64-
)
65-
):
66-
wrap_node = addnodes.download_reference(
67-
refdoc=self.sphinx_env.docname,
68-
reftarget=destination,
69-
reftype="myst",
70-
refdomain=None, # Added to enable cross-linking
71-
refexplicit=len(token.children or []) > 0,
72-
refwarn=False,
61+
if path_dest == "./":
62+
# this is a special case, where we want to reference the current document
63+
potential_path = (
64+
Path(self.sphinx_env.doc2path(self.sphinx_env.docname))
65+
if self.sphinx_env.srcdir
66+
else None
7367
)
74-
classes = ["xref", "download", "myst"]
75-
text = destination if not token.children else ""
68+
if potential_path and potential_path.is_file():
69+
docname = self.sphinx_env.path2doc(str(potential_path))
70+
if docname:
71+
wrap_node = addnodes.pending_xref(
72+
refdomain="doc", reftarget=docname, reftargetid=path_id, **kwargs
73+
)
74+
classes = ["xref", "myst"]
75+
text = ""
76+
else:
77+
wrap_node = addnodes.download_reference(
78+
refdomain=None, reftarget=path_dest, refwarn=False, **kwargs
79+
)
80+
classes = ["xref", "download", "myst"]
81+
text = destination if not token.children else ""
7682
else:
7783
wrap_node = addnodes.pending_xref(
78-
refdoc=self.sphinx_env.docname,
79-
reftarget=destination,
80-
reftype="myst",
81-
refdomain=None, # Added to enable cross-linking
82-
refexplicit=len(token.children or []) > 0,
83-
refwarn=True,
84+
refdomain=None, reftarget=destination, refwarn=True, **kwargs
8485
)
8586
classes = ["xref", "myst"]
8687
text = ""
@@ -112,48 +113,6 @@ def get_inventory_matches(
112113
)
113114
)
114115

115-
def render_heading(self, token: SyntaxTreeNode) -> None:
116-
"""This extends the docutils method, to allow for the addition of heading ids.
117-
These ids are computed by the ``markdown-it-py`` ``anchors_plugin``
118-
as "slugs" which are unique to a document.
119-
120-
The approach is similar to ``sphinx.ext.autosectionlabel``
121-
"""
122-
super().render_heading(token)
123-
124-
if not isinstance(self.current_node, nodes.section):
125-
return
126-
127-
# create the slug string
128-
slug = cast(str, token.attrGet("id"))
129-
if slug is None:
130-
return
131-
132-
section = self.current_node
133-
doc_slug = (
134-
self.sphinx_env.doc2path(self.sphinx_env.docname, base=False) + "#" + slug
135-
)
136-
137-
# save the reference in the standard domain, so that it can be handled properly
138-
domain = cast(StandardDomain, self.sphinx_env.get_domain("std"))
139-
if doc_slug in domain.labels:
140-
other_doc = self.sphinx_env.doc2path(domain.labels[doc_slug][0])
141-
self.create_warning(
142-
f"duplicate label {doc_slug}, other instance in {other_doc}",
143-
MystWarnings.ANCHOR_DUPE,
144-
line=section.line,
145-
)
146-
labelid = section["ids"][0]
147-
domain.anonlabels[doc_slug] = self.sphinx_env.docname, labelid
148-
domain.labels[doc_slug] = (
149-
self.sphinx_env.docname,
150-
labelid,
151-
clean_astext(section[0]),
152-
)
153-
154-
self.sphinx_env.metadata[self.sphinx_env.docname]["myst_anchors"] = True
155-
section["myst-anchor"] = doc_slug
156-
157116
def render_math_block_label(self, token: SyntaxTreeNode) -> None:
158117
"""Render math with referencable labels, e.g. ``$a=1$ (label)``."""
159118
label = token.info

0 commit comments

Comments
 (0)