executablebooks · chrisjsewell · Jan 11, 2023 · Jan 10, 2023 · Jan 10, 2023 · choldgraf
diff --git a/docs/intro.md b/docs/intro.md
@@ -46,7 +46,7 @@ To parse single documents, see the [](docutils.md) section
 ## Write a CommonMark document
 
 MyST is an extension of [CommonMark Markdown](https://commonmark.org/),
-that includes [additional syntax](../syntax/syntax.md) for technical authoring,
+that includes [additional syntax](syntax/syntax.md) for technical authoring,
 which integrates with Docutils and Sphinx.
 
 To start off, create an empty file called `myfile.md` and give it a markdown title and text.

diff --git a/docs/syntax/syntax.md b/docs/syntax/syntax.md
@@ -255,7 +255,8 @@ By default, MyST will resolve link destinations according to the following rules
 3. Destinations which point to a local file path are treated as links to that file.
    - The path must be relative and in [POSIX format](https://en.wikipedia.org/wiki/Path_(computing)#POSIX_and_Unix_paths) (i.e. `/` separators).
    - If the path is to another source file in the project (e.g. a `.md` or `.rst` file),
-    then the link will be to the initial heading in that file.
+    then the link will be to the initial heading in that file or,
+    if the path is appended by a `#target`, to the heading "slug" in that file.
 # add possible reference slug, this may be different to the standard name above, 
 # and does not have to be normalised, so we treat it separately 
 if "id" in token.attrs: 
 def _render_finalise(self) -> None: 
     """Finalise the render of the document.""" 
     # attempt to replace id_link references with internal links 
     for refnode in findall(self.document)(nodes.reference): 
     def resolve_myst_ref_doc(self, node: pending_xref): 
         """Resolve a reference, from a markdown link, to another document, 
         optionally with a target id within that document. 
 # add possible reference slug, this may be different to the standard name above, 
 # and does not have to be normalised, so we treat it separately 
 if "id" in token.attrs: 
 def _render_finalise(self) -> None: 
     """Finalise the render of the document.""" 
  
     # attempt to replace id_link references with internal links 
     for refnode in findall(self.document)(nodes.reference): 
     def resolve_myst_ref_doc(self, node: pending_xref): 
         """Resolve a reference, from a markdown link, to another document, 
         optionally with a target id within that document. 
    - If the path is to a non-source file (e.g. a `.png` or `.pdf` file),
     then the link will be to the file itself, e.g. to download it.
 
@@ -290,10 +291,14 @@ Here are some examples:
   - `[Non-source file](example.txt)`
   - [Non-source file](example.txt)
 
-* - Internal heading
+* - Local heading
   - `[Heading](#markdown-links-and-referencing)`
   - [Heading](#markdown-links-and-referencing)
 
+* - Heading in another file
+  - `[Heading](optional.md#auto-generated-header-anchors)`
+  - [Heading](optional.md#auto-generated-header-anchors)
+
 :::
 
 ### Customising destination resolution

diff --git a/myst_parser/config/main.py b/myst_parser/config/main.py
@@ -170,7 +170,6 @@ class MdParserConfig:
         metadata={
             "validator": optional(in_([1, 2, 3, 4, 5, 6, 7])),
             "help": "Heading level depth to assign HTML anchors",
-            "sphinx_only": True,
         },
     )
 
@@ -180,7 +179,7 @@ class MdParserConfig:
             "validator": optional(is_callable),
             "help": "Function for creating heading anchors",
             "global_only": True,
-            "sphinx_only": True,
+            "sphinx_only": True,  # TODO docutils config doesn't handle callables
         },
     )
 

diff --git a/myst_parser/mdit_to_docutils/base.py b/myst_parser/mdit_to_docutils/base.py
@@ -136,6 +136,8 @@ def setup_render(
         self._level_to_elem: dict[int, nodes.document | nodes.section] = {
             0: self.document
         }
+        # mapping of section slug to section node
+        self._slug_to_section: dict[str, nodes.section] = {}
 
     @property
     def sphinx_env(self) -> BuildEnvironment | None:
@@ -236,6 +238,37 @@ def _render_initialise(self) -> None:
     def _render_finalise(self) -> None:
         """Finalise the render of the document."""
 
+        # attempt to replace id_link references with internal links
+        for refnode in findall(self.document)(nodes.reference):
+            if not refnode.get("id_link"):
+                continue
+            target = refnode["refuri"][1:]
+            if target in self._slug_to_section:
+                section_node = self._slug_to_section[target]
+                refnode["refid"] = section_node["ids"][0]
+
+                if not refnode.children:
+                    implicit_text = clean_astext(section_node[0])
+                    refnode += nodes.inline(
+                        implicit_text, implicit_text, classes=["std", "std-ref"]
+                    )
+            else:
+                self.create_warning(
+                    f"local id not found: {refnode['refuri']!r}",
+                    MystWarnings.XREF_MISSING,
+                    line=refnode.line,
+                    append_to=refnode,
+                )
+                refnode["refid"] = target
+            del refnode["refuri"]
+
+        if self._slug_to_section and self.sphinx_env:
+            # save for later reference resolution
+            self.sphinx_env.metadata[self.sphinx_env.docname]["myst_slugs"] = {
+                slug: (snode["ids"][0], clean_astext(snode[0]))
+                for slug, snode in self._slug_to_section.items()
+            }
+
         # log warnings for duplicate reference definitions
         # "duplicate_refs": [{"href": "ijk", "label": "B", "map": [4, 5], "title": ""}],
         for dup_ref in self.md_env.get("duplicate_refs", []):
@@ -713,11 +746,29 @@ def render_heading(self, token: SyntaxTreeNode) -> None:
         with self.current_node_context(title_node):
             self.render_children(token)
 
-        # create a target reference for the section, based on the heading text
+        # create a target reference for the section, based on the heading text.
+        # Note, this is an implicit target, meaning that it is not prioritised,
+        # and is not stored by sphinx for ref resolution
         name = nodes.fully_normalize_name(title_node.astext())
         new_section["names"].append(name)
         self.document.note_implicit_target(new_section, new_section)
 
+        # add possible reference slug, this may be different to the standard name above,
+        # and does not have to be normalised, so we treat it separately
+        if "id" in token.attrs:
+            slug = str(token.attrs["id"])
+            new_section["slug"] = slug
+            if slug in self._slug_to_section:
+                other_node = self._slug_to_section[slug]
+                self.create_warning(
+                    f"duplicate heading slug {slug!r}, other at line {other_node.line}",
+                    MystWarnings.ANCHOR_DUPE,
+                    line=new_section.line,
+                )
+            else:
+                # we store this for later processing on finalise
+                self._slug_to_section[slug] = new_section
+
         # set the section as the current node for subsequent rendering
         self.current_node = new_section
 
@@ -736,19 +787,19 @@ def render_link(self, token: SyntaxTreeNode) -> None:
             or self.md_config.gfm_only
             or self.md_config.all_links_external
         ):
-            if token.info == "auto":  # handles both autolink and linkify
-                return self.render_autolink(token)
-            else:
-                return self.render_external_url(token)
+            return self.render_external_url(token)
 
         href = cast(str, token.attrGet("href") or "")
 
+        if href.startswith("#"):
+            return self.render_id_link(token)
+
         # TODO ideally whether inv_link is enabled could be precomputed
         if "inv_link" in self.md_config.enable_extensions and href.startswith("inv:"):
             return self.create_inventory_link(token)
 
         if token.info == "auto":  # handles both autolink and linkify
-            return self.render_autolink(token)
+            return self.render_external_url(token)
 
         # Check for external URL
         url_scheme = urlparse(href).scheme
@@ -761,20 +812,27 @@ def render_link(self, token: SyntaxTreeNode) -> None:
         return self.render_internal_link(token)
 
     def render_external_url(self, token: SyntaxTreeNode) -> None:
-        """Render link token `[text](link "title")`,
-        where the link has been identified as an external URL::
-
-            <reference refuri="link" title="title">
-                text
-
-        `text` can contain nested syntax, e.g. `[**bold**](url "title")`.
+        """Render link token (including autolink and linkify),
+        where the link has been identified as an external URL.
         """
         ref_node = nodes.reference()
         self.add_line_and_source_path(ref_node, token)
         self.copy_attributes(
             token, ref_node, ("class", "id", "reftitle"), aliases={"title": "reftitle"}
         )
-        ref_node["refuri"] = cast(str, token.attrGet("href") or "")
+        ref_node["refuri"] = escapeHtml(token.attrGet("href") or "")  # type: ignore[arg-type]
+        with self.current_node_context(ref_node, append=True):
+            self.render_children(token)
+
+    def render_id_link(self, token: SyntaxTreeNode) -> None:
+        """Render link token like `[text](#id)`, to a local target."""
+        ref_node = nodes.reference()
+        self.add_line_and_source_path(ref_node, token)
+        ref_node["id_link"] = True
+        ref_node["refuri"] = token.attrGet("href") or ""
+        self.copy_attributes(
+            token, ref_node, ("class", "id", "reftitle"), aliases={"title": "reftitle"}
+        )
         with self.current_node_context(ref_node, append=True):
             self.render_children(token)
 
@@ -799,17 +857,6 @@ def render_internal_link(self, token: SyntaxTreeNode) -> None:
         with self.current_node_context(ref_node, append=True):
             self.render_children(token)
 
-    def render_autolink(self, token: SyntaxTreeNode) -> None:
-        refuri = escapeHtml(token.attrGet("href") or "")  # type: ignore[arg-type]
-        ref_node = nodes.reference()
-        self.copy_attributes(
-            token, ref_node, ("class", "id", "reftitle"), aliases={"title": "reftitle"}
-        )
-        ref_node["refuri"] = refuri
-        self.add_line_and_source_path(ref_node, token)
-        with self.current_node_context(ref_node, append=True):
-            self.render_children(token)
-
     def create_inventory_link(self, token: SyntaxTreeNode) -> None:
         r"""Create a link to an inventory object.
 
@@ -1641,3 +1688,15 @@ def html_meta_to_nodes(
         output.append(pending)
 
     return output
+
+
+def clean_astext(node: nodes.Element) -> str:
+    """Like node.astext(), but ignore images.
+    Copied from sphinx.
+    """
+    node = node.deepcopy()
+    for img in findall(node)(nodes.image):
+        img["alt"] = ""
+    for raw in list(findall(node)(nodes.raw)):
+        raw.parent.remove(raw)
+    return node.astext()
diff --git a/myst_parser/mdit_to_docutils/sphinx_.py b/myst_parser/mdit_to_docutils/sphinx_.py
@@ -11,15 +11,12 @@
 from markdown_it.tree import SyntaxTreeNode
 from sphinx import addnodes
 from sphinx.domains.math import MathDomain
-from sphinx.domains.std import StandardDomain
 from sphinx.environment import BuildEnvironment
 from sphinx.ext.intersphinx import InventoryAdapter
 from sphinx.util import logging
-from sphinx.util.nodes import clean_astext
 
 from myst_parser import inventory
 from myst_parser.mdit_to_docutils.base import DocutilsRenderer
-from myst_parser.warnings_ import MystWarnings
 
 LOGGER = logging.getLogger(__name__)
 
@@ -49,38 +46,42 @@ def render_internal_link(self, token: SyntaxTreeNode) -> None:
             destination = os.path.relpath(
                 os.path.join(include_dir, os.path.normpath(destination)), source_dir
             )
-
+        kwargs = {
+            "refdoc": self.sphinx_env.docname,
+            "reftype": "myst",
+            "refexplicit": len(token.children or []) > 0,
+        }
+        path_dest, *_path_ids = destination.split("#", maxsplit=1)
+        path_id = _path_ids[0] if _path_ids else None
         potential_path = (
-            Path(self.sphinx_env.doc2path(self.sphinx_env.docname)).parent / destination
+            Path(self.sphinx_env.doc2path(self.sphinx_env.docname)).parent / path_dest
             if self.sphinx_env.srcdir  # not set in some test situations
             else None
         )
-        if (
-            potential_path
-            and potential_path.is_file()
-            and not any(
-                destination.endswith(suffix)
-                for suffix in self.sphinx_env.config.source_suffix
-            )
-        ):
-            wrap_node = addnodes.download_reference(
-                refdoc=self.sphinx_env.docname,
-                reftarget=destination,
-                reftype="myst",
-                refdomain=None,  # Added to enable cross-linking
-                refexplicit=len(token.children or []) > 0,
-                refwarn=False,
+        if path_dest == "./":
+            # this is a special case, where we want to reference the current document
+            potential_path = (
+                Path(self.sphinx_env.doc2path(self.sphinx_env.docname))
+                if self.sphinx_env.srcdir
+                else None
             )
-            classes = ["xref", "download", "myst"]
-            text = destination if not token.children else ""
+        if potential_path and potential_path.is_file():
+            docname = self.sphinx_env.path2doc(str(potential_path))
+            if docname:
+                wrap_node = addnodes.pending_xref(
+                    refdomain="doc", reftarget=docname, reftargetid=path_id, **kwargs
+                )
+                classes = ["xref", "myst"]
+                text = ""
+            else:
+                wrap_node = addnodes.download_reference(
+                    refdomain=None, reftarget=path_dest, refwarn=False, **kwargs
+                )
+                classes = ["xref", "download", "myst"]
+                text = destination if not token.children else ""
         else:
             wrap_node = addnodes.pending_xref(
-                refdoc=self.sphinx_env.docname,
-                reftarget=destination,
-                reftype="myst",
-                refdomain=None,  # Added to enable cross-linking
-                refexplicit=len(token.children or []) > 0,
-                refwarn=True,
+                refdomain=None, reftarget=destination, refwarn=True, **kwargs
             )
             classes = ["xref", "myst"]
             text = ""
@@ -112,48 +113,6 @@ def get_inventory_matches(
             )
         )
 
-    def render_heading(self, token: SyntaxTreeNode) -> None:
-        """This extends the docutils method, to allow for the addition of heading ids.
-        These ids are computed by the ``markdown-it-py`` ``anchors_plugin``
-        as "slugs" which are unique to a document.
-
-        The approach is similar to ``sphinx.ext.autosectionlabel``
-        """
-        super().render_heading(token)
-
-        if not isinstance(self.current_node, nodes.section):
-            return
-
-        # create the slug string
-        slug = cast(str, token.attrGet("id"))
-        if slug is None:
-            return
-
-        section = self.current_node
-        doc_slug = (
-            self.sphinx_env.doc2path(self.sphinx_env.docname, base=False) + "#" + slug
-        )
-
-        # save the reference in the standard domain, so that it can be handled properly
-        domain = cast(StandardDomain, self.sphinx_env.get_domain("std"))
-        if doc_slug in domain.labels:
-            other_doc = self.sphinx_env.doc2path(domain.labels[doc_slug][0])
-            self.create_warning(
-                f"duplicate label {doc_slug}, other instance in {other_doc}",
-                MystWarnings.ANCHOR_DUPE,
-                line=section.line,
-            )
-        labelid = section["ids"][0]
-        domain.anonlabels[doc_slug] = self.sphinx_env.docname, labelid
-        domain.labels[doc_slug] = (
-            self.sphinx_env.docname,
-            labelid,
-            clean_astext(section[0]),
-        )
-
-        self.sphinx_env.metadata[self.sphinx_env.docname]["myst_anchors"] = True
-        section["myst-anchor"] = doc_slug
-
     def render_math_block_label(self, token: SyntaxTreeNode) -> None:
         """Render math with referencable labels, e.g. ``$a=1$ (label)``."""
         label = token.info