From 4904a4fbca75efb584bfa327595c646bf892cc42 Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Mon, 9 Jan 2023 20:47:06 +0100 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20NEW:=20Add=20`inv=5Flink`=20extensi?= =?UTF-8?q?on?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CHANGELOG.md | 4 +- docs/conf.py | 1 + docs/configuration.md | 8 +- docs/docutils.md | 2 +- docs/faq/index.md | 8 +- docs/intro.md | 4 +- docs/syntax/optional.md | 29 +-- docs/syntax/roles-and-directives.md | 6 +- docs/syntax/syntax.md | 219 ++++++++++++++++-- myst_parser/config/main.py | 29 +++ myst_parser/mdit_to_docutils/base.py | 151 +++++++++++- myst_parser/mdit_to_docutils/sphinx_.py | 20 ++ myst_parser/warnings_.py | 6 + tests/test_inventory.py | 16 ++ tests/test_renderers/fixtures/myst-config.txt | 73 ++++++ tests/test_renderers/test_myst_config.py | 6 +- 16 files changed, 523 insertions(+), 59 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 37a50b77..70bd6e94 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -363,7 +363,7 @@ In particular for users, this update alters the parsing of tables to be consiste ### New Features ✨ -- **Task lists** utilise the [markdown-it-py tasklists plugin](markdown_it:md/plugins), and are applied to Markdown list items starting with `[ ]` or `[x]`. +- **Task lists** utilise the [markdown-it-py tasklists plugin](inv:markdown_it#md/plugins), and are applied to Markdown list items starting with `[ ]` or `[x]`. ```markdown - [ ] An item that needs doing @@ -541,7 +541,7 @@ substitutions: {{ key1 }} ``` -The substitutions are assessed as [jinja2 expressions](http://jinja.palletsprojects.com/) and includes the [Sphinx Environment](https://www.sphinx-doc.org/en/master/extdev/envapi.html) as `env`, so you can do powerful thinks like: +The substitutions are assessed as [jinja2 expressions](http://jinja.palletsprojects.com/) and includes the [Sphinx Environment](inv:sphinx#extdev/envapi) as `env`, so you can do powerful thinks like: ``` {{ [key1, env.docname] | join('/') }} diff --git a/docs/conf.py b/docs/conf.py index 301fa1f1..09726ff9 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -97,6 +97,7 @@ "substitution", "tasklist", "attrs_inline", + "inv_link", ] myst_number_code_blocks = ["typescript"] myst_heading_anchors = 2 diff --git a/docs/configuration.md b/docs/configuration.md index 286a612a..8125d9a5 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -69,6 +69,9 @@ Full details in the [](syntax/extensions) section. amsmath : enable direct parsing of [amsmath](https://ctan.org/pkg/amsmath) LaTeX equations +attrs_inline +: Enable inline attribute parsing, [see here](syntax/attributes) for details + colon_fence : Enable code fences using `:::` delimiters, [see here](syntax/colon_fence) for details @@ -87,6 +90,9 @@ html_admonition html_image : Convert HTML `` elements to sphinx image nodes, [see here](syntax/images) for details +inv_link +: Enable the `inv:` schema for Markdown link destinations, [see here](syntax/inv_links) for details + linkify : Automatically identify "bare" web URLs and add hyperlinks @@ -117,7 +123,7 @@ WARNING: Non-consecutive header level increase; H1 to H3 [myst.header] **In general, if your build logs any warnings, you should either fix them or [raise an Issue](https://github.com/executablebooks/MyST-Parser/issues/new/choose) if you think the warning is erroneous.** -However, in some circumstances if you wish to suppress the warning you can use the [`suppress_warnings`](https://www.sphinx-doc.org/en/master/usage/configuration.html#confval-suppress_warnings) configuration option, e.g. +However, in some circumstances if you wish to suppress the warning you can use the configuration option, e.g. ```python suppress_warnings = ["myst.header"] diff --git a/docs/docutils.md b/docs/docutils.md index 10ea237e..9ec4aa15 100644 --- a/docs/docutils.md +++ b/docs/docutils.md @@ -36,7 +36,7 @@ The commands are based on the [Docutils Front-End Tools](https://docutils.source ::: :::{versionadded} 0.19.0 -`myst-suppress-warnings` replicates the functionality of sphinx's [`suppress_warnings`](https://www.sphinx-doc.org/en/master/usage/configuration.html#confval-suppress_warnings) for `myst.` warnings in the `docutils` CLI. +`myst-suppress-warnings` replicates the functionality of sphinx's for `myst.` warnings in the `docutils` CLI. ::: The CLI commands can also utilise the [`docutils.conf` configuration file](https://docutils.sourceforge.io/docs/user/config.html) to configure the behaviour of the CLI commands. For example: diff --git a/docs/faq/index.md b/docs/faq/index.md index 3f0c27c8..902dde6b 100644 --- a/docs/faq/index.md +++ b/docs/faq/index.md @@ -102,7 +102,7 @@ If you encounter any issues with this feature, please don't hesitate to report i (howto/autodoc)= ### Use `sphinx.ext.autodoc` in Markdown files -The [Sphinx extension `autodoc`](sphinx:sphinx.ext.autodoc), which pulls in code documentation from docstrings, is currently hard-coded to parse reStructuredText. +The [Sphinx extension `autodoc`](inv:sphinx#sphinx.ext.autodoc), which pulls in code documentation from docstrings, is currently hard-coded to parse reStructuredText. It is therefore incompatible with MyST's Markdown parser. However, the special [`eval-rst` directive](syntax/directives/parsing) can be used to "wrap" `autodoc` directives: @@ -142,7 +142,7 @@ See the [](syntax/header-anchors) section of extended syntaxes. ::: If you'd like to *automatically* generate targets for each of your section headers, -check out the {external+sphinx:std:doc}`autosectionlabel ` +check out the [autosectionlabel](inv:sphinx#usage/*/autosectionlabel) sphinx feature. You can activate it in your Sphinx site by adding the following to your `conf.py` file: @@ -179,7 +179,7 @@ Moved to [](myst-warnings) ### Sphinx-specific page front matter Sphinx intercepts front matter and stores them within the global environment -(as discussed in the {external+sphinx:std:doc}`sphinx documentation `. +(as discussed in the [sphinx documentation](inv:sphinx#usage/*/field-lists)). There are certain front-matter keys (or their translations) that are also recognised specifically by docutils and parsed to inline Markdown: - `author` @@ -228,7 +228,7 @@ emphasis syntax will now be disabled. For example, the following will be rendere *emphasis is now disabled* ``` -For a list of all the syntax elements you can disable, see the [markdown-it parser guide](markdown_it:using). +For a list of all the syntax elements you can disable, see the [markdown-it parser guide](inv:markdown_it#using). ## Common errors and questions diff --git a/docs/intro.md b/docs/intro.md index f34725fb..671e4c5d 100644 --- a/docs/intro.md +++ b/docs/intro.md @@ -28,7 +28,7 @@ conda install -c conda-forge myst-parser (intro/sphinx)= ## Enable MyST in Sphinx -To get started with Sphinx, see their {external+sphinx:std:doc}`quick-start guide `. +To get started with Sphinx, see their [quick-start guide](inv:sphinx#usage/quickstart). To use the MyST parser in Sphinx, simply add the following to your `conf.py` file: @@ -80,7 +80,7 @@ $ myst-docutils-html5 --stylesheet= myfile.md ``` To include this document within a Sphinx project, -include `myfile.md` in a [`toctree` directive](sphinx:toctree-directive) on an index page. +include `myfile.md` in a [`toctree` directive](inv:sphinx#toctree-directive) on an index page. ## Extend CommonMark with roles and directives diff --git a/docs/syntax/optional.md b/docs/syntax/optional.md index 4ad5073a..f9efca74 100644 --- a/docs/syntax/optional.md +++ b/docs/syntax/optional.md @@ -13,16 +13,16 @@ myst: :width: 200px ``` key4: example - confpy: sphinx `conf.py` {external+sphinx:std:doc}`configuration file ` + confpy: sphinx `conf.py` [configuration file](inv:sphinx#usage/configuration) --- (syntax/extensions)= # Syntax Extensions -MyST-Parser is highly configurable, utilising the inherent "plugability" of the [markdown-it-py](markdown_it:index) parser. +MyST-Parser is highly configurable, utilising the inherent "plugability" of the [markdown-it-py](inv:markdown_it#index) parser. The following syntaxes are optional (disabled by default) and can be enabled *via* the sphinx `conf.py` (see also [](sphinx/config-options)). -Their goal is generally to add more *Markdown friendly* syntaxes; often enabling and rendering [markdown-it-py plugins](markdown_it:md/plugins) that extend the [CommonMark specification](https://commonmark.org/). +Their goal is generally to add more *Markdown friendly* syntaxes; often enabling and rendering [markdown-it-py plugins](inv:markdown_it#md/plugins) that extend the [CommonMark specification](https://commonmark.org/). To enable all the syntaxes explained below: @@ -36,6 +36,7 @@ myst_enable_extensions = [ "fieldlist", "html_admonition", "html_image", + "inv_link", "linkify", "replacements", "smartquotes", @@ -101,7 +102,7 @@ Math is parsed by adding to the `myst_enable_extensions` list option, in the {{ - `"dollarmath"` for parsing of dollar `$` and `$$` encapsulated math. - `"amsmath"` for direct parsing of [amsmath LaTeX environments](https://ctan.org/pkg/amsmath). -These options enable their respective Markdown parser plugins, as detailed in the [markdown-it plugin guide](markdown_it:md/plugins). +These options enable their respective Markdown parser plugins, as detailed in the [markdown-it plugin guide](inv:markdown_it#md/plugins). :::{versionchanged} 0.13.0 `myst_dmath_enable=True` and `myst_amsmath_enable=True` are deprecated, and replaced by `myst_enable_extensions = ["dollarmath", "amsmath"]` @@ -231,7 +232,7 @@ See [the extended syntax option](syntax/amsmath). (syntax/mathjax)= ### Mathjax and math parsing -When building HTML using the {external+sphinx:mod}`sphinx.ext.mathjax ` extension (enabled by default), +When building HTML using the extension (enabled by default), If `dollarmath` is enabled, Myst-Parser injects the `tex2jax_ignore` (MathJax v2) and `mathjax_ignore` (MathJax v3) classes in to the top-level section of each MyST document, and adds the following default MathJax configuration: MathJax version 2 (see [the tex2jax preprocessor](https://docs.mathjax.org/en/v2.7-latest/options/preprocessors/tex2jax.html#configure-tex2jax): @@ -353,7 +354,7 @@ This may lead to unexpected outcomes. ::: -Substitution references are assessed as [Jinja2 expressions](http://jinja.palletsprojects.com) which can use [filters](https://jinja.palletsprojects.com/en/2.11.x/templates/#list-of-builtin-filters), and also contains the {external+sphinx:std:doc}`Sphinx Environment ` in the context (as `env`). +Substitution references are assessed as [Jinja2 expressions](http://jinja.palletsprojects.com) which can use [filters](https://jinja.palletsprojects.com/en/2.11.x/templates/#list-of-builtin-filters), and also contains the [Sphinx Environment](inv:sphinx#extdev/envapi) in the context (as `env`). Therefore you can do things like: ```md @@ -540,7 +541,7 @@ $ myst-anchors -l 2 docs/syntax/optional.md By adding `"deflist"` to `myst_enable_extensions` (in the {{ confpy }}), you will be able to utilise definition lists. -Definition lists utilise the [markdown-it-py deflist plugin](markdown_it:md/plugins), which itself is based on the [Pandoc definition list specification](http://johnmacfarlane.net/pandoc/README.html#definition-lists). +Definition lists utilise the [markdown-it-py deflist plugin](inv:markdown_it#md/plugins), which itself is based on the [Pandoc definition list specification](http://johnmacfarlane.net/pandoc/README.html#definition-lists). This syntax can be useful, for example, as an alternative to nested bullet-lists: @@ -619,7 +620,7 @@ Term 3 By adding `"tasklist"` to `myst_enable_extensions` (in the {{ confpy }}), you will be able to utilise task lists. -Task lists utilise the [markdown-it-py tasklists plugin](markdown_it:md/plugins), +Task lists utilise the [markdown-it-py tasklists plugin](inv:markdown_it#md/plugins), and are applied to markdown list items starting with `[ ]` or `[x]`: ```markdown @@ -691,7 +692,7 @@ based on the [reStructureText syntax](https://docutils.sourceforge.io/docs/ref/r print("Hello, world!") ``` -A prominent use case of field lists is for use in API docstrings, as used in [Sphinx's docstring renderers](sphinx:python-domain): +A prominent use case of field lists is for use in API docstrings, as used in [Sphinx's docstring renderers](inv:sphinx#python-domain): ````md ```{py:function} send_message(sender, priority) @@ -726,16 +727,16 @@ Currently `sphinx.ext.autodoc` does not support MyST, see [](howto/autodoc). (syntax/attributes)= ## Inline attributes +:::{versionadded} 0.19 +This feature is in *beta*, and may change in future versions. +It replace the previous `attrs_image` extension, which is now deprecated. +::: + By adding `"attrs_inline"` to `myst_enable_extensions` (in the {{ confpy }}), you can enable parsing of inline attributes after certain inline syntaxes. This is adapted from [djot inline attributes](https://htmlpreview.github.io/?https://github.com/jgm/djot/blob/master/doc/syntax.html#inline-attributes), and also related to [pandoc bracketed spans](https://pandoc.org/MANUAL.html#extension-bracketed_spans). -:::{important} -This feature is in *beta*, and may change in future versions. -It replace the previous `attrs_image` extension, which is now deprecated. -::: - Attributes are specified in curly braces after the inline syntax. Inside the curly braces, the following syntax is recognised: diff --git a/docs/syntax/roles-and-directives.md b/docs/syntax/roles-and-directives.md index 3017b527..df760679 100644 --- a/docs/syntax/roles-and-directives.md +++ b/docs/syntax/roles-and-directives.md @@ -8,8 +8,8 @@ by interpreting a chuck of text as a specific type of markup, according to its n Mostly all [docutils roles](https://docutils.sourceforge.io/docs/ref/rst/roles.html), [docutils directives](https://docutils.sourceforge.io/docs/ref/rst/directives.html), -{external+sphinx:std:doc}`Sphinx roles `, or -{external+sphinx:std:doc}`Sphinx directives ` +[Sphinx roles](inv:sphinx#usage/*/roles), or +[Sphinx directives](inv:sphinx#usage/*/directives) can be used in MyST. ## Syntax @@ -421,6 +421,6 @@ For example: > {sub-ref}`today` | {sub-ref}`wordcount-words` words | {sub-ref}`wordcount-minutes` min read -`today` is replaced by either the date on which the document is parsed, with the format set by [`today_fmt`](https://www.sphinx-doc.org/en/master/usage/configuration.html#confval-today_fmt), or the `today` variable if set in the configuration file. +`today` is replaced by either the date on which the document is parsed, with the format set by , or the `today` variable if set in the configuration file. The reading speed is computed using the `myst_words_per_minute` configuration (see the [Sphinx configuration options](sphinx/config-options)). diff --git a/docs/syntax/syntax.md b/docs/syntax/syntax.md index 0062a551..5d3ac61d 100644 --- a/docs/syntax/syntax.md +++ b/docs/syntax/syntax.md @@ -85,7 +85,7 @@ would be equivalent to: ### Setting HTML Metadata The front-matter can contain the special key `html_meta`; a dict with data to add to the generated HTML as [`` elements](https://developer.mozilla.org/en-US/docs/Web/HTML/Element/meta). -This is equivalent to using the {external+sphinx:ref}`meta directive `. +This is equivalent to using the [meta directive](inv:sphinx#html-meta). HTML metadata can also be added globally in the `conf.py` *via* the `myst_html_meta` variable, in which case it will be added to all MyST documents. For each document, the `myst_html_meta` dict will be updated by the document level front-matter `html_meta`, with the front-matter taking precedence. @@ -207,34 +207,203 @@ Is below, but it won't be parsed into the document. ## Markdown Links and Referencing -Markdown links are of the form: `[text](link)`. +### CommonMark link format -If you set the configuration `myst_all_links_external = True` (`False` by default), -then all links will be treated simply as "external" links. -For example, in HTML outputs, `[text](link)` will be rendered as `text`. +CommonMark links come in three forms ([see the spec](https://spec.commonmark.org/0.30/#links)): -Otherwise, links will only be treated as "external" links if they are prefixed with a scheme, -configured with `myst_url_schemes` (by default, `http`, `https`, `ftp`, or `mailto`). -For example, `[example.com](https://example.com)` becomes [example.com](https://example.com). +*Autolinks* are [URIs][uri] surrounded by `<` and `>`, which must always have a scheme: -:::{note} -The `text` will be parsed as nested Markdown, for example `[here's some *emphasised text*](https://example.com)` will be parsed as [here's some *emphasised text*](https://example.com). +```md + +``` + +*Inline links* allow for optional explicit text and titles (in HTML titles are rendered as tooltips): + +```md +[Explicit *Markdown* text](destination "optional explicit title") +``` + +or, if the destination contains spaces, + +```md +[Explicit *Markdown* text]( "optional explicit title") +``` + +*Reference links* define the destination separately in the document, and can be used multiple times: + +```md +[Explicit *Markdown* text][label] +[Another link][label] + +[label]: destination "optional explicit title" +``` + +[uri]: https://en.wikipedia.org/wiki/Uniform_Resource_Identifier +[url]: https://en.wikipedia.org/wiki/URL + +### Default destination resolution + +The destination of a link can resolve to either an **external** target, such as a [URL] to another website, +or an **internal** target, such as a file, heading or figure within the same project. + +By default, MyST will resolve link destinations according to the following rules: + +1. All autolinks will be treated as external [URL] links. + +2. Destinations beginning with `http:`, `https:`, `ftp:`, or `mailto:` will be treated as external [URL] links. + +3. Destinations which point to a local file path are treated as links to that file. + - The path must be relative and in [POSIX format](https://en.wikipedia.org/wiki/Path_(computing)#POSIX_and_Unix_paths) (i.e. `/` separators). + - If the path is to another source file in the project (e.g. a `.md` or `.rst` file), + then the link will be to the initial heading in that file. + - If the path is to a non-source file (e.g. a `.png` or `.pdf` file), + then the link will be to the file itself, e.g. to download it. + +4. Destinations beginning with `#` will be treated as a link to a heading "slug" in the same file. + - This requires the `myst_heading_anchors` configuration be set. + - For more details see [](syntax/header-anchors). + +5. All other destinations are treated as internal references, which can link to any type of target within the project (see [](syntax/targets)). + +Here are some examples: + +:::{list-table} +:header-rows: 1 + +* - Type + - Syntax + - Rendered + +* - Autolink + - `` + - + +* - External URL + - `[example.com](https://example.com)` + - [example.com](https://example.com) + +* - Internal source file + - `[Source file](syntax.md)` + - [Source file](syntax.md) + +* - Internal non-source file + - `[Non-source file](example.txt)` + - [Non-source file](example.txt) + +* - Internal heading + - `[Heading](#markdown-links-and-referencing)` + - [Heading](#markdown-links-and-referencing) + +::: + +### Customising destination resolution + +You can customise the default destination resolution rules by setting the following [configuration options](../configuration.md): + +`myst_all_links_external` (default: `False`) +: If `True`, then all links will be treated as external links. + +`myst_url_schemes` (default: `["http", "https", "ftp", "mailto"]`) +: A list of [URL] schemes which will be treated as external links. + +`myst_ref_domains` (default: `[]`) +: A list of [sphinx domains](inv:sphinx#domain) which will be allowed for internal links. + For example, `myst_ref_domains = ("std", "py")` will only allow cross-references to `std` and `py` domains. + If the list is empty, then all domains will be allowed. + +(syntax/inv_links)= +### Cross-project (inventory) links + +:::{versionadded} 0.19 +This functionality is currently in *beta*. +It is intended that eventually it will be part of the core syntax. ::: -For "internal" links, myst-parser in Sphinx will attempt to resolve the reference to either a relative document path, or a cross-reference to a target (see [](syntax/targets)): +Each Sphinx HTML build creates a file named `objects.inv` that contains a mapping from referenceable objects to [URIs][uri] relative to the HTML set’s root. +Each object is uniquely identified by a `domain`, `type`, and `name`. +As well as the relative location, the object can also include implicit `text` for the reference (like the text for a heading). + +You can use the `myst-inv` command line tool (installed with `myst_parser`) to visualise and filter any remote URL or local file path to this inventory file (or its parent): + +```yaml +# $ myst-inv https://www.sphinx-doc.org/en/master -n index +name: Sphinx +version: 6.2.0 +base_url: https://www.sphinx-doc.org/en/master +objects: + rst: + role: + index: + loc: usage/restructuredtext/directives.html#role-index + text: null + std: + doc: + index: + loc: index.html + text: Welcome +``` + +To load external inventories into your Sphinx project, you must load the [`sphinx.ext.intersphinx` extension](inv:sphinx#usage/*/intersphinx), and set the `intersphinx_mapping` configuration option. +Then also enable the `inv_link` MyST extension e.g.: + +```python +extensions = ["myst_parser", "sphinx.ext.intersphinx"] +intersphinx_mapping = { + "sphinx": ("https://www.sphinx-doc.org/en/master", None), +} +myst_enable_extensions = ["inv_link"] +``` + +:::{dropdown} Docutils configuration -- `[this doc](syntax.md)` will link to a rendered source document: [this doc](syntax.md) - - This is similar to `` {doc}`this doc ` ``; {doc}`this doc `, but allows for document extensions, and parses nested Markdown text. -- `[example text](example.txt)` will link to a non-source (downloadable) file: [example text](example.txt) - - The linked document itself will be copied to the build directory. - - This is similar to `` {download}`example text ` ``; {download}`example text `, but parses nested Markdown text. -- `[reference](syntax/referencing)` will link to an internal cross-reference: [reference](syntax/referencing) - - This is similar to `` {any}`reference ` ``; {any}`reference `, but parses nested Markdown text. - - You can limit the scope of the cross-reference to specific [sphinx domains](sphinx:domain), by using the `myst_ref_domains` configuration. - For example, `myst_ref_domains = ("std", "py")` will only allow cross-references to `std` and `py` domains. +Use the `docutils.conf` configuration file, for more details see [](myst-docutils). -Additionally, only if [](syntax/header-anchors) are enabled, then internal links to document headers can be used. -For example `[a header](syntax.md#markdown-links-and-referencing)` will link to a header anchor: [a header](syntax.md#markdown-links-and-referencing). +```ini +[general] +myst-inventories: + sphinx: ["https://www.sphinx-doc.org/en/master", null] +myst-enable-extensions: inv_link +``` + +::: + +you can then reference inventory objects by prefixing the `inv` schema to the destination [URI]: `inv:key:domain:type#name`. + +`key`, `domain` and `type` are optional, e.g. for `inv:#name`, all inventories, domains and types will be searched, with a [warning emitted](myst-warnings) if multiple matches are found. + +Additionally, `*` is a wildcard which matches zero or characters, e.g. `inv:*:std:doc#a*` will match all `std:doc` objects in all inventories, with a `name` beginning with `a`. +Note, to match to a literal `*` use `\*`. + +Here are some examples: + +:::{list-table} +:header-rows: 1 + +* - Type + - Syntax + - Rendered + +* - Autolink, full + - `` + - + +* - Link, full + - `[Sphinx](inv:sphinx:std:doc#index)` + - [Sphinx](inv:sphinx:std:doc#index) + +* - Autolink, no type + - `` + - + +* - Autolink, no domain + - `` + - + +* - Autolink, only name + - `` + - + +::: (syntax/targets)= @@ -258,7 +427,7 @@ Target headers are defined with this syntax: ``` They can then be referred to with the -{external+sphinx:ref}`ref inline role `: +[`ref` inline role](inv:sphinx#ref-role): ```md {ref}`header_target` @@ -278,7 +447,7 @@ Alternatively using the markdown syntax: [my text](header_target) ``` -is equivalent to using the {external+sphinx:ref}`any inline role `: +is equivalent to using the [`any` inline role](inv:sphinx#any-role): ```md {any}`my text ` @@ -314,7 +483,7 @@ c = "string" ``` You can create and register your own lexer, using the [`pygments.lexers` entry point](https://pygments.org/docs/plugins/#register-plugins), -or within a sphinx extension, with the [`app.add_lexer` method](sphinx:sphinx.application.Sphinx.add_lexer). +or within a sphinx extension, with the [`app.add_lexer` method](inv:sphinx#*.Sphinx.add_lexer). Using the `myst_number_code_blocks` configuration option, you can also control whether code blocks are numbered by line. For example, using `myst_number_code_blocks = ["typescript"]`: diff --git a/myst_parser/config/main.py b/myst_parser/config/main.py index 5ca5ec44..7b36b7fc 100644 --- a/myst_parser/config/main.py +++ b/myst_parser/config/main.py @@ -41,6 +41,7 @@ def check_extensions(_, field: dc.Field, value: Any): "fieldlist", "html_admonition", "html_image", + "inv_link", "linkify", "replacements", "smartquotes", @@ -64,6 +65,23 @@ def check_sub_delimiters(_, field: dc.Field, value: Any): ) +def check_inventories(_, field: dc.Field, value: Any): + """Check that the inventories are a dict of {str: (str, Optional[str])}""" + if not isinstance(value, dict): + raise TypeError(f"'{field.name}' is not a dictionary: {value!r}") + for key, val in value.items(): + if not isinstance(key, str): + raise TypeError(f"'{field.name}' key is not a string: {key!r}") + if not isinstance(val, (tuple, list)) or len(val) != 2: + raise TypeError( + f"'{field.name}[{key}]' value is not a 2-item list: {val!r}" + ) + if not isinstance(val[0], str): + raise TypeError(f"'{field.name}[{key}][0]' is not a string: {val[0]}") + if not (val[1] is None or isinstance(val[1], str)): + raise TypeError(f"'{field.name}[{key}][1]' is not a null/string: {val[1]}") + + @dc.dataclass() class MdParserConfig: """Configuration options for the Markdown Parser. @@ -304,6 +322,17 @@ class MdParserConfig: }, ) + inventories: Dict[str, Tuple[str, Optional[str]]] = dc.field( + default_factory=dict, + repr=False, + metadata={ + "validator": check_inventories, + "help": "Mapping of key to (url, inv file), for intra-project referencing", + "docutils_only": True, + "global_only": True, + }, + ) + def __post_init__(self): validate_fields(self) diff --git a/myst_parser/mdit_to_docutils/base.py b/myst_parser/mdit_to_docutils/base.py index 2e8096fc..b72ef5f3 100644 --- a/myst_parser/mdit_to_docutils/base.py +++ b/myst_parser/mdit_to_docutils/base.py @@ -4,9 +4,10 @@ import inspect import json import os +import posixpath import re from collections import OrderedDict -from contextlib import contextmanager +from contextlib import contextmanager, suppress from datetime import date, datetime from types import ModuleType from typing import ( @@ -40,6 +41,7 @@ from markdown_it.token import Token from markdown_it.tree import SyntaxTreeNode +from myst_parser import inventory from myst_parser._compat import findall from myst_parser.config.main import MdParserConfig from myst_parser.mocking import ( @@ -93,6 +95,8 @@ def __init__(self, parser: MarkdownIt) -> None: for k, v in inspect.getmembers(self, predicate=inspect.ismethod) if k.startswith("render_") and k != "render_children" } + # these are lazy loaded, when needed + self._inventories: None | dict[str, inventory.InventoryType] = None def __getattr__(self, name: str): """Warn when the renderer has not been setup yet.""" @@ -727,18 +731,27 @@ def render_link(self, token: SyntaxTreeNode) -> None: or any scheme if `myst_url_schemes` is None. - Otherwise, forward to `render_internal_link` """ - if token.info == "auto": # handles both autolink and linkify - return self.render_autolink(token) - if ( self.md_config.commonmark_only or self.md_config.gfm_only or self.md_config.all_links_external ): - return self.render_external_url(token) + if token.info == "auto": # handles both autolink and linkify + return self.render_autolink(token) + else: + return self.render_external_url(token) + + href = cast(str, token.attrGet("href") or "") + + # TODO ideally whether inv_link is enabled could be precomputed + if "inv_link" in self.md_config.enable_extensions and href.startswith("inv:"): + return self.create_inventory_link(token) + + if token.info == "auto": # handles both autolink and linkify + return self.render_autolink(token) # Check for external URL - url_scheme = urlparse(cast(str, token.attrGet("href") or "")).scheme + url_scheme = urlparse(href).scheme allowed_url_schemes = self.md_config.url_schemes if (allowed_url_schemes is None and url_scheme) or ( allowed_url_schemes is not None and url_scheme in allowed_url_schemes @@ -797,6 +810,132 @@ def render_autolink(self, token: SyntaxTreeNode) -> None: with self.current_node_context(ref_node, append=True): self.render_children(token) + def create_inventory_link(self, token: SyntaxTreeNode) -> None: + r"""Create a link to an inventory object. + + This assumes the href is of the form `:#`. + The path is of the form `::`, + where each of the parts is optional, hence `:#` is also valid. + Each of the path parts can contain the `*` wildcard, for example: + `:key:*:obj#targe*`. + `\*` is treated as a plain `*`. + """ + + # account for autolinks + if token.info == "auto": + # autolinks escape the HTML, which we don't want + href = token.children[0].content + explicit = False + else: + href = cast(str, token.attrGet("href") or "") + explicit = bool(token.children) + + # split the href up into parts + uri_parts = urlparse(href) + target = uri_parts.fragment + invs, domains, otypes = None, None, None + if uri_parts.path: + path_parts = uri_parts.path.split(":") + with suppress(IndexError): + invs = path_parts[0] + domains = path_parts[1] + otypes = path_parts[2] + + # find the matches + matches = self.get_inventory_matches( + target=target, invs=invs, domains=domains, otypes=otypes + ) + + # warn for 0 or >1 matches + if not matches: + filter_str = inventory.filter_string(invs, domains, otypes, target) + self.create_warning( + f"No matches for {filter_str!r}", + MystWarnings.IREF_MISSING, + line=token_line(token, default=0), + append_to=self.current_node, + ) + return + if len(matches) > 1: + show_num = 3 + filter_str = inventory.filter_string(invs, domains, otypes, target) + matches_str = ", ".join( + [ + inventory.filter_string(m.inv, m.domain, m.otype, m.name) + for m in matches[:show_num] + ] + ) + if len(matches) > show_num: + matches_str += ", ..." + self.create_warning( + f"Multiple matches for {filter_str!r}: {matches_str}", + MystWarnings.IREF_AMBIGUOUS, + line=token_line(token, default=0), + append_to=self.current_node, + ) + + # create the docutils node + match = matches[0] + ref_node = nodes.reference() + ref_node["internal"] = False + ref_node["inv_match"] = inventory.filter_string( + match.inv, match.domain, match.otype, match.name + ) + self.add_line_and_source_path(ref_node, token) + self.copy_attributes( + token, ref_node, ("class", "id", "reftitle"), aliases={"title": "reftitle"} + ) + ref_node["refuri"] = ( + posixpath.join(match.base_url, match.loc) if match.base_url else match.loc + ) + if "reftitle" not in ref_node: + ref_node["reftitle"] = f"{match.project} {match.version}".strip() + self.current_node.append(ref_node) + if explicit: + with self.current_node_context(ref_node): + self.render_children(token) + elif match.text: + ref_node.append(nodes.Text(match.text)) + else: + ref_node.append(nodes.Text(match.name)) + + def get_inventory_matches( + self, + *, + invs: str | None, + domains: str | None, + otypes: str | None, + target: str | None, + ) -> list[inventory.InvMatch]: + """Return inventory matches. + + This will be overridden for sphinx, to use intersphinx config. + """ + if self._inventories is None: + self._inventories = {} + for key, (uri, path) in self.md_config.inventories.items(): + load_path = posixpath.join(uri, "objects.inv") if path is None else path + self.reporter.info(f"Loading inventory {key!r}: {load_path}") + try: + inv = inventory.fetch_inventory(load_path, base_url=uri) + except Exception as exc: + self.create_warning( + f"Failed to load inventory {key!r}: {exc}", + MystWarnings.INV_LOAD, + ) + else: + self._inventories[key] = inv + + return list( + inventory.filter_inventories( + self._inventories, + invs=invs, + domains=domains, + otypes=otypes, + targets=target, + ) + ) + def render_html_inline(self, token: SyntaxTreeNode) -> None: self.render_html_block(token) diff --git a/myst_parser/mdit_to_docutils/sphinx_.py b/myst_parser/mdit_to_docutils/sphinx_.py index 3fed1224..c1989edb 100644 --- a/myst_parser/mdit_to_docutils/sphinx_.py +++ b/myst_parser/mdit_to_docutils/sphinx_.py @@ -13,9 +13,11 @@ from sphinx.domains.math import MathDomain from sphinx.domains.std import StandardDomain from sphinx.environment import BuildEnvironment +from sphinx.ext.intersphinx import InventoryAdapter from sphinx.util import logging from sphinx.util.nodes import clean_astext +from myst_parser import inventory from myst_parser.mdit_to_docutils.base import DocutilsRenderer from myst_parser.warnings_ import MystWarnings @@ -92,6 +94,24 @@ def render_internal_link(self, token: SyntaxTreeNode) -> None: with self.current_node_context(inner_node): self.render_children(token) + def get_inventory_matches( + self, + *, + invs: str | None, + domains: str | None, + otypes: str | None, + target: str | None, + ) -> list[inventory.InvMatch]: + return list( + inventory.filter_sphinx_inventories( + InventoryAdapter(self.sphinx_env).named_inventory, + invs=invs, + domains=domains, + otypes=otypes, + targets=target, + ) + ) + def render_heading(self, token: SyntaxTreeNode) -> None: """This extends the docutils method, to allow for the addition of heading ids. These ids are computed by the ``markdown-it-py`` ``anchors_plugin`` diff --git a/myst_parser/warnings_.py b/myst_parser/warnings_.py index 78b94914..6325c4af 100644 --- a/myst_parser/warnings_.py +++ b/myst_parser/warnings_.py @@ -32,6 +32,12 @@ class MystWarnings(Enum): # cross-reference resolution XREF_AMBIGUOUS = "xref_ambiguous" """Multiple targets were found for a cross-reference.""" + INV_LOAD = "inv_retrieval" + """Failure to retrieve or load an inventory.""" + IREF_MISSING = "iref_missing" + """A target was not found for an inventory reference.""" + IREF_AMBIGUOUS = "iref_ambiguous" + """Multiple targets were found for an inventory reference.""" LEGACY_DOMAIN = "domains" """A legacy domain found, which does not support `resolve_any_xref`.""" diff --git a/tests/test_inventory.py b/tests/test_inventory.py index 795a1144..825bcccb 100644 --- a/tests/test_inventory.py +++ b/tests/test_inventory.py @@ -3,6 +3,7 @@ import pytest +from myst_parser.config.main import MdParserConfig from myst_parser.inventory import ( filter_inventories, from_sphinx, @@ -14,6 +15,21 @@ STATIC = Path(__file__).parent.absolute() / "static" +@pytest.mark.parametrize( + "value", + [ + None, + {1: 2}, + {"key": 1}, + {"key": [1, 2]}, + {"key": ["a", 1]}, + ], +) +def test_docutils_config_invalid(value): + with pytest.raises((TypeError, ValueError)): + MdParserConfig(inventories=value) + + def test_convert_roundtrip(): with (STATIC / "objects_v2.inv").open("rb") as f: inv = load(f) diff --git a/tests/test_renderers/fixtures/myst-config.txt b/tests/test_renderers/fixtures/myst-config.txt index 562f99b1..0f898cb5 100644 --- a/tests/test_renderers/fixtures/myst-config.txt +++ b/tests/test_renderers/fixtures/myst-config.txt @@ -256,3 +256,76 @@ text :1: (WARNING/2) Invalid 'height' attribute value: '2x' [myst.attribute] :1: (WARNING/2) Invalid 'align' attribute value: 'other' [myst.attribute] . + +[inv_link] --myst-enable-extensions=inv_link +. + +[](inv:#index) +[*explicit*](inv:#index) + +[](inv:key#index) + +[](inv:key:std:label#search) + +[](inv:#in*) + +[](inv:key:*:doc#index) +. + + + + Title + + + Title + + + + explicit + + + Title + + + Title + + + Search Page + + + Search Page + + + Title + + + Title + + + Title + + + Title +. + +[inv_link_error] --myst-enable-extensions=inv_link +. + + + +. + + + + + No matches for '*:*:*:other' [myst.iref_missing] + + + + Multiple matches for '*:*:*:*index': key:std:label:genindex, key:std:label:modindex, key:std:label:py-modindex, ... [myst.iref_ambiguous] + + Index + +:1: (WARNING/2) No matches for '*:*:*:other' [myst.iref_missing] +:3: (WARNING/2) Multiple matches for '*:*:*:*index': key:std:label:genindex, key:std:label:modindex, key:std:label:py-modindex, ... [myst.iref_ambiguous] +. diff --git a/tests/test_renderers/test_myst_config.py b/tests/test_renderers/test_myst_config.py index 0640238a..ae8d9519 100644 --- a/tests/test_renderers/test_myst_config.py +++ b/tests/test_renderers/test_myst_config.py @@ -5,14 +5,16 @@ import pytest from docutils.core import Publisher, publish_string +from pytest_param_files import ParamTestData from myst_parser.parsers.docutils_ import Parser FIXTURE_PATH = Path(__file__).parent.joinpath("fixtures") +INV_PATH = Path(__file__).parent.parent.absolute() / "static" / "objects_v2.inv" @pytest.mark.param_file(FIXTURE_PATH / "myst-config.txt") -def test_cmdline(file_params): +def test_cmdline(file_params: ParamTestData): """The description is parsed as a docutils commandline""" pub = Publisher(parser=Parser()) option_parser = pub.setup_option_parser() @@ -27,6 +29,8 @@ def test_cmdline(file_params): report_stream = StringIO() settings["output_encoding"] = "unicode" settings["warning_stream"] = report_stream + if "inv_" in file_params.title: + settings["myst_inventories"] = {"key": ["https://example.com", str(INV_PATH)]} output = publish_string( file_params.content, parser=Parser(),