diff --git a/.gitmodules b/.gitmodules index d487c0f3..42012c57 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,9 +1,12 @@ [submodule "vendor/cmark-gfm"] path = vendor/cmark-gfm - url = https://github.com/github/cmark-gfm.git + url = https://github.com/kivikakk/cmark-gfm.git [submodule "vendor/pulldown-cmark"] path = vendor/pulldown-cmark url = https://github.com/raphlinus/pulldown-cmark.git [submodule "vendor/markdown-it"] path = vendor/markdown-it url = https://github.com/rlidwka/markdown-it.rs.git +[submodule "vendor/commonmark-spec"] + path = vendor/commonmark-spec + url = https://github.com/commonmark/commonmark-spec diff --git a/Cargo.lock b/Cargo.lock index bdcfc456..afc18d1f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -68,6 +68,16 @@ version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" +[[package]] +name = "caseless" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "808dab3318747be122cb31d36de18d4d1c81277a76f8332a02b81a3d73463d7f" +dependencies = [ + "regex", + "unicode-normalization", +] + [[package]] name = "cc" version = "1.0.78" @@ -123,6 +133,7 @@ name = "comrak" version = "0.24.1" dependencies = [ "arbitrary", + "caseless", "clap", "derive_builder", "emojis", @@ -809,6 +820,21 @@ dependencies = [ "time-core", ] +[[package]] +name = "tinyvec" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "445e881f4f6d382d5f27c034e25eb92edd7c784ceab92a0937db7f2e9471b938" +dependencies = [ + "tinyvec_macros", +] + +[[package]] +name = "tinyvec_macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" + [[package]] name = "toml" version = "0.5.10" @@ -864,6 +890,15 @@ version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "84a22b9f218b40614adcb3f4ff08b703773ad44fa9423e4e0d346d5db86e4ebc" +[[package]] +name = "unicode-normalization" +version = "0.1.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a56d1686db2308d901306f92a263857ef59ea39678a5458e7cb17f01415101f5" +dependencies = [ + "tinyvec", +] + [[package]] name = "unicode_categories" version = "0.1.1" diff --git a/Cargo.toml b/Cargo.toml index 13cab088..9366fce4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -46,6 +46,7 @@ slug = "0.1.4" emojis = { version = "0.6.2", optional = true } arbitrary = { version = "1", optional = true, features = ["derive"] } derive_builder = "0.20.0" +caseless = "0.2.1" [dev-dependencies] ntest = "0.9" diff --git a/README.md b/README.md index 359c7c1f..7c9fa346 100644 --- a/README.md +++ b/README.md @@ -1,19 +1,15 @@ # [Comrak](https://github.com/kivikakk/comrak) -[![Build Status](https://github.com/kivikakk/comrak/actions/workflows/rust.yml/badge.svg)](https://github.com/kivikakk/comrak/actions/workflows/rust.yml) ![Spec -Status: 671/671](https://img.shields.io/badge/specs-671%2F671-brightgreen.svg) +[![Build status](https://github.com/kivikakk/comrak/actions/workflows/rust.yml/badge.svg)](https://github.com/kivikakk/comrak/actions/workflows/rust.yml) +[![CommonMark: 652/652](https://img.shields.io/badge/commonmark-652%2F652-brightgreen.svg)](https://github.com/commonmark/commonmark-spec/blob/9103e341a973013013bb1a80e13567007c5cef6f/spec.txt) +[![GFM: 670/670](https://img.shields.io/badge/gfm-670%2F670-brightgreen.svg)](https://github.com/kivikakk/cmark-gfm/blob/2f13eeedfe9906c72a1843b03552550af7bee29a/test/spec.txt) [![crates.io version](https://img.shields.io/crates/v/comrak.svg)](https://crates.io/crates/comrak) [![docs.rs](https://docs.rs/comrak/badge.svg)](https://docs.rs/comrak) -Rust port of [github's `cmark-gfm`](https://github.com/github/cmark). *Currently synced with release `0.29.0.gfm.13`*. +Rust port of [github's `cmark-gfm`](https://github.com/github/cmark-gfm). -- [Installation](#installation) -- [Usage](#usage) -- [Security](#security) -- [Extensions](#extensions) -- [Related projects](#related-projects) -- [Contributing](#contributing) -- [Legal](#legal) +Compliant with [CommonMark 0.31.2](https://spec.commonmark.org/0.31.2/) in default mode. +GFM support synced with release `0.29.0.gfm.13`. ## Installation @@ -26,22 +22,31 @@ comrak = "0.24" Comrak's library supports Rust 1.62.1+. -### Mac & Linux Binaries +### CLI -``` bash -curl https://webinstall.dev/comrak | bash -``` +- Anywhere with a Rust toolchain: + - `cargo install comrak` +- Many Unix distributions: + - `pacman -S comrak` + - `brew install comrak` + - `dnf install comrak` + - `nix run nixpkgs#comrak` -### Windows 10 Binaries - -``` powershell -curl.exe -A "MS" https://webinstall.dev/comrak | powershell -``` +You can also find builds I've published in [GitHub Releases](https://github.com/kivikakk/comrak/releases), but they're limited to machines I have access to at the time of making them\! [webinstall.dev](https://webinstall.dev/comrak/) offers `curl | shell`-style installation of the latest of these for your OS. ## Usage +
+ +Click to expand the CLI --help output. + ``` console $ comrak --help +``` + + + +``` A 100% CommonMark-compatible GitHub Flavored Markdown parser and formatter Usage: comrak [OPTIONS] [FILE]... @@ -54,7 +59,7 @@ Options: -c, --config-file Path to config file containing command-line arguments, or 'none' - [default: /home/runner/.config/comrak/config] + [default: /Users/kivikakk/.config/comrak/config] -i, --inplace To perform an in-place formatting @@ -73,7 +78,11 @@ Options: --gfm Enable GitHub-flavored markdown extensions: strikethrough, tagfilter, table, autolink, and - tasklist. Also enables --github-pre-lang + tasklist. Also enables --github-pre-lang and --gfm-quirks + + --gfm-quirks + Enables GFM-style quirks in output HTML, such as not nesting tags, which + otherwise breaks CommonMark compatibility --relaxed-tasklist-character Enable relaxing which character is allowed in a tasklists @@ -104,7 +113,7 @@ Options: [possible values: strikethrough, tagfilter, table, autolink, tasklist, superscript, footnotes, description-lists, multiline-block-quotes, math-dollars, math-code, - wikilinks-title-after-pipe, wikilinks-title-before-pipe] + wikilinks-title-after-pipe, wikilinks-title-before-pipe, underline, spoiler, greentext] -t, --to Specify output format @@ -140,6 +149,12 @@ Options: --sourcepos Include source position attribute in HTML and XML output + --ignore-setext + Ignore setext headers + + --ignore-empty-links + Ignore empty links + -h, --help Print help information (use `-h` for a summary) @@ -151,6 +166,8 @@ By default, Comrak will attempt to read command-line options from a config file the file does not exist. ``` +
+ And there's a Rust interface. You can use `comrak::markdown_to_html` directly: ``` rust @@ -162,7 +179,6 @@ assert_eq!(markdown_to_html("Hello, **世界**!", &Options::default()), Or you can parse the input into an AST yourself, manipulate it, and then use your desired formatter: ``` rust -extern crate comrak; use comrak::nodes::NodeValue; use comrak::{format_html, parse_document, Arena, Options}; @@ -194,42 +210,24 @@ fn main() { let html = replace_text(&doc, &orig, &repl); println!("{}", html); + // Output: + // + //

This is your input.

+ //
    + //
  1. Also your input.
  2. + //
  3. Certainly your input.
  4. + //
} ``` -## Benchmarking - -For running benchmarks, you will need to [install hyperfine](https://github.com/sharkdp/hyperfine#installation) and optionally cmake. - -If you want to just run the benchmark for `comrak`, with the current state of the repo, you can simply run - -``` bash -make bench-comrak -``` - -This will build comrak in release mode, and run benchmark on it. You will see the time measurements as reported by hyperfine in the console. - -Makefile also provides a way to run benchmarks for `comrak` current state (with your changes), `comrak` main branch, [`cmark-gfm`](https://github.com/github/cmark-gfm), [`pulldown-cmark`](https://github.com/raphlinus/pulldown-cmark) and [`markdown-it.rs`](https://github.com/rlidwka/markdown-it.rs). For this you will need to install `cmake`. After that make sure that you have set-up the git submodules. In case you have not installed submodules when cloning, you can do it by running - -``` bash -git submodule update --init -``` - -After this is done, you can run - -``` bash -make bench-all -``` - -which will run benchmarks across all, and report the time take by each as well as relative time. - -Apart from this, CI is also setup for running benchmarks when a pull request is first opened. It will add a comment with the results on the pull request in a tabular format comparing the 5 versions. After that you can manually trigger this CI by commenting `/run-bench` on the PR, this will update the existing comment with new results. Note benchmarks won't be automatically run on each push. +For a slightly more real-world example, see how I [generate my GitHub user README](https://github.com/kivikakk/kivikakk) from a base document with embedded YAML, which itself has embedded Markdown, or +[check out some of Comrak's dependents on crates.io](https://crates.io/crates/comrak/reverse_dependencies) or [on GitHub](https://github.com/kivikakk/comrak/network/dependents). ## Security As with [`cmark`](https://github.com/commonmark/cmark) and [`cmark-gfm`](https://github.com/github/cmark-gfm#security), Comrak will scrub raw HTML and potentially dangerous links. This change was introduced in Comrak 0.4.0 in support of a -safe-by-default posture. +safe-by-default posture, and later adopted by our contemporaries. :) To allow these, use the `unsafe_` option (or `--unsafe` with the command line program). If doing so, we recommend the use of a sanitisation library like [`ammonia`](https://github.com/notriddle/ammonia) configured specific to your needs. @@ -252,29 +250,33 @@ Comrak additionally supports its own extensions, which are yet to be specced out - Footnotes - Description lists - Front matter -- Shortcodes +- Multi-line blockquotes - Math -- Multiline Blockquotes +- Emoji shortcodes +- Wikilinks +- Underline +- Spoiler text +- "Greentext" By default none are enabled; they are individually enabled with each parse by setting the appropriate values in the -[`ComrakExtensionOptions` struct](https://docs.rs/comrak/newest/comrak/type.ComrakExtensionOptions.html). +[`ExtensionOptions` struct](https://docs.rs/comrak/latest/comrak/struct.ExtensionOptions.html). ## Plugins -### Codefence syntax highlighter +### Fenced code block syntax highlighting -At the moment syntax highlighting of codefence blocks is the only feature that can be enhanced with plugins. +You can provide your own syntax highlighting engine. Create an implementation of the `SyntaxHighlighterAdapter` trait, and then provide an instance of such adapter to -`Plugins.render.codefence_syntax_highlighter`. For formatting a markdown document with plugins, use the -`markdown_to_html_with_plugins` function, which accepts your plugin as a parameter. +`Plugins.render.codefence_syntax_highlighter`. For formatting a Markdown document with plugins, use the +`markdown_to_html_with_plugins` function, which accepts your plugins object as a parameter. See the `syntax_highlighter.rs` and `syntect.rs` examples for more details. #### Syntect [`syntect`](https://github.com/trishume/syntect) is a syntax highlighting library for Rust. By default, `comrak` offers -a plugin for it. In order to utilize it, create an instance of `plugins::syntect::SyntectAdapter` and use it as your +a plugin for it. In order to utilize it, create an instance of `plugins::syntect::SyntectAdapter` and use it in your `Plugins` option. ## Related projects @@ -284,8 +286,8 @@ in terms of code structure. The upside of this is that a change in `cmark-gfm` h Likewise, any bug in `cmark-gfm` is likely to be reproduced in Comrak. This could be considered a pro or a con, depending on your use case. -The downside, of course, is that the code is not what I'd call idiomatic Rust (*so many `RefCell`s*), and while -contributors and I have made it as fast as possible, it simply won't be as fast as some other CommonMark parsers +The downside, of course, is that the code often diverges from idiomatic Rust, especially in the AST's extensive use of `RefCell`, and while +contributors have made it as fast as possible, it simply won't be as fast as some other CommonMark parsers depending on your use-case. Here are some other projects to consider: - [Raph Levien](https://github.com/raphlinus)'s [`pulldown-cmark`](https://github.com/google/pulldown-cmark). It's @@ -295,20 +297,43 @@ depending on your use-case. Here are some other projects to consider: - Know of another library? Please open a PR to add it\! As far as I know, Comrak is the only library to implement all of the [GitHub Flavored Markdown -extensions](https://github.github.com/gfm) to the spec, but this tends to only be important if you want to reproduce -GitHub's Markdown rendering exactly, e.g. in a GitHub client app. +extensions](https://github.github.com/gfm) rigorously. + +## Benchmarking + +You'll need to [install hyperfine](https://github.com/sharkdp/hyperfine#installation), and CMake if you want to compare against `cmark-gfm`. + +If you want to just run the benchmark for the `comrak` binary itself, run: + +``` bash +make bench-comrak +``` + +This will build Comrak in release mode, and run benchmark on it. You will see the time measurements as reported by hyperfine in the console. + +The `Makefile` also provides a way to run benchmarks for `comrak` current state (with your changes), `comrak` main branch, [`cmark-gfm`](https://github.com/github/cmark-gfm), [`pulldown-cmark`](https://github.com/raphlinus/pulldown-cmark) and [`markdown-it.rs`](https://github.com/rlidwka/markdown-it.rs). You'll need CMake, and ensure [submodules are prepared](https://stackoverflow.com/a/10168693/499609). + +``` bash +make bench-all +``` + +This will build and run benchmarks across all, and report the time taken by each as well as relative time. + + + + ## Contributing -Contributions are highly encouraged; where possible I practice [Optimistic Merging](http://hintjens.com/blog:106) as -described by Peter Hintjens. Please keep the [code of conduct](CODE_OF_CONDUCT.md) in mind when interacting with this -project. +Contributions are **highly encouraged**; if you'd like to assist, consider checking out the [`good first issue` label](https://github.com/kivikakk/comrak/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22)\! I'm happy to help provide direction and guidance throughout, even if (especially if\!) you're new to Rust or open source. + +Where possible I practice [Optimistic Merging](http://hintjens.com/blog:106) as described by Peter Hintjens. Please keep the [code of conduct](CODE_OF_CONDUCT.md) in mind too. Thank you to Comrak's many contributors for PRs and issues opened\! ### Code Contributors - +[![Small chart showing Comrak contributors.](https://opencollective.com/comrak/contributors.svg?width=890&button=false)](https://github.com/kivikakk/comrak/graphs/contributors) ### Financial Contributors diff --git a/examples/sample.rs b/examples/sample.rs index fb7b8148..d3e5141f 100644 --- a/examples/sample.rs +++ b/examples/sample.rs @@ -10,46 +10,47 @@ fn small() { } fn large() { - use comrak::nodes::{AstNode, NodeValue}; + use comrak::nodes::NodeValue; use comrak::{format_html, parse_document, Arena, Options}; - // The returned nodes are created in the supplied Arena, and are bound by its lifetime. - let arena = Arena::new(); + fn replace_text(document: &str, orig_string: &str, replacement: &str) -> String { + // The returned nodes are created in the supplied Arena, and are bound by its lifetime. + let arena = Arena::new(); - let root = parse_document( - &arena, - "This is my input.\n\n1. Also my input.\n2. Certainly my input.\n", - &Options::default(), - ); + // Parse the document into a root `AstNode` + let root = parse_document(&arena, document, &Options::default()); - fn iter_nodes<'a, F>(node: &'a AstNode<'a>, f: &F) - where - F: Fn(&'a AstNode<'a>), - { - f(node); - for c in node.children() { - iter_nodes(c, f); + // Iterate over all the descendants of root. + for node in root.descendants() { + if let NodeValue::Text(ref mut text) = node.data.borrow_mut().value { + // If the node is a text node, perform the string replacement. + *text = text.replace(orig_string, replacement) + } } - } - iter_nodes(root, &|node| { - if let NodeValue::Text(ref mut text) = node.data.borrow_mut().value { - let orig = std::mem::take(text); - *text = orig.replace("my", "your"); - } - }); + let mut html = vec![]; + format_html(root, &Options::default(), &mut html).unwrap(); + + String::from_utf8(html).unwrap() + } - let mut html = vec![]; - format_html(root, &Options::default(), &mut html).unwrap(); + fn main() { + let doc = "This is my input.\n\n1. Also [my](#) input.\n2. Certainly *my* input.\n"; + let orig = "my"; + let repl = "your"; + let html = replace_text(&doc, &orig, &repl); + + println!("{}", html); + // Output: + // + //

This is your input.

+ //
    + //
  1. Also your input.
  2. + //
  3. Certainly your input.
  4. + //
+ } - assert_eq!( - String::from_utf8(html).unwrap(), - "

This is your input.

\n\ -
    \n\ -
  1. Also your input.
  2. \n\ -
  3. Certainly your input.
  4. \n\ -
\n" - ); + main() } fn main() { diff --git a/examples/update-readme.rs b/examples/update-readme.rs index fab08d70..c683a8f3 100644 --- a/examples/update-readme.rs +++ b/examples/update-readme.rs @@ -10,6 +10,8 @@ use comrak::{format_commonmark, parse_document, Arena, Options}; const DEPENDENCIES: &str = "[dependencies]\ncomrak = "; const HELP: &str = "$ comrak --help\n"; +const HELP_START: &str = + "A 100% CommonMark-compatible GitHub Flavored Markdown parser and formatter\n"; fn main() -> Result<(), Box> { let arena = Arena::new(); @@ -23,6 +25,8 @@ fn main() -> Result<(), Box> { .unwrap(); let mut in_msrv = false; + let mut next_block_is_help_body = false; + for node in doc.descendants() { match node.data.borrow_mut().value { NodeValue::CodeBlock(ref mut ncb) => { @@ -33,12 +37,20 @@ fn main() -> Result<(), Box> { version_parts.pop(); write!(content, "\"{}\"", version_parts.join(".")).unwrap(); ncb.literal = content; + continue; } // Look for a console code block whose contents starts with the HELP string. - // Replace its contents with the same string and the actual command output. + // The *next* code block contains our help, minus the starting string. if ncb.info == "console" && ncb.literal.starts_with(HELP) { - let mut content = HELP.to_string(); + next_block_is_help_body = true; + continue; + } + + if next_block_is_help_body { + next_block_is_help_body = false; + assert!(ncb.info == "" && ncb.literal.starts_with(HELP_START)); + let mut content = String::new(); let mut cmd = std::process::Command::new("cargo"); content.push_str( str::from_utf8( @@ -50,6 +62,7 @@ fn main() -> Result<(), Box> { .unwrap(), ); ncb.literal = content; + continue; } } NodeValue::HtmlInline(ref mut s) => { @@ -68,8 +81,11 @@ fn main() -> Result<(), Box> { } } + let mut options = Options::default(); + options.render.prefer_fenced = true; + let mut out = vec![]; - format_commonmark(doc, &Options::default(), &mut out)?; + format_commonmark(doc, &options, &mut out)?; std::fs::write("README.md", &out)?; Ok(()) diff --git a/script/cibuild b/script/cibuild index 2df91085..794441cc 100755 --- a/script/cibuild +++ b/script/cibuild @@ -16,7 +16,11 @@ PROGRAM_ARG="--program=../../../target/release/comrak --syntax-highlighting none set +e -python3 spec_tests.py --no-normalize --spec spec.txt "$PROGRAM_ARG" \ +# Upstream CommonMark specs. +python3 spec_tests.py --no-normalize --spec ../../commonmark-spec/spec.txt "$PROGRAM_ARG" \ + || failed=1 + +python3 spec_tests.py --no-normalize --spec spec.txt "$PROGRAM_ARG --gfm-quirks" \ || failed=1 python3 pathological_tests.py "$PROGRAM_ARG" \ || failed=1 diff --git a/src/cm.rs b/src/cm.rs index 309634c2..e0a51dfa 100644 --- a/src/cm.rs +++ b/src/cm.rs @@ -531,6 +531,7 @@ impl<'a, 'o> CommonMarkFormatter<'a, 'o> { && !(isspace(literal[literal.len() - 1]) && isspace(literal[literal.len() - 2]))) && !first_in_list_item + && !self.options.render.prefer_fenced { write!(self, " ").unwrap(); write!(self.prefix, " ").unwrap(); diff --git a/src/entity.rs b/src/entity.rs index 7b7a5e02..95f74bfa 100644 --- a/src/entity.rs +++ b/src/entity.rs @@ -36,7 +36,11 @@ pub fn unescape(text: &[u8]) -> Option<(Vec, usize)> { 0 }; - if (1..=8).contains(&num_digits) && i < text.len() && text[i] == b';' { + if i < text.len() + && text[i] == b';' + && (((text[1] == b'x' || text[1] == b'X') && (1..=6).contains(&num_digits)) + || (1..=7).contains(&num_digits)) + { if codepoint == 0 || (0xD800..=0xE000).contains(&codepoint) || codepoint >= 0x110000 { codepoint = 0xFFFD; } diff --git a/src/html.rs b/src/html.rs index 6037d6cf..94ac4698 100644 --- a/src/html.rs +++ b/src/html.rs @@ -289,7 +289,7 @@ pub fn escape(output: &mut dyn Write, buffer: &[u8]) -> io::Result<()> { /// The inclusion of characters like "%" in those which are not escaped is /// explained somewhat here: /// -/// https://github.com/github/cmark-gfm/blob/c32ef78bae851cb83b7ad52d0fbff880acdcd44a/src/houdini_href_e.c#L7-L31 +/// /// /// In other words, if a CommonMark user enters: /// @@ -774,8 +774,9 @@ impl<'o> HtmlFormatter<'o> { } NodeValue::Strong => { let parent_node = node.parent(); - if parent_node.is_none() - || !matches!(parent_node.unwrap().data.borrow().value, NodeValue::Strong) + if !self.options.render.gfm_quirks + || (parent_node.is_none() + || !matches!(parent_node.unwrap().data.borrow().value, NodeValue::Strong)) { if entering { self.output.write_all(b" String { } /// Render Markdown to CommonMark XML. -/// See https://github.com/commonmark/commonmark-spec/blob/master/CommonMark.dtd. +/// See . pub fn markdown_to_commonmark_xml(md: &str, options: &Options) -> String { markdown_to_commonmark_xml_with_plugins(md, options, &Plugins::default()) } /// Render Markdown to CommonMark XML using plugins. -/// See https://github.com/commonmark/commonmark-spec/blob/master/CommonMark.dtd. +/// See . pub fn markdown_to_commonmark_xml_with_plugins( md: &str, options: &Options, diff --git a/src/main.rs b/src/main.rs index af2a550f..22df9deb 100644 --- a/src/main.rs +++ b/src/main.rs @@ -56,11 +56,17 @@ struct Cli { #[arg(long)] full_info_string: bool, - /// Enable GitHub-flavored markdown extensions: strikethrough, tagfilter, table, autolink, and tasklist. - /// Also enables --github-pre-lang. + /// Enable GitHub-flavored markdown extensions: strikethrough, tagfilter, + /// table, autolink, and tasklist. Also enables --github-pre-lang and + /// --gfm-quirks. #[arg(long)] gfm: bool, + /// Enables GFM-style quirks in output HTML, such as not nesting + /// tags, which otherwise breaks CommonMark compatibility. + #[arg(long)] + gfm_quirks: bool, + /// Enable relaxing which character is allowed in a tasklists. #[arg(long)] relaxed_tasklist_character: bool, @@ -284,6 +290,7 @@ fn main() -> Result<(), Box> { .escaped_char_spans(cli.escaped_char_spans) .ignore_setext(cli.ignore_setext) .ignore_empty_links(cli.ignore_empty_links) + .gfm_quirks(cli.gfm_quirks || cli.gfm) .build()?; let options = Options { diff --git a/src/parser/autolink.rs b/src/parser/autolink.rs index 4d619db5..a4361da6 100644 --- a/src/parser/autolink.rs +++ b/src/parser/autolink.rs @@ -161,7 +161,7 @@ fn check_domain(data: &[u8], allow_short: bool) -> Option { } fn is_valid_hostchar(ch: char) -> bool { - !ch.is_whitespace() && !ch.is_punctuation() + !ch.is_whitespace() && !(ch.is_punctuation() || ch.is_symbol()) } fn autolink_delim(data: &[u8], mut link_end: usize, relaxed_autolinks: bool) -> usize { diff --git a/src/parser/inlines.rs b/src/parser/inlines.rs index 281fd601..f288ec50 100644 --- a/src/parser/inlines.rs +++ b/src/parser/inlines.rs @@ -998,20 +998,22 @@ impl<'a, 'r, 'o, 'd, 'i, 'c, 'subj> Subject<'a, 'r, 'o, 'd, 'i, 'c, 'subj> { let left_flanking = numdelims > 0 && !after_char.is_whitespace() - && !(after_char.is_punctuation() + && !((after_char.is_punctuation() || after_char.is_symbol()) && !before_char.is_whitespace() - && !before_char.is_punctuation()); + && !(before_char.is_punctuation() || before_char.is_symbol())); let right_flanking = numdelims > 0 && !before_char.is_whitespace() - && !(before_char.is_punctuation() + && !((before_char.is_punctuation() || before_char.is_symbol()) && !after_char.is_whitespace() - && !after_char.is_punctuation()); + && !(after_char.is_punctuation() || after_char.is_symbol())); if c == b'_' { ( numdelims, - left_flanking && (!right_flanking || before_char.is_punctuation()), - right_flanking && (!left_flanking || after_char.is_punctuation()), + left_flanking + && (!right_flanking || before_char.is_punctuation() || before_char.is_symbol()), + right_flanking + && (!left_flanking || after_char.is_punctuation() || after_char.is_symbol()), ) } else if c == b'\'' || c == b'"' { ( @@ -1528,7 +1530,7 @@ impl<'a, 'r, 'o, 'd, 'i, 'c, 'subj> Subject<'a, 'r, 'o, 'd, 'i, 'c, 'subj> { } // Need to normalize both to lookup in refmap and to call callback - let lab = strings::normalize_label(&lab, Case::DontPreserve); + let lab = strings::normalize_label(&lab, Case::Fold); let mut reff = if found_label { self.refmap.lookup(&lab) } else { diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 09c55b0f..e9d68a3c 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -766,6 +766,43 @@ pub struct RenderOptions { /// assert_eq!(markdown_to_html(input, &options), "

[]()

\n"); /// ``` pub ignore_empty_links: bool, + + /// Enables GFM quirks in HTML output which break CommonMark compatibility. + /// + /// ```rust + /// # use comrak::{markdown_to_html, Options}; + /// let mut options = Options::default(); + /// let input = "****abcd**** *_foo_*"; + /// + /// assert_eq!(markdown_to_html(input, &options), + /// "

abcd foo

\n"); + /// + /// options.render.gfm_quirks = true; + /// assert_eq!(markdown_to_html(input, &options), + /// "

abcd foo

\n"); + /// ``` + pub gfm_quirks: bool, + + /// Prefer fenced code blocks when outputting CommonMark. + /// + /// ```rust + /// # use std::str; + /// # use comrak::{Arena, Options, format_commonmark, parse_document}; + /// let arena = Arena::new(); + /// let mut options = Options::default(); + /// let input = "```\nhello\n```\n"; + /// let root = parse_document(&arena, input, &options); + /// + /// let mut buf = Vec::new(); + /// format_commonmark(&root, &options, &mut buf); + /// assert_eq!(str::from_utf8(&buf).unwrap(), " hello\n"); + /// + /// buf.clear(); + /// options.render.prefer_fenced = true; + /// format_commonmark(&root, &options, &mut buf); + /// assert_eq!(str::from_utf8(&buf).unwrap(), "```\nhello\n```\n"); + /// ``` + pub prefer_fenced: bool, } #[non_exhaustive] @@ -2131,7 +2168,7 @@ impl<'a, 'o, 'c> Parser<'a, 'o, 'c> { match node.data.borrow().value { NodeValue::FootnoteDefinition(ref nfd) => { map.insert( - strings::normalize_label(&nfd.name, Case::DontPreserve), + strings::normalize_label(&nfd.name, Case::Fold), FootnoteDefinition { ix: None, node, @@ -2157,7 +2194,7 @@ impl<'a, 'o, 'c> Parser<'a, 'o, 'c> { let mut replace = None; match ast.value { NodeValue::FootnoteReference(ref mut nfr) => { - let normalized = strings::normalize_label(&nfr.name, Case::DontPreserve); + let normalized = strings::normalize_label(&nfr.name, Case::Fold); if let Some(ref mut footnote) = map.get_mut(&normalized) { let ix = match footnote.ix { Some(ix) => ix, @@ -2389,7 +2426,7 @@ impl<'a, 'o, 'c> Parser<'a, 'o, 'c> { } } - lab = strings::normalize_label(&lab, Case::DontPreserve); + lab = strings::normalize_label(&lab, Case::Fold); if !lab.is_empty() { subj.refmap.map.entry(lab).or_insert(Reference { url: String::from_utf8(strings::clean_url(url)).unwrap(), diff --git a/src/scanners.re b/src/scanners.re index 6c5799fc..35bbc4f6 100644 --- a/src/scanners.re +++ b/src/scanners.re @@ -24,7 +24,7 @@ tagname = [A-Za-z][A-Za-z0-9-]*; - blocktagname = 'address'|'article'|'aside'|'base'|'basefont'|'blockquote'|'body'|'caption'|'center'|'col'|'colgroup'|'dd'|'details'|'dialog'|'dir'|'div'|'dl'|'dt'|'fieldset'|'figcaption'|'figure'|'footer'|'form'|'frame'|'frameset'|'h1'|'h2'|'h3'|'h4'|'h5'|'h6'|'head'|'header'|'hr'|'html'|'iframe'|'legend'|'li'|'link'|'main'|'menu'|'menuitem'|'nav'|'noframes'|'ol'|'optgroup'|'option'|'p'|'param'|'section'|'source'|'title'|'summary'|'table'|'tbody'|'td'|'tfoot'|'th'|'thead'|'title'|'tr'|'track'|'ul'; + blocktagname = 'address'|'article'|'aside'|'base'|'basefont'|'blockquote'|'body'|'caption'|'center'|'col'|'colgroup'|'dd'|'details'|'dialog'|'dir'|'div'|'dl'|'dt'|'fieldset'|'figcaption'|'figure'|'footer'|'form'|'frame'|'frameset'|'h1'|'h2'|'h3'|'h4'|'h5'|'h6'|'head'|'header'|'hr'|'html'|'iframe'|'legend'|'li'|'link'|'main'|'menu'|'menuitem'|'nav'|'noframes'|'ol'|'optgroup'|'option'|'p'|'param'|'search'|'section'|'title'|'summary'|'table'|'tbody'|'td'|'tfoot'|'th'|'thead'|'title'|'tr'|'track'|'ul'; attributename = [a-zA-Z_:][a-zA-Z0-9:._-]*; diff --git a/src/scanners.rs b/src/scanners.rs index 967c2df7..52b2a747 100644 --- a/src/scanners.rs +++ b/src/scanners.rs @@ -6178,19 +6178,14 @@ pub fn html_block_start(s: &[u8]) -> Option { yystate = 59; continue 'yyl; } - 0x4F | 0x6F => { - cursor += 1; - yystate = 60; - continue 'yyl; - } 0x54 | 0x74 => { cursor += 1; - yystate = 61; + yystate = 60; continue 'yyl; } 0x55 | 0x75 => { cursor += 1; - yystate = 62; + yystate = 61; continue 'yyl; } _ => { @@ -6210,12 +6205,12 @@ pub fn html_block_start(s: &[u8]) -> Option { match yych { 0x41 | 0x61 => { cursor += 1; - yystate = 63; + yystate = 62; continue 'yyl; } 0x42 | 0x62 => { cursor += 1; - yystate = 64; + yystate = 63; continue 'yyl; } 0x44 | 0x64 => { @@ -6225,27 +6220,27 @@ pub fn html_block_start(s: &[u8]) -> Option { } 0x45 | 0x65 => { cursor += 1; - yystate = 65; + yystate = 64; continue 'yyl; } 0x46 | 0x66 => { cursor += 1; - yystate = 66; + yystate = 65; continue 'yyl; } 0x48 | 0x68 => { cursor += 1; - yystate = 67; + yystate = 66; continue 'yyl; } 0x49 | 0x69 => { cursor += 1; - yystate = 68; + yystate = 67; continue 'yyl; } 0x52 | 0x72 => { cursor += 1; - yystate = 69; + yystate = 68; continue 'yyl; } _ => { @@ -6285,7 +6280,7 @@ pub fn html_block_start(s: &[u8]) -> Option { match yych { 0x2D => { cursor += 1; - yystate = 70; + yystate = 69; continue 'yyl; } _ => { @@ -6308,7 +6303,7 @@ pub fn html_block_start(s: &[u8]) -> Option { match yych { 0x43 | 0x63 => { cursor += 1; - yystate = 71; + yystate = 70; continue 'yyl; } _ => { @@ -6361,14 +6356,9 @@ pub fn html_block_start(s: &[u8]) -> Option { yystate = 59; continue 'yyl; } - 0x4F | 0x6F => { - cursor += 1; - yystate = 60; - continue 'yyl; - } 0x55 | 0x75 => { cursor += 1; - yystate = 62; + yystate = 61; continue 'yyl; } _ => { @@ -6388,12 +6378,12 @@ pub fn html_block_start(s: &[u8]) -> Option { match yych { 0x41 | 0x61 => { cursor += 1; - yystate = 63; + yystate = 62; continue 'yyl; } 0x42 | 0x62 => { cursor += 1; - yystate = 64; + yystate = 63; continue 'yyl; } 0x44 | 0x64 => { @@ -6403,22 +6393,22 @@ pub fn html_block_start(s: &[u8]) -> Option { } 0x46 | 0x66 => { cursor += 1; - yystate = 66; + yystate = 65; continue 'yyl; } 0x48 | 0x68 => { cursor += 1; - yystate = 67; + yystate = 66; continue 'yyl; } 0x49 | 0x69 => { cursor += 1; - yystate = 68; + yystate = 67; continue 'yyl; } 0x52 | 0x72 => { cursor += 1; - yystate = 69; + yystate = 68; continue 'yyl; } _ => { @@ -6438,7 +6428,7 @@ pub fn html_block_start(s: &[u8]) -> Option { match yych { 0x44 | 0x64 => { cursor += 1; - yystate = 72; + yystate = 71; continue 'yyl; } _ => { @@ -6458,7 +6448,7 @@ pub fn html_block_start(s: &[u8]) -> Option { match yych { 0x54 | 0x74 => { cursor += 1; - yystate = 73; + yystate = 72; continue 'yyl; } _ => { @@ -6478,7 +6468,7 @@ pub fn html_block_start(s: &[u8]) -> Option { match yych { 0x49 | 0x69 => { cursor += 1; - yystate = 74; + yystate = 73; continue 'yyl; } _ => { @@ -6498,7 +6488,7 @@ pub fn html_block_start(s: &[u8]) -> Option { match yych { 0x53 | 0x73 => { cursor += 1; - yystate = 75; + yystate = 74; continue 'yyl; } _ => { @@ -6518,7 +6508,7 @@ pub fn html_block_start(s: &[u8]) -> Option { match yych { 0x4F | 0x6F => { cursor += 1; - yystate = 76; + yystate = 75; continue 'yyl; } _ => { @@ -6538,7 +6528,7 @@ pub fn html_block_start(s: &[u8]) -> Option { match yych { 0x44 | 0x64 => { cursor += 1; - yystate = 77; + yystate = 76; continue 'yyl; } _ => { @@ -6558,7 +6548,7 @@ pub fn html_block_start(s: &[u8]) -> Option { match yych { 0x50 | 0x70 => { cursor += 1; - yystate = 78; + yystate = 77; continue 'yyl; } _ => { @@ -6578,7 +6568,7 @@ pub fn html_block_start(s: &[u8]) -> Option { match yych { 0x4E | 0x6E => { cursor += 1; - yystate = 79; + yystate = 78; continue 'yyl; } _ => { @@ -6598,7 +6588,7 @@ pub fn html_block_start(s: &[u8]) -> Option { match yych { 0x4C | 0x6C => { cursor += 1; - yystate = 80; + yystate = 79; continue 'yyl; } _ => { @@ -6643,7 +6633,7 @@ pub fn html_block_start(s: &[u8]) -> Option { match yych { 0x54 | 0x74 => { cursor += 1; - yystate = 81; + yystate = 80; continue 'yyl; } _ => { @@ -6663,7 +6653,7 @@ pub fn html_block_start(s: &[u8]) -> Option { match yych { 0x41 | 0x61 => { cursor += 1; - yystate = 82; + yystate = 81; continue 'yyl; } 0x52 | 0x56 | 0x72 | 0x76 => { @@ -6688,12 +6678,12 @@ pub fn html_block_start(s: &[u8]) -> Option { match yych { 0x45 | 0x65 => { cursor += 1; - yystate = 83; + yystate = 82; continue 'yyl; } 0x47 | 0x67 => { cursor += 1; - yystate = 84; + yystate = 83; continue 'yyl; } _ => { @@ -6713,12 +6703,12 @@ pub fn html_block_start(s: &[u8]) -> Option { match yych { 0x4F | 0x6F => { cursor += 1; - yystate = 79; + yystate = 78; continue 'yyl; } 0x52 | 0x72 => { cursor += 1; - yystate = 85; + yystate = 84; continue 'yyl; } _ => { @@ -6738,7 +6728,7 @@ pub fn html_block_start(s: &[u8]) -> Option { match yych { 0x41 | 0x61 => { cursor += 1; - yystate = 86; + yystate = 85; continue 'yyl; } _ => { @@ -6758,7 +6748,7 @@ pub fn html_block_start(s: &[u8]) -> Option { match yych { 0x41 | 0x61 => { cursor += 1; - yystate = 87; + yystate = 86; continue 'yyl; } _ => { @@ -6798,7 +6788,7 @@ pub fn html_block_start(s: &[u8]) -> Option { match yych { 0x52 | 0x72 => { cursor += 1; - yystate = 88; + yystate = 87; continue 'yyl; } _ => { @@ -6818,7 +6808,7 @@ pub fn html_block_start(s: &[u8]) -> Option { match yych { 0x47 | 0x67 => { cursor += 1; - yystate = 89; + yystate = 88; continue 'yyl; } _ => { @@ -6848,7 +6838,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } 0x4E | 0x6E => { cursor += 1; - yystate = 90; + yystate = 89; continue 'yyl; } _ => { @@ -6868,7 +6858,7 @@ pub fn html_block_start(s: &[u8]) -> Option { match yych { 0x49 | 0x69 => { cursor += 1; - yystate = 91; + yystate = 90; continue 'yyl; } _ => { @@ -6888,7 +6878,7 @@ pub fn html_block_start(s: &[u8]) -> Option { match yych { 0x4E | 0x6E => { cursor += 1; - yystate = 92; + yystate = 91; continue 'yyl; } _ => { @@ -6928,7 +6918,7 @@ pub fn html_block_start(s: &[u8]) -> Option { match yych { 0x46 | 0x66 => { cursor += 1; - yystate = 93; + yystate = 92; continue 'yyl; } _ => { @@ -6948,7 +6938,7 @@ pub fn html_block_start(s: &[u8]) -> Option { match yych { 0x54 | 0x74 => { cursor += 1; - yystate = 94; + yystate = 93; continue 'yyl; } _ => { @@ -6991,7 +6981,7 @@ pub fn html_block_start(s: &[u8]) -> Option { match yych { 0x52 | 0x72 => { cursor += 1; - yystate = 95; + yystate = 94; continue 'yyl; } _ => { @@ -7011,7 +7001,7 @@ pub fn html_block_start(s: &[u8]) -> Option { match yych { 0x45 | 0x65 => { cursor += 1; - yystate = 96; + yystate = 95; continue 'yyl; } _ => { @@ -7031,7 +7021,7 @@ pub fn html_block_start(s: &[u8]) -> Option { match yych { 0x52 | 0x72 => { cursor += 1; - yystate = 97; + yystate = 96; continue 'yyl; } _ => { @@ -7049,9 +7039,14 @@ pub fn html_block_start(s: &[u8]) -> Option { } }; match yych { + 0x41 | 0x61 => { + cursor += 1; + yystate = 97; + continue 'yyl; + } 0x43 | 0x63 => { cursor += 1; - yystate = 78; + yystate = 77; continue 'yyl; } _ => { @@ -7069,7 +7064,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } }; match yych { - 0x55 | 0x75 => { + 0x59 | 0x79 => { cursor += 1; yystate = 98; continue 'yyl; @@ -7089,7 +7084,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } }; match yych { - 0x59 | 0x79 => { + 0x4D | 0x6D => { cursor += 1; yystate = 99; continue 'yyl; @@ -7109,7 +7104,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } }; match yych { - 0x4D | 0x6D => { + 0x42 | 0x62 => { cursor += 1; yystate = 100; continue 'yyl; @@ -7121,26 +7116,6 @@ pub fn html_block_start(s: &[u8]) -> Option { } } 63 => { - yych = unsafe { - if cursor < len { - *s.get_unchecked(cursor) - } else { - 0 - } - }; - match yych { - 0x42 | 0x62 => { - cursor += 1; - yystate = 101; - continue 'yyl; - } - _ => { - yystate = 5; - continue 'yyl; - } - } - } - 64 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) @@ -7160,7 +7135,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } } } - 65 => { + 64 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) @@ -7171,7 +7146,7 @@ pub fn html_block_start(s: &[u8]) -> Option { match yych { 0x58 | 0x78 => { cursor += 1; - yystate = 102; + yystate = 101; continue 'yyl; } _ => { @@ -7180,7 +7155,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } } } - 66 => { + 65 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) @@ -7191,7 +7166,7 @@ pub fn html_block_start(s: &[u8]) -> Option { match yych { 0x4F | 0x6F => { cursor += 1; - yystate = 103; + yystate = 102; continue 'yyl; } _ => { @@ -7200,7 +7175,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } } } - 67 => { + 66 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) @@ -7221,7 +7196,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } 0x45 | 0x65 => { cursor += 1; - yystate = 104; + yystate = 103; continue 'yyl; } _ => { @@ -7230,7 +7205,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } } } - 68 => { + 67 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) @@ -7241,7 +7216,7 @@ pub fn html_block_start(s: &[u8]) -> Option { match yych { 0x54 | 0x74 => { cursor += 1; - yystate = 101; + yystate = 100; continue 'yyl; } _ => { @@ -7250,7 +7225,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } } } - 69 => { + 68 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) @@ -7271,7 +7246,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } 0x41 | 0x61 => { cursor += 1; - yystate = 105; + yystate = 104; continue 'yyl; } _ => { @@ -7280,10 +7255,10 @@ pub fn html_block_start(s: &[u8]) -> Option { } } } - 70 => { + 69 => { return Some(2); } - 71 => { + 70 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) @@ -7294,7 +7269,7 @@ pub fn html_block_start(s: &[u8]) -> Option { match yych { 0x44 | 0x64 => { cursor += 1; - yystate = 106; + yystate = 105; continue 'yyl; } _ => { @@ -7303,7 +7278,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } } } - 72 => { + 71 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) @@ -7314,7 +7289,7 @@ pub fn html_block_start(s: &[u8]) -> Option { match yych { 0x52 | 0x72 => { cursor += 1; - yystate = 107; + yystate = 106; continue 'yyl; } _ => { @@ -7323,7 +7298,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } } } - 73 => { + 72 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) @@ -7334,7 +7309,7 @@ pub fn html_block_start(s: &[u8]) -> Option { match yych { 0x49 | 0x69 => { cursor += 1; - yystate = 108; + yystate = 107; continue 'yyl; } _ => { @@ -7343,7 +7318,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } } } - 74 => { + 73 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) @@ -7354,7 +7329,7 @@ pub fn html_block_start(s: &[u8]) -> Option { match yych { 0x44 | 0x64 => { cursor += 1; - yystate = 109; + yystate = 108; continue 'yyl; } _ => { @@ -7363,7 +7338,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } } } - 75 => { + 74 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) @@ -7374,7 +7349,7 @@ pub fn html_block_start(s: &[u8]) -> Option { match yych { 0x45 | 0x65 => { cursor += 1; - yystate = 110; + yystate = 109; continue 'yyl; } _ => { @@ -7383,7 +7358,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } } } - 76 => { + 75 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) @@ -7394,7 +7369,7 @@ pub fn html_block_start(s: &[u8]) -> Option { match yych { 0x43 | 0x63 => { cursor += 1; - yystate = 111; + yystate = 110; continue 'yyl; } _ => { @@ -7403,7 +7378,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } } } - 77 => { + 76 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) @@ -7423,7 +7398,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } } } - 78 => { + 77 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) @@ -7434,7 +7409,7 @@ pub fn html_block_start(s: &[u8]) -> Option { match yych { 0x54 | 0x74 => { cursor += 1; - yystate = 112; + yystate = 111; continue 'yyl; } _ => { @@ -7443,7 +7418,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } } } - 79 => { + 78 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) @@ -7454,7 +7429,7 @@ pub fn html_block_start(s: &[u8]) -> Option { match yych { 0x54 | 0x74 => { cursor += 1; - yystate = 113; + yystate = 112; continue 'yyl; } _ => { @@ -7463,7 +7438,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } } } - 80 => { + 79 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) @@ -7484,7 +7459,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } 0x47 | 0x67 => { cursor += 1; - yystate = 114; + yystate = 113; continue 'yyl; } _ => { @@ -7493,7 +7468,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } } } - 81 => { + 80 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) @@ -7504,7 +7479,7 @@ pub fn html_block_start(s: &[u8]) -> Option { match yych { 0x41 | 0x61 => { cursor += 1; - yystate = 115; + yystate = 114; continue 'yyl; } _ => { @@ -7513,7 +7488,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } } } - 82 => { + 81 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) @@ -7524,7 +7499,7 @@ pub fn html_block_start(s: &[u8]) -> Option { match yych { 0x4C | 0x6C => { cursor += 1; - yystate = 116; + yystate = 115; continue 'yyl; } _ => { @@ -7533,7 +7508,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } } } - 83 => { + 82 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) @@ -7544,7 +7519,7 @@ pub fn html_block_start(s: &[u8]) -> Option { match yych { 0x4C | 0x6C => { cursor += 1; - yystate = 117; + yystate = 116; continue 'yyl; } _ => { @@ -7553,7 +7528,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } } } - 84 => { + 83 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) @@ -7564,12 +7539,12 @@ pub fn html_block_start(s: &[u8]) -> Option { match yych { 0x43 | 0x63 => { cursor += 1; - yystate = 118; + yystate = 117; continue 'yyl; } 0x55 | 0x75 => { cursor += 1; - yystate = 119; + yystate = 118; continue 'yyl; } _ => { @@ -7578,7 +7553,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } } } - 85 => { + 84 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) @@ -7598,7 +7573,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } } } - 86 => { + 85 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) @@ -7609,7 +7584,7 @@ pub fn html_block_start(s: &[u8]) -> Option { match yych { 0x4D | 0x6D => { cursor += 1; - yystate = 120; + yystate = 119; continue 'yyl; } _ => { @@ -7618,7 +7593,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } } } - 87 => { + 86 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) @@ -7629,7 +7604,7 @@ pub fn html_block_start(s: &[u8]) -> Option { match yych { 0x44 | 0x64 => { cursor += 1; - yystate = 121; + yystate = 120; continue 'yyl; } _ => { @@ -7638,7 +7613,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } } } - 88 => { + 87 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) @@ -7649,7 +7624,7 @@ pub fn html_block_start(s: &[u8]) -> Option { match yych { 0x41 | 0x61 => { cursor += 1; - yystate = 122; + yystate = 121; continue 'yyl; } _ => { @@ -7658,7 +7633,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } } } - 89 => { + 88 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) @@ -7669,7 +7644,7 @@ pub fn html_block_start(s: &[u8]) -> Option { match yych { 0x45 | 0x65 => { cursor += 1; - yystate = 123; + yystate = 122; continue 'yyl; } _ => { @@ -7678,7 +7653,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } } } - 90 => { + 89 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) @@ -7698,7 +7673,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } } } - 91 => { + 90 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) @@ -7718,7 +7693,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } } } - 92 => { + 91 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) @@ -7729,7 +7704,7 @@ pub fn html_block_start(s: &[u8]) -> Option { match yych { 0x55 | 0x75 => { cursor += 1; - yystate = 124; + yystate = 123; continue 'yyl; } _ => { @@ -7738,7 +7713,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } } } - 93 => { + 92 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) @@ -7749,7 +7724,7 @@ pub fn html_block_start(s: &[u8]) -> Option { match yych { 0x52 | 0x72 => { cursor += 1; - yystate = 125; + yystate = 124; continue 'yyl; } _ => { @@ -7758,7 +7733,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } } } - 94 => { + 93 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) @@ -7769,12 +7744,12 @@ pub fn html_block_start(s: &[u8]) -> Option { match yych { 0x47 | 0x67 => { cursor += 1; - yystate = 114; + yystate = 113; continue 'yyl; } 0x49 | 0x69 => { cursor += 1; - yystate = 126; + yystate = 125; continue 'yyl; } _ => { @@ -7783,7 +7758,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } } } - 95 => { + 94 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) @@ -7794,7 +7769,7 @@ pub fn html_block_start(s: &[u8]) -> Option { match yych { 0x41 | 0x61 => { cursor += 1; - yystate = 85; + yystate = 84; continue 'yyl; } _ => { @@ -7803,7 +7778,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } } } - 96 => { + 95 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) @@ -7814,7 +7789,7 @@ pub fn html_block_start(s: &[u8]) -> Option { match yych { 0x09..=0x0D | 0x20 | 0x3E => { cursor += 1; - yystate = 127; + yystate = 126; continue 'yyl; } _ => { @@ -7823,7 +7798,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } } } - 97 => { + 96 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) @@ -7834,7 +7809,7 @@ pub fn html_block_start(s: &[u8]) -> Option { match yych { 0x49 | 0x69 => { cursor += 1; - yystate = 128; + yystate = 127; continue 'yyl; } _ => { @@ -7843,7 +7818,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } } } - 98 => { + 97 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) @@ -7854,7 +7829,7 @@ pub fn html_block_start(s: &[u8]) -> Option { match yych { 0x52 | 0x72 => { cursor += 1; - yystate = 129; + yystate = 128; continue 'yyl; } _ => { @@ -7863,7 +7838,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } } } - 99 => { + 98 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) @@ -7883,7 +7858,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } } } - 100 => { + 99 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) @@ -7894,7 +7869,7 @@ pub fn html_block_start(s: &[u8]) -> Option { match yych { 0x4D | 0x6D => { cursor += 1; - yystate = 130; + yystate = 129; continue 'yyl; } _ => { @@ -7903,7 +7878,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } } } - 101 => { + 100 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) @@ -7914,7 +7889,7 @@ pub fn html_block_start(s: &[u8]) -> Option { match yych { 0x4C | 0x6C => { cursor += 1; - yystate = 109; + yystate = 108; continue 'yyl; } _ => { @@ -7923,7 +7898,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } } } - 102 => { + 101 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) @@ -7934,7 +7909,7 @@ pub fn html_block_start(s: &[u8]) -> Option { match yych { 0x54 | 0x74 => { cursor += 1; - yystate = 131; + yystate = 130; continue 'yyl; } _ => { @@ -7943,7 +7918,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } } } - 103 => { + 102 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) @@ -7954,7 +7929,7 @@ pub fn html_block_start(s: &[u8]) -> Option { match yych { 0x4F | 0x6F => { cursor += 1; - yystate = 132; + yystate = 131; continue 'yyl; } _ => { @@ -7963,7 +7938,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } } } - 104 => { + 103 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) @@ -7974,7 +7949,7 @@ pub fn html_block_start(s: &[u8]) -> Option { match yych { 0x41 | 0x61 => { cursor += 1; - yystate = 133; + yystate = 132; continue 'yyl; } _ => { @@ -7983,7 +7958,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } } } - 105 => { + 104 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) @@ -7994,7 +7969,7 @@ pub fn html_block_start(s: &[u8]) -> Option { match yych { 0x43 | 0x63 => { cursor += 1; - yystate = 90; + yystate = 89; continue 'yyl; } _ => { @@ -8003,7 +7978,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } } } - 106 => { + 105 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) @@ -8014,7 +7989,7 @@ pub fn html_block_start(s: &[u8]) -> Option { match yych { 0x41 | 0x61 => { cursor += 1; - yystate = 134; + yystate = 133; continue 'yyl; } _ => { @@ -8023,7 +7998,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } } } - 107 => { + 106 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) @@ -8034,7 +8009,7 @@ pub fn html_block_start(s: &[u8]) -> Option { match yych { 0x45 | 0x65 => { cursor += 1; - yystate = 135; + yystate = 134; continue 'yyl; } _ => { @@ -8043,7 +8018,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } } } - 108 => { + 107 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) @@ -8054,7 +8029,7 @@ pub fn html_block_start(s: &[u8]) -> Option { match yych { 0x43 | 0x63 => { cursor += 1; - yystate = 101; + yystate = 100; continue 'yyl; } _ => { @@ -8063,7 +8038,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } } } - 109 => { + 108 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) @@ -8083,7 +8058,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } } } - 110 => { + 109 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) @@ -8104,7 +8079,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } 0x46 | 0x66 => { cursor += 1; - yystate = 136; + yystate = 135; continue 'yyl; } _ => { @@ -8113,7 +8088,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } } } - 111 => { + 110 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) @@ -8124,7 +8099,7 @@ pub fn html_block_start(s: &[u8]) -> Option { match yych { 0x4B | 0x6B => { cursor += 1; - yystate = 137; + yystate = 136; continue 'yyl; } _ => { @@ -8133,7 +8108,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } } } - 112 => { + 111 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) @@ -8144,7 +8119,7 @@ pub fn html_block_start(s: &[u8]) -> Option { match yych { 0x49 | 0x69 => { cursor += 1; - yystate = 126; + yystate = 125; continue 'yyl; } _ => { @@ -8153,7 +8128,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } } } - 113 => { + 112 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) @@ -8164,7 +8139,7 @@ pub fn html_block_start(s: &[u8]) -> Option { match yych { 0x45 | 0x65 => { cursor += 1; - yystate = 138; + yystate = 137; continue 'yyl; } _ => { @@ -8173,7 +8148,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } } } - 114 => { + 113 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) @@ -8184,7 +8159,7 @@ pub fn html_block_start(s: &[u8]) -> Option { match yych { 0x52 | 0x72 => { cursor += 1; - yystate = 139; + yystate = 138; continue 'yyl; } _ => { @@ -8193,7 +8168,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } } } - 115 => { + 114 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) @@ -8204,7 +8179,7 @@ pub fn html_block_start(s: &[u8]) -> Option { match yych { 0x49 | 0x69 => { cursor += 1; - yystate = 140; + yystate = 139; continue 'yyl; } _ => { @@ -8213,7 +8188,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } } } - 116 => { + 115 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) @@ -8224,7 +8199,7 @@ pub fn html_block_start(s: &[u8]) -> Option { match yych { 0x4F | 0x6F => { cursor += 1; - yystate = 141; + yystate = 140; continue 'yyl; } _ => { @@ -8233,7 +8208,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } } } - 117 => { + 116 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) @@ -8244,7 +8219,7 @@ pub fn html_block_start(s: &[u8]) -> Option { match yych { 0x44 | 0x64 => { cursor += 1; - yystate = 142; + yystate = 141; continue 'yyl; } _ => { @@ -8253,7 +8228,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } } } - 118 => { + 117 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) @@ -8273,7 +8248,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } } } - 119 => { + 118 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) @@ -8284,7 +8259,7 @@ pub fn html_block_start(s: &[u8]) -> Option { match yych { 0x52 | 0x72 => { cursor += 1; - yystate = 109; + yystate = 108; continue 'yyl; } _ => { @@ -8293,7 +8268,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } } } - 120 => { + 119 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) @@ -8304,7 +8279,7 @@ pub fn html_block_start(s: &[u8]) -> Option { match yych { 0x45 | 0x65 => { cursor += 1; - yystate = 143; + yystate = 142; continue 'yyl; } _ => { @@ -8313,7 +8288,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } } } - 121 => { + 120 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) @@ -8334,7 +8309,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } 0x45 | 0x65 => { cursor += 1; - yystate = 138; + yystate = 137; continue 'yyl; } _ => { @@ -8343,7 +8318,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } } } - 122 => { + 121 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) @@ -8354,7 +8329,7 @@ pub fn html_block_start(s: &[u8]) -> Option { match yych { 0x4D | 0x6D => { cursor += 1; - yystate = 109; + yystate = 108; continue 'yyl; } _ => { @@ -8363,7 +8338,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } } } - 123 => { + 122 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) @@ -8374,7 +8349,7 @@ pub fn html_block_start(s: &[u8]) -> Option { match yych { 0x4E | 0x6E => { cursor += 1; - yystate = 133; + yystate = 132; continue 'yyl; } _ => { @@ -8383,7 +8358,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } } } - 124 => { + 123 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) @@ -8404,7 +8379,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } 0x49 | 0x69 => { cursor += 1; - yystate = 144; + yystate = 143; continue 'yyl; } _ => { @@ -8413,7 +8388,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } } } - 125 => { + 124 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) @@ -8424,7 +8399,7 @@ pub fn html_block_start(s: &[u8]) -> Option { match yych { 0x41 | 0x61 => { cursor += 1; - yystate = 145; + yystate = 144; continue 'yyl; } _ => { @@ -8433,7 +8408,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } } } - 126 => { + 125 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) @@ -8444,7 +8419,7 @@ pub fn html_block_start(s: &[u8]) -> Option { match yych { 0x4F | 0x6F => { cursor += 1; - yystate = 91; + yystate = 90; continue 'yyl; } _ => { @@ -8453,10 +8428,10 @@ pub fn html_block_start(s: &[u8]) -> Option { } } } - 127 => { + 126 => { return Some(1); } - 128 => { + 127 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) @@ -8467,7 +8442,7 @@ pub fn html_block_start(s: &[u8]) -> Option { match yych { 0x50 | 0x70 => { cursor += 1; - yystate = 146; + yystate = 145; continue 'yyl; } _ => { @@ -8476,7 +8451,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } } } - 129 => { + 128 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) @@ -8487,7 +8462,7 @@ pub fn html_block_start(s: &[u8]) -> Option { match yych { 0x43 | 0x63 => { cursor += 1; - yystate = 109; + yystate = 146; continue 'yyl; } _ => { @@ -8496,7 +8471,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } } } - 130 => { + 129 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) @@ -8516,7 +8491,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } } } - 131 => { + 130 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) @@ -8536,7 +8511,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } } } - 132 => { + 131 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) @@ -8556,7 +8531,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } } } - 133 => { + 132 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) @@ -8576,7 +8551,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } } } - 134 => { + 133 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) @@ -8596,7 +8571,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } } } - 135 => { + 134 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) @@ -8616,7 +8591,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } } } - 136 => { + 135 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) @@ -8636,7 +8611,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } } } - 137 => { + 136 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) @@ -8656,7 +8631,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } } } - 138 => { + 137 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) @@ -8676,7 +8651,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } } } - 139 => { + 138 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) @@ -8696,7 +8671,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } } } - 140 => { + 139 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) @@ -8716,7 +8691,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } } } - 141 => { + 140 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) @@ -8736,7 +8711,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } } } - 142 => { + 141 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) @@ -8756,7 +8731,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } } } - 143 => { + 142 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) @@ -8786,7 +8761,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } } } - 144 => { + 143 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) @@ -8806,7 +8781,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } } } - 145 => { + 144 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) @@ -8826,7 +8801,7 @@ pub fn html_block_start(s: &[u8]) -> Option { } } } - 146 => { + 145 => { yych = unsafe { if cursor < len { *s.get_unchecked(cursor) @@ -8837,7 +8812,27 @@ pub fn html_block_start(s: &[u8]) -> Option { match yych { 0x54 | 0x74 => { cursor += 1; - yystate = 96; + yystate = 95; + continue 'yyl; + } + _ => { + yystate = 5; + continue 'yyl; + } + } + } + 146 => { + yych = unsafe { + if cursor < len { + *s.get_unchecked(cursor) + } else { + 0 + } + }; + match yych { + 0x48 | 0x68 => { + cursor += 1; + yystate = 38; continue 'yyl; } _ => { @@ -8857,7 +8852,7 @@ pub fn html_block_start(s: &[u8]) -> Option { match yych { 0x52 | 0x72 => { cursor += 1; - yystate = 77; + yystate = 76; continue 'yyl; } _ => { @@ -8937,7 +8932,7 @@ pub fn html_block_start(s: &[u8]) -> Option { match yych { 0x4E | 0x6E => { cursor += 1; - yystate = 132; + yystate = 131; continue 'yyl; } _ => { @@ -8997,7 +8992,7 @@ pub fn html_block_start(s: &[u8]) -> Option { match yych { 0x45 | 0x65 => { cursor += 1; - yystate = 132; + yystate = 131; continue 'yyl; } _ => { @@ -9017,7 +9012,7 @@ pub fn html_block_start(s: &[u8]) -> Option { match yych { 0x45 | 0x65 => { cursor += 1; - yystate = 85; + yystate = 84; continue 'yyl; } _ => { @@ -9137,7 +9132,7 @@ pub fn html_block_start(s: &[u8]) -> Option { match yych { 0x41 | 0x61 => { cursor += 1; - yystate = 96; + yystate = 95; continue 'yyl; } _ => { @@ -9160,7 +9155,7 @@ pub fn html_block_start(s: &[u8]) -> Option { match yych { 0x54 | 0x74 => { cursor += 1; - yystate = 109; + yystate = 108; continue 'yyl; } _ => { diff --git a/src/strings.rs b/src/strings.rs index 0403ee89..42206bc3 100644 --- a/src/strings.rs +++ b/src/strings.rs @@ -7,7 +7,7 @@ use std::str; #[derive(PartialEq, Eq)] pub enum Case { Preserve, - DontPreserve, + Fold, } pub fn unescape(v: &mut Vec) { @@ -262,13 +262,23 @@ pub fn normalize_label(i: &str, casing: Case) -> String { } } else { last_was_whitespace = false; - match casing { - Case::Preserve => v.push(c), - Case::DontPreserve => v.push_str(&c.to_lowercase().to_string()), - } + v.push(c); } } - v + + if casing == Case::Fold { + caseless::default_case_fold_str(&v) + } else { + v + } +} + +#[test] +fn normalize_label_fold_test() { + assert_eq!(normalize_label("Abc \t\ndef", Case::Preserve), "Abc def"); + assert_eq!(normalize_label("Abc \t\ndef", Case::Fold), "abc def"); + assert_eq!(normalize_label("Straẞe", Case::Preserve), "Straẞe"); + assert_eq!(normalize_label("Straẞe", Case::Fold), "strasse"); } pub fn split_off_front_matter<'s>(mut s: &'s str, delimiter: &str) -> Option<(&'s str, &'s str)> { @@ -356,14 +366,8 @@ pub mod tests { #[test] fn normalize_label_lowercase() { - assert_eq!( - normalize_label(" Foo\u{A0}BAR ", Case::DontPreserve), - "foo bar" - ); - assert_eq!( - normalize_label(" FooİBAR ", Case::DontPreserve), - "fooi\u{307}bar" - ); + assert_eq!(normalize_label(" Foo\u{A0}BAR ", Case::Fold), "foo bar"); + assert_eq!(normalize_label(" FooİBAR ", Case::Fold), "fooi\u{307}bar"); } #[test] diff --git a/src/tests/core.rs b/src/tests/core.rs index 30061490..3b5adefa 100644 --- a/src/tests/core.rs +++ b/src/tests/core.rs @@ -423,6 +423,14 @@ fn reference_links() { ); } +#[test] +fn reference_links_casefold() { + html( + concat!("[ẞ]\n", "\n", "[SS]: /url \n",), + "

\n", + ); +} + #[test] fn safety() { html( diff --git a/vendor/commonmark-spec b/vendor/commonmark-spec new file mode 160000 index 00000000..9103e341 --- /dev/null +++ b/vendor/commonmark-spec @@ -0,0 +1 @@ +Subproject commit 9103e341a973013013bb1a80e13567007c5cef6f