Skip to content

Commit

Permalink
Merge pull request #490 from liamwhite/lazy
Browse files Browse the repository at this point in the history
Add macro for character tables
  • Loading branch information
kivikakk authored Nov 22, 2024
2 parents fc4292c + eebca85 commit 26b6ca5
Show file tree
Hide file tree
Showing 5 changed files with 35 additions and 78 deletions.
18 changes: 18 additions & 0 deletions src/character_set.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
macro_rules! character_set {
() => {{
[false; 256]
}};

($value:literal $(,$rest:literal)*) => {{
const A: &[u8] = $value;
let mut a = character_set!($($rest),*);
let mut i = 0;
while i < A.len() {
a[A[i] as usize] = true;
i += 1;
}
a
}}
}

pub(crate) use character_set;
56 changes: 9 additions & 47 deletions src/html.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
//! The HTML renderer for the CommonMark AST, as well as helper functions.
use crate::character_set::character_set;
use crate::ctype::isspace;
use crate::nodes::{
AstNode, ListType, NodeCode, NodeFootnoteDefinition, NodeMath, NodeTable, NodeValue,
Expand Down Expand Up @@ -135,42 +136,6 @@ struct HtmlFormatter<'o> {
plugins: &'o Plugins<'o>,
}

#[rustfmt::skip]
const NEEDS_ESCAPED : [bool; 256] = [
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, true, false, false, false, true, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, true, false, true, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
false, false, false, false, false, false, false, false,
];

fn tagfilter(literal: &[u8]) -> bool {
static TAGFILTER_BLACKLIST: [&str; 9] = [
"title",
Expand Down Expand Up @@ -254,9 +219,11 @@ fn dangerous_url(input: &[u8]) -> bool {
/// Note that this is appropriate and sufficient for free text, but not for
/// URLs in attributes. See escape_href.
pub fn escape(output: &mut dyn Write, buffer: &[u8]) -> io::Result<()> {
const HTML_UNSAFE: [bool; 256] = character_set!(b"&<>\"");

let mut offset = 0;
for (i, &byte) in buffer.iter().enumerate() {
if NEEDS_ESCAPED[byte as usize] {
if HTML_UNSAFE[byte as usize] {
let esc: &[u8] = match byte {
b'"' => b"&quot;",
b'&' => b"&amp;",
Expand Down Expand Up @@ -297,16 +264,11 @@ pub fn escape(output: &mut dyn Write, buffer: &[u8]) -> io::Result<()> {
/// the string "a b", rather than "?q=a%2520b", a search for the literal
/// string "a%20b".
pub fn escape_href(output: &mut dyn Write, buffer: &[u8]) -> io::Result<()> {
static HREF_SAFE: Lazy<[bool; 256]> = Lazy::new(|| {
let mut a = [false; 256];
for &c in b"-_.+!*(),%#@?=;:/,+$~abcdefghijklmnopqrstuvwxyz".iter() {
a[c as usize] = true;
}
for &c in b"ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789".iter() {
a[c as usize] = true;
}
a
});
const HREF_SAFE: [bool; 256] = character_set!(
b"-_.+!*(),%#@?=;:/,+$~",
b"abcdefghijklmnopqrstuvwxyz",
b"ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
);

let size = buffer.len();
let mut i = 0;
Expand Down
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ use std::io::BufWriter;

pub mod adapters;
pub mod arena_tree;
mod character_set;
mod cm;
mod ctype;
mod entity;
Expand Down
26 changes: 4 additions & 22 deletions src/parser/autolink.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use crate::character_set::character_set;
use crate::ctype::{isalnum, isalpha, isspace};
use crate::nodes::{AstNode, NodeLink, NodeValue};
use crate::parser::inlines::make_inline;
use once_cell::sync::Lazy;
use std::str;
use typed_arena::Arena;
use unicode_categories::UnicodeCategories;
Expand Down Expand Up @@ -74,13 +74,7 @@ pub fn www_match<'a>(
i: usize,
relaxed_autolinks: bool,
) -> Option<(&'a AstNode<'a>, usize, usize)> {
static WWW_DELIMS: Lazy<[bool; 256]> = Lazy::new(|| {
let mut sc = [false; 256];
for c in &[b'*', b'_', b'~', b'(', b'['] {
sc[*c as usize] = true;
}
sc
});
const WWW_DELIMS: [bool; 256] = character_set!(b"*_~([");

if i > 0 && !isspace(contents[i - 1]) && !WWW_DELIMS[contents[i - 1] as usize] {
return None;
Expand Down Expand Up @@ -167,13 +161,7 @@ fn is_valid_hostchar(ch: char) -> bool {
}

fn autolink_delim(data: &[u8], mut link_end: usize, relaxed_autolinks: bool) -> usize {
static LINK_END_ASSORTMENT: Lazy<[bool; 256]> = Lazy::new(|| {
let mut sc = [false; 256];
for c in &[b'?', b'!', b'.', b',', b':', b'*', b'_', b'~', b'\'', b'"'] {
sc[*c as usize] = true;
}
sc
});
const LINK_END_ASSORTMENT: [bool; 256] = character_set!(b"?!.,:*_~'\"");

for (i, &b) in data.iter().enumerate().take(link_end) {
if b == b'<' {
Expand Down Expand Up @@ -311,13 +299,7 @@ fn email_match<'a>(
i: usize,
relaxed_autolinks: bool,
) -> Option<(&'a AstNode<'a>, usize, usize)> {
static EMAIL_OK_SET: Lazy<[bool; 256]> = Lazy::new(|| {
let mut sc = [false; 256];
for c in &[b'.', b'+', b'-', b'_'] {
sc[*c as usize] = true;
}
sc
});
const EMAIL_OK_SET: [bool; 256] = character_set!(b".+-_");

let size = contents.len();

Expand Down
12 changes: 3 additions & 9 deletions src/xml.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use crate::character_set::character_set;
use crate::nodes::{AstNode, ListType, NodeCode, NodeMath, NodeTable, NodeValue};
use crate::parser::{Options, Plugins};
use once_cell::sync::Lazy;
use std::cmp;
use std::io::{self, Write};

Expand Down Expand Up @@ -48,17 +48,11 @@ impl<'o> XmlFormatter<'o> {
}

fn escape(&mut self, buffer: &[u8]) -> io::Result<()> {
static XML_SAFE: Lazy<[bool; 256]> = Lazy::new(|| {
let mut a = [true; 256];
for &c in b"&<>\"".iter() {
a[c as usize] = false;
}
a
});
const XML_UNSAFE: [bool; 256] = character_set!(b"&<>\"");

let mut offset = 0;
for (i, &byte) in buffer.iter().enumerate() {
if !XML_SAFE[byte as usize] {
if XML_UNSAFE[byte as usize] {
let esc: &[u8] = match byte {
b'"' => b"&quot;",
b'&' => b"&amp;",
Expand Down

0 comments on commit 26b6ca5

Please sign in to comment.