diff --git a/CMark.hsc b/CMarkGFM.hsc similarity index 69% rename from CMark.hsc rename to CMarkGFM.hsc index 6314c59..edfd245 100644 --- a/CMark.hsc +++ b/CMarkGFM.hsc @@ -1,22 +1,26 @@ {-# LANGUAGE CPP, ForeignFunctionInterface, GeneralizedNewtypeDeriving, DeriveGeneric, DeriveDataTypeable, FlexibleContexts #-} -module CMark ( +module CMarkGFM ( commonmarkToHtml , commonmarkToXml , commonmarkToMan , commonmarkToLaTeX , commonmarkToNode + , registerPlugins , nodeToHtml , nodeToXml , nodeToMan , nodeToLaTeX , nodeToCommonmark , optSourcePos - , optNormalize , optHardBreaks , optSmart , optSafe + , extStrikethrough + , extTable + , extAutolink + , extTagfilter , Node(..) , NodeType(..) , PosInfo(..) @@ -27,15 +31,18 @@ module CMark ( , Title , Level , Info + , TableCellAlignment(..) , CMarkOption + , CMarkExtension ) where import Foreign import Foreign.C.Types -import Foreign.C.String (CString) +import Foreign.C.String (CString, withCString) import qualified System.IO.Unsafe as Unsafe import Data.Maybe (fromMaybe) import GHC.Generics (Generic) +import Data.Maybe (catMaybes) import Data.Data (Data) import Data.Typeable (Typeable) import Data.Text (Text, empty) @@ -45,41 +52,82 @@ import Data.Text.Encoding (encodeUtf8) import Control.Applicative ((<$>), (<*>)) #include +#include + +-- | Register core extensions. This should be done once at program start. +registerPlugins :: IO () +registerPlugins = c_cmark_register_plugin c_core_extensions_registration + +-- | Frees a cmark linked list, produced by extsToLlist. +freeLlist :: LlistPtr a -> IO () +freeLlist = c_cmark_llist_free c_CMARK_DEFAULT_MEM_ALLOCATOR + +-- | Converts a list of resolved extension pointers to a single cmark +-- linked list, which can be passed to functions requiring a list of +-- extensions. +extsToLlist :: [ExtensionPtr] -> IO (LlistPtr ExtensionPtr) +extsToLlist [] = return nullPtr +extsToLlist (h:t) = do + t' <- extsToLlist t + c_cmark_llist_append c_CMARK_DEFAULT_MEM_ALLOCATOR t' (castPtr h) + +-- | Resolves a CMarkExtension to its pointer. +resolveExt :: CMarkExtension -> IO (Maybe ExtensionPtr) +resolveExt e = do + p <- withCString (unCMarkExtension e) c_cmark_find_syntax_extension + return (if p == nullPtr then Nothing else Just p) -- | Convert CommonMark formatted text to Html, using cmark's -- built-in renderer. -commonmarkToHtml :: [CMarkOption] -> Text -> Text -commonmarkToHtml opts = commonmarkToX render_html opts Nothing - where render_html n o _ = c_cmark_render_html n o +commonmarkToHtml :: [CMarkOption] -> [CMarkExtension] -> Text -> Text +commonmarkToHtml opts exts = + commonmarkToX render_html opts exts Nothing + where exts' = Unsafe.unsafePerformIO $ fmap catMaybes $ mapM resolveExt exts + render_html n o _ = Unsafe.unsafePerformIO $ do + llist <- extsToLlist exts' + let r = c_cmark_render_html n o llist + freeLlist llist + return r -- | Convert CommonMark formatted text to CommonMark XML, using cmark's -- built-in renderer. -commonmarkToXml :: [CMarkOption] -> Text -> Text -commonmarkToXml opts = commonmarkToX render_xml opts Nothing +commonmarkToXml :: [CMarkOption] -> [CMarkExtension] -> Text -> Text +commonmarkToXml opts exts = commonmarkToX render_xml opts exts Nothing where render_xml n o _ = c_cmark_render_xml n o -- | Convert CommonMark formatted text to groff man, using cmark's -- built-in renderer. -commonmarkToMan :: [CMarkOption] -> Maybe Int -> Text -> Text +commonmarkToMan :: [CMarkOption] -> [CMarkExtension] -> Maybe Int -> Text -> Text commonmarkToMan = commonmarkToX c_cmark_render_man -- | Convert CommonMark formatted text to latex, using cmark's -- built-in renderer. -commonmarkToLaTeX :: [CMarkOption] -> Maybe Int -> Text -> Text +commonmarkToLaTeX :: [CMarkOption] -> [CMarkExtension] -> Maybe Int -> Text -> Text commonmarkToLaTeX = commonmarkToX c_cmark_render_latex -- | Convert CommonMark formatted text to a structured 'Node' tree, -- which can be transformed or rendered using Haskell code. -commonmarkToNode :: [CMarkOption] -> Text -> Node -commonmarkToNode opts s = Unsafe.unsafePerformIO $ do - nptr <- TF.withCStringLen s $! \(ptr, len) -> - c_cmark_parse_document ptr len (combineOptions opts) +commonmarkToNode :: [CMarkOption] -> [CMarkExtension] -> Text -> Node +commonmarkToNode opts exts s = Unsafe.unsafePerformIO $ do + exts' <- fmap catMaybes $ mapM resolveExt exts + parser <- c_cmark_parser_new (combineOptions opts) + mapM_ (c_cmark_parser_attach_syntax_extension parser) exts' + TF.withCStringLen s $! \(ptr, len) -> + c_cmark_parser_feed parser ptr len + nptr <- c_cmark_parser_finish parser + c_cmark_parser_free parser fptr <- newForeignPtr c_cmark_node_free nptr withForeignPtr fptr toNode -nodeToHtml :: [CMarkOption] -> Node -> Text -nodeToHtml opts = nodeToX render_html opts Nothing - where render_html n o _ = c_cmark_render_html n o +nodeToHtml :: [CMarkOption] -> [CMarkExtension] -> Node -> Text +nodeToHtml opts exts = + nodeToX render_html opts Nothing + where exts' = Unsafe.unsafePerformIO $ fmap catMaybes $ mapM resolveExt exts + render_html n o _ = Unsafe.unsafePerformIO $ do + llist <- extsToLlist exts' + let r = c_cmark_render_html n o llist + freeLlist llist + return r nodeToXml :: [CMarkOption] -> Node -> Text nodeToXml opts = nodeToX render_xml opts Nothing @@ -106,20 +154,42 @@ nodeToX renderer opts mbWidth node = Unsafe.unsafePerformIO $ do commonmarkToX :: Renderer -> [CMarkOption] + -> [CMarkExtension] -> Maybe Int -> Text -> Text -commonmarkToX renderer opts mbWidth s = Unsafe.unsafePerformIO $ +commonmarkToX renderer opts exts mbWidth s = Unsafe.unsafePerformIO $ TF.withCStringLen s $ \(ptr, len) -> do let opts' = combineOptions opts - nptr <- c_cmark_parse_document ptr len opts' + exts' <- fmap catMaybes $ mapM resolveExt exts + parser <- c_cmark_parser_new opts' + mapM_ (c_cmark_parser_attach_syntax_extension parser) exts' + c_cmark_parser_feed parser ptr len + nptr <- c_cmark_parser_finish parser + c_cmark_parser_free parser fptr <- newForeignPtr c_cmark_node_free nptr withForeignPtr fptr $ \p -> do str <- renderer p opts' (fromMaybe 0 mbWidth) t <- TF.peekCStringLen $! (str, c_strlen str) return t -type NodePtr = Ptr () +data ParserPhantom +type ParserPtr = Ptr ParserPhantom + +data NodePhantom +type NodePtr = Ptr NodePhantom + +data LlistPhantom a +type LlistPtr a = Ptr (LlistPhantom a) + +data MemPhantom +type MemPtr = Ptr MemPhantom + +data PluginPhantom +type PluginPtr = Ptr PluginPhantom + +data ExtensionPhantom +type ExtensionPtr = Ptr ExtensionPhantom data Node = Node (Maybe PosInfo) NodeType [Node] deriving (Show, Read, Eq, Ord, Typeable, Data, Generic) @@ -153,6 +223,9 @@ type OnEnter = Text type OnExit = Text +data TableCellAlignment = None | Left | Center | Right + deriving (Show, Read, Eq, Ord, Typeable, Data, Generic) + data NodeType = DOCUMENT | THEMATIC_BREAK @@ -174,6 +247,10 @@ data NodeType = | STRONG | LINK Url Title | IMAGE Url Title + | STRIKETHROUGH + | TABLE [TableCellAlignment] + | TABLE_ROW + | TABLE_CELL deriving (Show, Read, Eq, Ord, Typeable, Data, Generic) data PosInfo = PosInfo{ startLine :: Int @@ -197,10 +274,6 @@ optSourcePos = CMarkOption #const CMARK_OPT_SOURCEPOS optHardBreaks :: CMarkOption optHardBreaks = CMarkOption #const CMARK_OPT_HARDBREAKS --- | Normalize the document by consolidating adjacent text nodes. -optNormalize :: CMarkOption -optNormalize = CMarkOption #const CMARK_OPT_NORMALIZE - -- | Convert straight quotes to curly, @---@ to em-dash, @--@ to en-dash. optSmart :: CMarkOption optSmart = CMarkOption #const CMARK_OPT_SMART @@ -210,6 +283,20 @@ optSmart = CMarkOption #const CMARK_OPT_SMART optSafe :: CMarkOption optSafe = CMarkOption #const CMARK_OPT_SAFE +newtype CMarkExtension = CMarkExtension { unCMarkExtension :: String } + +extStrikethrough :: CMarkExtension +extStrikethrough = CMarkExtension "strikethrough" + +extTable :: CMarkExtension +extTable = CMarkExtension "table" + +extAutolink :: CMarkExtension +extAutolink = CMarkExtension "autolink" + +extTagfilter :: CMarkExtension +extTagfilter = CMarkExtension "tagfilter" + ptrToNodeType :: NodePtr -> IO NodeType ptrToNodeType ptr = do nodeType <- c_cmark_node_get_type ptr @@ -255,7 +342,16 @@ ptrToNodeType ptr = do -> return SOFTBREAK #const CMARK_NODE_LINEBREAK -> return LINEBREAK - _ -> error "Unknown node type" + _ -> if nodeType == fromIntegral (Unsafe.unsafePerformIO $ peek c_CMARK_NODE_STRIKETHROUGH) then + return STRIKETHROUGH + else if nodeType == fromIntegral (Unsafe.unsafePerformIO $ peek c_CMARK_NODE_TABLE) then + TABLE <$> alignments + else if nodeType == fromIntegral (Unsafe.unsafePerformIO $ peek c_CMARK_NODE_TABLE_ROW) then + return TABLE_ROW + else if nodeType == fromIntegral (Unsafe.unsafePerformIO $ peek c_CMARK_NODE_TABLE_CELL) then + return TABLE_CELL + else + error $ "Unknown node type " ++ (show nodeType) where literal = c_cmark_node_get_literal ptr >>= totext level = c_cmark_node_get_heading_level ptr onEnter = c_cmark_node_get_on_enter ptr >>= totext @@ -280,6 +376,14 @@ ptrToNodeType ptr = do url = c_cmark_node_get_url ptr >>= totext title = c_cmark_node_get_title ptr >>= totext info = c_cmark_node_get_fence_info ptr >>= totext + alignments = do + ncols <- c_cmarkextensions_get_table_columns ptr + cols <- c_cmarkextensions_get_table_alignments ptr + mapM (fmap ucharToAlignment . peekElemOff cols) [0..(fromIntegral ncols) - 1] + ucharToAlignment (CUChar 108) = CMarkGFM.Left + ucharToAlignment (CUChar 99) = CMarkGFM.Center + ucharToAlignment (CUChar 114) = CMarkGFM.Right + ucharToAlignment _ = None getPosInfo :: NodePtr -> IO (Maybe PosInfo) getPosInfo ptr = do @@ -376,6 +480,10 @@ fromNode (Node _ nodeType children) = do return n SOFTBREAK -> c_cmark_node_new (#const CMARK_NODE_SOFTBREAK) LINEBREAK -> c_cmark_node_new (#const CMARK_NODE_LINEBREAK) + STRIKETHROUGH -> c_cmark_node_new (fromIntegral . Unsafe.unsafePerformIO $ peek c_CMARK_NODE_STRIKETHROUGH) + TABLE _ -> error "constructing table not supported" + TABLE_ROW -> error "constructing table row not supported" + TABLE_CELL -> error "constructing table cell not supported" mapM_ (\child -> fromNode child >>= c_cmark_node_append_child node) children return node @@ -394,7 +502,7 @@ foreign import ccall "cmark.h cmark_node_new" c_cmark_node_new :: Int -> IO NodePtr foreign import ccall "cmark.h cmark_render_html" - c_cmark_render_html :: NodePtr -> CInt -> IO CString + c_cmark_render_html :: NodePtr -> CInt -> LlistPtr ExtensionPtr -> IO CString foreign import ccall "cmark.h cmark_render_xml" c_cmark_render_xml :: NodePtr -> CInt -> IO CString @@ -408,8 +516,17 @@ foreign import ccall "cmark.h cmark_render_latex" foreign import ccall "cmark.h cmark_render_commonmark" c_cmark_render_commonmark :: NodePtr -> CInt -> Int -> IO CString -foreign import ccall "cmark.h cmark_parse_document" - c_cmark_parse_document :: CString -> Int -> CInt -> IO NodePtr +foreign import ccall "cmark.h cmark_parser_new" + c_cmark_parser_new :: CInt -> IO ParserPtr + +foreign import ccall "cmark.h cmark_parser_feed" + c_cmark_parser_feed :: ParserPtr -> CString -> Int -> IO () + +foreign import ccall "cmark.h cmark_parser_finish" + c_cmark_parser_finish :: ParserPtr -> IO NodePtr + +foreign import ccall "cmark.h cmark_parser_free" + c_cmark_parser_free :: ParserPtr -> IO () foreign import ccall "cmark.h cmark_node_get_type" c_cmark_node_get_type :: NodePtr -> IO Int @@ -503,3 +620,42 @@ foreign import ccall "cmark.h cmark_node_set_on_exit" foreign import ccall "cmark.h &cmark_node_free" c_cmark_node_free :: FunPtr (NodePtr -> IO ()) + +foreign import ccall "registry.h cmark_register_plugin" + c_cmark_register_plugin :: FunPtr (PluginPtr -> IO Int) -> IO () + +foreign import ccall "core-extensions.h &core_extensions_registration" + c_core_extensions_registration :: FunPtr (PluginPtr -> IO Int) + +foreign import ccall "cmark_extension_api.h cmark_find_syntax_extension" + c_cmark_find_syntax_extension :: CString -> IO ExtensionPtr + +foreign import ccall "cmark.h cmark_llist_append" + c_cmark_llist_append :: MemPtr -> LlistPtr a -> Ptr () -> IO (LlistPtr a) + +foreign import ccall "cmark.h cmark_llist_free" + c_cmark_llist_free :: MemPtr -> LlistPtr a -> IO () + +foreign import ccall "cmark.h &CMARK_DEFAULT_MEM_ALLOCATOR" + c_CMARK_DEFAULT_MEM_ALLOCATOR :: MemPtr + +foreign import ccall "cmark_extension_api.h cmark_parser_attach_syntax_extension" + c_cmark_parser_attach_syntax_extension :: ParserPtr -> ExtensionPtr -> IO () + +foreign import ccall "strikethrough.h &CMARK_NODE_STRIKETHROUGH" + c_CMARK_NODE_STRIKETHROUGH :: Ptr CUShort + +foreign import ccall "table.h &CMARK_NODE_TABLE" + c_CMARK_NODE_TABLE :: Ptr CUShort + +foreign import ccall "table.h &CMARK_NODE_TABLE_ROW" + c_CMARK_NODE_TABLE_ROW :: Ptr CUShort + +foreign import ccall "table.h &CMARK_NODE_TABLE_CELL" + c_CMARK_NODE_TABLE_CELL :: Ptr CUShort + +foreign import ccall "core-extensions.h cmarkextensions_get_table_columns" + c_cmarkextensions_get_table_columns :: NodePtr -> IO CUShort + +foreign import ccall "core-extensions.h cmarkextensions_get_table_alignments" + c_cmarkextensions_get_table_alignments :: NodePtr -> IO (Ptr CUChar) diff --git a/README.md b/README.md index d4c879f..287dfe5 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,9 @@ -cmark-hs -======== +cmark-gfm-hs +============ -This package provides Haskell bindings for [libcmark], the reference -parser for [CommonMark], a fully specified variant of Markdown. -It includes sources for [libcmark] and does not require prior +This package provides Haskell bindings for [libcmark-gfm], the reference +parser for [GitHub Flavored Markdown], a fully specified variant of Markdown. +It includes sources for [libcmark-gfm] and does not require prior installation of the C library. cmark provides the following advantages over existing Markdown @@ -26,7 +26,7 @@ libraries for Haskell: without the exponential blowups in parsing time that sometimes afflict other libraries. (The input `bench/full-sample.md`, for example, causes both [pandoc] and [markdown] to grind to a - halt.) [libcmark] has been extensively fuzz-tested. + halt.) [libcmark-gfm] has been extensively fuzz-tested. - **Accuracy:** cmark passes the CommonMark spec's suite of over 600 conformance tests. @@ -44,7 +44,7 @@ libraries for Haskell: - **Ease of installation:** cmark is portable and has minimal dependencies. -cmark does not provide Haskell versions of the whole [libcmark] +cmark does not provide Haskell versions of the whole [libcmark-gfm] API, which is built around mutable `cmark_node` objects. Instead, it provides functions for converting CommonMark to HTML (and other formats), and a function for converting CommonMark to a `Node` @@ -58,8 +58,8 @@ dangerous URLs. **A note on stability:** There is a good chance the API will change significantly after this early release. -[CommonMark]: http://commonmark.org -[libcmark]: http://github.com/jgm/cmark +[GitHub Flavored Markdown]: https://github.github.com/gfm/ +[libcmark-gfm]: http://github.com/github/cmark [benchmarks]: https://github.com/jgm/cmark/blob/master/benchmarks.md [cheapskate]: https://hackage.haskell.org/package/cheapskate [pandoc]: https://hackage.haskell.org/package/pandoc diff --git a/appveyor.yml b/appveyor.yml index 9e06a67..87075b9 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -21,5 +21,5 @@ test_script: # descriptor - echo "" | stack clean - echo "" | stack --no-terminal test - - echo "" | stack --local-bin-path . install cmark + - echo "" | stack --local-bin-path . install cmark-gfm diff --git a/bench/bench-cmark.hs b/bench/bench-cmark.hs index acb6983..d120d3d 100644 --- a/bench/bench-cmark.hs +++ b/bench/bench-cmark.hs @@ -1,6 +1,6 @@ import qualified Cheapskate as Cheapskate import qualified Cheapskate.Html as CheapskateHtml -import qualified CMark as CMark +import qualified CMarkGFM as CMarkGFM import qualified Text.Sundown.Html.Text as Sundown import qualified Text.Discount as Discount import qualified Text.Blaze.Html.Renderer.Text as Blaze @@ -20,7 +20,7 @@ main = do mkBench "cheapskate" (T.concat . toChunks . Blaze.renderHtml . CheapskateHtml.renderDoc . Cheapskate.markdown Cheapskate.def) sample , mkBench "discount" (Discount.parseMarkdownUtf8 []) sample , mkBench "markdown" (T.concat . toChunks . Blaze.renderHtml . Markdown.markdown Markdown.def . fromChunks . (:[])) sample - , mkBench "cmark" (CMark.commonmarkToHtml []) sample + , mkBench "cmark" (CMarkGFM.commonmarkToHtml []) sample ] -- Note: when full-sample.md rather than sample.md is used markdown diff --git a/cbits/arena.c b/cbits/arena.c new file mode 100644 index 0000000..801fb3c --- /dev/null +++ b/cbits/arena.c @@ -0,0 +1,103 @@ +#include +#include +#include +#include "cmark.h" +#include "cmark_extension_api.h" + +static struct arena_chunk { + size_t sz, used; + uint8_t push_point; + void *ptr; + struct arena_chunk *prev; +} *A = NULL; + +static struct arena_chunk *alloc_arena_chunk(size_t sz, struct arena_chunk *prev) { + struct arena_chunk *c = (struct arena_chunk *)calloc(1, sizeof(*c)); + if (!c) + abort(); + c->sz = sz; + c->ptr = calloc(1, sz); + if (!c->ptr) + abort(); + c->prev = prev; + return c; +} + +void cmark_arena_push(void) { + if (!A) + return; + A->push_point = 1; + A = alloc_arena_chunk(10240, A); +} + +int cmark_arena_pop(void) { + if (!A) + return 0; + while (A && !A->push_point) { + free(A->ptr); + struct arena_chunk *n = A->prev; + free(A); + A = n; + } + if (A) + A->push_point = 0; + return 1; +} + +static void init_arena(void) { + A = alloc_arena_chunk(4 * 1048576, NULL); +} + +void cmark_arena_reset(void) { + while (A) { + free(A->ptr); + struct arena_chunk *n = A->prev; + free(A); + A = n; + } +} + +static void *arena_calloc(size_t nmem, size_t size) { + if (!A) + init_arena(); + + size_t sz = nmem * size + sizeof(size_t); + + // Round allocation sizes to largest integer size to + // ensure returned memory is correctly aligned + const size_t align = sizeof(size_t) - 1; + sz = (sz + align) & ~align; + + if (sz > A->sz) { + A->prev = alloc_arena_chunk(sz, A->prev); + return (uint8_t *) A->prev->ptr + sizeof(size_t); + } + if (sz > A->sz - A->used) { + A = alloc_arena_chunk(A->sz + A->sz / 2, A); + } + void *ptr = (uint8_t *) A->ptr + A->used; + A->used += sz; + *((size_t *) ptr) = sz - sizeof(size_t); + return (uint8_t *) ptr + sizeof(size_t); +} + +static void *arena_realloc(void *ptr, size_t size) { + if (!A) + init_arena(); + + void *new_ptr = arena_calloc(1, size); + if (ptr) + memcpy(new_ptr, ptr, ((size_t *) ptr)[-1]); + return new_ptr; +} + +static void arena_free(void *ptr) { + (void) ptr; + /* no-op */ +} + +cmark_mem CMARK_ARENA_MEM_ALLOCATOR = {arena_calloc, arena_realloc, arena_free}; + +cmark_mem *cmark_get_arena_mem_allocator() { + return &CMARK_ARENA_MEM_ALLOCATOR; +} diff --git a/cbits/autolink.c b/cbits/autolink.c new file mode 100644 index 0000000..3d2a185 --- /dev/null +++ b/cbits/autolink.c @@ -0,0 +1,413 @@ +#include "autolink.h" +#include +#include +#include + +#if defined(_WIN32) +#define strncasecmp _strnicmp +#else +#include +#endif + +static int is_valid_hostchar(const uint8_t *link, size_t link_len) { + int32_t ch; + int r = cmark_utf8proc_iterate(link, (bufsize_t)link_len, &ch); + if (r < 0) + return 0; + return !cmark_utf8proc_is_space(ch) && !cmark_utf8proc_is_punctuation(ch); +} + +static int sd_autolink_issafe(const uint8_t *link, size_t link_len) { + static const size_t valid_uris_count = 3; + static const char *valid_uris[] = {"http://", "https://", "ftp://"}; + + size_t i; + + for (i = 0; i < valid_uris_count; ++i) { + size_t len = strlen(valid_uris[i]); + + if (link_len > len && strncasecmp((char *)link, valid_uris[i], len) == 0 && + is_valid_hostchar(link + len, link_len - len)) + return 1; + } + + return 0; +} + +static size_t autolink_delim(uint8_t *data, size_t link_end) { + uint8_t cclose, copen; + size_t i; + + for (i = 0; i < link_end; ++i) + if (data[i] == '<') { + link_end = i; + break; + } + + while (link_end > 0) { + cclose = data[link_end - 1]; + + switch (cclose) { + case ')': + copen = '('; + break; + default: + copen = 0; + } + + if (strchr("?!.,:*_~'\"", data[link_end - 1]) != NULL) + link_end--; + + else if (data[link_end - 1] == ';') { + size_t new_end = link_end - 2; + + while (new_end > 0 && cmark_isalpha(data[new_end])) + new_end--; + + if (new_end < link_end - 2 && data[new_end] == '&') + link_end = new_end; + else + link_end--; + } else if (copen != 0) { + size_t closing = 0; + size_t opening = 0; + i = 0; + + /* Allow any number of matching brackets (as recognised in copen/cclose) + * at the end of the URL. If there is a greater number of closing + * brackets than opening ones, we remove one character from the end of + * the link. + * + * Examples (input text => output linked portion): + * + * http://www.pokemon.com/Pikachu_(Electric) + * => http://www.pokemon.com/Pikachu_(Electric) + * + * http://www.pokemon.com/Pikachu_((Electric) + * => http://www.pokemon.com/Pikachu_((Electric) + * + * http://www.pokemon.com/Pikachu_(Electric)) + * => http://www.pokemon.com/Pikachu_(Electric) + * + * http://www.pokemon.com/Pikachu_((Electric)) + * => http://www.pokemon.com/Pikachu_((Electric)) + */ + + while (i < link_end) { + if (data[i] == copen) + opening++; + else if (data[i] == cclose) + closing++; + + i++; + } + + if (closing <= opening) + break; + + link_end--; + } else + break; + } + + return link_end; +} + +static size_t check_domain(uint8_t *data, size_t size, int allow_short) { + size_t i, np = 0, uscore1 = 0, uscore2 = 0; + + for (i = 1; i < size - 1; i++) { + if (data[i] == '_') + uscore2++; + else if (data[i] == '.') { + uscore1 = uscore2; + uscore2 = 0; + np++; + } else if (!is_valid_hostchar(data + i, size - i) && data[i] != '-') + break; + } + + if (uscore1 > 0 || uscore2 > 0) + return 0; + + if (allow_short) { + /* We don't need a valid domain in the strict sense (with + * least one dot; so just make sure it's composed of valid + * domain characters and return the length of the the valid + * sequence. */ + return i; + } else { + /* a valid domain needs to have at least a dot. + * that's as far as we get */ + return np ? i : 0; + } +} + +static cmark_node *www_match(cmark_parser *parser, cmark_node *parent, + cmark_inline_parser *inline_parser) { + cmark_chunk *chunk = cmark_inline_parser_get_chunk(inline_parser); + size_t max_rewind = cmark_inline_parser_get_offset(inline_parser); + uint8_t *data = chunk->data + max_rewind; + size_t size = chunk->len - max_rewind; + + size_t link_end; + + if (max_rewind > 0 && strchr("*_~(", data[-1]) == NULL && + !cmark_isspace(data[-1])) + return 0; + + if (size < 4 || memcmp(data, "www.", strlen("www.")) != 0) + return 0; + + link_end = check_domain(data, size, 0); + + if (link_end == 0) + return NULL; + + while (link_end < size && !cmark_isspace(data[link_end])) + link_end++; + + link_end = autolink_delim(data, link_end); + + if (link_end == 0) + return NULL; + + cmark_inline_parser_set_offset(inline_parser, (int)(max_rewind + link_end)); + + cmark_node *node = cmark_node_new_with_mem(CMARK_NODE_LINK, parser->mem); + + cmark_strbuf buf; + cmark_strbuf_init(parser->mem, &buf, 10); + cmark_strbuf_puts(&buf, "http://"); + cmark_strbuf_put(&buf, data, (bufsize_t)link_end); + node->as.link.url = cmark_chunk_buf_detach(&buf); + + cmark_node *text = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem); + text->as.literal = + cmark_chunk_dup(chunk, (bufsize_t)max_rewind, (bufsize_t)link_end); + cmark_node_append_child(node, text); + + return node; +} + +static cmark_node *url_match(cmark_parser *parser, cmark_node *parent, + cmark_inline_parser *inline_parser) { + size_t link_end, domain_len; + int rewind = 0; + + cmark_chunk *chunk = cmark_inline_parser_get_chunk(inline_parser); + int max_rewind = cmark_inline_parser_get_offset(inline_parser); + uint8_t *data = chunk->data + max_rewind; + size_t size = chunk->len - max_rewind; + + if (size < 4 || data[1] != '/' || data[2] != '/') + return 0; + + while (rewind < max_rewind && cmark_isalpha(data[-rewind - 1])) + rewind++; + + if (!sd_autolink_issafe(data - rewind, size + rewind)) + return 0; + + link_end = strlen("://"); + + domain_len = check_domain(data + link_end, size - link_end, 1); + + if (domain_len == 0) + return 0; + + link_end += domain_len; + while (link_end < size && !cmark_isspace(data[link_end])) + link_end++; + + link_end = autolink_delim(data, link_end); + + if (link_end == 0) + return NULL; + + cmark_inline_parser_set_offset(inline_parser, (int)(max_rewind + link_end)); + cmark_node_unput(parent, rewind); + + cmark_node *node = cmark_node_new_with_mem(CMARK_NODE_LINK, parser->mem); + + cmark_chunk url = cmark_chunk_dup(chunk, max_rewind - rewind, + (bufsize_t)(link_end + rewind)); + node->as.link.url = url; + + cmark_node *text = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem); + text->as.literal = url; + cmark_node_append_child(node, text); + + return node; +} + +static cmark_node *match(cmark_syntax_extension *ext, cmark_parser *parser, + cmark_node *parent, unsigned char c, + cmark_inline_parser *inline_parser) { + if (cmark_inline_parser_in_bracket(inline_parser, false) || + cmark_inline_parser_in_bracket(inline_parser, true)) + return NULL; + + if (c == ':') + return url_match(parser, parent, inline_parser); + + if (c == 'w') + return www_match(parser, parent, inline_parser); + + return NULL; + + // note that we could end up re-consuming something already a + // part of an inline, because we don't track when the last + // inline was finished in inlines.c. +} + +static void postprocess_text(cmark_parser *parser, cmark_node *text, int offset) { + size_t link_end; + uint8_t *data = text->as.literal.data, + *at; + size_t size = text->as.literal.len; + int rewind, max_rewind, + nb = 0, np = 0, ns = 0; + + if (offset < 0 || (size_t)offset >= size) + return; + + data += offset; + size -= offset; + + at = (uint8_t *)memchr(data, '@', size); + if (!at) + return; + + max_rewind = (int)(at - data); + data += max_rewind; + size -= max_rewind; + + for (rewind = 0; rewind < max_rewind; ++rewind) { + uint8_t c = data[-rewind - 1]; + + if (cmark_isalnum(c)) + continue; + + if (strchr(".+-_", c) != NULL) + continue; + + if (c == '/') + ns++; + + break; + } + + if (rewind == 0 || ns > 0) { + postprocess_text(parser, text, max_rewind + 1 + offset); + return; + } + + for (link_end = 0; link_end < size; ++link_end) { + uint8_t c = data[link_end]; + + if (cmark_isalnum(c)) + continue; + + if (c == '@') + nb++; + else if (c == '.' && link_end < size - 1) + np++; + else if (c != '-' && c != '_') + break; + } + + if (link_end < 2 || nb != 1 || np == 0 || + (!cmark_isalpha(data[link_end - 1]) && data[link_end - 1] != '.')) { + postprocess_text(parser, text, max_rewind + 1 + offset); + return; + } + + link_end = autolink_delim(data, link_end); + + if (link_end == 0) { + postprocess_text(parser, text, max_rewind + 1 + offset); + return; + } + + cmark_chunk_to_cstr(parser->mem, &text->as.literal); + + cmark_node *link_node = cmark_node_new_with_mem(CMARK_NODE_LINK, parser->mem); + cmark_strbuf buf; + cmark_strbuf_init(parser->mem, &buf, 10); + cmark_strbuf_puts(&buf, "mailto:"); + cmark_strbuf_put(&buf, data - rewind, (bufsize_t)(link_end + rewind)); + link_node->as.link.url = cmark_chunk_buf_detach(&buf); + + cmark_node *link_text = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem); + cmark_chunk email = cmark_chunk_dup( + &text->as.literal, + offset + max_rewind - rewind, + (bufsize_t)(link_end + rewind)); + cmark_chunk_to_cstr(parser->mem, &email); + link_text->as.literal = email; + cmark_node_append_child(link_node, link_text); + + cmark_node_insert_after(text, link_node); + + cmark_node *post = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem); + post->as.literal = cmark_chunk_dup(&text->as.literal, + (bufsize_t)(offset + max_rewind + link_end), + (bufsize_t)(size - link_end)); + cmark_chunk_to_cstr(parser->mem, &post->as.literal); + + cmark_node_insert_after(link_node, post); + + text->as.literal.len = offset + max_rewind - rewind; + text->as.literal.data[text->as.literal.len] = 0; + + postprocess_text(parser, post, 0); +} + +static cmark_node *postprocess(cmark_syntax_extension *ext, cmark_parser *parser, cmark_node *root) { + cmark_iter *iter; + cmark_event_type ev; + cmark_node *node; + bool in_link = false; + + cmark_consolidate_text_nodes(root); + iter = cmark_iter_new(root); + + while ((ev = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { + node = cmark_iter_get_node(iter); + if (in_link) { + if (ev == CMARK_EVENT_EXIT && node->type == CMARK_NODE_LINK) { + in_link = false; + } + continue; + } + + if (ev == CMARK_EVENT_ENTER && node->type == CMARK_NODE_LINK) { + in_link = true; + continue; + } + + if (ev == CMARK_EVENT_ENTER && node->type == CMARK_NODE_TEXT) { + postprocess_text(parser, node, 0); + } + } + + cmark_iter_free(iter); + + return root; +} + +cmark_syntax_extension *create_autolink_extension(void) { + cmark_syntax_extension *ext = cmark_syntax_extension_new("autolink"); + cmark_llist *special_chars = NULL; + + cmark_syntax_extension_set_match_inline_func(ext, match); + cmark_syntax_extension_set_postprocess_func(ext, postprocess); + + cmark_mem *mem = cmark_get_default_mem_allocator(); + special_chars = cmark_llist_append(mem, special_chars, (void *)':'); + special_chars = cmark_llist_append(mem, special_chars, (void *)'w'); + cmark_syntax_extension_set_special_inline_chars(ext, special_chars); + + return ext; +} diff --git a/cbits/autolink.h b/cbits/autolink.h new file mode 100644 index 0000000..ee2ea2f --- /dev/null +++ b/cbits/autolink.h @@ -0,0 +1,8 @@ +#ifndef AUTOLINK_H +#define AUTOLINK_H + +#include "core-extensions.h" + +cmark_syntax_extension *create_autolink_extension(void); + +#endif diff --git a/cbits/blocks.c b/cbits/blocks.c index 5a293b2..0c22220 100644 --- a/cbits/blocks.c +++ b/cbits/blocks.c @@ -10,6 +10,7 @@ #include #include "cmark_ctype.h" +#include "syntax_extension.h" #include "config.h" #include "parser.h" #include "cmark.h" @@ -80,43 +81,72 @@ static cmark_node *make_document(cmark_mem *mem) { return e; } -cmark_parser *cmark_parser_new_with_mem(int options, cmark_mem *mem) { - cmark_parser *parser = (cmark_parser *)mem->calloc(1, sizeof(cmark_parser)); - parser->mem = mem; +int cmark_parser_attach_syntax_extension(cmark_parser *parser, + cmark_syntax_extension *extension) { + parser->syntax_extensions = cmark_llist_append(parser->mem, parser->syntax_extensions, extension); + if (extension->match_inline || extension->insert_inline_from_delim) { + parser->inline_syntax_extensions = cmark_llist_append( + parser->mem, parser->inline_syntax_extensions, extension); + } + + return 1; +} + +static void cmark_parser_dispose(cmark_parser *parser) { + if (parser->root) + cmark_node_free(parser->root); - cmark_node *document = make_document(mem); + if (parser->refmap) + cmark_reference_map_free(parser->refmap); +} + +static void cmark_parser_reset(cmark_parser *parser) { + cmark_llist *saved_exts = parser->syntax_extensions; + cmark_llist *saved_inline_exts = parser->inline_syntax_extensions; + int saved_options = parser->options; + cmark_mem *saved_mem = parser->mem; + + cmark_parser_dispose(parser); + + memset(parser, 0, sizeof(cmark_parser)); + parser->mem = saved_mem; - cmark_strbuf_init(mem, &parser->curline, 256); - cmark_strbuf_init(mem, &parser->linebuf, 0); + cmark_strbuf_init(parser->mem, &parser->curline, 256); + cmark_strbuf_init(parser->mem, &parser->linebuf, 0); - parser->refmap = cmark_reference_map_new(mem); + cmark_node *document = make_document(parser->mem); + + parser->refmap = cmark_reference_map_new(parser->mem); parser->root = document; parser->current = document; - parser->line_number = 0; - parser->offset = 0; - parser->column = 0; - parser->first_nonspace = 0; - parser->first_nonspace_column = 0; - parser->indent = 0; - parser->blank = false; - parser->partially_consumed_tab = false; - parser->last_line_length = 0; - parser->options = options; + parser->last_buffer_ended_with_cr = false; + parser->syntax_extensions = saved_exts; + parser->inline_syntax_extensions = saved_inline_exts; + parser->options = saved_options; +} + +cmark_parser *cmark_parser_new_with_mem(int options, cmark_mem *mem) { + cmark_parser *parser = (cmark_parser *)mem->calloc(1, sizeof(cmark_parser)); + parser->mem = mem; + parser->options = options; + cmark_parser_reset(parser); return parser; } cmark_parser *cmark_parser_new(int options) { - extern cmark_mem DEFAULT_MEM_ALLOCATOR; - return cmark_parser_new_with_mem(options, &DEFAULT_MEM_ALLOCATOR); + extern cmark_mem CMARK_DEFAULT_MEM_ALLOCATOR; + return cmark_parser_new_with_mem(options, &CMARK_DEFAULT_MEM_ALLOCATOR); } void cmark_parser_free(cmark_parser *parser) { cmark_mem *mem = parser->mem; + cmark_parser_dispose(parser); cmark_strbuf_free(&parser->curline); cmark_strbuf_free(&parser->linebuf); - cmark_reference_map_free(parser->refmap); + cmark_llist_free(parser->mem, parser->syntax_extensions); + cmark_llist_free(parser->mem, parser->inline_syntax_extensions); mem->free(parser); } @@ -143,23 +173,19 @@ static bool is_blank(cmark_strbuf *s, bufsize_t offset) { return true; } -static CMARK_INLINE bool can_contain(cmark_node_type parent_type, - cmark_node_type child_type) { - return (parent_type == CMARK_NODE_DOCUMENT || - parent_type == CMARK_NODE_BLOCK_QUOTE || - parent_type == CMARK_NODE_ITEM || - (parent_type == CMARK_NODE_LIST && child_type == CMARK_NODE_ITEM)); -} - static CMARK_INLINE bool accepts_lines(cmark_node_type block_type) { return (block_type == CMARK_NODE_PARAGRAPH || block_type == CMARK_NODE_HEADING || block_type == CMARK_NODE_CODE_BLOCK); } -static CMARK_INLINE bool contains_inlines(cmark_node_type block_type) { - return (block_type == CMARK_NODE_PARAGRAPH || - block_type == CMARK_NODE_HEADING); +static CMARK_INLINE bool contains_inlines(cmark_node *node) { + if (node->extension && node->extension->contains_inlines_func) { + return node->extension->contains_inlines_func(node->extension, node) != 0; + } + + return (node->type == CMARK_NODE_PARAGRAPH || + node->type == CMARK_NODE_HEADING); } static void add_line(cmark_node *node, cmark_chunk *ch, cmark_parser *parser) { @@ -340,7 +366,7 @@ static cmark_node *add_child(cmark_parser *parser, cmark_node *parent, // if 'parent' isn't the kind of node that can accept this child, // then back up til we hit a node that can. - while (!can_contain(S_type(parent), block_type)) { + while (!cmark_node_can_contain_type(parent, block_type)) { parent = finalize(parser, parent); } @@ -359,23 +385,43 @@ static cmark_node *add_child(cmark_parser *parser, cmark_node *parent, return child; } +void cmark_manage_extensions_special_characters(cmark_parser *parser, bool add) { + cmark_llist *tmp_ext; + + for (tmp_ext = parser->inline_syntax_extensions; tmp_ext; tmp_ext=tmp_ext->next) { + cmark_syntax_extension *ext = (cmark_syntax_extension *) tmp_ext->data; + cmark_llist *tmp_char; + for (tmp_char = ext->special_inline_chars; tmp_char; tmp_char=tmp_char->next) { + unsigned char c = (unsigned char)(size_t)tmp_char->data; + if (add) + cmark_inlines_add_special_character(c); + else + cmark_inlines_remove_special_character(c); + } + } +} + // Walk through node and all children, recursively, parsing // string content into inline content where appropriate. -static void process_inlines(cmark_mem *mem, cmark_node *root, +static void process_inlines(cmark_parser *parser, cmark_reference_map *refmap, int options) { - cmark_iter *iter = cmark_iter_new(root); + cmark_iter *iter = cmark_iter_new(parser->root); cmark_node *cur; cmark_event_type ev_type; + cmark_manage_extensions_special_characters(parser, true); + while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { cur = cmark_iter_get_node(iter); if (ev_type == CMARK_EVENT_ENTER) { - if (contains_inlines(S_type(cur))) { - cmark_parse_inlines(mem, cur, refmap, options); + if (contains_inlines(cur)) { + cmark_parse_inlines(parser, cur, refmap, options); } } } + cmark_manage_extensions_special_characters(parser, false); + cmark_iter_free(iter); } @@ -482,7 +528,7 @@ static cmark_node *finalize_document(cmark_parser *parser) { } finalize(parser, parser->root); - process_inlines(parser->mem, parser->root, parser->refmap, parser->options); + process_inlines(parser, parser->refmap, parser->options); return parser->root; } @@ -521,6 +567,19 @@ void cmark_parser_feed(cmark_parser *parser, const char *buffer, size_t len) { S_parser_feed(parser, (const unsigned char *)buffer, len, false); } +void cmark_parser_feed_reentrant(cmark_parser *parser, const char *buffer, size_t len) { + cmark_strbuf saved_linebuf; + + cmark_strbuf_init(parser->mem, &saved_linebuf, 0); + cmark_strbuf_puts(&saved_linebuf, cmark_strbuf_cstr(&parser->linebuf)); + cmark_strbuf_clear(&parser->linebuf); + + S_parser_feed(parser, (const unsigned char *)buffer, len, true); + + cmark_strbuf_sets(&parser->linebuf, cmark_strbuf_cstr(&saved_linebuf)); + cmark_strbuf_free(&saved_linebuf); +} + static void S_parser_feed(cmark_parser *parser, const unsigned char *buffer, size_t len, bool eof) { const unsigned char *end = buffer + len; @@ -548,7 +607,7 @@ static void S_parser_feed(cmark_parser *parser, const unsigned char *buffer, process = true; } - chunk_len = (eol - buffer); + chunk_len = (bufsize_t)(eol - buffer); if (process) { if (parser->linebuf.size > 0) { cmark_strbuf_put(&parser->linebuf, buffer, chunk_len); @@ -784,6 +843,21 @@ static bool parse_html_block_prefix(cmark_parser *parser, return res; } +static bool parse_extension_block(cmark_parser *parser, + cmark_node *container, + cmark_chunk *input) +{ + bool res = false; + + if (container->extension->last_block_matches) { + if (container->extension->last_block_matches( + container->extension, parser, input->data, input->len, container)) + res = true; + } + + return res; +} + /** * For each containing node, try to parse the associated line start. * @@ -805,6 +879,12 @@ static cmark_node *check_open_blocks(cmark_parser *parser, cmark_chunk *input, S_find_first_nonspace(parser, input); + if (container->extension) { + if (!parse_extension_block(parser, container, input)) + goto done; + continue; + } + switch (cont_type) { case CMARK_NODE_BLOCK_QUOTE: if (!parse_block_quote_prefix(parser, input)) @@ -907,7 +987,7 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container, parser->first_nonspace + 1); (*container)->as.code.fenced = true; (*container)->as.code.fence_char = peek_at(input, parser->first_nonspace); - (*container)->as.code.fence_length = (matched > 255) ? 255 : matched; + (*container)->as.code.fence_length = (matched > 255) ? 255 : (uint8_t)matched; (*container)->as.code.fence_offset = (int8_t)(parser->first_nonspace - parser->offset); (*container)->as.code.info = cmark_chunk_literal(""); @@ -1005,9 +1085,27 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container, (*container)->as.code.fence_length = 0; (*container)->as.code.fence_offset = 0; (*container)->as.code.info = cmark_chunk_literal(""); - } else { - break; + cmark_llist *tmp; + cmark_node *new_container = NULL; + + for (tmp = parser->syntax_extensions; tmp; tmp=tmp->next) { + cmark_syntax_extension *ext = (cmark_syntax_extension *) tmp->data; + + if (ext->try_opening_block) { + new_container = ext->try_opening_block( + ext, indented, parser, *container, input->data, input->len); + + if (new_container) { + *container = new_container; + break; + } + } + } + + if (!new_container) { + break; + } } if (accepts_lines(S_type(*container))) { @@ -1140,6 +1238,9 @@ static void S_process_line(cmark_parser *parser, const unsigned char *buffer, bool all_matched = true; cmark_node *container; cmark_chunk input; + cmark_node *current; + + cmark_strbuf_clear(&parser->curline); if (parser->options & CMARK_OPT_VALIDATE_UTF8) cmark_utf8proc_check(&parser->curline, buffer, bytes); @@ -1161,6 +1262,12 @@ static void S_process_line(cmark_parser *parser, const unsigned char *buffer, input.len = parser->curline.size; input.alloc = 0; + // Skip UTF-8 BOM. + if (parser->line_number == 0 && + input.len >= 3 && + memcmp(input.data, "\xef\xbb\xbf", 3) == 0) + parser->offset += 3; + parser->line_number++; last_matched_container = check_open_blocks(parser, &input, &all_matched); @@ -1170,8 +1277,12 @@ static void S_process_line(cmark_parser *parser, const unsigned char *buffer, container = last_matched_container; + current = parser->current; + open_new_blocks(parser, &container, &input, all_matched); + /* parser->current might have changed if feed_reentrant was called */ + if (current == parser->current) add_text_to_container(parser, container, last_matched_container, &input); finished: @@ -1187,6 +1298,13 @@ static void S_process_line(cmark_parser *parser, const unsigned char *buffer, } cmark_node *cmark_parser_finish(cmark_parser *parser) { + cmark_node *res; + cmark_llist *extensions; + + /* Parser was already finished once */ + if (parser->root == NULL) + return NULL; + if (parser->linebuf.size) { S_process_line(parser, parser->linebuf.ptr, parser->linebuf.size); cmark_strbuf_clear(&parser->linebuf); @@ -1197,11 +1315,88 @@ cmark_node *cmark_parser_finish(cmark_parser *parser) { cmark_consolidate_text_nodes(parser->root); cmark_strbuf_free(&parser->curline); + cmark_strbuf_free(&parser->linebuf); #if CMARK_DEBUG_NODES if (cmark_node_check(parser->root, stderr)) { abort(); } #endif - return parser->root; + + for (extensions = parser->syntax_extensions; extensions; extensions = extensions->next) { + cmark_syntax_extension *ext = (cmark_syntax_extension *) extensions->data; + if (ext->postprocess_func) { + cmark_node *processed = ext->postprocess_func(ext, parser, parser->root); + if (processed) + parser->root = processed; + } + } + + res = parser->root; + parser->root = NULL; + + cmark_parser_reset(parser); + + return res; +} + +int cmark_parser_get_line_number(cmark_parser *parser) { + return parser->line_number; +} + +bufsize_t cmark_parser_get_offset(cmark_parser *parser) { + return parser->offset; +} + +bufsize_t cmark_parser_get_column(cmark_parser *parser) { + return parser->column; +} + +int cmark_parser_get_first_nonspace(cmark_parser *parser) { + return parser->first_nonspace; +} + +int cmark_parser_get_first_nonspace_column(cmark_parser *parser) { + return parser->first_nonspace_column; +} + +int cmark_parser_get_indent(cmark_parser *parser) { + return parser->indent; +} + +int cmark_parser_is_blank(cmark_parser *parser) { + return parser->blank; +} + +int cmark_parser_has_partially_consumed_tab(cmark_parser *parser) { + return parser->partially_consumed_tab; +} + +int cmark_parser_get_last_line_length(cmark_parser *parser) { + return parser->last_line_length; +} + +cmark_node *cmark_parser_add_child(cmark_parser *parser, + cmark_node *parent, + cmark_node_type block_type, + int start_column) { + return add_child(parser, parent, block_type, start_column); +} + +void cmark_parser_advance_offset(cmark_parser *parser, + const char *input, + int count, + int columns) { + cmark_chunk input_chunk = cmark_chunk_literal(input); + + S_advance_offset(parser, &input_chunk, count, columns != 0); +} + +void cmark_parser_set_backslash_ispunct_func(cmark_parser *parser, + cmark_ispunct_func func) { + parser->backslash_ispunct = func; +} + +cmark_llist *cmark_parser_get_syntax_extensions(cmark_parser *parser) { + return parser->syntax_extensions; } diff --git a/cbits/buffer.c b/cbits/buffer.c index a237b11..75733d4 100644 --- a/cbits/buffer.c +++ b/cbits/buffer.c @@ -96,7 +96,7 @@ void cmark_strbuf_set(cmark_strbuf *buf, const unsigned char *data, void cmark_strbuf_sets(cmark_strbuf *buf, const char *string) { cmark_strbuf_set(buf, (const unsigned char *)string, - string ? strlen(string) : 0); + string ? (bufsize_t)strlen(string) : 0); } void cmark_strbuf_putc(cmark_strbuf *buf, int c) { @@ -117,7 +117,7 @@ void cmark_strbuf_put(cmark_strbuf *buf, const unsigned char *data, } void cmark_strbuf_puts(cmark_strbuf *buf, const char *string) { - cmark_strbuf_put(buf, (const unsigned char *)string, strlen(string)); + cmark_strbuf_put(buf, (const unsigned char *)string, (bufsize_t)strlen(string)); } void cmark_strbuf_copy_cstr(char *data, bufsize_t datasize, diff --git a/cbits/buffer.h b/cbits/buffer.h index e878075..90fa7df 100644 --- a/cbits/buffer.h +++ b/cbits/buffer.h @@ -32,22 +32,32 @@ extern unsigned char cmark_strbuf__initbuf[]; * For the cases where CMARK_BUF_INIT cannot be used to do static * initialization. */ +CMARK_EXPORT void cmark_strbuf_init(cmark_mem *mem, cmark_strbuf *buf, bufsize_t initial_size); /** * Grow the buffer to hold at least `target_size` bytes. */ +CMARK_EXPORT void cmark_strbuf_grow(cmark_strbuf *buf, bufsize_t target_size); +CMARK_EXPORT void cmark_strbuf_free(cmark_strbuf *buf); + +CMARK_EXPORT void cmark_strbuf_swap(cmark_strbuf *buf_a, cmark_strbuf *buf_b); +CMARK_EXPORT bufsize_t cmark_strbuf_len(const cmark_strbuf *buf); +CMARK_EXPORT int cmark_strbuf_cmp(const cmark_strbuf *a, const cmark_strbuf *b); +CMARK_EXPORT unsigned char *cmark_strbuf_detach(cmark_strbuf *buf); + +CMARK_EXPORT void cmark_strbuf_copy_cstr(char *data, bufsize_t datasize, const cmark_strbuf *buf); @@ -57,22 +67,48 @@ static CMARK_INLINE const char *cmark_strbuf_cstr(const cmark_strbuf *buf) { #define cmark_strbuf_at(buf, n) ((buf)->ptr[n]) +CMARK_EXPORT void cmark_strbuf_set(cmark_strbuf *buf, const unsigned char *data, bufsize_t len); + +CMARK_EXPORT void cmark_strbuf_sets(cmark_strbuf *buf, const char *string); + +CMARK_EXPORT void cmark_strbuf_putc(cmark_strbuf *buf, int c); + +CMARK_EXPORT void cmark_strbuf_put(cmark_strbuf *buf, const unsigned char *data, bufsize_t len); + +CMARK_EXPORT void cmark_strbuf_puts(cmark_strbuf *buf, const char *string); + +CMARK_EXPORT void cmark_strbuf_clear(cmark_strbuf *buf); +CMARK_EXPORT bufsize_t cmark_strbuf_strchr(const cmark_strbuf *buf, int c, bufsize_t pos); + +CMARK_EXPORT bufsize_t cmark_strbuf_strrchr(const cmark_strbuf *buf, int c, bufsize_t pos); + +CMARK_EXPORT void cmark_strbuf_drop(cmark_strbuf *buf, bufsize_t n); + +CMARK_EXPORT void cmark_strbuf_truncate(cmark_strbuf *buf, bufsize_t len); + +CMARK_EXPORT void cmark_strbuf_rtrim(cmark_strbuf *buf); + +CMARK_EXPORT void cmark_strbuf_trim(cmark_strbuf *buf); + +CMARK_EXPORT void cmark_strbuf_normalize_whitespace(cmark_strbuf *s); + +CMARK_EXPORT void cmark_strbuf_unescape(cmark_strbuf *s); #ifdef __cplusplus diff --git a/cbits/chunk.h b/cbits/chunk.h index f198be3..f0a1c6c 100644 --- a/cbits/chunk.h +++ b/cbits/chunk.h @@ -117,4 +117,20 @@ static CMARK_INLINE cmark_chunk cmark_chunk_buf_detach(cmark_strbuf *buf) { return c; } +/* trim_new variants are to be used when the source chunk may or may not be + * allocated; forces a newly allocated chunk. */ +static CMARK_INLINE cmark_chunk cmark_chunk_ltrim_new(cmark_mem *mem, cmark_chunk *c) { + cmark_chunk r = cmark_chunk_dup(c, 0, c->len); + cmark_chunk_ltrim(&r); + cmark_chunk_to_cstr(mem, &r); + return r; +} + +static CMARK_INLINE cmark_chunk cmark_chunk_rtrim_new(cmark_mem *mem, cmark_chunk *c) { + cmark_chunk r = cmark_chunk_dup(c, 0, c->len); + cmark_chunk_rtrim(&r); + cmark_chunk_to_cstr(mem, &r); + return r; +} + #endif diff --git a/cbits/cmark.c b/cbits/cmark.c index d64237f..dd013d2 100644 --- a/cbits/cmark.c +++ b/cbits/cmark.c @@ -1,11 +1,15 @@ #include #include #include +#include "registry.h" #include "node.h" #include "houdini.h" #include "cmark.h" #include "buffer.h" +cmark_node_type CMARK_NODE_LAST_BLOCK = CMARK_NODE_THEMATIC_BREAK; +cmark_node_type CMARK_NODE_LAST_INLINE = CMARK_NODE_IMAGE; + int cmark_version() { return CMARK_VERSION; } const char *cmark_version_string() { return CMARK_VERSION_STRING; } @@ -28,7 +32,15 @@ static void *xrealloc(void *ptr, size_t size) { return new_ptr; } -cmark_mem DEFAULT_MEM_ALLOCATOR = {xcalloc, xrealloc, free}; +static void xfree(void *ptr) { + free(ptr); +} + +cmark_mem CMARK_DEFAULT_MEM_ALLOCATOR = {xcalloc, xrealloc, xfree}; + +cmark_mem *cmark_get_default_mem_allocator() { + return &CMARK_DEFAULT_MEM_ALLOCATOR; +} char *cmark_markdown_to_html(const char *text, size_t len, int options) { cmark_node *doc; @@ -36,7 +48,7 @@ char *cmark_markdown_to_html(const char *text, size_t len, int options) { doc = cmark_parse_document(text, len, options); - result = cmark_render_html(doc, options); + result = cmark_render_html(doc, options, NULL); cmark_node_free(doc); return result; diff --git a/cbits/cmark.h b/cbits/cmark.h index d1a65aa..d98e00e 100644 --- a/cbits/cmark.h +++ b/cbits/cmark.h @@ -1,9 +1,9 @@ -#ifndef CMARK_H -#define CMARK_H +#ifndef CMARK_CMARK_H +#define CMARK_CMARK_H #include -#include -#include +#include "cmark_export.h" +#include "cmark_version.h" #ifdef __cplusplus extern "C" { @@ -11,7 +11,7 @@ extern "C" { /** # NAME * - * **cmark** - CommonMark parsing, manipulating, and rendering + * **cmark-gfm** - CommonMark parsing, manipulating, and rendering */ /** # DESCRIPTION @@ -30,41 +30,44 @@ char *cmark_markdown_to_html(const char *text, size_t len, int options); /** ## Node Structure */ +#define CMARK_NODE_TYPE_PRESENT (0x8000) +#define CMARK_NODE_TYPE_BLOCK (CMARK_NODE_TYPE_PRESENT | 0x0000) +#define CMARK_NODE_TYPE_INLINE (CMARK_NODE_TYPE_PRESENT | 0x4000) +#define CMARK_NODE_TYPE_MASK (0xc000) +#define CMARK_NODE_VALUE_MASK (0x3fff) + typedef enum { /* Error status */ - CMARK_NODE_NONE, + CMARK_NODE_NONE = 0x0000, /* Block */ - CMARK_NODE_DOCUMENT, - CMARK_NODE_BLOCK_QUOTE, - CMARK_NODE_LIST, - CMARK_NODE_ITEM, - CMARK_NODE_CODE_BLOCK, - CMARK_NODE_HTML_BLOCK, - CMARK_NODE_CUSTOM_BLOCK, - CMARK_NODE_PARAGRAPH, - CMARK_NODE_HEADING, - CMARK_NODE_THEMATIC_BREAK, - - CMARK_NODE_FIRST_BLOCK = CMARK_NODE_DOCUMENT, - CMARK_NODE_LAST_BLOCK = CMARK_NODE_THEMATIC_BREAK, + CMARK_NODE_DOCUMENT = CMARK_NODE_TYPE_BLOCK | 0x0001, + CMARK_NODE_BLOCK_QUOTE = CMARK_NODE_TYPE_BLOCK | 0x0002, + CMARK_NODE_LIST = CMARK_NODE_TYPE_BLOCK | 0x0003, + CMARK_NODE_ITEM = CMARK_NODE_TYPE_BLOCK | 0x0004, + CMARK_NODE_CODE_BLOCK = CMARK_NODE_TYPE_BLOCK | 0x0005, + CMARK_NODE_HTML_BLOCK = CMARK_NODE_TYPE_BLOCK | 0x0006, + CMARK_NODE_CUSTOM_BLOCK = CMARK_NODE_TYPE_BLOCK | 0x0007, + CMARK_NODE_PARAGRAPH = CMARK_NODE_TYPE_BLOCK | 0x0008, + CMARK_NODE_HEADING = CMARK_NODE_TYPE_BLOCK | 0x0009, + CMARK_NODE_THEMATIC_BREAK = CMARK_NODE_TYPE_BLOCK | 0x000a, /* Inline */ - CMARK_NODE_TEXT, - CMARK_NODE_SOFTBREAK, - CMARK_NODE_LINEBREAK, - CMARK_NODE_CODE, - CMARK_NODE_HTML_INLINE, - CMARK_NODE_CUSTOM_INLINE, - CMARK_NODE_EMPH, - CMARK_NODE_STRONG, - CMARK_NODE_LINK, - CMARK_NODE_IMAGE, - - CMARK_NODE_FIRST_INLINE = CMARK_NODE_TEXT, - CMARK_NODE_LAST_INLINE = CMARK_NODE_IMAGE, + CMARK_NODE_TEXT = CMARK_NODE_TYPE_INLINE | 0x0001, + CMARK_NODE_SOFTBREAK = CMARK_NODE_TYPE_INLINE | 0x0002, + CMARK_NODE_LINEBREAK = CMARK_NODE_TYPE_INLINE | 0x0003, + CMARK_NODE_CODE = CMARK_NODE_TYPE_INLINE | 0x0004, + CMARK_NODE_HTML_INLINE = CMARK_NODE_TYPE_INLINE | 0x0005, + CMARK_NODE_CUSTOM_INLINE = CMARK_NODE_TYPE_INLINE | 0x0006, + CMARK_NODE_EMPH = CMARK_NODE_TYPE_INLINE | 0x0007, + CMARK_NODE_STRONG = CMARK_NODE_TYPE_INLINE | 0x0008, + CMARK_NODE_LINK = CMARK_NODE_TYPE_INLINE | 0x0009, + CMARK_NODE_IMAGE = CMARK_NODE_TYPE_INLINE | 0x000a, } cmark_node_type; +extern cmark_node_type CMARK_NODE_LAST_BLOCK; +extern cmark_node_type CMARK_NODE_LAST_INLINE; + /* For backwards compatibility: */ #define CMARK_NODE_HEADER CMARK_NODE_HEADING #define CMARK_NODE_HRULE CMARK_NODE_THEMATIC_BREAK @@ -100,6 +103,70 @@ typedef struct cmark_mem { void (*free)(void *); } cmark_mem; +/** The default memory allocator; uses the system's calloc, + * realloc and free. + */ +CMARK_EXPORT +cmark_mem *cmark_get_default_mem_allocator(); + +/** An arena allocator; uses system calloc to allocate large + * slabs of memory. Memory in these slabs is not reused at all. + */ +CMARK_EXPORT +cmark_mem *cmark_get_arena_mem_allocator(); + +/** Resets the arena allocator, quickly returning all used memory + * to the operating system. + */ +CMARK_EXPORT +void cmark_arena_reset(void); + +/** Callback for freeing user data with a 'cmark_mem' context. + */ +typedef void (*cmark_free_func) (cmark_mem *mem, void *user_data); + + +/* + * ## Basic data structures + * + * To keep dependencies to the strict minimum, libcmark implements + * its own versions of "classic" data structures. + */ + +/** + * ### Linked list + */ + +/** A generic singly linked list. + */ +typedef struct _cmark_llist +{ + struct _cmark_llist *next; + void *data; +} cmark_llist; + +/** Append an element to the linked list, return the possibly modified + * head of the list. + */ +CMARK_EXPORT +cmark_llist * cmark_llist_append (cmark_mem * mem, + cmark_llist * head, + void * data); + +/** Free the list starting with 'head', calling 'free_func' with the + * data pointer of each of its elements + */ +CMARK_EXPORT +void cmark_llist_free_full (cmark_mem * mem, + cmark_llist * head, + cmark_free_func free_func); + +/** Free the list starting with 'head' + */ +CMARK_EXPORT +void cmark_llist_free (cmark_mem * mem, + cmark_llist * head); + /** * ## Creating and Destroying Nodes */ @@ -254,6 +321,11 @@ CMARK_EXPORT void *cmark_node_get_user_data(cmark_node *node); */ CMARK_EXPORT int cmark_node_set_user_data(cmark_node *node, void *user_data); +/** Set free function for user data */ +CMARK_EXPORT +int cmark_node_set_user_data_free_func(cmark_node *node, + cmark_free_func free_func); + /** Returns the type of 'node', or `CMARK_NODE_NONE` on error. */ CMARK_EXPORT cmark_node_type cmark_node_get_type(cmark_node *node); @@ -334,6 +406,15 @@ CMARK_EXPORT const char *cmark_node_get_fence_info(cmark_node *node); */ CMARK_EXPORT int cmark_node_set_fence_info(cmark_node *node, const char *info); +/** Sets code blocks fencing details + */ +CMARK_EXPORT int cmark_node_set_fenced(cmark_node * node, int fenced, + int length, int offset, char character); + +/** Returns code blocks fencing details + */ +CMARK_EXPORT int cmark_node_get_fenced(cmark_node *node, int *length, int *offset, char *character); + /** Returns the URL of a link or image 'node', or an empty string if no URL is set. Returns NULL if called on a node that is not a link or image. @@ -435,6 +516,10 @@ CMARK_EXPORT int cmark_node_append_child(cmark_node *node, cmark_node *child); */ CMARK_EXPORT void cmark_consolidate_text_nodes(cmark_node *root); +/** Ensures a node and all its children own their own chunk memory. + */ +CMARK_EXPORT void cmark_node_own(cmark_node *root); + /** * ## Parsing * @@ -507,12 +592,24 @@ cmark_node *cmark_parse_file(FILE *f, int options); CMARK_EXPORT char *cmark_render_xml(cmark_node *root, int options); +/** As for 'cmark_render_xml', but specifying the allocator to use for + * the resulting string. + */ +CMARK_EXPORT +char *cmark_render_xml_with_mem(cmark_node *root, int options, cmark_mem *mem); + /** Render a 'node' tree as an HTML fragment. It is up to the user * to add an appropriate header and footer. It is the caller's * responsibility to free the returned buffer. */ CMARK_EXPORT -char *cmark_render_html(cmark_node *root, int options); +char *cmark_render_html(cmark_node *root, int options, cmark_llist *extensions); + +/** As for 'cmark_render_html', but specifying the allocator to use for + * the resulting string. + */ +CMARK_EXPORT +char *cmark_render_html_with_mem(cmark_node *root, int options, cmark_llist *extensions, cmark_mem *mem); /** Render a 'node' tree as a groff man page, without the header. * It is the caller's responsibility to free the returned buffer. @@ -520,18 +617,48 @@ char *cmark_render_html(cmark_node *root, int options); CMARK_EXPORT char *cmark_render_man(cmark_node *root, int options, int width); +/** As for 'cmark_render_man', but specifying the allocator to use for + * the resulting string. + */ +CMARK_EXPORT +char *cmark_render_man_with_mem(cmark_node *root, int options, int width, cmark_mem *mem); + /** Render a 'node' tree as a commonmark document. * It is the caller's responsibility to free the returned buffer. */ CMARK_EXPORT char *cmark_render_commonmark(cmark_node *root, int options, int width); +/** As for 'cmark_render_commonmark', but specifying the allocator to use for + * the resulting string. + */ +CMARK_EXPORT +char *cmark_render_commonmark_with_mem(cmark_node *root, int options, int width, cmark_mem *mem); + +/** Render a 'node' tree as a plain text document. + * It is the caller's responsibility to free the returned buffer. + */ +CMARK_EXPORT +char *cmark_render_plaintext(cmark_node *root, int options, int width); + +/** As for 'cmark_render_plaintext', but specifying the allocator to use for + * the resulting string. + */ +CMARK_EXPORT +char *cmark_render_plaintext_with_mem(cmark_node *root, int options, int width, cmark_mem *mem); + /** Render a 'node' tree as a LaTeX document. * It is the caller's responsibility to free the returned buffer. */ CMARK_EXPORT char *cmark_render_latex(cmark_node *root, int options, int width); +/** As for 'cmark_render_latex', but specifying the allocator to use for + * the resulting string. + */ +CMARK_EXPORT +char *cmark_render_latex_with_mem(cmark_node *root, int options, int width, cmark_mem *mem); + /** * ## Options */ @@ -581,6 +708,15 @@ char *cmark_render_latex(cmark_node *root, int options, int width); */ #define CMARK_OPT_SMART (1 << 10) +/** Use GitHub-style
 tags for code blocks instead of 
.
+ */
+#define CMARK_OPT_GITHUB_PRE_LANG (1 << 11)
+
+/** Be liberal in interpreting inline HTML tags.
+ */
+#define CMARK_OPT_LIBERAL_HTML_TAG (1 << 12)
+
 /**
  * ## Version information
  */
diff --git a/cbits/cmark_ctype.h b/cbits/cmark_ctype.h
index 9a07618..4b90940 100644
--- a/cbits/cmark_ctype.h
+++ b/cbits/cmark_ctype.h
@@ -5,18 +5,25 @@
 extern "C" {
 #endif
 
+#include "cmark_export.h"
+
 /** Locale-independent versions of functions from ctype.h.
  * We want cmark to behave the same no matter what the system locale.
  */
 
+CMARK_EXPORT
 int cmark_isspace(char c);
 
+CMARK_EXPORT
 int cmark_ispunct(char c);
 
+CMARK_EXPORT
 int cmark_isalnum(char c);
 
+CMARK_EXPORT
 int cmark_isdigit(char c);
 
+CMARK_EXPORT
 int cmark_isalpha(char c);
 
 #ifdef __cplusplus
diff --git a/cbits/cmark_extension_api.h b/cbits/cmark_extension_api.h
new file mode 100644
index 0000000..8a273b5
--- /dev/null
+++ b/cbits/cmark_extension_api.h
@@ -0,0 +1,708 @@
+#ifndef CMARK_CMARK_EXTENSION_API_H
+#define CMARK_CMARK_EXTENSION_API_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include 
+#include 
+#include 
+
+/**
+ * ## Extension Support
+ *
+ * While the "core" of libcmark is strictly compliant with the
+ * specification, an API is provided for extension writers to
+ * hook into the parsing process.
+ *
+ * It should be noted that the cmark_node API already offers
+ * room for customization, with methods offered to traverse and
+ * modify the AST, and even define custom blocks.
+ * When the desired customization is achievable in an error-proof
+ * way using that API, it should be the preferred method.
+ *
+ * The following API requires a more in-depth understanding
+ * of libcmark's parsing strategy, which is exposed
+ * [here](http://spec.commonmark.org/0.24/#appendix-a-parsing-strategy).
+ *
+ * It should be used when "a posteriori" modification of the AST
+ * proves to be too difficult / impossible to implement correctly.
+ *
+ * It can also serve as an intermediary step before extending
+ * the specification, as an extension implemented using this API
+ * will be trivially integrated in the core if it proves to be
+ * desirable.
+ */
+
+typedef struct cmark_plugin cmark_plugin;
+
+/** A syntax extension that can be attached to a cmark_parser
+ * with cmark_parser_attach_syntax_extension().
+ *
+ * Extension writers should assign functions matching
+ * the signature of the following 'virtual methods' to
+ * implement new functionality.
+ *
+ * Their calling order and expected behaviour match the procedure outlined
+ * at :
+ *
+ * During step 1, cmark will call the function provided through
+ * 'cmark_syntax_extension_set_match_block_func' when it
+ * iterates over an open block created by this extension,
+ * to determine  whether it could contain the new line.
+ * If no function was provided, cmark will close the block.
+ *
+ * During step 2, if and only if the new line doesn't match any
+ * of the standard syntax rules, cmark will call the function
+ * provided through 'cmark_syntax_extension_set_open_block_func'
+ * to let the extension determine whether that new line matches
+ * one of its syntax rules.
+ * It is the responsibility of the parser to create and add the
+ * new block with cmark_parser_make_block and cmark_parser_add_child.
+ * If no function was provided is NULL, the extension will have
+ * no effect at all on the final block structure of the AST.
+ *
+ * #### Inline parsing phase hooks
+ *
+ * For each character provided by the extension through
+ * 'cmark_syntax_extension_set_special_inline_chars',
+ * the function provided by the extension through
+ * 'cmark_syntax_extension_set_match_inline_func'
+ * will get called, it is the responsibility of the extension
+ * to scan the characters located at the current inline parsing offset
+ * with the cmark_inline_parser API.
+ *
+ * Depending on the type of the extension, it can either:
+ *
+ * * Scan forward, determine that the syntax matches and return
+ *   a newly-created inline node with the appropriate type.
+ *   This is the technique that would be used if inline code
+ *   (with backticks) was implemented as an extension.
+ * * Scan only the character(s) that its syntax rules require
+ *   for opening and closing nodes, push a delimiter on the
+ *   delimiter stack, and return a simple text node with its
+ *   contents set to the character(s) consumed.
+ *   This is the technique that would be used if emphasis
+ *   inlines were implemented as an extension.
+ *
+ * When an extension has pushed delimiters on the stack,
+ * the function provided through
+ * 'cmark_syntax_extension_set_inline_from_delim_func'
+ * will get called in a latter phase,
+ * when the inline parser has matched opener and closer delimiters
+ * created by the extension together.
+ *
+ * It is then the responsibility of the extension to modify
+ * and populate the opener inline text node, and to remove
+ * the necessary delimiters from the delimiter stack.
+ *
+ * Finally, the extension should return NULL if its scan didn't
+ * match its syntax rules.
+ *
+ * The extension can store whatever private data it might need
+ * with 'cmark_syntax_extension_set_private',
+ * and optionally define a free function for this data.
+ */
+typedef struct cmark_syntax_extension cmark_syntax_extension;
+
+typedef struct subject cmark_inline_parser;
+
+/** Exposed raw for now */
+
+typedef struct delimiter {
+  struct delimiter *previous;
+  struct delimiter *next;
+  cmark_node *inl_text;
+  bufsize_t length;
+  unsigned char delim_char;
+  int can_open;
+  int can_close;
+} delimiter;
+
+/**
+ * ### Plugin API.
+ *
+ * Extensions should be distributed as dynamic libraries,
+ * with a single exported function named after the distributed
+ * filename.
+ *
+ * When discovering extensions (see cmark_init), cmark will
+ * try to load a symbol named "init_{{filename}}" in all the
+ * dynamic libraries it encounters.
+ *
+ * For example, given a dynamic library named myextension.so
+ * (or myextension.dll), cmark will try to load the symbol
+ * named "init_myextension". This means that the filename
+ * must lend itself to forming a valid C identifier, with
+ * the notable exception of dashes, which will be translated
+ * to underscores, which means cmark will look for a function
+ * named "init_my_extension" if it encounters a dynamic library
+ * named "my-extension.so".
+ *
+ * See the 'cmark_plugin_init_func' typedef for the exact prototype
+ * this function should follow.
+ *
+ * For now the extensibility of cmark is not complete, as
+ * it only offers API to hook into the block parsing phase
+ * ().
+ *
+ * See 'cmark_plugin_register_syntax_extension' for more information.
+ */
+
+/** The prototype plugins' init function should follow.
+ */
+typedef int (*cmark_plugin_init_func)(cmark_plugin *plugin);
+
+/** Register a syntax 'extension' with the 'plugin', it will be made
+ * available as an extension and, if attached to a cmark_parser
+ * with 'cmark_parser_attach_syntax_extension', it will contribute
+ * to the block parsing process.
+ *
+ * See the documentation for 'cmark_syntax_extension' for information
+ * on how to implement one.
+ *
+ * This function will typically be called from the init function
+ * of external modules.
+ *
+ * This takes ownership of 'extension', one should not call
+ * 'cmark_syntax_extension_free' on a registered extension.
+ */
+CMARK_EXPORT
+int cmark_plugin_register_syntax_extension(cmark_plugin *plugin,
+                                            cmark_syntax_extension *extension);
+
+/** This will search for the syntax extension named 'name' among the
+ *  registered syntax extensions.
+ *
+ *  It can then be attached to a cmark_parser
+ *  with the cmark_parser_attach_syntax_extension method.
+ */
+CMARK_EXPORT
+cmark_syntax_extension *cmark_find_syntax_extension(const char *name);
+
+/** Should create and add a new open block to 'parent_container' if
+ * 'input' matches a syntax rule for that block type. It is allowed
+ * to modify the type of 'parent_container'.
+ *
+ * Should return the newly created block if there is one, or
+ * 'parent_container' if its type was modified, or NULL.
+ */
+typedef cmark_node * (*cmark_open_block_func) (cmark_syntax_extension *extension,
+                                       int indented,
+                                       cmark_parser *parser,
+                                       cmark_node *parent_container,
+                                       unsigned char *input,
+                                       int len);
+
+typedef cmark_node *(*cmark_match_inline_func)(cmark_syntax_extension *extension,
+                                       cmark_parser *parser,
+                                       cmark_node *parent,
+                                       unsigned char character,
+                                       cmark_inline_parser *inline_parser);
+
+typedef delimiter *(*cmark_inline_from_delim_func)(cmark_syntax_extension *extension,
+                                           cmark_parser *parser,
+                                           cmark_inline_parser *inline_parser,
+                                           delimiter *opener,
+                                           delimiter *closer);
+
+/** Should return 'true' if 'input' can be contained in 'container',
+ *  'false' otherwise.
+ */
+typedef int (*cmark_match_block_func)        (cmark_syntax_extension *extension,
+                                       cmark_parser *parser,
+                                       unsigned char *input,
+                                       int len,
+                                       cmark_node *container);
+
+typedef const char *(*cmark_get_type_string_func) (cmark_syntax_extension *extension,
+                                                   cmark_node *node);
+
+typedef int (*cmark_can_contain_func) (cmark_syntax_extension *extension,
+                                       cmark_node *node,
+                                       cmark_node_type child);
+
+typedef int (*cmark_contains_inlines_func) (cmark_syntax_extension *extension,
+                                            cmark_node *node);
+
+typedef void (*cmark_common_render_func) (cmark_syntax_extension *extension,
+                                          cmark_renderer *renderer,
+                                          cmark_node *node,
+                                          cmark_event_type ev_type,
+                                          int options);
+
+typedef int (*cmark_commonmark_escape_func) (cmark_syntax_extension *extension,
+                                              cmark_node *node,
+                                              int c);
+
+typedef void (*cmark_html_render_func) (cmark_syntax_extension *extension,
+                                        cmark_html_renderer *renderer,
+                                        cmark_node *node,
+                                        cmark_event_type ev_type,
+                                        int options);
+
+typedef int (*cmark_html_filter_func) (cmark_syntax_extension *extension,
+                                       const unsigned char *tag,
+                                       size_t tag_len);
+
+typedef cmark_node *(*cmark_postprocess_func) (cmark_syntax_extension *extension,
+                                               cmark_parser *parser,
+                                               cmark_node *root);
+
+typedef int (*cmark_ispunct_func) (char c);
+
+typedef void (*cmark_opaque_free_func) (cmark_syntax_extension *extension,
+                                        cmark_mem *mem,
+                                        cmark_node *node);
+
+/** Free a cmark_syntax_extension.
+ */
+CMARK_EXPORT
+void cmark_syntax_extension_free               (cmark_mem *mem, cmark_syntax_extension *extension);
+
+/** Return a newly-constructed cmark_syntax_extension, named 'name'.
+ */
+CMARK_EXPORT
+cmark_syntax_extension *cmark_syntax_extension_new (const char *name);
+
+CMARK_EXPORT
+cmark_node_type cmark_syntax_extension_add_node(int is_inline);
+
+/** See the documentation for 'cmark_syntax_extension'
+ */
+CMARK_EXPORT
+void cmark_syntax_extension_set_open_block_func(cmark_syntax_extension *extension,
+                                                cmark_open_block_func func);
+
+/** See the documentation for 'cmark_syntax_extension'
+ */
+CMARK_EXPORT
+void cmark_syntax_extension_set_match_block_func(cmark_syntax_extension *extension,
+                                                 cmark_match_block_func func);
+
+/** See the documentation for 'cmark_syntax_extension'
+ */
+CMARK_EXPORT
+void cmark_syntax_extension_set_match_inline_func(cmark_syntax_extension *extension,
+                                                  cmark_match_inline_func func);
+
+/** See the documentation for 'cmark_syntax_extension'
+ */
+CMARK_EXPORT
+void cmark_syntax_extension_set_inline_from_delim_func(cmark_syntax_extension *extension,
+                                                       cmark_inline_from_delim_func func);
+
+/** See the documentation for 'cmark_syntax_extension'
+ */
+CMARK_EXPORT
+void cmark_syntax_extension_set_special_inline_chars(cmark_syntax_extension *extension,
+                                                     cmark_llist *special_chars);
+
+/** See the documentation for 'cmark_syntax_extension'
+ */
+CMARK_EXPORT
+void cmark_syntax_extension_set_get_type_string_func(cmark_syntax_extension *extension,
+                                                     cmark_get_type_string_func func);
+
+/** See the documentation for 'cmark_syntax_extension'
+ */
+CMARK_EXPORT
+void cmark_syntax_extension_set_can_contain_func(cmark_syntax_extension *extension,
+                                                 cmark_can_contain_func func);
+
+/** See the documentation for 'cmark_syntax_extension'
+ */
+CMARK_EXPORT
+void cmark_syntax_extension_set_contains_inlines_func(cmark_syntax_extension *extension,
+                                                      cmark_contains_inlines_func func);
+
+/** See the documentation for 'cmark_syntax_extension'
+ */
+CMARK_EXPORT
+void cmark_syntax_extension_set_commonmark_render_func(cmark_syntax_extension *extension,
+                                                       cmark_common_render_func func);
+
+/** See the documentation for 'cmark_syntax_extension'
+ */
+CMARK_EXPORT
+void cmark_syntax_extension_set_plaintext_render_func(cmark_syntax_extension *extension,
+                                                      cmark_common_render_func func);
+
+/** See the documentation for 'cmark_syntax_extension'
+ */
+CMARK_EXPORT
+void cmark_syntax_extension_set_latex_render_func(cmark_syntax_extension *extension,
+                                                  cmark_common_render_func func);
+
+/** See the documentation for 'cmark_syntax_extension'
+ */
+CMARK_EXPORT
+void cmark_syntax_extension_set_man_render_func(cmark_syntax_extension *extension,
+                                                cmark_common_render_func func);
+
+/** See the documentation for 'cmark_syntax_extension'
+ */
+CMARK_EXPORT
+void cmark_syntax_extension_set_html_render_func(cmark_syntax_extension *extension,
+                                                 cmark_html_render_func func);
+
+/** See the documentation for 'cmark_syntax_extension'
+ */
+CMARK_EXPORT
+void cmark_syntax_extension_set_html_filter_func(cmark_syntax_extension *extension,
+                                                 cmark_html_filter_func func);
+
+/** See the documentation for 'cmark_syntax_extension'
+ */
+CMARK_EXPORT
+void cmark_syntax_extension_set_commonmark_escape_func(cmark_syntax_extension *extension,
+                                                       cmark_commonmark_escape_func func);
+
+/** See the documentation for 'cmark_syntax_extension'
+ */
+CMARK_EXPORT
+void cmark_syntax_extension_set_private(cmark_syntax_extension *extension,
+                                        void *priv,
+                                        cmark_free_func free_func);
+
+/** See the documentation for 'cmark_syntax_extension'
+ */
+CMARK_EXPORT
+void *cmark_syntax_extension_get_private(cmark_syntax_extension *extension);
+
+/** See the documentation for 'cmark_syntax_extension'
+ */
+CMARK_EXPORT
+void cmark_syntax_extension_set_postprocess_func(cmark_syntax_extension *extension,
+                                                 cmark_postprocess_func func);
+
+/** See the documentation for 'cmark_syntax_extension'
+ */
+CMARK_EXPORT
+void cmark_syntax_extension_set_opaque_free_func(cmark_syntax_extension *extension,
+                                                 cmark_opaque_free_func func);
+
+/** See the documentation for 'cmark_syntax_extension'
+ */
+CMARK_EXPORT
+void cmark_parser_set_backslash_ispunct_func(cmark_parser *parser,
+                                             cmark_ispunct_func func);
+
+/** Return the index of the line currently being parsed, starting with 1.
+ */
+CMARK_EXPORT
+int cmark_parser_get_line_number(cmark_parser *parser);
+
+/** Return the offset in bytes in the line being processed.
+ *
+ * Example:
+ *
+ * ### foo
+ *
+ * Here, offset will first be 0, then 5 (the index of the 'f' character).
+ */
+CMARK_EXPORT
+int cmark_parser_get_offset(cmark_parser *parser);
+
+/**
+ * Return the offset in 'columns' in the line being processed.
+ *
+ * This value may differ from the value returned by
+ * cmark_parser_get_offset() in that it accounts for tabs,
+ * and as such should not be used as an index in the current line's
+ * buffer.
+ *
+ * Example:
+ *
+ * cmark_parser_advance_offset() can be called to advance the
+ * offset by a number of columns, instead of a number of bytes.
+ *
+ * In that case, if offset falls "in the middle" of a tab
+ * character, 'column' and offset will differ.
+ *
+ * ```
+ * foo                 \t bar
+ * ^                   ^^
+ * offset (0)          20
+ * ```
+ *
+ * If cmark_parser_advance_offset is called here with 'columns'
+ * set to 'true' and 'offset' set to 22, cmark_parser_get_offset()
+ * will return 20, whereas cmark_parser_get_column() will return
+ * 22.
+ *
+ * Additionally, as tabs expand to the next multiple of 4 column,
+ * cmark_parser_has_partially_consumed_tab() will now return
+ * 'true'.
+ */
+CMARK_EXPORT
+int cmark_parser_get_column(cmark_parser *parser);
+
+/** Return the absolute index in bytes of the first nonspace
+ * character coming after the offset as returned by
+ * cmark_parser_get_offset() in the line currently being processed.
+ *
+ * Example:
+ *
+ * ```
+ *   foo        bar            baz  \n
+ * ^               ^           ^
+ * 0            offset (16) first_nonspace (28)
+ * ```
+ */
+CMARK_EXPORT
+int cmark_parser_get_first_nonspace(cmark_parser *parser);
+
+/** Return the absolute index of the first nonspace column coming after 'offset'
+ * in the line currently being processed, counting tabs as multiple
+ * columns as appropriate.
+ *
+ * See the documentation for cmark_parser_get_first_nonspace() and
+ * cmark_parser_get_column() for more information.
+ */
+CMARK_EXPORT
+int cmark_parser_get_first_nonspace_column(cmark_parser *parser);
+
+/** Return the difference between the values returned by
+ * cmark_parser_get_first_nonspace_column() and
+ * cmark_parser_get_column().
+ *
+ * This is not a byte offset, as it can count one tab as multiple
+ * characters.
+ */
+CMARK_EXPORT
+int cmark_parser_get_indent(cmark_parser *parser);
+
+/** Return 'true' if the line currently being processed has been entirely
+ * consumed, 'false' otherwise.
+ *
+ * Example:
+ *
+ * ```
+ *   foo        bar            baz  \n
+ * ^
+ * offset
+ * ```
+ *
+ * This function will return 'false' here.
+ *
+ * ```
+ *   foo        bar            baz  \n
+ *                 ^
+ *              offset
+ * ```
+ * This function will still return 'false'.
+ *
+ * ```
+ *   foo        bar            baz  \n
+ *                                ^
+ *                             offset
+ * ```
+ *
+ * At this point, this function will now return 'true'.
+ */
+CMARK_EXPORT
+int cmark_parser_is_blank(cmark_parser *parser);
+
+/** Return 'true' if the value returned by cmark_parser_get_offset()
+ * is 'inside' an expanded tab.
+ *
+ * See the documentation for cmark_parser_get_column() for more
+ * information.
+ */
+CMARK_EXPORT
+int cmark_parser_has_partially_consumed_tab(cmark_parser *parser);
+
+/** Return the length in bytes of the previously processed line, excluding potential
+ * newline (\n) and carriage return (\r) trailing characters.
+ */
+CMARK_EXPORT
+int cmark_parser_get_last_line_length(cmark_parser *parser);
+
+/** Add a child to 'parent' during the parsing process.
+ *
+ * If 'parent' isn't the kind of node that can accept this child,
+ * this function will back up till it hits a node that can, closing
+ * blocks as appropriate.
+ */
+CMARK_EXPORT
+cmark_node*cmark_parser_add_child(cmark_parser *parser,
+                                  cmark_node *parent,
+                                  cmark_node_type block_type,
+                                  int start_column);
+
+/** Advance the 'offset' of the parser in the current line.
+ *
+ * See the documentation of cmark_parser_get_offset() and
+ * cmark_parser_get_column() for more information.
+ */
+CMARK_EXPORT
+void cmark_parser_advance_offset(cmark_parser *parser,
+                                 const char *input,
+                                 int count,
+                                 int columns);
+
+
+CMARK_EXPORT
+void cmark_parser_feed_reentrant(cmark_parser *parser, const char *buffer, size_t len);
+
+/** Attach the syntax 'extension' to the 'parser', to provide extra syntax
+ *  rules.
+ *  See the documentation for cmark_syntax_extension for more information.
+ *
+ *  Returns 'true' if the 'extension' was successfully attached,
+ *  'false' otherwise.
+ */
+CMARK_EXPORT
+int cmark_parser_attach_syntax_extension(cmark_parser *parser, cmark_syntax_extension *extension);
+
+/** Change the type of 'node'.
+ *
+ * Return 0 if the type could be changed, 1 otherwise.
+ */
+CMARK_EXPORT int cmark_node_set_type(cmark_node *node, cmark_node_type type);
+
+/** Return the string content for all types of 'node'.
+ *  The pointer stays valid as long as 'node' isn't freed.
+ */
+CMARK_EXPORT const char *cmark_node_get_string_content(cmark_node *node);
+
+/** Set the string 'content' for all types of 'node'.
+ *  Copies 'content'.
+ */
+CMARK_EXPORT int cmark_node_set_string_content(cmark_node *node, const char *content);
+
+/** Get the syntax extension responsible for the creation of 'node'.
+ *  Return NULL if 'node' was created because it matched standard syntax rules.
+ */
+CMARK_EXPORT cmark_syntax_extension *cmark_node_get_syntax_extension(cmark_node *node);
+
+/** Set the syntax extension responsible for creating 'node'.
+ */
+CMARK_EXPORT int cmark_node_set_syntax_extension(cmark_node *node,
+                                                  cmark_syntax_extension *extension);
+
+/**
+ * ## Inline syntax extension helpers
+ *
+ * The inline parsing process is described in detail at
+ * 
+ */
+
+/** Should return 'true' if the predicate matches 'c', 'false' otherwise
+ */
+typedef int (*cmark_inline_predicate)(int c);
+
+/** Advance the current inline parsing offset */
+CMARK_EXPORT
+void cmark_inline_parser_advance_offset(cmark_inline_parser *parser);
+
+/** Get the current inline parsing offset */
+CMARK_EXPORT
+int cmark_inline_parser_get_offset(cmark_inline_parser *parser);
+
+/** Set the offset in bytes in the chunk being processed by the given inline parser.
+ */
+CMARK_EXPORT
+void cmark_inline_parser_set_offset(cmark_inline_parser *parser, int offset);
+
+/** Gets the cmark_chunk being operated on by the given inline parser.
+ * Use cmark_inline_parser_get_offset to get our current position in the chunk.
+ */
+CMARK_EXPORT
+cmark_chunk *cmark_inline_parser_get_chunk(cmark_inline_parser *parser);
+
+/** Returns 1 if the inline parser is currently in a bracket; pass 1 for 'image'
+ * if you want to know about an image-type bracket, 0 for link-type. */
+CMARK_EXPORT
+int cmark_inline_parser_in_bracket(cmark_inline_parser *parser, int image);
+
+/** Remove the last n characters from the last child of the given node.
+ * This only works where all n characters are in the single last child, and the last
+ * child is CMARK_NODE_TEXT.
+ */
+CMARK_EXPORT
+void cmark_node_unput(cmark_node *node, int n);
+
+
+/** Get the character located at the current inline parsing offset
+ */
+CMARK_EXPORT
+unsigned char cmark_inline_parser_peek_char(cmark_inline_parser *parser);
+
+/** Get the character located 'pos' bytes in the current line.
+ */
+CMARK_EXPORT
+unsigned char cmark_inline_parser_peek_at(cmark_inline_parser *parser, int pos);
+
+/** Whether the inline parser has reached the end of the current line
+ */
+CMARK_EXPORT
+int cmark_inline_parser_is_eof(cmark_inline_parser *parser);
+
+/** Get the characters located after the current inline parsing offset
+ * while 'pred' matches. Free after usage.
+ */
+CMARK_EXPORT
+char *cmark_inline_parser_take_while(cmark_inline_parser *parser, cmark_inline_predicate pred);
+
+/** Push a delimiter on the delimiter stack.
+ * See < for
+ * more information on the parameters
+ */
+CMARK_EXPORT
+void cmark_inline_parser_push_delimiter(cmark_inline_parser *parser,
+                                  unsigned char c,
+                                  int can_open,
+                                  int can_close,
+                                  cmark_node *inl_text);
+
+/** Remove 'delim' from the delimiter stack
+ */
+CMARK_EXPORT
+void cmark_inline_parser_remove_delimiter(cmark_inline_parser *parser, delimiter *delim);
+
+CMARK_EXPORT
+delimiter *cmark_inline_parser_get_last_delimiter(cmark_inline_parser *parser);
+
+/** Convenience function to scan a given delimiter.
+ *
+ * 'left_flanking' and 'right_flanking' will be set to true if they
+ * respectively precede and follow a non-space, non-punctuation
+ * character.
+ *
+ * Additionally, 'punct_before' and 'punct_after' will respectively be set
+ * if the preceding or following character is a punctuation character.
+ *
+ * Note that 'left_flanking' and 'right_flanking' can both be 'true'.
+ *
+ * Returns the number of delimiters encountered, in the limit
+ * of 'max_delims', and advances the inline parsing offset.
+ */
+CMARK_EXPORT
+int cmark_inline_parser_scan_delimiters(cmark_inline_parser *parser,
+                                  int max_delims,
+                                  unsigned char c,
+                                  int *left_flanking,
+                                  int *right_flanking,
+                                  int *punct_before,
+                                  int *punct_after);
+
+CMARK_EXPORT
+void cmark_manage_extensions_special_characters(cmark_parser *parser, bool add);
+
+CMARK_EXPORT
+cmark_llist *cmark_parser_get_syntax_extensions(cmark_parser *parser);
+
+CMARK_EXPORT
+void cmark_arena_push(void);
+
+CMARK_EXPORT
+int cmark_arena_pop(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/cbits/cmarkextensions_export.h b/cbits/cmarkextensions_export.h
new file mode 100644
index 0000000..b1a7b7c
--- /dev/null
+++ b/cbits/cmarkextensions_export.h
@@ -0,0 +1,41 @@
+
+#ifndef CMARKEXTENSIONS_EXPORT_H
+#define CMARKEXTENSIONS_EXPORT_H
+
+#ifdef CMARKEXTENSIONS_STATIC_DEFINE
+#  define CMARKEXTENSIONS_EXPORT
+#  define CMARKEXTENSIONS_NO_EXPORT
+#else
+#  ifndef CMARKEXTENSIONS_EXPORT
+#    ifdef libcmark_gfmextensions_EXPORTS
+        /* We are building this library */
+#      define CMARKEXTENSIONS_EXPORT __attribute__((visibility("default")))
+#    else
+        /* We are using this library */
+#      define CMARKEXTENSIONS_EXPORT __attribute__((visibility("default")))
+#    endif
+#  endif
+
+#  ifndef CMARKEXTENSIONS_NO_EXPORT
+#    define CMARKEXTENSIONS_NO_EXPORT __attribute__((visibility("hidden")))
+#  endif
+#endif
+
+#ifndef CMARKEXTENSIONS_DEPRECATED
+#  define CMARKEXTENSIONS_DEPRECATED __attribute__ ((__deprecated__))
+#endif
+
+#ifndef CMARKEXTENSIONS_DEPRECATED_EXPORT
+#  define CMARKEXTENSIONS_DEPRECATED_EXPORT CMARKEXTENSIONS_EXPORT CMARKEXTENSIONS_DEPRECATED
+#endif
+
+#ifndef CMARKEXTENSIONS_DEPRECATED_NO_EXPORT
+#  define CMARKEXTENSIONS_DEPRECATED_NO_EXPORT CMARKEXTENSIONS_NO_EXPORT CMARKEXTENSIONS_DEPRECATED
+#endif
+
+#define DEFINE_NO_DEPRECATED 0
+#if DEFINE_NO_DEPRECATED
+# define CMARKEXTENSIONS_NO_DEPRECATED
+#endif
+
+#endif
diff --git a/cbits/commonmark.c b/cbits/commonmark.c
index 95a1ae5..8063acb 100644
--- a/cbits/commonmark.c
+++ b/cbits/commonmark.c
@@ -11,9 +11,10 @@
 #include "utf8.h"
 #include "scanners.h"
 #include "render.h"
+#include "syntax_extension.h"
 
-#define OUT(s, wrap, escaping) renderer->out(renderer, s, wrap, escaping)
-#define LIT(s) renderer->out(renderer, s, false, LITERAL)
+#define OUT(s, wrap, escaping) renderer->out(renderer, node, s, wrap, escaping)
+#define LIT(s) renderer->out(renderer, node, s, false, LITERAL)
 #define CR() renderer->cr(renderer)
 #define BLANKLINE() renderer->blankline(renderer)
 #define ENCODED_SIZE 20
@@ -21,7 +22,8 @@
 
 // Functions to convert cmark_nodes to commonmark strings.
 
-static CMARK_INLINE void outc(cmark_renderer *renderer, cmark_escaping escape,
+static CMARK_INLINE void outc(cmark_renderer *renderer, cmark_node *node, 
+                              cmark_escaping escape,
                               int32_t c, unsigned char nextc) {
   bool needs_escaping = false;
   bool follows_digit =
@@ -42,13 +44,13 @@ static CMARK_INLINE void outc(cmark_renderer *renderer, cmark_escaping escape,
          (renderer->begin_content && (c == '.' || c == ')') && follows_digit &&
           (nextc == 0 || cmark_isspace(nextc))))) ||
        (escape == URL &&
-        (c == '`' || c == '<' || c == '>' || cmark_isspace(c) || c == '\\' ||
+        (c == '`' || c == '<' || c == '>' || cmark_isspace((char)c) || c == '\\' ||
          c == ')' || c == '(')) ||
        (escape == TITLE &&
         (c == '`' || c == '<' || c == '>' || c == '"' || c == '\\')));
 
   if (needs_escaping) {
-    if (cmark_isspace(c)) {
+    if (cmark_isspace((char)c)) {
       // use percent encoding for spaces
       snprintf(encoded, ENCODED_SIZE, "%%%2x", c);
       cmark_strbuf_puts(renderer->buffer, encoded);
@@ -151,8 +153,7 @@ static bool is_autolink(cmark_node *node) {
 // if there is no block-level ancestor, returns NULL.
 static cmark_node *get_containing_block(cmark_node *node) {
   while (node) {
-    if (node->type >= CMARK_NODE_FIRST_BLOCK &&
-        node->type <= CMARK_NODE_LAST_BLOCK) {
+    if (CMARK_NODE_BLOCK_P(node)) {
       return node;
     } else {
       node = node->parent;
@@ -191,6 +192,11 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node,
           cmark_node_get_list_tight(tmp->parent->parent)));
   }
 
+  if (node->extension && node->extension->commonmark_render_func) {
+    node->extension->commonmark_render_func(node->extension, renderer, node, ev_type, options);
+    return 1;
+  }
+
   switch (node->type) {
   case CMARK_NODE_DOCUMENT:
     break;
@@ -234,7 +240,7 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node,
       snprintf(listmarker, LISTMARKER_SIZE, "%d%s%s", list_number,
                list_delim == CMARK_PAREN_DELIM ? ")" : ".",
                list_number < 10 ? "  " : " ");
-      marker_width = strlen(listmarker);
+      marker_width = (bufsize_t)strlen(listmarker);
     }
     if (entering) {
       if (cmark_node_get_list_type(node->parent) == CMARK_BULLET_LIST) {
@@ -466,10 +472,14 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node,
 }
 
 char *cmark_render_commonmark(cmark_node *root, int options, int width) {
+  return cmark_render_commonmark_with_mem(root, options, width, cmark_node_mem(root));
+}
+
+char *cmark_render_commonmark_with_mem(cmark_node *root, int options, int width, cmark_mem *mem) {
   if (options & CMARK_OPT_HARDBREAKS) {
     // disable breaking on width, since it has
     // a different meaning with OPT_HARDBREAKS
     width = 0;
   }
-  return cmark_render(root, options, width, outc, S_render_node);
+  return cmark_render(mem, root, options, width, outc, S_render_node);
 }
diff --git a/cbits/core-extensions.c b/cbits/core-extensions.c
new file mode 100644
index 0000000..49bd8d4
--- /dev/null
+++ b/cbits/core-extensions.c
@@ -0,0 +1,14 @@
+#include "core-extensions.h"
+#include "autolink.h"
+#include "strikethrough.h"
+#include "table.h"
+#include "tagfilter.h"
+
+int core_extensions_registration(cmark_plugin *plugin) {
+  cmark_plugin_register_syntax_extension(plugin, create_table_extension());
+  cmark_plugin_register_syntax_extension(plugin,
+                                         create_strikethrough_extension());
+  cmark_plugin_register_syntax_extension(plugin, create_autolink_extension());
+  cmark_plugin_register_syntax_extension(plugin, create_tagfilter_extension());
+  return 1;
+}
diff --git a/cbits/core-extensions.h b/cbits/core-extensions.h
new file mode 100644
index 0000000..45f1994
--- /dev/null
+++ b/cbits/core-extensions.h
@@ -0,0 +1,24 @@
+#ifndef CORE_EXTENSIONS_H
+#define CORE_EXTENSIONS_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include 
+#include "cmarkextensions_export.h"
+
+CMARKEXTENSIONS_EXPORT
+int core_extensions_registration(cmark_plugin *plugin);
+
+CMARKEXTENSIONS_EXPORT
+uint16_t cmarkextensions_get_table_columns(cmark_node *node);
+
+CMARKEXTENSIONS_EXPORT
+uint8_t *cmarkextensions_get_table_alignments(cmark_node *node);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/cbits/ext_scanners.c b/cbits/ext_scanners.c
new file mode 100644
index 0000000..7e0f5f2
--- /dev/null
+++ b/cbits/ext_scanners.c
@@ -0,0 +1,941 @@
+/* Generated by re2c 0.15.3 */
+#include 
+#include "ext_scanners.h"
+
+bufsize_t _ext_scan_at(bufsize_t (*scanner)(const unsigned char *),
+                       unsigned char *ptr, int len, bufsize_t offset) {
+  bufsize_t res;
+
+  if (ptr == NULL || offset > len) {
+    return 0;
+  } else {
+    unsigned char lim = ptr[len];
+
+    ptr[len] = '\0';
+    res = scanner(ptr + offset);
+    ptr[len] = lim;
+  }
+
+  return res;
+}
+
+bufsize_t _scan_table_start(const unsigned char *p) {
+  const unsigned char *marker = NULL;
+  const unsigned char *start = p;
+
+  {
+    unsigned char yych;
+    static const unsigned char yybm[] = {
+        0, 0,   0, 0, 0, 0, 0, 0, 0, 64, 0,  64, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0,   0, 0, 0, 0, 0, 0, 0, 0,  64, 0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 128, 0, 0, 0, 0, 0, 0, 0, 0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0,   0, 0, 0, 0, 0, 0, 0, 0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0,   0, 0, 0, 0, 0, 0, 0, 0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0,   0, 0, 0, 0, 0, 0, 0, 0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0,   0, 0, 0, 0, 0, 0, 0, 0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0,   0, 0, 0, 0, 0, 0, 0, 0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0,   0, 0, 0, 0, 0, 0, 0, 0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0,   0, 0, 0, 0, 0, 0, 0, 0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0,   0, 0, 0, 0, 0, 0, 0, 0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0,   0, 0, 0, 0, 0, 0, 0, 0,  0,  0,  0,  0,
+    };
+    yych = *(marker = p);
+    if (yych <= '{') {
+      if (yych <= 0x1F) {
+        if (yych <= '\t') {
+          if (yych <= 0x08)
+            goto yy6;
+          goto yy3;
+        } else {
+          if (yych <= '\n')
+            goto yy2;
+          if (yych <= '\f')
+            goto yy3;
+          goto yy6;
+        }
+      } else {
+        if (yych <= '-') {
+          if (yych <= ' ')
+            goto yy3;
+          if (yych <= ',')
+            goto yy6;
+          goto yy5;
+        } else {
+          if (yych == ':')
+            goto yy4;
+          goto yy6;
+        }
+      }
+    } else {
+      if (yych <= 0xEC) {
+        if (yych <= 0xC1) {
+          if (yych <= '|')
+            goto yy3;
+          if (yych <= 0x7F)
+            goto yy6;
+        } else {
+          if (yych <= 0xDF)
+            goto yy7;
+          if (yych <= 0xE0)
+            goto yy9;
+          goto yy10;
+        }
+      } else {
+        if (yych <= 0xF0) {
+          if (yych <= 0xED)
+            goto yy14;
+          if (yych <= 0xEF)
+            goto yy10;
+          goto yy11;
+        } else {
+          if (yych <= 0xF3)
+            goto yy12;
+          if (yych <= 0xF4)
+            goto yy13;
+        }
+      }
+    }
+  yy2 : { return 0; }
+  yy3:
+    yych = *(marker = ++p);
+    if (yybm[0 + yych] & 128) {
+      goto yy22;
+    }
+    if (yych <= '\f') {
+      if (yych == '\t')
+        goto yy29;
+      if (yych <= '\n')
+        goto yy2;
+      goto yy29;
+    } else {
+      if (yych <= ' ') {
+        if (yych <= 0x1F)
+          goto yy2;
+        goto yy29;
+      } else {
+        if (yych == ':')
+          goto yy31;
+        goto yy2;
+      }
+    }
+  yy4:
+    yych = *(marker = ++p);
+    if (yybm[0 + yych] & 128) {
+      goto yy22;
+    }
+    goto yy2;
+  yy5:
+    yych = *(marker = ++p);
+    if (yybm[0 + yych] & 128) {
+      goto yy22;
+    }
+    if (yych <= ' ') {
+      if (yych <= 0x08)
+        goto yy2;
+      if (yych <= '\r')
+        goto yy16;
+      if (yych <= 0x1F)
+        goto yy2;
+      goto yy16;
+    } else {
+      if (yych <= ':') {
+        if (yych <= '9')
+          goto yy2;
+        goto yy15;
+      } else {
+        if (yych == '|')
+          goto yy16;
+        goto yy2;
+      }
+    }
+  yy6:
+    yych = *++p;
+    goto yy2;
+  yy7:
+    yych = *++p;
+    if (yych <= 0x7F)
+      goto yy8;
+    if (yych <= 0xBF)
+      goto yy6;
+  yy8:
+    p = marker;
+    goto yy2;
+  yy9:
+    yych = *++p;
+    if (yych <= 0x9F)
+      goto yy8;
+    if (yych <= 0xBF)
+      goto yy7;
+    goto yy8;
+  yy10:
+    yych = *++p;
+    if (yych <= 0x7F)
+      goto yy8;
+    if (yych <= 0xBF)
+      goto yy7;
+    goto yy8;
+  yy11:
+    yych = *++p;
+    if (yych <= 0x8F)
+      goto yy8;
+    if (yych <= 0xBF)
+      goto yy10;
+    goto yy8;
+  yy12:
+    yych = *++p;
+    if (yych <= 0x7F)
+      goto yy8;
+    if (yych <= 0xBF)
+      goto yy10;
+    goto yy8;
+  yy13:
+    yych = *++p;
+    if (yych <= 0x7F)
+      goto yy8;
+    if (yych <= 0x8F)
+      goto yy10;
+    goto yy8;
+  yy14:
+    yych = *++p;
+    if (yych <= 0x7F)
+      goto yy8;
+    if (yych <= 0x9F)
+      goto yy7;
+    goto yy8;
+  yy15:
+    ++p;
+    yych = *p;
+  yy16:
+    if (yybm[0 + yych] & 64) {
+      goto yy15;
+    }
+    if (yych <= '\r') {
+      if (yych <= 0x08)
+        goto yy8;
+      if (yych <= '\n')
+        goto yy20;
+      goto yy19;
+    } else {
+      if (yych != '|')
+        goto yy8;
+    }
+  yy17:
+    ++p;
+    yych = *p;
+    if (yych <= 0x1F) {
+      if (yych <= '\n') {
+        if (yych <= 0x08)
+          goto yy8;
+        if (yych <= '\t')
+          goto yy17;
+        goto yy20;
+      } else {
+        if (yych <= '\f')
+          goto yy17;
+        if (yych >= 0x0E)
+          goto yy8;
+      }
+    } else {
+      if (yych <= '-') {
+        if (yych <= ' ')
+          goto yy17;
+        if (yych <= ',')
+          goto yy8;
+        goto yy25;
+      } else {
+        if (yych == ':')
+          goto yy24;
+        goto yy8;
+      }
+    }
+  yy19:
+    yych = *++p;
+    if (yych != '\n')
+      goto yy8;
+  yy20:
+    ++p;
+    { return (bufsize_t)(p - start); }
+  yy22:
+    ++p;
+    yych = *p;
+    if (yybm[0 + yych] & 128) {
+      goto yy22;
+    }
+    if (yych <= 0x1F) {
+      if (yych <= '\n') {
+        if (yych <= 0x08)
+          goto yy8;
+        if (yych <= '\t')
+          goto yy15;
+        goto yy20;
+      } else {
+        if (yych <= '\f')
+          goto yy15;
+        if (yych <= '\r')
+          goto yy19;
+        goto yy8;
+      }
+    } else {
+      if (yych <= ':') {
+        if (yych <= ' ')
+          goto yy15;
+        if (yych <= '9')
+          goto yy8;
+        goto yy15;
+      } else {
+        if (yych == '|')
+          goto yy17;
+        goto yy8;
+      }
+    }
+  yy24:
+    ++p;
+    yych = *p;
+    if (yych != '-')
+      goto yy8;
+  yy25:
+    ++p;
+    yych = *p;
+    if (yych <= ' ') {
+      if (yych <= '\n') {
+        if (yych <= 0x08)
+          goto yy8;
+        if (yych >= '\n')
+          goto yy20;
+      } else {
+        if (yych <= '\f')
+          goto yy27;
+        if (yych <= '\r')
+          goto yy19;
+        if (yych <= 0x1F)
+          goto yy8;
+      }
+    } else {
+      if (yych <= '9') {
+        if (yych == '-')
+          goto yy25;
+        goto yy8;
+      } else {
+        if (yych <= ':')
+          goto yy27;
+        if (yych == '|')
+          goto yy17;
+        goto yy8;
+      }
+    }
+  yy27:
+    ++p;
+    yych = *p;
+    if (yych <= '\r') {
+      if (yych <= '\t') {
+        if (yych <= 0x08)
+          goto yy8;
+        goto yy27;
+      } else {
+        if (yych <= '\n')
+          goto yy20;
+        if (yych <= '\f')
+          goto yy27;
+        goto yy19;
+      }
+    } else {
+      if (yych <= ' ') {
+        if (yych <= 0x1F)
+          goto yy8;
+        goto yy27;
+      } else {
+        if (yych == '|')
+          goto yy17;
+        goto yy8;
+      }
+    }
+  yy29:
+    ++p;
+    yych = *p;
+    if (yybm[0 + yych] & 128) {
+      goto yy22;
+    }
+    if (yych <= '\f') {
+      if (yych == '\t')
+        goto yy29;
+      if (yych <= '\n')
+        goto yy8;
+      goto yy29;
+    } else {
+      if (yych <= ' ') {
+        if (yych <= 0x1F)
+          goto yy8;
+        goto yy29;
+      } else {
+        if (yych != ':')
+          goto yy8;
+      }
+    }
+  yy31:
+    ++p;
+    if (yybm[0 + (yych = *p)] & 128) {
+      goto yy22;
+    }
+    goto yy8;
+  }
+}
+
+bufsize_t _scan_table_cell(const unsigned char *p) {
+  const unsigned char *marker = NULL;
+  const unsigned char *start = p;
+
+  {
+    unsigned char yych;
+    static const unsigned char yybm[] = {
+        128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 0,   128, 128, 0,
+        128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+        128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+        128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+        128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+        128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+        128, 128, 128, 128, 128, 128, 128, 128, 64,  128, 128, 128, 128, 128,
+        128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+        128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 0,   128,
+        128, 128, 0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+        0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+        0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+        0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+        0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+        0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+        0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+        0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+        0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+        0,   0,   0,   0,
+    };
+    yych = *(marker = p);
+    if (yych <= 0x7F) {
+      if (yych <= '\r') {
+        if (yych == '\n')
+          goto yy34;
+        if (yych <= '\f')
+          goto yy36;
+        goto yy45;
+      } else {
+        if (yych <= '\\') {
+          if (yych <= '[')
+            goto yy36;
+          goto yy35;
+        } else {
+          if (yych == '|')
+            goto yy45;
+          goto yy36;
+        }
+      }
+    } else {
+      if (yych <= 0xED) {
+        if (yych <= 0xDF) {
+          if (yych >= 0xC2)
+            goto yy37;
+        } else {
+          if (yych <= 0xE0)
+            goto yy39;
+          if (yych <= 0xEC)
+            goto yy40;
+          goto yy44;
+        }
+      } else {
+        if (yych <= 0xF0) {
+          if (yych <= 0xEF)
+            goto yy40;
+          goto yy41;
+        } else {
+          if (yych <= 0xF3)
+            goto yy42;
+          if (yych <= 0xF4)
+            goto yy43;
+        }
+      }
+    }
+  yy34 : { return (bufsize_t)(p - start); }
+  yy35:
+    yych = *(marker = ++p);
+    if (yych == '|')
+      goto yy49;
+    goto yy50;
+  yy36:
+    yych = *(marker = ++p);
+    goto yy50;
+  yy37:
+    yych = *++p;
+    if (yych <= 0x7F)
+      goto yy38;
+    if (yych <= 0xBF)
+      goto yy36;
+  yy38:
+    p = marker;
+    goto yy34;
+  yy39:
+    yych = *++p;
+    if (yych <= 0x9F)
+      goto yy38;
+    if (yych <= 0xBF)
+      goto yy37;
+    goto yy38;
+  yy40:
+    yych = *++p;
+    if (yych <= 0x7F)
+      goto yy38;
+    if (yych <= 0xBF)
+      goto yy37;
+    goto yy38;
+  yy41:
+    yych = *++p;
+    if (yych <= 0x8F)
+      goto yy38;
+    if (yych <= 0xBF)
+      goto yy40;
+    goto yy38;
+  yy42:
+    yych = *++p;
+    if (yych <= 0x7F)
+      goto yy38;
+    if (yych <= 0xBF)
+      goto yy40;
+    goto yy38;
+  yy43:
+    yych = *++p;
+    if (yych <= 0x7F)
+      goto yy38;
+    if (yych <= 0x8F)
+      goto yy40;
+    goto yy38;
+  yy44:
+    yych = *++p;
+    if (yych <= 0x7F)
+      goto yy38;
+    if (yych <= 0x9F)
+      goto yy37;
+    goto yy38;
+  yy45:
+    ++p;
+    { return 0; }
+  yy47:
+    marker = ++p;
+    yych = *p;
+    if (yybm[0 + yych] & 64) {
+      goto yy47;
+    }
+    if (yych <= 0xDF) {
+      if (yych <= '\f') {
+        if (yych == '\n')
+          goto yy34;
+      } else {
+        if (yych <= '\r')
+          goto yy34;
+        if (yych <= 0x7F)
+          goto yy49;
+        if (yych <= 0xC1)
+          goto yy34;
+        goto yy51;
+      }
+    } else {
+      if (yych <= 0xEF) {
+        if (yych <= 0xE0)
+          goto yy52;
+        if (yych == 0xED)
+          goto yy57;
+        goto yy53;
+      } else {
+        if (yych <= 0xF0)
+          goto yy54;
+        if (yych <= 0xF3)
+          goto yy55;
+        if (yych <= 0xF4)
+          goto yy56;
+        goto yy34;
+      }
+    }
+  yy49:
+    marker = ++p;
+    yych = *p;
+  yy50:
+    if (yybm[0 + yych] & 128) {
+      goto yy49;
+    }
+    if (yych <= 0xEC) {
+      if (yych <= 0xC1) {
+        if (yych <= '\r')
+          goto yy34;
+        if (yych <= '\\')
+          goto yy47;
+        goto yy34;
+      } else {
+        if (yych <= 0xDF)
+          goto yy51;
+        if (yych <= 0xE0)
+          goto yy52;
+        goto yy53;
+      }
+    } else {
+      if (yych <= 0xF0) {
+        if (yych <= 0xED)
+          goto yy57;
+        if (yych <= 0xEF)
+          goto yy53;
+        goto yy54;
+      } else {
+        if (yych <= 0xF3)
+          goto yy55;
+        if (yych <= 0xF4)
+          goto yy56;
+        goto yy34;
+      }
+    }
+  yy51:
+    ++p;
+    yych = *p;
+    if (yych <= 0x7F)
+      goto yy38;
+    if (yych <= 0xBF)
+      goto yy49;
+    goto yy38;
+  yy52:
+    ++p;
+    yych = *p;
+    if (yych <= 0x9F)
+      goto yy38;
+    if (yych <= 0xBF)
+      goto yy51;
+    goto yy38;
+  yy53:
+    ++p;
+    yych = *p;
+    if (yych <= 0x7F)
+      goto yy38;
+    if (yych <= 0xBF)
+      goto yy51;
+    goto yy38;
+  yy54:
+    ++p;
+    yych = *p;
+    if (yych <= 0x8F)
+      goto yy38;
+    if (yych <= 0xBF)
+      goto yy53;
+    goto yy38;
+  yy55:
+    ++p;
+    yych = *p;
+    if (yych <= 0x7F)
+      goto yy38;
+    if (yych <= 0xBF)
+      goto yy53;
+    goto yy38;
+  yy56:
+    ++p;
+    yych = *p;
+    if (yych <= 0x7F)
+      goto yy38;
+    if (yych <= 0x8F)
+      goto yy53;
+    goto yy38;
+  yy57:
+    ++p;
+    yych = *p;
+    if (yych <= 0x7F)
+      goto yy38;
+    if (yych <= 0x9F)
+      goto yy51;
+    goto yy38;
+  }
+}
+
+bufsize_t _scan_table_cell_end(const unsigned char *p) {
+  const unsigned char *marker = NULL;
+  const unsigned char *start = p;
+
+  {
+    unsigned char yych;
+    unsigned int yyaccept = 0;
+    static const unsigned char yybm[] = {
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 0, 128, 128, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   0, 0,   128, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   0, 0,   0,   0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   0, 0,   0,   0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   0, 0,   0,   0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   0, 0,   0,   0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   0, 0,   0,   0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   0, 0,   0,   0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   0, 0,   0,   0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   0, 0,   0,   0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   0, 0,   0,   0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   0, 0,   0,   0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   0, 0,   0,   0, 0, 0,
+    };
+    yych = *(marker = p);
+    if (yych <= 0xDF) {
+      if (yych <= '{') {
+        if (yych != '\n')
+          goto yy63;
+      } else {
+        if (yych <= '|')
+          goto yy61;
+        if (yych <= 0x7F)
+          goto yy63;
+        if (yych >= 0xC2)
+          goto yy64;
+      }
+    } else {
+      if (yych <= 0xEF) {
+        if (yych <= 0xE0)
+          goto yy66;
+        if (yych == 0xED)
+          goto yy71;
+        goto yy67;
+      } else {
+        if (yych <= 0xF0)
+          goto yy68;
+        if (yych <= 0xF3)
+          goto yy69;
+        if (yych <= 0xF4)
+          goto yy70;
+      }
+    }
+  yy60 : { return 0; }
+  yy61:
+    yyaccept = 1;
+    yych = *(marker = ++p);
+    goto yy73;
+  yy62 : { return (bufsize_t)(p - start); }
+  yy63:
+    yych = *++p;
+    goto yy60;
+  yy64:
+    yych = *++p;
+    if (yych <= 0x7F)
+      goto yy65;
+    if (yych <= 0xBF)
+      goto yy63;
+  yy65:
+    p = marker;
+    if (yyaccept == 0) {
+      goto yy60;
+    } else {
+      goto yy62;
+    }
+  yy66:
+    yych = *++p;
+    if (yych <= 0x9F)
+      goto yy65;
+    if (yych <= 0xBF)
+      goto yy64;
+    goto yy65;
+  yy67:
+    yych = *++p;
+    if (yych <= 0x7F)
+      goto yy65;
+    if (yych <= 0xBF)
+      goto yy64;
+    goto yy65;
+  yy68:
+    yych = *++p;
+    if (yych <= 0x8F)
+      goto yy65;
+    if (yych <= 0xBF)
+      goto yy67;
+    goto yy65;
+  yy69:
+    yych = *++p;
+    if (yych <= 0x7F)
+      goto yy65;
+    if (yych <= 0xBF)
+      goto yy67;
+    goto yy65;
+  yy70:
+    yych = *++p;
+    if (yych <= 0x7F)
+      goto yy65;
+    if (yych <= 0x8F)
+      goto yy67;
+    goto yy65;
+  yy71:
+    yych = *++p;
+    if (yych <= 0x7F)
+      goto yy65;
+    if (yych <= 0x9F)
+      goto yy64;
+    goto yy65;
+  yy72:
+    yyaccept = 1;
+    marker = ++p;
+    yych = *p;
+  yy73:
+    if (yybm[0 + yych] & 128) {
+      goto yy72;
+    }
+    if (yych <= 0x08)
+      goto yy62;
+    if (yych <= '\n')
+      goto yy75;
+    if (yych >= 0x0E)
+      goto yy62;
+    yych = *++p;
+    if (yych != '\n')
+      goto yy65;
+  yy75:
+    ++p;
+    yych = *p;
+    goto yy62;
+  }
+}
+
+bufsize_t _scan_table_row_end(const unsigned char *p) {
+  const unsigned char *marker = NULL;
+  const unsigned char *start = p;
+
+  {
+    unsigned char yych;
+    static const unsigned char yybm[] = {
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 0, 128, 128, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   0, 0,   128, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   0, 0,   0,   0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   0, 0,   0,   0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   0, 0,   0,   0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   0, 0,   0,   0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   0, 0,   0,   0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   0, 0,   0,   0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   0, 0,   0,   0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   0, 0,   0,   0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   0, 0,   0,   0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   0, 0,   0,   0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   0, 0,   0,   0, 0, 0,
+    };
+    yych = *(marker = p);
+    if (yych <= 0xC1) {
+      if (yych <= '\f') {
+        if (yych <= 0x08)
+          goto yy83;
+        if (yych == '\n')
+          goto yy81;
+        goto yy79;
+      } else {
+        if (yych <= 0x1F) {
+          if (yych <= '\r')
+            goto yy80;
+          goto yy83;
+        } else {
+          if (yych <= ' ')
+            goto yy79;
+          if (yych <= 0x7F)
+            goto yy83;
+        }
+      }
+    } else {
+      if (yych <= 0xED) {
+        if (yych <= 0xDF)
+          goto yy84;
+        if (yych <= 0xE0)
+          goto yy86;
+        if (yych <= 0xEC)
+          goto yy87;
+        goto yy91;
+      } else {
+        if (yych <= 0xF0) {
+          if (yych <= 0xEF)
+            goto yy87;
+          goto yy88;
+        } else {
+          if (yych <= 0xF3)
+            goto yy89;
+          if (yych <= 0xF4)
+            goto yy90;
+        }
+      }
+    }
+  yy78 : { return 0; }
+  yy79:
+    yych = *(marker = ++p);
+    if (yych <= 0x08)
+      goto yy78;
+    if (yych <= '\r')
+      goto yy94;
+    if (yych == ' ')
+      goto yy94;
+    goto yy78;
+  yy80:
+    yych = *++p;
+    if (yych != '\n')
+      goto yy78;
+  yy81:
+    ++p;
+    { return (bufsize_t)(p - start); }
+  yy83:
+    yych = *++p;
+    goto yy78;
+  yy84:
+    yych = *++p;
+    if (yych <= 0x7F)
+      goto yy85;
+    if (yych <= 0xBF)
+      goto yy83;
+  yy85:
+    p = marker;
+    goto yy78;
+  yy86:
+    yych = *++p;
+    if (yych <= 0x9F)
+      goto yy85;
+    if (yych <= 0xBF)
+      goto yy84;
+    goto yy85;
+  yy87:
+    yych = *++p;
+    if (yych <= 0x7F)
+      goto yy85;
+    if (yych <= 0xBF)
+      goto yy84;
+    goto yy85;
+  yy88:
+    yych = *++p;
+    if (yych <= 0x8F)
+      goto yy85;
+    if (yych <= 0xBF)
+      goto yy87;
+    goto yy85;
+  yy89:
+    yych = *++p;
+    if (yych <= 0x7F)
+      goto yy85;
+    if (yych <= 0xBF)
+      goto yy87;
+    goto yy85;
+  yy90:
+    yych = *++p;
+    if (yych <= 0x7F)
+      goto yy85;
+    if (yych <= 0x8F)
+      goto yy87;
+    goto yy85;
+  yy91:
+    yych = *++p;
+    if (yych <= 0x7F)
+      goto yy85;
+    if (yych <= 0x9F)
+      goto yy84;
+    goto yy85;
+  yy92:
+    yych = *++p;
+    if (yych == '\n')
+      goto yy81;
+    goto yy85;
+  yy93:
+    ++p;
+    yych = *p;
+  yy94:
+    if (yybm[0 + yych] & 128) {
+      goto yy93;
+    }
+    if (yych <= 0x08)
+      goto yy85;
+    if (yych <= '\n')
+      goto yy81;
+    if (yych <= '\r')
+      goto yy92;
+    goto yy85;
+  }
+}
diff --git a/cbits/ext_scanners.h b/cbits/ext_scanners.h
new file mode 100644
index 0000000..53584d8
--- /dev/null
+++ b/cbits/ext_scanners.h
@@ -0,0 +1,22 @@
+#include "chunk.h"
+#include "cmark.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+bufsize_t _ext_scan_at(bufsize_t (*scanner)(const unsigned char *),
+                       unsigned char *ptr, int len, bufsize_t offset);
+bufsize_t _scan_table_start(const unsigned char *p);
+bufsize_t _scan_table_cell(const unsigned char *p);
+bufsize_t _scan_table_cell_end(const unsigned char *p);
+bufsize_t _scan_table_row_end(const unsigned char *p);
+
+#define scan_table_start(c, l, n) _ext_scan_at(&_scan_table_start, c, l, n)
+#define scan_table_cell(c, l, n) _ext_scan_at(&_scan_table_cell, c, l, n)
+#define scan_table_cell_end(c, l, n) _ext_scan_at(&_scan_table_cell_end, c, l, n)
+#define scan_table_row_end(c, l, n) _ext_scan_at(&_scan_table_row_end, c, l, n)
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/cbits/houdini.h b/cbits/houdini.h
index f738e82..7852c3a 100644
--- a/cbits/houdini.h
+++ b/cbits/houdini.h
@@ -31,17 +31,23 @@ extern "C" {
 #define HOUDINI_ESCAPED_SIZE(x) (((x)*12) / 10)
 #define HOUDINI_UNESCAPED_SIZE(x) (x)
 
-extern bufsize_t houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src,
+CMARK_EXPORT
+bufsize_t houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src,
                                       bufsize_t size);
-extern int houdini_escape_html(cmark_strbuf *ob, const uint8_t *src,
+CMARK_EXPORT
+int houdini_escape_html(cmark_strbuf *ob, const uint8_t *src,
                                bufsize_t size);
-extern int houdini_escape_html0(cmark_strbuf *ob, const uint8_t *src,
+CMARK_EXPORT
+int houdini_escape_html0(cmark_strbuf *ob, const uint8_t *src,
                                 bufsize_t size, int secure);
-extern int houdini_unescape_html(cmark_strbuf *ob, const uint8_t *src,
+CMARK_EXPORT
+int houdini_unescape_html(cmark_strbuf *ob, const uint8_t *src,
                                  bufsize_t size);
-extern void houdini_unescape_html_f(cmark_strbuf *ob, const uint8_t *src,
+CMARK_EXPORT
+void houdini_unescape_html_f(cmark_strbuf *ob, const uint8_t *src,
                                     bufsize_t size);
-extern int houdini_escape_href(cmark_strbuf *ob, const uint8_t *src,
+CMARK_EXPORT
+int houdini_escape_href(cmark_strbuf *ob, const uint8_t *src,
                                bufsize_t size);
 
 #ifdef __cplusplus
diff --git a/cbits/houdini_html_e.c b/cbits/houdini_html_e.c
index 0e539f0..da0b15c 100644
--- a/cbits/houdini_html_e.c
+++ b/cbits/houdini_html_e.c
@@ -48,7 +48,7 @@ int houdini_escape_html0(cmark_strbuf *ob, const uint8_t *src, bufsize_t size,
     if (unlikely(i >= size))
       break;
 
-    /* The forward slash is only escaped in secure mode */
+    /* The forward slash and single quote are only escaped in secure mode */
     if ((src[i] == '/' || src[i] == '\'') && !secure) {
       cmark_strbuf_putc(ob, src[i]);
     } else {
diff --git a/cbits/html.c b/cbits/html.c
index a680e4a..aaf2b74 100644
--- a/cbits/html.c
+++ b/cbits/html.c
@@ -5,12 +5,10 @@
 #include "cmark_ctype.h"
 #include "config.h"
 #include "cmark.h"
-#include "node.h"
-#include "buffer.h"
 #include "houdini.h"
 #include "scanners.h"
-
-#define BUFFER_SIZE 100
+#include "syntax_extension.h"
+#include "html.h"
 
 // Functions to convert cmark_nodes to HTML strings.
 
@@ -19,44 +17,67 @@ static void escape_html(cmark_strbuf *dest, const unsigned char *source,
   houdini_escape_html0(dest, source, length, 0);
 }
 
-static CMARK_INLINE void cr(cmark_strbuf *html) {
-  if (html->size && html->ptr[html->size - 1] != '\n')
-    cmark_strbuf_putc(html, '\n');
-}
+static void filter_html_block(cmark_html_renderer *renderer, uint8_t *data, size_t len) {
+  cmark_strbuf *html = renderer->html;
+  cmark_llist *it;
+  cmark_syntax_extension *ext;
+  bool filtered;
+  uint8_t *match;
 
-struct render_state {
-  cmark_strbuf *html;
-  cmark_node *plain;
-};
+  while (len) {
+    match = (uint8_t *) memchr(data, '<', len);
+    if (!match)
+      break;
 
-static void S_render_sourcepos(cmark_node *node, cmark_strbuf *html,
-                               int options) {
-  char buffer[BUFFER_SIZE];
-  if (CMARK_OPT_SOURCEPOS & options) {
-    snprintf(buffer, BUFFER_SIZE, " data-sourcepos=\"%d:%d-%d:%d\"",
-             cmark_node_get_start_line(node), cmark_node_get_start_column(node),
-             cmark_node_get_end_line(node), cmark_node_get_end_column(node));
-    cmark_strbuf_puts(html, buffer);
+    if (match != data) {
+      cmark_strbuf_put(html, data, (bufsize_t)(match - data));
+      len -= (match - data);
+      data = match;
+    }
+
+    filtered = false;
+    for (it = renderer->filter_extensions; it; it = it->next) {
+      ext = ((cmark_syntax_extension *) it->data);
+      if (!ext->html_filter_func(ext, data, len)) {
+        filtered = true;
+        break;
+      }
+    }
+
+    if (!filtered) {
+      cmark_strbuf_putc(html, '<');
+    } else {
+      cmark_strbuf_puts(html, "<");
+    }
+
+    ++data;
+    --len;
   }
+
+  if (len)
+    cmark_strbuf_put(html, data, (bufsize_t)len);
 }
 
-static int S_render_node(cmark_node *node, cmark_event_type ev_type,
-                         struct render_state *state, int options) {
+static int S_render_node(cmark_html_renderer *renderer, cmark_node *node,
+                         cmark_event_type ev_type, int options) {
   cmark_node *parent;
   cmark_node *grandparent;
-  cmark_strbuf *html = state->html;
+  cmark_strbuf *html = renderer->html;
+  cmark_llist *it;
+  cmark_syntax_extension *ext;
   char start_heading[] = "plain == node) { // back at original node
-    state->plain = NULL;
+  if (renderer->plain == node) { // back at original node
+    renderer->plain = NULL;
   }
 
-  if (state->plain != NULL) {
+  if (renderer->plain != NULL) {
     switch (node->type) {
     case CMARK_NODE_TEXT:
     case CMARK_NODE_CODE:
@@ -75,18 +96,23 @@ static int S_render_node(cmark_node *node, cmark_event_type ev_type,
     return 1;
   }
 
+  if (node->extension && node->extension->html_render_func) {
+    node->extension->html_render_func(node->extension, renderer, node, ev_type, options);
+    return 1;
+  }
+
   switch (node->type) {
   case CMARK_NODE_DOCUMENT:
     break;
 
   case CMARK_NODE_BLOCK_QUOTE:
     if (entering) {
-      cr(html);
+      cmark_html_render_cr(html);
       cmark_strbuf_puts(html, "\n");
     } else {
-      cr(html);
+      cmark_html_render_cr(html);
       cmark_strbuf_puts(html, "\n");
     }
     break;
@@ -96,19 +122,19 @@ static int S_render_node(cmark_node *node, cmark_event_type ev_type,
     int start = node->as.list.start;
 
     if (entering) {
-      cr(html);
+      cmark_html_render_cr(html);
       if (list_type == CMARK_BULLET_LIST) {
         cmark_strbuf_puts(html, "\n");
       } else if (start == 1) {
         cmark_strbuf_puts(html, "\n");
       } else {
         snprintf(buffer, BUFFER_SIZE, "
    \n"); } } else { @@ -120,9 +146,9 @@ static int S_render_node(cmark_node *node, cmark_event_type ev_type, case CMARK_NODE_ITEM: if (entering) { - cr(html); + cmark_html_render_cr(html); cmark_strbuf_puts(html, "'); } else { cmark_strbuf_puts(html, "\n"); @@ -131,10 +157,10 @@ static int S_render_node(cmark_node *node, cmark_event_type ev_type, case CMARK_NODE_HEADING: if (entering) { - cr(html); + cmark_html_render_cr(html); start_heading[2] = (char)('0' + node->as.heading.level); cmark_strbuf_puts(html, start_heading); - S_render_sourcepos(node, html, options); + cmark_html_render_sourcepos(node, html, options); cmark_strbuf_putc(html, '>'); } else { end_heading[3] = (char)('0' + node->as.heading.level); @@ -144,11 +170,11 @@ static int S_render_node(cmark_node *node, cmark_event_type ev_type, break; case CMARK_NODE_CODE_BLOCK: - cr(html); + cmark_html_render_cr(html); if (node->as.code.info.len == 0) { cmark_strbuf_puts(html, ""); } else { bufsize_t first_tag = 0; @@ -157,11 +183,19 @@ static int S_render_node(cmark_node *node, cmark_event_type ev_type, first_tag += 1; } - cmark_strbuf_puts(html, "as.code.info.data, first_tag); - cmark_strbuf_puts(html, "\">"); + if (options & CMARK_OPT_GITHUB_PRE_LANG) { + cmark_strbuf_puts(html, "as.code.info.data, first_tag); + cmark_strbuf_puts(html, "\">"); + } else { + cmark_strbuf_puts(html, "as.code.info.data, first_tag); + cmark_strbuf_puts(html, "\">"); + } } escape_html(html, node->as.code.literal.data, node->as.code.literal.len); @@ -169,17 +203,19 @@ static int S_render_node(cmark_node *node, cmark_event_type ev_type, break; case CMARK_NODE_HTML_BLOCK: - cr(html); + cmark_html_render_cr(html); if (options & CMARK_OPT_SAFE) { cmark_strbuf_puts(html, ""); + } else if (renderer->filter_extensions) { + filter_html_block(renderer, node->as.literal.data, node->as.literal.len); } else { cmark_strbuf_put(html, node->as.literal.data, node->as.literal.len); } - cr(html); + cmark_html_render_cr(html); break; case CMARK_NODE_CUSTOM_BLOCK: - cr(html); + cmark_html_render_cr(html); if (entering) { cmark_strbuf_put(html, node->as.custom.on_enter.data, node->as.custom.on_enter.len); @@ -187,13 +223,13 @@ static int S_render_node(cmark_node *node, cmark_event_type ev_type, cmark_strbuf_put(html, node->as.custom.on_exit.data, node->as.custom.on_exit.len); } - cr(html); + cmark_html_render_cr(html); break; case CMARK_NODE_THEMATIC_BREAK: - cr(html); + cmark_html_render_cr(html); cmark_strbuf_puts(html, "\n"); break; @@ -207,9 +243,9 @@ static int S_render_node(cmark_node *node, cmark_event_type ev_type, } if (!tight) { if (entering) { - cr(html); + cmark_html_render_cr(html); cmark_strbuf_puts(html, "'); } else { cmark_strbuf_puts(html, "

    \n"); @@ -245,7 +281,20 @@ static int S_render_node(cmark_node *node, cmark_event_type ev_type, if (options & CMARK_OPT_SAFE) { cmark_strbuf_puts(html, ""); } else { - cmark_strbuf_put(html, node->as.literal.data, node->as.literal.len); + filtered = false; + for (it = renderer->filter_extensions; it; it = it->next) { + ext = (cmark_syntax_extension *) it->data; + if (!ext->html_filter_func(ext, node->as.literal.data, node->as.literal.len)) { + filtered = true; + break; + } + } + if (!filtered) { + cmark_strbuf_put(html, node->as.literal.data, node->as.literal.len); + } else { + cmark_strbuf_puts(html, "<"); + cmark_strbuf_put(html, node->as.literal.data + 1, node->as.literal.len - 1); + } } break; @@ -302,7 +351,7 @@ static int S_render_node(cmark_node *node, cmark_event_type ev_type, node->as.link.url.len); } cmark_strbuf_puts(html, "\" alt=\""); - state->plain = node; + renderer->plain = node; } else { if (node->as.link.title.len) { cmark_strbuf_puts(html, "\" title=\""); @@ -318,24 +367,36 @@ static int S_render_node(cmark_node *node, cmark_event_type ev_type, break; } - // cmark_strbuf_putc(html, 'x'); return 1; } -char *cmark_render_html(cmark_node *root, int options) { +char *cmark_render_html(cmark_node *root, int options, cmark_llist *extensions) { + return cmark_render_html_with_mem(root, options, extensions, cmark_node_mem(root)); +} + +char *cmark_render_html_with_mem(cmark_node *root, int options, cmark_llist *extensions, cmark_mem *mem) { char *result; - cmark_strbuf html = CMARK_BUF_INIT(cmark_node_mem(root)); + cmark_strbuf html = CMARK_BUF_INIT(mem); cmark_event_type ev_type; cmark_node *cur; - struct render_state state = {&html, NULL}; + cmark_html_renderer renderer = {&html, NULL, NULL, NULL}; cmark_iter *iter = cmark_iter_new(root); + for (; extensions; extensions = extensions->next) + if (((cmark_syntax_extension *) extensions->data)->html_filter_func) + renderer.filter_extensions = cmark_llist_append( + mem, + renderer.filter_extensions, + (cmark_syntax_extension *) extensions->data); + while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { cur = cmark_iter_get_node(iter); - S_render_node(cur, ev_type, &state, options); + S_render_node(&renderer, cur, ev_type, options); } result = (char *)cmark_strbuf_detach(&html); + cmark_llist_free(mem, renderer.filter_extensions); + cmark_iter_free(iter); return result; } diff --git a/cbits/html.h b/cbits/html.h new file mode 100644 index 0000000..aeba7bc --- /dev/null +++ b/cbits/html.h @@ -0,0 +1,27 @@ +#ifndef CMARK_HTML_H +#define CMARK_HTML_H + +#include "buffer.h" +#include "node.h" + +CMARK_INLINE +static void cmark_html_render_cr(cmark_strbuf *html) { + if (html->size && html->ptr[html->size - 1] != '\n') + cmark_strbuf_putc(html, '\n'); +} + +#define BUFFER_SIZE 100 + +CMARK_INLINE +static void cmark_html_render_sourcepos(cmark_node *node, cmark_strbuf *html, int options) { + char buffer[BUFFER_SIZE]; + if (CMARK_OPT_SOURCEPOS & options) { + snprintf(buffer, BUFFER_SIZE, " data-sourcepos=\"%d:%d-%d:%d\"", + cmark_node_get_start_line(node), cmark_node_get_start_column(node), + cmark_node_get_end_line(node), cmark_node_get_end_column(node)); + cmark_strbuf_puts(html, buffer); + } +} + + +#endif diff --git a/cbits/inlines.c b/cbits/inlines.c index f223baf..e30c2af 100644 --- a/cbits/inlines.c +++ b/cbits/inlines.c @@ -12,6 +12,7 @@ #include "utf8.h" #include "scanners.h" #include "inlines.h" +#include "syntax_extension.h" static const char *EMDASH = "\xE2\x80\x94"; static const char *ENDASH = "\xE2\x80\x93"; @@ -30,17 +31,7 @@ static const char *RIGHTSINGLEQUOTE = "\xE2\x80\x99"; #define make_emph(mem) make_simple(mem, CMARK_NODE_EMPH) #define make_strong(mem) make_simple(mem, CMARK_NODE_STRONG) -#define MAXBACKTICKS 1000 - -typedef struct delimiter { - struct delimiter *previous; - struct delimiter *next; - cmark_node *inl_text; - bufsize_t length; - unsigned char delim_char; - bool can_open; - bool can_close; -} delimiter; +#define MAXBACKTICKS 80 typedef struct bracket { struct bracket *previous; @@ -52,7 +43,7 @@ typedef struct bracket { bool bracket_after; } bracket; -typedef struct { +typedef struct subject{ cmark_mem *mem; cmark_chunk input; bufsize_t pos; @@ -70,7 +61,7 @@ static CMARK_INLINE bool S_is_line_end_char(char c) { static delimiter *S_insert_emph(subject *subj, delimiter *opener, delimiter *closer); -static int parse_inline(subject *subj, cmark_node *parent, int options); +static int parse_inline(cmark_parser *parser, subject *subj, cmark_node *parent, int options); static void subject_from_buf(cmark_mem *mem, subject *e, cmark_strbuf *buffer, cmark_reference_map *refmap); @@ -81,7 +72,7 @@ static CMARK_INLINE cmark_node *make_literal(cmark_mem *mem, cmark_node_type t, cmark_chunk s) { cmark_node *e = (cmark_node *)mem->calloc(1, sizeof(*e)); cmark_strbuf_init(mem, &e->content, 0); - e->type = t; + e->type = (uint16_t)t; e->as.literal = s; return e; } @@ -90,7 +81,7 @@ static CMARK_INLINE cmark_node *make_literal(cmark_mem *mem, cmark_node_type t, static CMARK_INLINE cmark_node *make_simple(cmark_mem *mem, cmark_node_type t) { cmark_node *e = (cmark_node *)mem->calloc(1, sizeof(*e)); cmark_strbuf_init(mem, &e->content, 0); - e->type = t; + e->type = (uint16_t)t; return e; } @@ -511,14 +502,41 @@ static cmark_node *handle_period(subject *subj, bool smart) { } } -static void process_emphasis(subject *subj, delimiter *stack_bottom) { +static cmark_syntax_extension *get_extension_for_special_char(cmark_parser *parser, unsigned char c) { + cmark_llist *tmp_ext; + + for (tmp_ext = parser->inline_syntax_extensions; tmp_ext; tmp_ext=tmp_ext->next) { + cmark_syntax_extension *ext = (cmark_syntax_extension *) tmp_ext->data; + cmark_llist *tmp_char; + for (tmp_char = ext->special_inline_chars; tmp_char; tmp_char=tmp_char->next) { + unsigned char tmp_c = (unsigned char)(size_t)tmp_char->data; + + if (tmp_c == c) { + return ext; + } + } + } + + return NULL; +} + +static void process_emphasis(cmark_parser *parser, subject *subj, delimiter *stack_bottom) { delimiter *closer = subj->last_delim; delimiter *opener; delimiter *old_closer; bool opener_found; - int openers_bottom_index; - delimiter *openers_bottom[6] = {stack_bottom, stack_bottom, stack_bottom, - stack_bottom, stack_bottom, stack_bottom}; + bool odd_match; + delimiter *openers_bottom[3][128]; + int i; + + // initialize openers_bottom: + memset(&openers_bottom, 0, sizeof(openers_bottom)); + for (i=0; i < 3; i++) { + openers_bottom[i]['*'] = stack_bottom; + openers_bottom[i]['_'] = stack_bottom; + openers_bottom[i]['\''] = stack_bottom; + openers_bottom[i]['"'] = stack_bottom; + } // move back to first relevant delim. while (closer != NULL && closer->previous != stack_bottom) { @@ -527,33 +545,20 @@ static void process_emphasis(subject *subj, delimiter *stack_bottom) { // now move forward, looking for closers, and handling each while (closer != NULL) { + cmark_syntax_extension *extension = get_extension_for_special_char(parser, closer->delim_char); if (closer->can_close) { - switch (closer->delim_char) { - case '"': - openers_bottom_index = 0; - break; - case '\'': - openers_bottom_index = 1; - break; - case '_': - openers_bottom_index = 2; - break; - case '*': - openers_bottom_index = 3 + (closer->length % 3); - break; - default: - assert(false); - } - // Now look backwards for first matching opener: opener = closer->previous; opener_found = false; - while (opener != NULL && opener != openers_bottom[openers_bottom_index]) { + odd_match = false; + while (opener != NULL && opener != stack_bottom && + opener != openers_bottom[closer->length % 3][closer->delim_char]) { if (opener->can_open && opener->delim_char == closer->delim_char) { // interior closer of size 2 can't match opener of size 1 // or of size 1 can't match 2 - if (!(closer->can_open || opener->can_close) || - ((opener->length + closer->length) % 3) != 0) { + odd_match = (closer->can_open || opener->can_close) && + ((opener->length + closer->length) % 3 == 0); + if (!odd_match) { opener_found = true; break; } @@ -561,7 +566,13 @@ static void process_emphasis(subject *subj, delimiter *stack_bottom) { opener = opener->previous; } old_closer = closer; - if (closer->delim_char == '*' || closer->delim_char == '_') { + + if (extension) { + if (opener_found) + closer = extension->insert_inline_from_delim(extension, parser, subj, opener, closer); + else + closer = closer->next; + } else if (closer->delim_char == '*' || closer->delim_char == '_') { if (opener_found) { closer = S_insert_emph(subj, opener, closer); } else { @@ -586,7 +597,8 @@ static void process_emphasis(subject *subj, delimiter *stack_bottom) { } if (!opener_found) { // set lower bound for future searches for openers - openers_bottom[openers_bottom_index] = old_closer->previous; + openers_bottom[old_closer->length % 3][old_closer->delim_char] = + old_closer->previous; if (!old_closer->can_open) { // we can remove a closer that can't be an // opener, once we've seen there's no @@ -663,11 +675,11 @@ static delimiter *S_insert_emph(subject *subj, delimiter *opener, } // Parse backslash-escape or just a backslash, returning an inline. -static cmark_node *handle_backslash(subject *subj) { +static cmark_node *handle_backslash(cmark_parser *parser, subject *subj) { advance(subj); unsigned char nextchar = peek_char(subj); - if (cmark_ispunct( - nextchar)) { // only ascii symbols and newline can be escaped + if ((parser->backslash_ispunct ? parser->backslash_ispunct : cmark_ispunct)(nextchar)) { + // only ascii symbols and newline can be escaped advance(subj); return make_str(subj->mem, cmark_chunk_dup(&subj->input, subj->pos - 1, 1)); } else if (!is_eof(subj) && skip_line_end(subj)) { @@ -695,8 +707,8 @@ static cmark_node *handle_entity(subject *subj) { return make_str(subj->mem, cmark_chunk_buf_detach(&ent)); } -// Clean a URL: remove surrounding whitespace, and remove \ that escape -// punctuation. +// Clean a URL: remove surrounding whitespace and surrounding <>, +// and remove \ that escape punctuation. cmark_chunk cmark_clean_url(cmark_mem *mem, cmark_chunk *url) { cmark_strbuf buf = CMARK_BUF_INIT(mem); @@ -707,7 +719,11 @@ cmark_chunk cmark_clean_url(cmark_mem *mem, cmark_chunk *url) { return result; } - houdini_unescape_html_f(&buf, url->data, url->len); + if (url->data[0] == '<' && url->data[url->len - 1] == '>') { + houdini_unescape_html_f(&buf, url->data + 1, url->len - 2); + } else { + houdini_unescape_html_f(&buf, url->data, url->len); + } cmark_strbuf_unescape(&buf); return cmark_chunk_buf_detach(&buf); @@ -739,7 +755,7 @@ cmark_chunk cmark_clean_title(cmark_mem *mem, cmark_chunk *title) { // Parse an autolink or HTML tag. // Assumes the subject has a '<' character at the current position. -static cmark_node *handle_pointy_brace(subject *subj) { +static cmark_node *handle_pointy_brace(subject *subj, bool liberal_html_tag) { bufsize_t matchlen = 0; cmark_chunk contents; @@ -771,6 +787,15 @@ static cmark_node *handle_pointy_brace(subject *subj) { return make_raw_html(subj->mem, contents); } + if (liberal_html_tag) { + matchlen = scan_liberal_html_tag(&subj->input, subj->pos); + if (matchlen > 0) { + contents = cmark_chunk_dup(&subj->input, subj->pos - 1, matchlen + 1); + subj->pos += matchlen; + return make_raw_html(subj->mem, contents); + } + } + // if nothing matches, just return the opening <: return make_str(subj->mem, cmark_chunk_literal("<")); } @@ -820,43 +845,10 @@ static int link_label(subject *subj, cmark_chunk *raw_label) { subj->pos = startpos; // rewind return 0; } - -static bufsize_t manual_scan_link_url_2(cmark_chunk *input, bufsize_t offset, cmark_chunk *output) { +static bufsize_t manual_scan_link_url(cmark_chunk *input, bufsize_t offset) { bufsize_t i = offset; size_t nb_p = 0; - while (i < input->len) { - if (input->data[i] == '\\' && - i + 1 < input-> len && - cmark_ispunct(input->data[i+1])) - i += 2; - else if (input->data[i] == '(') { - ++nb_p; - ++i; - } else if (input->data[i] == ')') { - if (nb_p == 0) - break; - --nb_p; - ++i; - } else if (cmark_isspace(input->data[i])) - break; - else - ++i; - } - - if (i >= input->len) - return -1; - - { - cmark_chunk result = {input->data + offset, i - offset, 0}; - *output = result; - } - return i - offset; -} - -static bufsize_t manual_scan_link_url(cmark_chunk *input, bufsize_t offset, cmark_chunk *output) { - bufsize_t i = offset; - if (i < input->len && input->data[i] == '<') { ++i; while (i < input->len) { @@ -865,30 +857,44 @@ static bufsize_t manual_scan_link_url(cmark_chunk *input, bufsize_t offset, cmar break; } else if (input->data[i] == '\\') i += 2; - else if (cmark_isspace(input->data[i]) || input->data[i] == '<') - return manual_scan_link_url_2(input, offset, output); + else if (cmark_isspace(input->data[i])) + return -1; else ++i; } } else { - return manual_scan_link_url_2(input, offset, output); + while (i < input->len) { + if (input->data[i] == '\\' && + i + 1 < input-> len && + cmark_ispunct(input->data[i+1])) + i += 2; + else if (input->data[i] == '(') { + ++nb_p; + ++i; + if (nb_p > 32) + return -1; + } else if (input->data[i] == ')') { + if (nb_p == 0) + break; + --nb_p; + ++i; + } else if (cmark_isspace(input->data[i])) + break; + else + ++i; + } } if (i >= input->len) return -1; - - { - cmark_chunk result = {input->data + offset + 1, i - 2 - offset, 0}; - *output = result; - } return i - offset; } - // Return a link, an image, or a literal close bracket. -static cmark_node *handle_close_bracket(subject *subj) { +static cmark_node *handle_close_bracket(cmark_parser *parser, subject *subj) { bufsize_t initial_pos, after_link_text_pos; - bufsize_t endurl, starttitle, endtitle, endall; - bufsize_t sps, n; + bufsize_t starturl, endurl, starttitle, endtitle, endall; + bufsize_t n; + bufsize_t sps; cmark_reference *ref = NULL; cmark_chunk url_chunk, title_chunk; cmark_chunk url, title; @@ -924,10 +930,11 @@ static cmark_node *handle_close_bracket(subject *subj) { // First, look for an inline link. if (peek_char(subj) == '(' && ((sps = scan_spacechars(&subj->input, subj->pos + 1)) > -1) && - ((n = manual_scan_link_url(&subj->input, subj->pos + 1 + sps, &url_chunk)) > -1)) { + ((n = manual_scan_link_url(&subj->input, subj->pos + 1 + sps)) > -1)) { // try to parse an explicit link: - endurl = subj->pos + 1 + sps + n; + starturl = subj->pos + 1 + sps; // after ( + endurl = starturl + n; starttitle = endurl + scan_spacechars(&subj->input, endurl); // ensure there are spaces btw url and title @@ -940,6 +947,7 @@ static cmark_node *handle_close_bracket(subject *subj) { if (peek_at(subj, endall) == ')') { subj->pos = endall + 1; + url_chunk = cmark_chunk_dup(&subj->input, starturl, endurl - starturl); title_chunk = cmark_chunk_dup(&subj->input, starttitle, endtitle - starttitle); url = cmark_clean_url(subj->mem, &url_chunk); @@ -1006,7 +1014,7 @@ static cmark_node *handle_close_bracket(subject *subj) { // Free the bracket [: cmark_node_free(opener->inl_text); - process_emphasis(subj, opener->previous_delimiter); + process_emphasis(parser, subj, opener->previous_delimiter); pop_bracket(subj); // Now, if we have a link, we also want to deactivate earlier link @@ -1050,9 +1058,8 @@ static cmark_node *handle_newline(subject *subj) { } } -static bufsize_t subject_find_special_char(subject *subj, int options) { - // "\r\n\\`&_*[]pos + 1; while (n < subj->input.len) { @@ -1093,9 +1101,35 @@ static bufsize_t subject_find_special_char(subject *subj, int options) { return subj->input.len; } +void cmark_inlines_add_special_character(unsigned char c) { + SPECIAL_CHARS[c] = 1; +} + +void cmark_inlines_remove_special_character(unsigned char c) { + SPECIAL_CHARS[c] = 0; +} + +static cmark_node *try_extensions(cmark_parser *parser, + cmark_node *parent, + unsigned char c, + subject *subj) { + cmark_node *res = NULL; + cmark_llist *tmp; + + for (tmp = parser->inline_syntax_extensions; tmp; tmp = tmp->next) { + cmark_syntax_extension *ext = (cmark_syntax_extension *) tmp->data; + res = ext->match_inline(ext, parser, parent, c, subj); + + if (res) + break; + } + + return res; +} + // Parse an inline, advancing subject, and add it as a child of parent. // Return 0 if no inline can be parsed, 1 otherwise. -static int parse_inline(subject *subj, cmark_node *parent, int options) { +static int parse_inline(cmark_parser *parser, subject *subj, cmark_node *parent, int options) { cmark_node *new_inl = NULL; cmark_chunk contents; unsigned char c; @@ -1113,13 +1147,13 @@ static int parse_inline(subject *subj, cmark_node *parent, int options) { new_inl = handle_backticks(subj); break; case '\\': - new_inl = handle_backslash(subj); + new_inl = handle_backslash(parser, subj); break; case '&': new_inl = handle_entity(subj); break; case '<': - new_inl = handle_pointy_brace(subj); + new_inl = handle_pointy_brace(subj, (options & CMARK_OPT_LIBERAL_HTML_TAG) != 0); break; case '*': case '_': @@ -1139,7 +1173,7 @@ static int parse_inline(subject *subj, cmark_node *parent, int options) { push_bracket(subj, false, new_inl); break; case ']': - new_inl = handle_close_bracket(subj); + new_inl = handle_close_bracket(parser, subj); break; case '!': advance(subj); @@ -1152,6 +1186,10 @@ static int parse_inline(subject *subj, cmark_node *parent, int options) { } break; default: + new_inl = try_extensions(parser, parent, c, subj); + if (new_inl != NULL) + break; + endpos = subject_find_special_char(subj, options); contents = cmark_chunk_dup(&subj->input, subj->pos, endpos - subj->pos); subj->pos = endpos; @@ -1171,16 +1209,18 @@ static int parse_inline(subject *subj, cmark_node *parent, int options) { } // Parse inlines from parent's string_content, adding as children of parent. -extern void cmark_parse_inlines(cmark_mem *mem, cmark_node *parent, - cmark_reference_map *refmap, int options) { +void cmark_parse_inlines(cmark_parser *parser, + cmark_node *parent, + cmark_reference_map *refmap, + int options) { subject subj; - subject_from_buf(mem, &subj, &parent->content, refmap); + subject_from_buf(parser->mem, &subj, &parent->content, refmap); cmark_chunk_rtrim(&subj.input); - while (!is_eof(&subj) && parse_inline(&subj, parent, options)) + while (!is_eof(&subj) && parse_inline(parser, &subj, parent, options)) ; - process_emphasis(&subj, NULL); + process_emphasis(parser, &subj, NULL); // free bracket and delim stack while (subj.last_delim) { remove_delimiter(&subj, subj.last_delim); @@ -1228,8 +1268,9 @@ bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_strbuf *input, // parse link url: spnl(&subj); - if ((matchlen = manual_scan_link_url(&subj.input, subj.pos, &url)) > -1 && - url.len > 0) { + matchlen = manual_scan_link_url(&subj.input, subj.pos); + if (matchlen > 0) { + url = cmark_chunk_dup(&subj.input, subj.pos, matchlen); subj.pos += matchlen; } else { return 0; @@ -1264,3 +1305,151 @@ bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_strbuf *input, cmark_reference_create(refmap, &lab, &url, &title); return subj.pos; } + +unsigned char cmark_inline_parser_peek_char(cmark_inline_parser *parser) { + return peek_char(parser); +} + +unsigned char cmark_inline_parser_peek_at(cmark_inline_parser *parser, bufsize_t pos) { + return peek_at(parser, pos); +} + +int cmark_inline_parser_is_eof(cmark_inline_parser *parser) { + return is_eof(parser); +} + +static char * +my_strndup (const char *s, size_t n) +{ + char *result; + size_t len = strlen (s); + + if (n < len) + len = n; + + result = (char *) malloc (len + 1); + if (!result) + return 0; + + result[len] = '\0'; + return (char *) memcpy (result, s, len); +} + +char *cmark_inline_parser_take_while(cmark_inline_parser *parser, cmark_inline_predicate pred) { + unsigned char c; + bufsize_t startpos = parser->pos; + bufsize_t len = 0; + + while ((c = peek_char(parser)) && (*pred)(c)) { + advance(parser); + len++; + } + + return my_strndup((const char *) parser->input.data + startpos, len); +} + +void cmark_inline_parser_push_delimiter(cmark_inline_parser *parser, + unsigned char c, + int can_open, + int can_close, + cmark_node *inl_text) { + push_delimiter(parser, c, can_open != 0, can_close != 0, inl_text); +} + +void cmark_inline_parser_remove_delimiter(cmark_inline_parser *parser, delimiter *delim) { + remove_delimiter(parser, delim); +} + +int cmark_inline_parser_scan_delimiters(cmark_inline_parser *parser, + int max_delims, + unsigned char c, + int *left_flanking, + int *right_flanking, + int *punct_before, + int *punct_after) { + int numdelims = 0; + bufsize_t before_char_pos; + int32_t after_char = 0; + int32_t before_char = 0; + int len; + bool space_before, space_after; + + if (parser->pos == 0) { + before_char = 10; + } else { + before_char_pos = parser->pos - 1; + // walk back to the beginning of the UTF_8 sequence: + while (peek_at(parser, before_char_pos) >> 6 == 2 && before_char_pos > 0) { + before_char_pos -= 1; + } + len = cmark_utf8proc_iterate(parser->input.data + before_char_pos, + parser->pos - before_char_pos, &before_char); + if (len == -1) { + before_char = 10; + } + } + + while (peek_char(parser) == c && numdelims <= max_delims) { + numdelims++; + advance(parser); + } + + len = cmark_utf8proc_iterate(parser->input.data + parser->pos, + parser->input.len - parser->pos, &after_char); + if (len == -1) { + after_char = 10; + } + + *punct_before = cmark_utf8proc_is_punctuation(before_char); + *punct_after = cmark_utf8proc_is_punctuation(after_char); + space_before = cmark_utf8proc_is_space(before_char) != 0; + space_after = cmark_utf8proc_is_space(after_char) != 0; + + *left_flanking = numdelims > 0 && !cmark_utf8proc_is_space(after_char) && + !(*punct_after && !space_before && !*punct_before); + *right_flanking = numdelims > 0 && !cmark_utf8proc_is_space(before_char) && + !(*punct_before && !space_after && !*punct_after); + + return numdelims; +} + +void cmark_inline_parser_advance_offset(cmark_inline_parser *parser) { + advance(parser); +} + +int cmark_inline_parser_get_offset(cmark_inline_parser *parser) { + return parser->pos; +} + +void cmark_inline_parser_set_offset(cmark_inline_parser *parser, int offset) { + parser->pos = offset; +} + +cmark_chunk *cmark_inline_parser_get_chunk(cmark_inline_parser *parser) { + return &parser->input; +} + +int cmark_inline_parser_in_bracket(cmark_inline_parser *parser, int image) { + for (bracket *b = parser->last_bracket; b; b = b->previous) + if (b->active && b->image == (image != 0)) + return 1; + return 0; +} + +void cmark_node_unput(cmark_node *node, int n) { + node = node->last_child; + while (n > 0 && node && node->type == CMARK_NODE_TEXT) { + if (node->as.literal.len < n) { + n -= node->as.literal.len; + node->as.literal.len = 0; + } else { + node->as.literal.len -= n; + n = 0; + } + node = node->prev; + } +} + +delimiter *cmark_inline_parser_get_last_delimiter(cmark_inline_parser *parser) { + return parser->last_delim; +} diff --git a/cbits/inlines.h b/cbits/inlines.h index 52be768..0d8305c 100644 --- a/cbits/inlines.h +++ b/cbits/inlines.h @@ -5,15 +5,23 @@ extern "C" { #endif +#include "references.h" + cmark_chunk cmark_clean_url(cmark_mem *mem, cmark_chunk *url); cmark_chunk cmark_clean_title(cmark_mem *mem, cmark_chunk *title); -void cmark_parse_inlines(cmark_mem *mem, cmark_node *parent, - cmark_reference_map *refmap, int options); +CMARK_EXPORT +void cmark_parse_inlines(cmark_parser *parser, + cmark_node *parent, + cmark_reference_map *refmap, + int options); bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_strbuf *input, cmark_reference_map *refmap); +void cmark_inlines_add_special_character(unsigned char c); +void cmark_inlines_remove_special_character(unsigned char c); + #ifdef __cplusplus } #endif diff --git a/cbits/iterator.c b/cbits/iterator.c index 24423a2..149a445 100644 --- a/cbits/iterator.c +++ b/cbits/iterator.c @@ -6,12 +6,6 @@ #include "cmark.h" #include "iterator.h" -static const int S_leaf_mask = - (1 << CMARK_NODE_HTML_BLOCK) | (1 << CMARK_NODE_THEMATIC_BREAK) | - (1 << CMARK_NODE_CODE_BLOCK) | (1 << CMARK_NODE_TEXT) | - (1 << CMARK_NODE_SOFTBREAK) | (1 << CMARK_NODE_LINEBREAK) | - (1 << CMARK_NODE_CODE) | (1 << CMARK_NODE_HTML_INLINE); - cmark_iter *cmark_iter_new(cmark_node *root) { if (root == NULL) { return NULL; @@ -30,7 +24,18 @@ cmark_iter *cmark_iter_new(cmark_node *root) { void cmark_iter_free(cmark_iter *iter) { iter->mem->free(iter); } static bool S_is_leaf(cmark_node *node) { - return ((1 << node->type) & S_leaf_mask) != 0; + switch (node->type) { + case CMARK_NODE_HTML_BLOCK: + case CMARK_NODE_THEMATIC_BREAK: + case CMARK_NODE_CODE_BLOCK: + case CMARK_NODE_TEXT: + case CMARK_NODE_SOFTBREAK: + case CMARK_NODE_LINEBREAK: + case CMARK_NODE_CODE: + case CMARK_NODE_HTML_INLINE: + return 1; + } + return 0; } cmark_event_type cmark_iter_next(cmark_iter *iter) { @@ -118,3 +123,36 @@ void cmark_consolidate_text_nodes(cmark_node *root) { cmark_strbuf_free(&buf); cmark_iter_free(iter); } + +void cmark_node_own(cmark_node *root) { + if (root == NULL) { + return; + } + cmark_iter *iter = cmark_iter_new(root); + cmark_event_type ev_type; + cmark_node *cur; + + while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { + cur = cmark_iter_get_node(iter); + if (ev_type == CMARK_EVENT_ENTER) { + switch (cur->type) { + case CMARK_NODE_TEXT: + case CMARK_NODE_HTML_INLINE: + case CMARK_NODE_CODE: + case CMARK_NODE_HTML_BLOCK: + cmark_chunk_to_cstr(iter->mem, &cur->as.literal); + break; + case CMARK_NODE_LINK: + cmark_chunk_to_cstr(iter->mem, &cur->as.link.url); + cmark_chunk_to_cstr(iter->mem, &cur->as.link.title); + break; + case CMARK_NODE_CUSTOM_INLINE: + cmark_chunk_to_cstr(iter->mem, &cur->as.custom.on_enter); + cmark_chunk_to_cstr(iter->mem, &cur->as.custom.on_exit); + break; + } + } + } + + cmark_iter_free(iter); +} diff --git a/cbits/latex.c b/cbits/latex.c index f372a13..29572e1 100644 --- a/cbits/latex.c +++ b/cbits/latex.c @@ -10,14 +10,16 @@ #include "utf8.h" #include "scanners.h" #include "render.h" +#include "syntax_extension.h" -#define OUT(s, wrap, escaping) renderer->out(renderer, s, wrap, escaping) -#define LIT(s) renderer->out(renderer, s, false, LITERAL) +#define OUT(s, wrap, escaping) renderer->out(renderer, node, s, wrap, escaping) +#define LIT(s) renderer->out(renderer, node, s, false, LITERAL) #define CR() renderer->cr(renderer) #define BLANKLINE() renderer->blankline(renderer) #define LIST_NUMBER_STRING_SIZE 20 -static CMARK_INLINE void outc(cmark_renderer *renderer, cmark_escaping escape, +static CMARK_INLINE void outc(cmark_renderer *renderer, cmark_node *node, + cmark_escaping escape, int32_t c, unsigned char nextc) { if (escape == LITERAL) { cmark_render_code_point(renderer, c); @@ -226,8 +228,10 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node, cmark_list_type list_type; bool allow_wrap = renderer->width > 0 && !(CMARK_OPT_NOBREAKS & options); - // avoid warning about unused parameter: - (void)(options); + if (node->extension && node->extension->latex_render_func) { + node->extension->latex_render_func(node->extension, renderer, node, ev_type, options); + return 1; + } switch (node->type) { case CMARK_NODE_DOCUMENT: @@ -449,5 +453,9 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node, } char *cmark_render_latex(cmark_node *root, int options, int width) { - return cmark_render(root, options, width, outc, S_render_node); + return cmark_render_latex_with_mem(root, options, width, cmark_node_mem(root)); +} + +char *cmark_render_latex_with_mem(cmark_node *root, int options, int width, cmark_mem *mem) { + return cmark_render(mem, root, options, width, outc, S_render_node); } diff --git a/cbits/linked_list.c b/cbits/linked_list.c new file mode 100644 index 0000000..f8bc604 --- /dev/null +++ b/cbits/linked_list.c @@ -0,0 +1,37 @@ +#include + +#include "cmark.h" + +cmark_llist *cmark_llist_append(cmark_mem *mem, cmark_llist *head, void *data) { + cmark_llist *tmp; + cmark_llist *new_node = (cmark_llist *) mem->calloc(1, sizeof(cmark_llist)); + + new_node->data = data; + new_node->next = NULL; + + if (!head) + return new_node; + + for (tmp = head; tmp->next; tmp=tmp->next); + + tmp->next = new_node; + + return head; +} + +void cmark_llist_free_full(cmark_mem *mem, cmark_llist *head, cmark_free_func free_func) { + cmark_llist *tmp, *prev; + + for (tmp = head; tmp;) { + if (free_func) + free_func(mem, tmp->data); + + prev = tmp; + tmp = tmp->next; + mem->free(prev); + } +} + +void cmark_llist_free(cmark_mem *mem, cmark_llist *head) { + cmark_llist_free_full(mem, head, NULL); +} diff --git a/cbits/man.c b/cbits/man.c index 1c76f68..2b52ad5 100644 --- a/cbits/man.c +++ b/cbits/man.c @@ -9,15 +9,17 @@ #include "buffer.h" #include "utf8.h" #include "render.h" +#include "syntax_extension.h" -#define OUT(s, wrap, escaping) renderer->out(renderer, s, wrap, escaping) -#define LIT(s) renderer->out(renderer, s, false, LITERAL) +#define OUT(s, wrap, escaping) renderer->out(renderer, node, s, wrap, escaping) +#define LIT(s) renderer->out(renderer, node, s, false, LITERAL) #define CR() renderer->cr(renderer) #define BLANKLINE() renderer->blankline(renderer) #define LIST_NUMBER_SIZE 20 // Functions to convert cmark_nodes to groff man strings. -static void S_outc(cmark_renderer *renderer, cmark_escaping escape, int32_t c, +static void S_outc(cmark_renderer *renderer, cmark_node *node, + cmark_escaping escape, int32_t c, unsigned char nextc) { (void)(nextc); @@ -77,11 +79,26 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node, bool entering = (ev_type == CMARK_EVENT_ENTER); bool allow_wrap = renderer->width > 0 && !(CMARK_OPT_NOBREAKS & options); - // avoid unused parameter error: - (void)(options); + if (node->extension && node->extension->man_render_func) { + node->extension->man_render_func(node->extension, renderer, node, ev_type, options); + return 1; + } switch (node->type) { case CMARK_NODE_DOCUMENT: + if (entering) { + /* Define a strikethrough macro */ + /* Commenting out because this makes tests fail + LIT(".de ST"); + CR(); + LIT(".nr ww \\w'\\\\$1'"); + CR(); + LIT("\\Z@\\v'-.25m'\\l'\\\\n[ww]u'@\\\\$1"); + CR(); + LIT(".."); + CR(); + */ + } break; case CMARK_NODE_BLOCK_QUOTE: @@ -248,5 +265,9 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node, } char *cmark_render_man(cmark_node *root, int options, int width) { - return cmark_render(root, options, width, S_outc, S_render_node); + return cmark_render_man_with_mem(root, options, width, cmark_node_mem(root)); +} + +char *cmark_render_man_with_mem(cmark_node *root, int options, int width, cmark_mem *mem) { + return cmark_render(mem, root, options, width, S_outc, S_render_node); } diff --git a/cbits/node.c b/cbits/node.c index c6c2902..bcf322f 100644 --- a/cbits/node.c +++ b/cbits/node.c @@ -3,55 +3,29 @@ #include "config.h" #include "node.h" +#include "syntax_extension.h" static void S_node_unlink(cmark_node *node); #define NODE_MEM(node) cmark_node_mem(node) -static CMARK_INLINE bool S_is_block(cmark_node *node) { - if (node == NULL) { - return false; - } - return node->type >= CMARK_NODE_FIRST_BLOCK && - node->type <= CMARK_NODE_LAST_BLOCK; -} - -static CMARK_INLINE bool S_is_inline(cmark_node *node) { - if (node == NULL) { - return false; - } - return node->type >= CMARK_NODE_FIRST_INLINE && - node->type <= CMARK_NODE_LAST_INLINE; -} - -static bool S_can_contain(cmark_node *node, cmark_node *child) { - cmark_node *cur; - - if (node == NULL || child == NULL) { - return false; - } - - // Verify that child is not an ancestor of node or equal to node. - cur = node; - do { - if (cur == child) { +bool cmark_node_can_contain_type(cmark_node *node, cmark_node_type child_type) { + if (child_type == CMARK_NODE_DOCUMENT) { return false; } - cur = cur->parent; - } while (cur != NULL); - if (child->type == CMARK_NODE_DOCUMENT) { - return false; + if (node->extension && node->extension->can_contain_func) { + return node->extension->can_contain_func(node->extension, node, child_type) != 0; } switch (node->type) { case CMARK_NODE_DOCUMENT: case CMARK_NODE_BLOCK_QUOTE: case CMARK_NODE_ITEM: - return S_is_block(child) && child->type != CMARK_NODE_ITEM; + return CMARK_NODE_TYPE_BLOCK_P(child_type) && child_type != CMARK_NODE_ITEM; case CMARK_NODE_LIST: - return child->type == CMARK_NODE_ITEM; + return child_type == CMARK_NODE_ITEM; case CMARK_NODE_CUSTOM_BLOCK: return true; @@ -63,7 +37,7 @@ static bool S_can_contain(cmark_node *node, cmark_node *child) { case CMARK_NODE_LINK: case CMARK_NODE_IMAGE: case CMARK_NODE_CUSTOM_INLINE: - return S_is_inline(child); + return CMARK_NODE_TYPE_INLINE_P(child_type); default: break; @@ -72,6 +46,28 @@ static bool S_can_contain(cmark_node *node, cmark_node *child) { return false; } +static bool S_can_contain(cmark_node *node, cmark_node *child) { + cmark_node *cur; + + if (node == NULL || child == NULL) { + return false; + } + if (NODE_MEM(node) != NODE_MEM(child)) { + return 0; + } + + // Verify that child is not an ancestor of node or equal to node. + cur = node; + do { + if (cur == child) { + return false; + } + cur = cur->parent; + } while (cur != NULL); + + return cmark_node_can_contain_type(node, (cmark_node_type) child->type); +} + cmark_node *cmark_node_new_with_mem(cmark_node_type type, cmark_mem *mem) { cmark_node *node = (cmark_node *)mem->calloc(1, sizeof(*node)); cmark_strbuf_init(mem, &node->content, 0); @@ -98,39 +94,51 @@ cmark_node *cmark_node_new_with_mem(cmark_node_type type, cmark_mem *mem) { } cmark_node *cmark_node_new(cmark_node_type type) { - extern cmark_mem DEFAULT_MEM_ALLOCATOR; - return cmark_node_new_with_mem(type, &DEFAULT_MEM_ALLOCATOR); + extern cmark_mem CMARK_DEFAULT_MEM_ALLOCATOR; + return cmark_node_new_with_mem(type, &CMARK_DEFAULT_MEM_ALLOCATOR); } -// Free a cmark_node list and any children. -static void S_free_nodes(cmark_node *e) { - cmark_node *next; - while (e != NULL) { - cmark_strbuf_free(&e->content); - switch (e->type) { +static void free_node_as(cmark_node *node) { + switch (node->type) { case CMARK_NODE_CODE_BLOCK: - cmark_chunk_free(NODE_MEM(e), &e->as.code.info); - cmark_chunk_free(NODE_MEM(e), &e->as.code.literal); + cmark_chunk_free(NODE_MEM(node), &node->as.code.info); + cmark_chunk_free(NODE_MEM(node), &node->as.code.literal); break; case CMARK_NODE_TEXT: case CMARK_NODE_HTML_INLINE: case CMARK_NODE_CODE: case CMARK_NODE_HTML_BLOCK: - cmark_chunk_free(NODE_MEM(e), &e->as.literal); + cmark_chunk_free(NODE_MEM(node), &node->as.literal); break; case CMARK_NODE_LINK: case CMARK_NODE_IMAGE: - cmark_chunk_free(NODE_MEM(e), &e->as.link.url); - cmark_chunk_free(NODE_MEM(e), &e->as.link.title); + cmark_chunk_free(NODE_MEM(node), &node->as.link.url); + cmark_chunk_free(NODE_MEM(node), &node->as.link.title); break; case CMARK_NODE_CUSTOM_BLOCK: case CMARK_NODE_CUSTOM_INLINE: - cmark_chunk_free(NODE_MEM(e), &e->as.custom.on_enter); - cmark_chunk_free(NODE_MEM(e), &e->as.custom.on_exit); + cmark_chunk_free(NODE_MEM(node), &node->as.custom.on_enter); + cmark_chunk_free(NODE_MEM(node), &node->as.custom.on_exit); break; default: break; } +} + +// Free a cmark_node list and any children. +static void S_free_nodes(cmark_node *e) { + cmark_node *next; + while (e != NULL) { + cmark_strbuf_free(&e->content); + + if (e->user_data && e->user_data_free_func) + e->user_data_free_func(NODE_MEM(e), e->user_data); + + if (e->as.opaque && e->extension && e->extension->opaque_free_func) + e->extension->opaque_free_func(e->extension, NODE_MEM(e), e); + + free_node_as(e); + if (e->last_child) { // Splice children into list e->last_child->next = e->next; @@ -156,11 +164,38 @@ cmark_node_type cmark_node_get_type(cmark_node *node) { } } +int cmark_node_set_type(cmark_node * node, cmark_node_type type) { + cmark_node_type initial_type; + + if (type == node->type) + return 1; + + initial_type = (cmark_node_type) node->type; + node->type = (uint16_t)type; + + if (!S_can_contain(node->parent, node)) { + node->type = (uint16_t)initial_type; + return 0; + } + + /* We rollback the type to free the union members appropriately */ + node->type = (uint16_t)initial_type; + free_node_as(node); + + node->type = (uint16_t)type; + + return 1; +} + const char *cmark_node_get_type_string(cmark_node *node) { if (node == NULL) { return "NONE"; } + if (node->extension && node->extension->get_type_string_func) { + return node->extension->get_type_string_func(node->extension, node); + } + switch (node->type) { case CMARK_NODE_NONE: return "none"; @@ -265,6 +300,15 @@ int cmark_node_set_user_data(cmark_node *node, void *user_data) { return 1; } +int cmark_node_set_user_data_free_func(cmark_node *node, + cmark_free_func free_func) { + if (node == NULL) { + return 0; + } + node->user_data_free_func = free_func; + return 1; +} + const char *cmark_node_get_literal(cmark_node *node) { if (node == NULL) { return NULL; @@ -311,6 +355,15 @@ int cmark_node_set_literal(cmark_node *node, const char *content) { return 0; } +const char *cmark_node_get_string_content(cmark_node *node) { + return (char *) node->content.ptr; +} + +int cmark_node_set_string_content(cmark_node *node, const char *content) { + cmark_strbuf_sets(&node->content, content); + return true; +} + int cmark_node_get_heading_level(cmark_node *node) { if (node == NULL) { return 0; @@ -477,6 +530,38 @@ int cmark_node_set_fence_info(cmark_node *node, const char *info) { } } +int cmark_node_get_fenced(cmark_node *node, int *length, int *offset, char *character) { + if (node == NULL) { + return 0; + } + + if (node->type == CMARK_NODE_CODE_BLOCK) { + *length = node->as.code.fence_length; + *offset = node->as.code.fence_offset; + *character = node->as.code.fence_char; + return node->as.code.fenced; + } else { + return 0; + } +} + +int cmark_node_set_fenced(cmark_node * node, int fenced, + int length, int offset, char character) { + if (node == NULL) { + return 0; + } + + if (node->type == CMARK_NODE_CODE_BLOCK) { + node->as.code.fenced = (int8_t)fenced; + node->as.code.fence_length = (uint8_t)length; + node->as.code.fence_offset = (uint8_t)offset; + node->as.code.fence_char = character; + return 1; + } else { + return 0; + } +} + const char *cmark_node_get_url(cmark_node *node) { if (node == NULL) { return NULL; @@ -609,6 +694,23 @@ int cmark_node_set_on_exit(cmark_node *node, const char *on_exit) { return 0; } +cmark_syntax_extension *cmark_node_get_syntax_extension(cmark_node *node) { + if (node == NULL) { + return NULL; + } + + return node->extension; +} + +int cmark_node_set_syntax_extension(cmark_node *node, cmark_syntax_extension *extension) { + if (node == NULL) { + return 0; + } + + node->extension = extension; + return 1; +} + int cmark_node_get_start_line(cmark_node *node) { if (node == NULL) { return 0; diff --git a/cbits/node.h b/cbits/node.h index 65d857f..e32814b 100644 --- a/cbits/node.h +++ b/cbits/node.h @@ -9,6 +9,7 @@ extern "C" { #include #include "cmark.h" +#include "cmark_extension_api.h" #include "buffer.h" #include "chunk.h" @@ -61,6 +62,7 @@ struct cmark_node { struct cmark_node *last_child; void *user_data; + cmark_free_func user_data_free_func; int start_line; int start_column; @@ -69,6 +71,8 @@ struct cmark_node { uint16_t type; uint16_t flags; + cmark_syntax_extension *extension; + union { cmark_chunk literal; cmark_list list; @@ -77,6 +81,7 @@ struct cmark_node { cmark_link link; cmark_custom custom; int html_block_type; + void *opaque; } as; }; @@ -85,6 +90,24 @@ static CMARK_INLINE cmark_mem *cmark_node_mem(cmark_node *node) { } CMARK_EXPORT int cmark_node_check(cmark_node *node, FILE *out); +static CMARK_INLINE bool CMARK_NODE_TYPE_BLOCK_P(cmark_node_type node_type) { + return (node_type & CMARK_NODE_TYPE_MASK) == CMARK_NODE_TYPE_BLOCK; +} + +static CMARK_INLINE bool CMARK_NODE_BLOCK_P(cmark_node *node) { + return node != NULL && CMARK_NODE_TYPE_BLOCK_P((cmark_node_type) node->type); +} + +static CMARK_INLINE bool CMARK_NODE_TYPE_INLINE_P(cmark_node_type node_type) { + return (node_type & CMARK_NODE_TYPE_MASK) == CMARK_NODE_TYPE_INLINE; +} + +static CMARK_INLINE bool CMARK_NODE_INLINE_P(cmark_node *node) { + return node != NULL && CMARK_NODE_TYPE_INLINE_P((cmark_node_type) node->type); +} + +CMARK_EXPORT bool cmark_node_can_contain_type(cmark_node *node, cmark_node_type child_type); + #ifdef __cplusplus } #endif diff --git a/cbits/parser.h b/cbits/parser.h index 0c5033b..89c4209 100644 --- a/cbits/parser.h +++ b/cbits/parser.h @@ -1,5 +1,5 @@ -#ifndef CMARK_AST_H -#define CMARK_AST_H +#ifndef CMARK_PARSER_H +#define CMARK_PARSER_H #include #include "node.h" @@ -14,22 +14,40 @@ extern "C" { struct cmark_parser { struct cmark_mem *mem; + /* A hashtable of urls in the current document for cross-references */ struct cmark_reference_map *refmap; + /* The root node of the parser, always a CMARK_NODE_DOCUMENT */ struct cmark_node *root; + /* The last open block after a line is fully processed */ struct cmark_node *current; + /* See the documentation for cmark_parser_get_line_number() in cmark.h */ int line_number; + /* See the documentation for cmark_parser_get_offset() in cmark.h */ bufsize_t offset; + /* See the documentation for cmark_parser_get_column() in cmark.h */ bufsize_t column; + /* See the documentation for cmark_parser_get_first_nonspace() in cmark.h */ bufsize_t first_nonspace; + /* See the documentation for cmark_parser_get_first_nonspace_column() in cmark.h */ bufsize_t first_nonspace_column; + /* See the documentation for cmark_parser_get_indent() in cmark.h */ int indent; + /* See the documentation for cmark_parser_is_blank() in cmark.h */ bool blank; + /* See the documentation for cmark_parser_has_partially_consumed_tab() in cmark.h */ bool partially_consumed_tab; + /* Contains the currently processed line */ cmark_strbuf curline; + /* See the documentation for cmark_parser_get_last_line_length() in cmark.h */ bufsize_t last_line_length; + /* FIXME: not sure about the difference with curline */ cmark_strbuf linebuf; + /* Options set by the user, see the Options section in cmark.h */ int options; bool last_buffer_ended_with_cr; + cmark_llist *syntax_extensions; + cmark_llist *inline_syntax_extensions; + cmark_ispunct_func backslash_ispunct; }; #ifdef __cplusplus diff --git a/cbits/plaintext.c b/cbits/plaintext.c new file mode 100644 index 0000000..a274827 --- /dev/null +++ b/cbits/plaintext.c @@ -0,0 +1,212 @@ +#include "node.h" +#include "syntax_extension.h" + +#define OUT(s, wrap, escaping) renderer->out(renderer, node, s, wrap, escaping) +#define LIT(s) renderer->out(renderer, node, s, false, LITERAL) +#define CR() renderer->cr(renderer) +#define BLANKLINE() renderer->blankline(renderer) +#define LISTMARKER_SIZE 20 + +// Functions to convert cmark_nodes to plain text strings. + +static CMARK_INLINE void outc(cmark_renderer *renderer, cmark_node *node, + cmark_escaping escape, + int32_t c, unsigned char nextc) { + cmark_render_code_point(renderer, c); +} + +// if node is a block node, returns node. +// otherwise returns first block-level node that is an ancestor of node. +// if there is no block-level ancestor, returns NULL. +static cmark_node *get_containing_block(cmark_node *node) { + while (node) { + if (CMARK_NODE_BLOCK_P(node)) { + return node; + } else { + node = node->parent; + } + } + return NULL; +} + +static int S_render_node(cmark_renderer *renderer, cmark_node *node, + cmark_event_type ev_type, int options) { + cmark_node *tmp; + int list_number; + cmark_delim_type list_delim; + int i; + bool entering = (ev_type == CMARK_EVENT_ENTER); + char listmarker[LISTMARKER_SIZE]; + bool first_in_list_item; + bufsize_t marker_width; + bool allow_wrap = renderer->width > 0 && !(CMARK_OPT_NOBREAKS & options) && + !(CMARK_OPT_HARDBREAKS & options); + + // Don't adjust tight list status til we've started the list. + // Otherwise we loose the blank line between a paragraph and + // a following list. + if (!(node->type == CMARK_NODE_ITEM && node->prev == NULL && entering)) { + tmp = get_containing_block(node); + renderer->in_tight_list_item = + tmp && // tmp might be NULL if there is no containing block + ((tmp->type == CMARK_NODE_ITEM && + cmark_node_get_list_tight(tmp->parent)) || + (tmp && tmp->parent && tmp->parent->type == CMARK_NODE_ITEM && + cmark_node_get_list_tight(tmp->parent->parent))); + } + + if (node->extension && node->extension->plaintext_render_func) { + node->extension->plaintext_render_func(node->extension, renderer, node, ev_type, options); + return 1; + } + + switch (node->type) { + case CMARK_NODE_DOCUMENT: + break; + + case CMARK_NODE_BLOCK_QUOTE: + break; + + case CMARK_NODE_LIST: + if (!entering && node->next && (node->next->type == CMARK_NODE_CODE_BLOCK || + node->next->type == CMARK_NODE_LIST)) { + CR(); + } + break; + + case CMARK_NODE_ITEM: + if (cmark_node_get_list_type(node->parent) == CMARK_BULLET_LIST) { + marker_width = 4; + } else { + list_number = cmark_node_get_list_start(node->parent); + list_delim = cmark_node_get_list_delim(node->parent); + tmp = node; + while (tmp->prev) { + tmp = tmp->prev; + list_number += 1; + } + // we ensure a width of at least 4 so + // we get nice transition from single digits + // to double + snprintf(listmarker, LISTMARKER_SIZE, "%d%s%s", list_number, + list_delim == CMARK_PAREN_DELIM ? ")" : ".", + list_number < 10 ? " " : " "); + marker_width = (bufsize_t)strlen(listmarker); + } + if (entering) { + if (cmark_node_get_list_type(node->parent) == CMARK_BULLET_LIST) { + LIT(" - "); + renderer->begin_content = true; + } else { + LIT(listmarker); + renderer->begin_content = true; + } + for (i = marker_width; i--;) { + cmark_strbuf_putc(renderer->prefix, ' '); + } + } else { + cmark_strbuf_truncate(renderer->prefix, + renderer->prefix->size - marker_width); + CR(); + } + break; + + case CMARK_NODE_HEADING: + if (entering) { + renderer->begin_content = true; + renderer->no_linebreaks = true; + } else { + renderer->no_linebreaks = false; + BLANKLINE(); + } + break; + + case CMARK_NODE_CODE_BLOCK: + first_in_list_item = node->prev == NULL && node->parent && + node->parent->type == CMARK_NODE_ITEM; + + if (!first_in_list_item) { + BLANKLINE(); + } + OUT(cmark_node_get_literal(node), false, LITERAL); + BLANKLINE(); + break; + + case CMARK_NODE_HTML_BLOCK: + break; + + case CMARK_NODE_CUSTOM_BLOCK: + break; + + case CMARK_NODE_THEMATIC_BREAK: + BLANKLINE(); + break; + + case CMARK_NODE_PARAGRAPH: + if (!entering) { + BLANKLINE(); + } + break; + + case CMARK_NODE_TEXT: + OUT(cmark_node_get_literal(node), allow_wrap, NORMAL); + break; + + case CMARK_NODE_LINEBREAK: + CR(); + break; + + case CMARK_NODE_SOFTBREAK: + if (CMARK_OPT_HARDBREAKS & options) { + CR(); + } else if (!renderer->no_linebreaks && renderer->width == 0 && + !(CMARK_OPT_HARDBREAKS & options) && + !(CMARK_OPT_NOBREAKS & options)) { + CR(); + } else { + OUT(" ", allow_wrap, LITERAL); + } + break; + + case CMARK_NODE_CODE: + OUT(cmark_node_get_literal(node), allow_wrap, LITERAL); + break; + + case CMARK_NODE_HTML_INLINE: + break; + + case CMARK_NODE_CUSTOM_INLINE: + break; + + case CMARK_NODE_STRONG: + break; + + case CMARK_NODE_EMPH: + break; + + case CMARK_NODE_LINK: + break; + + case CMARK_NODE_IMAGE: + break; + + default: + assert(false); + break; + } + + return 1; +} + +char *cmark_render_plaintext(cmark_node *root, int options, int width) { + return cmark_render_plaintext_with_mem(root, options, width, cmark_node_mem(root)); +} + +char *cmark_render_plaintext_with_mem(cmark_node *root, int options, int width, cmark_mem *mem) { + if (options & CMARK_OPT_HARDBREAKS) { + // disable breaking on width, since it has + // a different meaning with OPT_HARDBREAKS + width = 0; + } + return cmark_render(mem, root, options, width, outc, S_render_node); +} diff --git a/cbits/plugin.c b/cbits/plugin.c new file mode 100644 index 0000000..3992fe1 --- /dev/null +++ b/cbits/plugin.c @@ -0,0 +1,36 @@ +#include + +#include "plugin.h" + +extern cmark_mem CMARK_DEFAULT_MEM_ALLOCATOR; + +int cmark_plugin_register_syntax_extension(cmark_plugin * plugin, + cmark_syntax_extension * extension) { + plugin->syntax_extensions = cmark_llist_append(&CMARK_DEFAULT_MEM_ALLOCATOR, plugin->syntax_extensions, extension); + return 1; +} + +cmark_plugin * +cmark_plugin_new(void) { + cmark_plugin *res = (cmark_plugin *) CMARK_DEFAULT_MEM_ALLOCATOR.calloc(1, sizeof(cmark_plugin)); + + res->syntax_extensions = NULL; + + return res; +} + +void +cmark_plugin_free(cmark_plugin *plugin) { + cmark_llist_free_full(&CMARK_DEFAULT_MEM_ALLOCATOR, + plugin->syntax_extensions, + (cmark_free_func) cmark_syntax_extension_free); + CMARK_DEFAULT_MEM_ALLOCATOR.free(plugin); +} + +cmark_llist * +cmark_plugin_steal_syntax_extensions(cmark_plugin *plugin) { + cmark_llist *res = plugin->syntax_extensions; + + plugin->syntax_extensions = NULL; + return res; +} diff --git a/cbits/plugin.h b/cbits/plugin.h new file mode 100644 index 0000000..b9e9d29 --- /dev/null +++ b/cbits/plugin.h @@ -0,0 +1,34 @@ +#ifndef CMARK_PLUGIN_H +#define CMARK_PLUGIN_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include "cmark.h" +#include "cmark_extension_api.h" + +/** + * cmark_plugin: + * + * A plugin structure, which should be filled by plugin's + * init functions. + */ +struct cmark_plugin { + cmark_llist *syntax_extensions; +}; + +cmark_llist * +cmark_plugin_steal_syntax_extensions(cmark_plugin *plugin); + +cmark_plugin * +cmark_plugin_new(void); + +void +cmark_plugin_free(cmark_plugin *plugin); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/cbits/references.c b/cbits/references.c index 89f2dc8..1648e9b 100644 --- a/cbits/references.c +++ b/cbits/references.c @@ -5,15 +5,6 @@ #include "inlines.h" #include "chunk.h" -static unsigned int refhash(const unsigned char *link_ref) { - unsigned int hash = 0; - - while (*link_ref) - hash = (*link_ref++) + (hash << 6) + (hash << 16) - hash; - - return hash; -} - static void reference_free(cmark_reference_map *map, cmark_reference *ref) { cmark_mem *mem = map->mem; if (ref != NULL) { @@ -53,21 +44,6 @@ static unsigned char *normalize_reference(cmark_mem *mem, cmark_chunk *ref) { return result; } -static void add_reference(cmark_reference_map *map, cmark_reference *ref) { - cmark_reference *t = ref->next = map->table[ref->hash % REFMAP_SIZE]; - - while (t) { - if (t->hash == ref->hash && !strcmp((char *)t->label, (char *)ref->label)) { - reference_free(map, ref); - return; - } - - t = t->next; - } - - map->table[ref->hash % REFMAP_SIZE] = ref; -} - void cmark_reference_create(cmark_reference_map *map, cmark_chunk *label, cmark_chunk *url, cmark_chunk *title) { cmark_reference *ref; @@ -77,64 +53,98 @@ void cmark_reference_create(cmark_reference_map *map, cmark_chunk *label, if (reflabel == NULL) return; + assert(map->sorted == NULL); + ref = (cmark_reference *)map->mem->calloc(1, sizeof(*ref)); ref->label = reflabel; - ref->hash = refhash(ref->label); ref->url = cmark_clean_url(map->mem, url); ref->title = cmark_clean_title(map->mem, title); - ref->next = NULL; + ref->age = map->size; + ref->next = map->refs; + + map->refs = ref; + map->size++; +} + +static int +labelcmp(const unsigned char *a, const unsigned char *b) { + return strcmp((const char *)a, (const char *)b); +} + +static int +refcmp(const void *p1, const void *p2) { + cmark_reference *r1 = *(cmark_reference **)p1; + cmark_reference *r2 = *(cmark_reference **)p2; + int res = labelcmp(r1->label, r2->label); + return res ? res : ((int)r1->age - (int)r2->age); +} + +static int +refsearch(const void *label, const void *p2) { + cmark_reference *ref = *(cmark_reference **)p2; + return labelcmp((const unsigned char *)label, ref->label); +} + +static void sort_references(cmark_reference_map *map) { + unsigned int i = 0, last = 0, size = map->size; + cmark_reference *r = map->refs, **sorted = NULL; + + sorted = (cmark_reference **)map->mem->calloc(size, sizeof(cmark_reference *)); + while (r) { + sorted[i++] = r; + r = r->next; + } + + qsort(sorted, size, sizeof(cmark_reference *), refcmp); + + for (i = 1; i < size; i++) { + if (labelcmp(sorted[i]->label, sorted[last]->label) != 0) + sorted[++last] = sorted[i]; + } - add_reference(map, ref); + map->sorted = sorted; + map->size = last + 1; } // Returns reference if refmap contains a reference with matching // label, otherwise NULL. cmark_reference *cmark_reference_lookup(cmark_reference_map *map, cmark_chunk *label) { - cmark_reference *ref = NULL; + cmark_reference **ref = NULL; unsigned char *norm; - unsigned int hash; if (label->len < 1 || label->len > MAX_LINK_LABEL_LENGTH) return NULL; - if (map == NULL) + if (map == NULL || !map->size) return NULL; norm = normalize_reference(map->mem, label); if (norm == NULL) return NULL; - hash = refhash(norm); - ref = map->table[hash % REFMAP_SIZE]; - - while (ref) { - if (ref->hash == hash && !strcmp((char *)ref->label, (char *)norm)) - break; - ref = ref->next; - } + if (!map->sorted) + sort_references(map); + ref = (cmark_reference **)bsearch(norm, map->sorted, map->size, sizeof(cmark_reference *), refsearch); map->mem->free(norm); - return ref; + return ref ? ref[0] : NULL; } void cmark_reference_map_free(cmark_reference_map *map) { - unsigned int i; + cmark_reference *ref; if (map == NULL) return; - for (i = 0; i < REFMAP_SIZE; ++i) { - cmark_reference *ref = map->table[i]; - cmark_reference *next; - - while (ref) { - next = ref->next; - reference_free(map, ref); - ref = next; - } + ref = map->refs; + while (ref) { + cmark_reference *next = ref->next; + reference_free(map, ref); + ref = next; } + map->mem->free(map->sorted); map->mem->free(map); } diff --git a/cbits/references.h b/cbits/references.h index f075bbb..0bbbd5f 100644 --- a/cbits/references.h +++ b/cbits/references.h @@ -8,21 +8,21 @@ extern "C" { #endif -#define REFMAP_SIZE 16 - struct cmark_reference { struct cmark_reference *next; unsigned char *label; cmark_chunk url; cmark_chunk title; - unsigned int hash; + unsigned int age; }; typedef struct cmark_reference cmark_reference; struct cmark_reference_map { cmark_mem *mem; - cmark_reference *table[REFMAP_SIZE]; + cmark_reference *refs; + cmark_reference **sorted; + unsigned int size; }; typedef struct cmark_reference_map cmark_reference_map; diff --git a/cbits/registry.c b/cbits/registry.c new file mode 100644 index 0000000..3ff01f2 --- /dev/null +++ b/cbits/registry.c @@ -0,0 +1,63 @@ +#include +#include +#include + +#include "config.h" +#include "cmark.h" +#include "syntax_extension.h" +#include "registry.h" +#include "plugin.h" + +extern cmark_mem CMARK_DEFAULT_MEM_ALLOCATOR; + +static cmark_llist *syntax_extensions = NULL; + +void cmark_register_plugin(cmark_plugin_init_func reg_fn) { + cmark_plugin *plugin = cmark_plugin_new(); + + if (!reg_fn(plugin)) { + cmark_plugin_free(plugin); + return; + } + + cmark_llist *syntax_extensions_list = cmark_plugin_steal_syntax_extensions(plugin), + *it; + + for (it = syntax_extensions_list; it; it = it->next) { + syntax_extensions = cmark_llist_append(&CMARK_DEFAULT_MEM_ALLOCATOR, syntax_extensions, it->data); + } + + cmark_llist_free(&CMARK_DEFAULT_MEM_ALLOCATOR, syntax_extensions_list); + cmark_plugin_free(plugin); +} + +void cmark_release_plugins(void) { + if (syntax_extensions) { + cmark_llist_free_full( + &CMARK_DEFAULT_MEM_ALLOCATOR, + syntax_extensions, + (cmark_free_func) cmark_syntax_extension_free); + syntax_extensions = NULL; + } +} + +cmark_llist *cmark_list_syntax_extensions(cmark_mem *mem) { + cmark_llist *it; + cmark_llist *res = NULL; + + for (it = syntax_extensions; it; it = it->next) { + res = cmark_llist_append(mem, res, it->data); + } + return res; +} + +cmark_syntax_extension *cmark_find_syntax_extension(const char *name) { + cmark_llist *tmp; + + for (tmp = syntax_extensions; tmp; tmp = tmp->next) { + cmark_syntax_extension *ext = (cmark_syntax_extension *) tmp->data; + if (!strcmp(ext->name, name)) + return ext; + } + return NULL; +} diff --git a/cbits/registry.h b/cbits/registry.h new file mode 100644 index 0000000..0f0fbae --- /dev/null +++ b/cbits/registry.h @@ -0,0 +1,24 @@ +#ifndef CMARK_REGISTRY_H +#define CMARK_REGISTRY_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include "cmark.h" +#include "plugin.h" + +CMARK_EXPORT +void cmark_register_plugin(cmark_plugin_init_func reg_fn); + +CMARK_EXPORT +void cmark_release_plugins(void); + +CMARK_EXPORT +cmark_llist *cmark_list_syntax_extensions(cmark_mem *mem); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/cbits/render.c b/cbits/render.c index 20dca5f..e731748 100644 --- a/cbits/render.c +++ b/cbits/render.c @@ -5,6 +5,7 @@ #include "utf8.h" #include "render.h" #include "node.h" +#include "syntax_extension.h" static CMARK_INLINE void S_cr(cmark_renderer *renderer) { if (renderer->need_cr < 1) { @@ -18,9 +19,10 @@ static CMARK_INLINE void S_blankline(cmark_renderer *renderer) { } } -static void S_out(cmark_renderer *renderer, const char *source, bool wrap, +static void S_out(cmark_renderer *renderer, cmark_node *node, + const char *source, bool wrap, cmark_escaping escape) { - int length = strlen(source); + int length = (int)strlen(source); unsigned char nextc; int32_t c; int i = 0; @@ -29,6 +31,16 @@ static void S_out(cmark_renderer *renderer, const char *source, bool wrap, cmark_chunk remainder = cmark_chunk_literal(""); int k = renderer->buffer->size - 1; + cmark_syntax_extension *ext = NULL; + cmark_node *n = node; + while (n && !ext) { + ext = n->extension; + if (!ext) + n = n->parent; + } + if (ext && !ext->commonmark_escape_func) + ext = NULL; + wrap = wrap && !renderer->no_linebreaks; if (renderer->in_tight_list_item && renderer->need_cr > 1) { @@ -62,6 +74,10 @@ static void S_out(cmark_renderer *renderer, const char *source, bool wrap, if (len == -1) { // error condition return; // return without rendering rest of string } + + if (ext && ext->commonmark_escape_func(ext, node, c)) + cmark_strbuf_putc(renderer->buffer, '\\'); + nextc = source[i + len]; if (c == 32 && wrap) { if (!renderer->begin_line) { @@ -95,12 +111,12 @@ static void S_out(cmark_renderer *renderer, const char *source, bool wrap, // we need to escape a potential list marker after // a digit: renderer->begin_content = - renderer->begin_content && cmark_isdigit(c) == 1; + renderer->begin_content && cmark_isdigit((char)c) == 1; } else { - (renderer->outc)(renderer, escape, c, nextc); + (renderer->outc)(renderer, node, escape, c, nextc); renderer->begin_line = false; renderer->begin_content = - renderer->begin_content && cmark_isdigit(c) == 1; + renderer->begin_content && cmark_isdigit((char)c) == 1; } // If adding the character went beyond width, look for an @@ -142,13 +158,13 @@ void cmark_render_code_point(cmark_renderer *renderer, uint32_t c) { renderer->column += 1; } -char *cmark_render(cmark_node *root, int options, int width, - void (*outc)(cmark_renderer *, cmark_escaping, int32_t, +char *cmark_render(cmark_mem *mem, cmark_node *root, int options, int width, + void (*outc)(cmark_renderer *, cmark_node *, + cmark_escaping, int32_t, unsigned char), int (*render_node)(cmark_renderer *renderer, cmark_node *node, cmark_event_type ev_type, int options)) { - cmark_mem *mem = cmark_node_mem(root); cmark_strbuf pref = CMARK_BUF_INIT(mem); cmark_strbuf buf = CMARK_BUF_INIT(mem); cmark_node *cur; diff --git a/cbits/render.h b/cbits/render.h index 35eb0a6..36c4820 100644 --- a/cbits/render.h +++ b/cbits/render.h @@ -24,20 +24,30 @@ struct cmark_renderer { bool begin_content; bool no_linebreaks; bool in_tight_list_item; - void (*outc)(struct cmark_renderer *, cmark_escaping, int32_t, unsigned char); + void (*outc)(struct cmark_renderer *, cmark_node *, cmark_escaping, int32_t, unsigned char); void (*cr)(struct cmark_renderer *); void (*blankline)(struct cmark_renderer *); - void (*out)(struct cmark_renderer *, const char *, bool, cmark_escaping); + void (*out)(struct cmark_renderer *, cmark_node *, const char *, bool, cmark_escaping); }; typedef struct cmark_renderer cmark_renderer; +struct cmark_html_renderer { + cmark_strbuf *html; + cmark_node *plain; + cmark_llist *filter_extensions; + void *opaque; +}; + +typedef struct cmark_html_renderer cmark_html_renderer; + void cmark_render_ascii(cmark_renderer *renderer, const char *s); void cmark_render_code_point(cmark_renderer *renderer, uint32_t c); -char *cmark_render(cmark_node *root, int options, int width, - void (*outc)(cmark_renderer *, cmark_escaping, int32_t, +char *cmark_render(cmark_mem *mem, cmark_node *root, int options, int width, + void (*outc)(cmark_renderer *, cmark_node *, + cmark_escaping, int32_t, unsigned char), int (*render_node)(cmark_renderer *renderer, cmark_node *node, diff --git a/cbits/scanners.c b/cbits/scanners.c index b312f66..319a0c0 100644 --- a/cbits/scanners.c +++ b/cbits/scanners.c @@ -752,7 +752,7 @@ bufsize_t _scan_autolink_uri(const unsigned char *p) { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 0, 128, 0, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, @@ -839,7 +839,7 @@ bufsize_t _scan_autolink_uri(const unsigned char *p) { } if (yych <= 0xEC) { if (yych <= 0xC1) { - if (yych <= '<') + if (yych <= ' ') goto yy45; if (yych <= '>') goto yy85; @@ -7887,45 +7887,35 @@ bufsize_t _scan_html_tag(const unsigned char *p) { unsigned char yych; static const unsigned char yybm[] = { /* table 1 .. 8: 0 */ - 0, 239, 239, 239, 239, 239, 239, 239, 239, 238, 238, 238, 238, 238, + 0, 239, 239, 239, 239, 239, 239, 239, 239, 238, 238, 238, 238, 238, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, - 239, 239, 239, 239, 238, 239, 234, 239, 239, 239, 239, 236, 239, 239, - 239, 239, 239, 207, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, - 239, 239, 239, 239, 238, 238, 174, 231, 239, 255, 255, 255, 255, 255, + 239, 239, 239, 238, 239, 234, 239, 239, 239, 239, 236, 239, 239, 239, + 239, 239, 207, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, + 239, 239, 239, 238, 238, 174, 231, 239, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 239, 239, 111, 239, 239, 238, 239, + 255, 255, 255, 255, 255, 255, 239, 239, 111, 239, 239, 238, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, - 239, 239, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, + 239, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* table 9 .. 11: 256 */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 64, 64, 64, 64, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 160, 128, 0, 160, 160, 160, 160, 160, 160, 160, 160, - 160, 160, 128, 0, 0, 0, 0, 0, 0, 160, 160, 160, 160, 160, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 64, 64, 64, 64, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 160, 128, 0, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 128, 0, + 0, 0, 0, 0, 0, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, - 160, 160, 160, 160, 160, 160, 160, 0, 0, 0, 0, 128, 0, 160, + 160, 0, 0, 0, 0, 128, 0, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, - 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, + 160, 160, 160, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; yych = *p; if (yych <= '>') { @@ -9754,6 +9744,272 @@ bufsize_t _scan_html_tag(const unsigned char *p) { } } +// Try to (liberally) match an HTML tag after first <, returning num of chars +// matched. +bufsize_t _scan_liberal_html_tag(const unsigned char *p) { + const unsigned char *marker = NULL; + const unsigned char *start = p; + + { + unsigned char yych; + unsigned int yyaccept = 0; + static const unsigned char yybm[] = { + 0, 64, 64, 64, 64, 64, 64, 64, 64, 64, 0, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 128, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, + }; + yych = *p; + if (yych <= 0xE0) { + if (yych <= '\n') { + if (yych <= 0x00) + goto yy493; + if (yych <= '\t') + goto yy495; + } else { + if (yych <= 0x7F) + goto yy495; + if (yych <= 0xC1) + goto yy493; + if (yych <= 0xDF) + goto yy496; + goto yy497; + } + } else { + if (yych <= 0xEF) { + if (yych == 0xED) + goto yy499; + goto yy498; + } else { + if (yych <= 0xF0) + goto yy500; + if (yych <= 0xF3) + goto yy501; + if (yych <= 0xF4) + goto yy502; + } + } + yy493: + ++p; + yy494 : { return 0; } + yy495: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= '\n') { + if (yych <= 0x00) + goto yy494; + if (yych <= '\t') + goto yy507; + goto yy494; + } else { + if (yych <= 0x7F) + goto yy507; + if (yych <= 0xC1) + goto yy494; + if (yych <= 0xF4) + goto yy507; + goto yy494; + } + yy496: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x7F) + goto yy494; + if (yych <= 0xBF) + goto yy506; + goto yy494; + yy497: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x9F) + goto yy494; + if (yych <= 0xBF) + goto yy505; + goto yy494; + yy498: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x7F) + goto yy494; + if (yych <= 0xBF) + goto yy505; + goto yy494; + yy499: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x7F) + goto yy494; + if (yych <= 0x9F) + goto yy505; + goto yy494; + yy500: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x8F) + goto yy494; + if (yych <= 0xBF) + goto yy503; + goto yy494; + yy501: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x7F) + goto yy494; + if (yych <= 0xBF) + goto yy503; + goto yy494; + yy502: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= 0x7F) + goto yy494; + if (yych >= 0x90) + goto yy494; + yy503: + ++p; + yych = *p; + if (yych <= 0x7F) + goto yy504; + if (yych <= 0xBF) + goto yy505; + yy504: + p = marker; + if (yyaccept == 0) { + goto yy494; + } else { + goto yy510; + } + yy505: + ++p; + yych = *p; + if (yych <= 0x7F) + goto yy504; + if (yych >= 0xC0) + goto yy504; + yy506: + ++p; + yych = *p; + yy507: + if (yybm[0 + yych] & 64) { + goto yy506; + } + if (yych <= 0xEC) { + if (yych <= 0xC1) { + if (yych <= '\n') + goto yy504; + if (yych >= '?') + goto yy504; + } else { + if (yych <= 0xDF) + goto yy505; + if (yych <= 0xE0) + goto yy511; + goto yy503; + } + } else { + if (yych <= 0xF0) { + if (yych <= 0xED) + goto yy515; + if (yych <= 0xEF) + goto yy503; + goto yy512; + } else { + if (yych <= 0xF3) + goto yy513; + if (yych <= 0xF4) + goto yy514; + goto yy504; + } + } + yy508: + yyaccept = 1; + marker = ++p; + yych = *p; + if (yybm[0 + yych] & 64) { + goto yy506; + } + if (yych <= 0xEC) { + if (yych <= 0xC1) { + if (yych <= '\n') + goto yy510; + if (yych <= '>') + goto yy508; + } else { + if (yych <= 0xDF) + goto yy505; + if (yych <= 0xE0) + goto yy511; + goto yy503; + } + } else { + if (yych <= 0xF0) { + if (yych <= 0xED) + goto yy515; + if (yych <= 0xEF) + goto yy503; + goto yy512; + } else { + if (yych <= 0xF3) + goto yy513; + if (yych <= 0xF4) + goto yy514; + } + } + yy510 : { return (bufsize_t)(p - start); } + yy511: + ++p; + yych = *p; + if (yych <= 0x9F) + goto yy504; + if (yych <= 0xBF) + goto yy505; + goto yy504; + yy512: + ++p; + yych = *p; + if (yych <= 0x8F) + goto yy504; + if (yych <= 0xBF) + goto yy503; + goto yy504; + yy513: + ++p; + yych = *p; + if (yych <= 0x7F) + goto yy504; + if (yych <= 0xBF) + goto yy503; + goto yy504; + yy514: + ++p; + yych = *p; + if (yych <= 0x7F) + goto yy504; + if (yych <= 0x8F) + goto yy503; + goto yy504; + yy515: + ++p; + yych = *p; + if (yych <= 0x7F) + goto yy504; + if (yych <= 0x9F) + goto yy505; + goto yy504; + } +} + // Try to match an HTML block tag start line, returning // an integer code for the type of block (1-6, matching the spec). // #7 is handled by a separate function, below. @@ -9764,239 +10020,239 @@ bufsize_t _scan_html_block_start(const unsigned char *p) { unsigned char yych; yych = *p; if (yych == '<') - goto yy495; + goto yy520; ++p; - yy494 : { return 0; } - yy495: + yy519 : { return 0; } + yy520: yych = *(marker = ++p); switch (yych) { case '!': - goto yy513; + goto yy538; case '/': - goto yy496; + goto yy521; case '?': - goto yy514; + goto yy539; case 'A': case 'a': - goto yy499; + goto yy524; case 'B': case 'b': - goto yy500; + goto yy525; case 'C': case 'c': - goto yy501; + goto yy526; case 'D': case 'd': - goto yy502; + goto yy527; case 'F': case 'f': - goto yy503; + goto yy528; case 'H': case 'h': - goto yy504; + goto yy529; case 'I': case 'i': - goto yy505; + goto yy530; case 'L': case 'l': - goto yy506; + goto yy531; case 'M': case 'm': - goto yy507; + goto yy532; case 'N': case 'n': - goto yy508; + goto yy533; case 'O': case 'o': - goto yy509; + goto yy534; case 'P': case 'p': - goto yy498; + goto yy523; case 'S': case 's': - goto yy510; + goto yy535; case 'T': case 't': - goto yy511; + goto yy536; case 'U': case 'u': - goto yy512; + goto yy537; default: - goto yy494; + goto yy519; } - yy496: + yy521: yych = *++p; switch (yych) { case 'A': case 'a': - goto yy499; + goto yy524; case 'B': case 'b': - goto yy500; + goto yy525; case 'C': case 'c': - goto yy501; + goto yy526; case 'D': case 'd': - goto yy502; + goto yy527; case 'F': case 'f': - goto yy503; + goto yy528; case 'H': case 'h': - goto yy504; + goto yy529; case 'I': case 'i': - goto yy505; + goto yy530; case 'L': case 'l': - goto yy506; + goto yy531; case 'M': case 'm': - goto yy507; + goto yy532; case 'N': case 'n': - goto yy508; + goto yy533; case 'O': case 'o': - goto yy509; + goto yy534; case 'P': case 'p': - goto yy698; + goto yy723; case 'S': case 's': - goto yy699; + goto yy724; case 'T': case 't': - goto yy511; + goto yy536; case 'U': case 'u': - goto yy512; + goto yy537; default: - goto yy497; + goto yy522; } - yy497: + yy522: p = marker; - goto yy494; - yy498: + goto yy519; + yy523: yych = *++p; if (yych <= '>') { if (yych <= ' ') { if (yych <= 0x08) - goto yy497; + goto yy522; if (yych <= '\r') - goto yy530; + goto yy555; if (yych <= 0x1F) - goto yy497; - goto yy530; + goto yy522; + goto yy555; } else { if (yych == '/') - goto yy532; + goto yy557; if (yych <= '=') - goto yy497; - goto yy530; + goto yy522; + goto yy555; } } else { if (yych <= 'R') { if (yych == 'A') - goto yy695; + goto yy720; if (yych <= 'Q') - goto yy497; - goto yy694; + goto yy522; + goto yy719; } else { if (yych <= 'a') { if (yych <= '`') - goto yy497; - goto yy695; + goto yy522; + goto yy720; } else { if (yych == 'r') - goto yy694; - goto yy497; + goto yy719; + goto yy522; } } } - yy499: + yy524: yych = *++p; if (yych <= 'S') { if (yych <= 'D') { if (yych <= 'C') - goto yy497; - goto yy683; + goto yy522; + goto yy708; } else { if (yych <= 'Q') - goto yy497; + goto yy522; if (yych <= 'R') - goto yy682; - goto yy681; + goto yy707; + goto yy706; } } else { if (yych <= 'q') { if (yych == 'd') - goto yy683; - goto yy497; + goto yy708; + goto yy522; } else { if (yych <= 'r') - goto yy682; + goto yy707; if (yych <= 's') - goto yy681; - goto yy497; + goto yy706; + goto yy522; } } - yy500: + yy525: yych = *++p; if (yych <= 'O') { if (yych <= 'K') { if (yych == 'A') - goto yy667; - goto yy497; + goto yy692; + goto yy522; } else { if (yych <= 'L') - goto yy666; + goto yy691; if (yych <= 'N') - goto yy497; - goto yy665; + goto yy522; + goto yy690; } } else { if (yych <= 'k') { if (yych == 'a') - goto yy667; - goto yy497; + goto yy692; + goto yy522; } else { if (yych <= 'l') - goto yy666; + goto yy691; if (yych == 'o') - goto yy665; - goto yy497; + goto yy690; + goto yy522; } } - yy501: + yy526: yych = *++p; if (yych <= 'O') { if (yych <= 'D') { if (yych == 'A') - goto yy652; - goto yy497; + goto yy677; + goto yy522; } else { if (yych <= 'E') - goto yy651; + goto yy676; if (yych <= 'N') - goto yy497; - goto yy650; + goto yy522; + goto yy675; } } else { if (yych <= 'd') { if (yych == 'a') - goto yy652; - goto yy497; + goto yy677; + goto yy522; } else { if (yych <= 'e') - goto yy651; + goto yy676; if (yych == 'o') - goto yy650; - goto yy497; + goto yy675; + goto yy522; } } - yy502: + yy527: yych = *++p; switch (yych) { case 'D': @@ -10005,1664 +10261,1664 @@ bufsize_t _scan_html_block_start(const unsigned char *p) { case 'd': case 'l': case 't': - goto yy529; + goto yy554; case 'E': case 'e': - goto yy642; + goto yy667; case 'I': case 'i': - goto yy641; + goto yy666; default: - goto yy497; + goto yy522; } - yy503: + yy528: yych = *++p; if (yych <= 'R') { if (yych <= 'N') { if (yych == 'I') - goto yy617; - goto yy497; + goto yy642; + goto yy522; } else { if (yych <= 'O') - goto yy616; + goto yy641; if (yych <= 'Q') - goto yy497; - goto yy615; + goto yy522; + goto yy640; } } else { if (yych <= 'n') { if (yych == 'i') - goto yy617; - goto yy497; + goto yy642; + goto yy522; } else { if (yych <= 'o') - goto yy616; + goto yy641; if (yych == 'r') - goto yy615; - goto yy497; + goto yy640; + goto yy522; } } - yy504: + yy529: yych = *++p; if (yych <= 'S') { if (yych <= 'D') { if (yych <= '0') - goto yy497; + goto yy522; if (yych <= '6') - goto yy529; - goto yy497; + goto yy554; + goto yy522; } else { if (yych <= 'E') - goto yy610; + goto yy635; if (yych == 'R') - goto yy529; - goto yy497; + goto yy554; + goto yy522; } } else { if (yych <= 'q') { if (yych <= 'T') - goto yy609; + goto yy634; if (yych == 'e') - goto yy610; - goto yy497; + goto yy635; + goto yy522; } else { if (yych <= 'r') - goto yy529; + goto yy554; if (yych == 't') - goto yy609; - goto yy497; + goto yy634; + goto yy522; } } - yy505: + yy530: yych = *++p; if (yych == 'F') - goto yy605; + goto yy630; if (yych == 'f') - goto yy605; - goto yy497; - yy506: + goto yy630; + goto yy522; + yy531: yych = *++p; if (yych <= 'I') { if (yych == 'E') - goto yy600; + goto yy625; if (yych <= 'H') - goto yy497; - goto yy599; + goto yy522; + goto yy624; } else { if (yych <= 'e') { if (yych <= 'd') - goto yy497; - goto yy600; + goto yy522; + goto yy625; } else { if (yych == 'i') - goto yy599; - goto yy497; + goto yy624; + goto yy522; } } - yy507: + yy532: yych = *++p; if (yych <= 'E') { if (yych == 'A') - goto yy591; + goto yy616; if (yych <= 'D') - goto yy497; - goto yy590; + goto yy522; + goto yy615; } else { if (yych <= 'a') { if (yych <= '`') - goto yy497; - goto yy591; + goto yy522; + goto yy616; } else { if (yych == 'e') - goto yy590; - goto yy497; + goto yy615; + goto yy522; } } - yy508: + yy533: yych = *++p; if (yych <= 'O') { if (yych == 'A') - goto yy584; + goto yy609; if (yych <= 'N') - goto yy497; - goto yy583; + goto yy522; + goto yy608; } else { if (yych <= 'a') { if (yych <= '`') - goto yy497; - goto yy584; + goto yy522; + goto yy609; } else { if (yych == 'o') - goto yy583; - goto yy497; + goto yy608; + goto yy522; } } - yy509: + yy534: yych = *++p; if (yych <= 'P') { if (yych == 'L') - goto yy529; + goto yy554; if (yych <= 'O') - goto yy497; - goto yy575; + goto yy522; + goto yy600; } else { if (yych <= 'l') { if (yych <= 'k') - goto yy497; - goto yy529; + goto yy522; + goto yy554; } else { if (yych == 'p') - goto yy575; - goto yy497; + goto yy600; + goto yy522; } } - yy510: + yy535: yych = *++p; switch (yych) { case 'C': case 'c': - goto yy552; + goto yy577; case 'E': case 'e': - goto yy555; + goto yy580; case 'O': case 'o': - goto yy554; + goto yy579; case 'T': case 't': - goto yy551; + goto yy576; case 'U': case 'u': - goto yy553; + goto yy578; default: - goto yy497; + goto yy522; } - yy511: + yy536: yych = *++p; switch (yych) { case 'A': case 'a': - goto yy538; + goto yy563; case 'B': case 'b': - goto yy537; + goto yy562; case 'D': case 'd': - goto yy529; + goto yy554; case 'F': case 'f': - goto yy536; + goto yy561; case 'H': case 'h': - goto yy535; + goto yy560; case 'I': case 'i': - goto yy534; + goto yy559; case 'R': case 'r': - goto yy533; + goto yy558; default: - goto yy497; + goto yy522; } - yy512: + yy537: yych = *++p; if (yych == 'L') - goto yy529; + goto yy554; if (yych == 'l') - goto yy529; - goto yy497; - yy513: + goto yy554; + goto yy522; + yy538: yych = *++p; if (yych <= '@') { if (yych == '-') - goto yy516; - goto yy497; + goto yy541; + goto yy522; } else { if (yych <= 'Z') - goto yy517; + goto yy542; if (yych <= '[') - goto yy519; - goto yy497; + goto yy544; + goto yy522; } - yy514: + yy539: ++p; { return 3; } - yy516: + yy541: yych = *++p; if (yych == '-') - goto yy527; - goto yy497; - yy517: + goto yy552; + goto yy522; + yy542: ++p; { return 4; } - yy519: + yy544: yych = *++p; if (yych == 'C') - goto yy520; + goto yy545; if (yych != 'c') - goto yy497; - yy520: + goto yy522; + yy545: yych = *++p; if (yych == 'D') - goto yy521; + goto yy546; if (yych != 'd') - goto yy497; - yy521: + goto yy522; + yy546: yych = *++p; if (yych == 'A') - goto yy522; + goto yy547; if (yych != 'a') - goto yy497; - yy522: + goto yy522; + yy547: yych = *++p; if (yych == 'T') - goto yy523; + goto yy548; if (yych != 't') - goto yy497; - yy523: + goto yy522; + yy548: yych = *++p; if (yych == 'A') - goto yy524; + goto yy549; if (yych != 'a') - goto yy497; - yy524: + goto yy522; + yy549: yych = *++p; if (yych != '[') - goto yy497; + goto yy522; ++p; { return 5; } - yy527: + yy552: ++p; { return 2; } - yy529: + yy554: yych = *++p; if (yych <= ' ') { if (yych <= 0x08) - goto yy497; + goto yy522; if (yych <= '\r') - goto yy530; + goto yy555; if (yych <= 0x1F) - goto yy497; + goto yy522; } else { if (yych <= '/') { if (yych <= '.') - goto yy497; - goto yy532; + goto yy522; + goto yy557; } else { if (yych != '>') - goto yy497; + goto yy522; } } - yy530: + yy555: ++p; { return 6; } - yy532: + yy557: yych = *++p; if (yych == '>') - goto yy530; - goto yy497; - yy533: + goto yy555; + goto yy522; + yy558: yych = *++p; if (yych <= '/') { if (yych <= 0x1F) { if (yych <= 0x08) - goto yy497; + goto yy522; if (yych <= '\r') - goto yy530; - goto yy497; + goto yy555; + goto yy522; } else { if (yych <= ' ') - goto yy530; + goto yy555; if (yych <= '.') - goto yy497; - goto yy532; + goto yy522; + goto yy557; } } else { if (yych <= '@') { if (yych == '>') - goto yy530; - goto yy497; + goto yy555; + goto yy522; } else { if (yych <= 'A') - goto yy549; + goto yy574; if (yych == 'a') - goto yy549; - goto yy497; + goto yy574; + goto yy522; } } - yy534: + yy559: yych = *++p; if (yych == 'T') - goto yy547; + goto yy572; if (yych == 't') - goto yy547; - goto yy497; - yy535: + goto yy572; + goto yy522; + yy560: yych = *++p; if (yych <= '/') { if (yych <= 0x1F) { if (yych <= 0x08) - goto yy497; + goto yy522; if (yych <= '\r') - goto yy530; - goto yy497; + goto yy555; + goto yy522; } else { if (yych <= ' ') - goto yy530; + goto yy555; if (yych <= '.') - goto yy497; - goto yy532; + goto yy522; + goto yy557; } } else { if (yych <= 'D') { if (yych == '>') - goto yy530; - goto yy497; + goto yy555; + goto yy522; } else { if (yych <= 'E') - goto yy545; + goto yy570; if (yych == 'e') - goto yy545; - goto yy497; + goto yy570; + goto yy522; } } - yy536: + yy561: yych = *++p; if (yych == 'O') - goto yy543; + goto yy568; if (yych == 'o') - goto yy543; - goto yy497; - yy537: + goto yy568; + goto yy522; + yy562: yych = *++p; if (yych == 'O') - goto yy541; + goto yy566; if (yych == 'o') - goto yy541; - goto yy497; - yy538: + goto yy566; + goto yy522; + yy563: yych = *++p; if (yych == 'B') - goto yy539; + goto yy564; if (yych != 'b') - goto yy497; - yy539: + goto yy522; + yy564: yych = *++p; if (yych == 'L') - goto yy540; + goto yy565; if (yych != 'l') - goto yy497; - yy540: + goto yy522; + yy565: yych = *++p; if (yych == 'E') - goto yy529; + goto yy554; if (yych == 'e') - goto yy529; - goto yy497; - yy541: + goto yy554; + goto yy522; + yy566: yych = *++p; if (yych == 'D') - goto yy542; + goto yy567; if (yych != 'd') - goto yy497; - yy542: + goto yy522; + yy567: yych = *++p; if (yych == 'Y') - goto yy529; + goto yy554; if (yych == 'y') - goto yy529; - goto yy497; - yy543: + goto yy554; + goto yy522; + yy568: yych = *++p; if (yych == 'O') - goto yy544; + goto yy569; if (yych != 'o') - goto yy497; - yy544: + goto yy522; + yy569: yych = *++p; if (yych == 'T') - goto yy529; + goto yy554; if (yych == 't') - goto yy529; - goto yy497; - yy545: + goto yy554; + goto yy522; + yy570: yych = *++p; if (yych == 'A') - goto yy546; + goto yy571; if (yych != 'a') - goto yy497; - yy546: + goto yy522; + yy571: yych = *++p; if (yych == 'D') - goto yy529; + goto yy554; if (yych == 'd') - goto yy529; - goto yy497; - yy547: + goto yy554; + goto yy522; + yy572: yych = *++p; if (yych == 'L') - goto yy548; + goto yy573; if (yych != 'l') - goto yy497; - yy548: + goto yy522; + yy573: yych = *++p; if (yych == 'E') - goto yy529; + goto yy554; if (yych == 'e') - goto yy529; - goto yy497; - yy549: + goto yy554; + goto yy522; + yy574: yych = *++p; if (yych == 'C') - goto yy550; + goto yy575; if (yych != 'c') - goto yy497; - yy550: - yych = *++p; + goto yy522; + yy575: + yych = *++p; if (yych == 'K') - goto yy529; + goto yy554; if (yych == 'k') - goto yy529; - goto yy497; - yy551: + goto yy554; + goto yy522; + yy576: yych = *++p; if (yych == 'Y') - goto yy573; + goto yy598; if (yych == 'y') - goto yy573; - goto yy497; - yy552: + goto yy598; + goto yy522; + yy577: yych = *++p; if (yych == 'R') - goto yy567; + goto yy592; if (yych == 'r') - goto yy567; - goto yy497; - yy553: + goto yy592; + goto yy522; + yy578: yych = *++p; if (yych == 'M') - goto yy563; + goto yy588; if (yych == 'm') - goto yy563; - goto yy497; - yy554: + goto yy588; + goto yy522; + yy579: yych = *++p; if (yych == 'U') - goto yy560; + goto yy585; if (yych == 'u') - goto yy560; - goto yy497; - yy555: + goto yy585; + goto yy522; + yy580: yych = *++p; if (yych == 'C') - goto yy556; + goto yy581; if (yych != 'c') - goto yy497; - yy556: + goto yy522; + yy581: yych = *++p; if (yych == 'T') - goto yy557; + goto yy582; if (yych != 't') - goto yy497; - yy557: + goto yy522; + yy582: yych = *++p; if (yych == 'I') - goto yy558; + goto yy583; if (yych != 'i') - goto yy497; - yy558: + goto yy522; + yy583: yych = *++p; if (yych == 'O') - goto yy559; + goto yy584; if (yych != 'o') - goto yy497; - yy559: + goto yy522; + yy584: yych = *++p; if (yych == 'N') - goto yy529; + goto yy554; if (yych == 'n') - goto yy529; - goto yy497; - yy560: + goto yy554; + goto yy522; + yy585: yych = *++p; if (yych == 'R') - goto yy561; + goto yy586; if (yych != 'r') - goto yy497; - yy561: + goto yy522; + yy586: yych = *++p; if (yych == 'C') - goto yy562; + goto yy587; if (yych != 'c') - goto yy497; - yy562: + goto yy522; + yy587: yych = *++p; if (yych == 'E') - goto yy529; + goto yy554; if (yych == 'e') - goto yy529; - goto yy497; - yy563: + goto yy554; + goto yy522; + yy588: yych = *++p; if (yych == 'M') - goto yy564; + goto yy589; if (yych != 'm') - goto yy497; - yy564: + goto yy522; + yy589: yych = *++p; if (yych == 'A') - goto yy565; + goto yy590; if (yych != 'a') - goto yy497; - yy565: + goto yy522; + yy590: yych = *++p; if (yych == 'R') - goto yy566; + goto yy591; if (yych != 'r') - goto yy497; - yy566: + goto yy522; + yy591: yych = *++p; if (yych == 'Y') - goto yy529; + goto yy554; if (yych == 'y') - goto yy529; - goto yy497; - yy567: + goto yy554; + goto yy522; + yy592: yych = *++p; if (yych == 'I') - goto yy568; + goto yy593; if (yych != 'i') - goto yy497; - yy568: + goto yy522; + yy593: yych = *++p; if (yych == 'P') - goto yy569; + goto yy594; if (yych != 'p') - goto yy497; - yy569: + goto yy522; + yy594: yych = *++p; if (yych == 'T') - goto yy570; + goto yy595; if (yych != 't') - goto yy497; - yy570: + goto yy522; + yy595: yych = *++p; if (yych <= 0x1F) { if (yych <= 0x08) - goto yy497; + goto yy522; if (yych >= 0x0E) - goto yy497; + goto yy522; } else { if (yych <= ' ') - goto yy571; + goto yy596; if (yych != '>') - goto yy497; + goto yy522; } - yy571: + yy596: ++p; { return 1; } - yy573: + yy598: yych = *++p; if (yych == 'L') - goto yy574; + goto yy599; if (yych != 'l') - goto yy497; - yy574: + goto yy522; + yy599: yych = *++p; if (yych == 'E') - goto yy570; + goto yy595; if (yych == 'e') - goto yy570; - goto yy497; - yy575: + goto yy595; + goto yy522; + yy600: yych = *++p; if (yych == 'T') - goto yy576; + goto yy601; if (yych != 't') - goto yy497; - yy576: + goto yy522; + yy601: yych = *++p; if (yych <= 'I') { if (yych == 'G') - goto yy578; + goto yy603; if (yych <= 'H') - goto yy497; + goto yy522; } else { if (yych <= 'g') { if (yych <= 'f') - goto yy497; - goto yy578; + goto yy522; + goto yy603; } else { if (yych != 'i') - goto yy497; + goto yy522; } } yych = *++p; if (yych == 'O') - goto yy582; + goto yy607; if (yych == 'o') - goto yy582; - goto yy497; - yy578: + goto yy607; + goto yy522; + yy603: yych = *++p; if (yych == 'R') - goto yy579; + goto yy604; if (yych != 'r') - goto yy497; - yy579: + goto yy522; + yy604: yych = *++p; if (yych == 'O') - goto yy580; + goto yy605; if (yych != 'o') - goto yy497; - yy580: + goto yy522; + yy605: yych = *++p; if (yych == 'U') - goto yy581; + goto yy606; if (yych != 'u') - goto yy497; - yy581: + goto yy522; + yy606: yych = *++p; if (yych == 'P') - goto yy529; + goto yy554; if (yych == 'p') - goto yy529; - goto yy497; - yy582: + goto yy554; + goto yy522; + yy607: yych = *++p; if (yych == 'N') - goto yy529; + goto yy554; if (yych == 'n') - goto yy529; - goto yy497; - yy583: + goto yy554; + goto yy522; + yy608: yych = *++p; if (yych == 'F') - goto yy585; + goto yy610; if (yych == 'f') - goto yy585; - goto yy497; - yy584: + goto yy610; + goto yy522; + yy609: yych = *++p; if (yych == 'V') - goto yy529; + goto yy554; if (yych == 'v') - goto yy529; - goto yy497; - yy585: + goto yy554; + goto yy522; + yy610: yych = *++p; if (yych == 'R') - goto yy586; + goto yy611; if (yych != 'r') - goto yy497; - yy586: + goto yy522; + yy611: yych = *++p; if (yych == 'A') - goto yy587; + goto yy612; if (yych != 'a') - goto yy497; - yy587: + goto yy522; + yy612: yych = *++p; if (yych == 'M') - goto yy588; + goto yy613; if (yych != 'm') - goto yy497; - yy588: + goto yy522; + yy613: yych = *++p; if (yych == 'E') - goto yy589; + goto yy614; if (yych != 'e') - goto yy497; - yy589: + goto yy522; + yy614: yych = *++p; if (yych == 'S') - goto yy529; + goto yy554; if (yych == 's') - goto yy529; - goto yy497; - yy590: + goto yy554; + goto yy522; + yy615: yych = *++p; if (yych <= 'T') { if (yych == 'N') - goto yy593; + goto yy618; if (yych <= 'S') - goto yy497; - goto yy594; + goto yy522; + goto yy619; } else { if (yych <= 'n') { if (yych <= 'm') - goto yy497; - goto yy593; + goto yy522; + goto yy618; } else { if (yych == 't') - goto yy594; - goto yy497; + goto yy619; + goto yy522; } } - yy591: + yy616: yych = *++p; if (yych == 'I') - goto yy592; + goto yy617; if (yych != 'i') - goto yy497; - yy592: + goto yy522; + yy617: yych = *++p; if (yych == 'N') - goto yy529; + goto yy554; if (yych == 'n') - goto yy529; - goto yy497; - yy593: + goto yy554; + goto yy522; + yy618: yych = *++p; if (yych == 'U') - goto yy595; + goto yy620; if (yych == 'u') - goto yy595; - goto yy497; - yy594: + goto yy620; + goto yy522; + yy619: yych = *++p; if (yych == 'A') - goto yy529; + goto yy554; if (yych == 'a') - goto yy529; - goto yy497; - yy595: + goto yy554; + goto yy522; + yy620: yych = *++p; if (yych <= '/') { if (yych <= 0x1F) { if (yych <= 0x08) - goto yy497; + goto yy522; if (yych <= '\r') - goto yy530; - goto yy497; + goto yy555; + goto yy522; } else { if (yych <= ' ') - goto yy530; + goto yy555; if (yych <= '.') - goto yy497; - goto yy532; + goto yy522; + goto yy557; } } else { if (yych <= 'H') { if (yych == '>') - goto yy530; - goto yy497; + goto yy555; + goto yy522; } else { if (yych <= 'I') - goto yy596; + goto yy621; if (yych != 'i') - goto yy497; + goto yy522; } } - yy596: + yy621: yych = *++p; if (yych == 'T') - goto yy597; + goto yy622; if (yych != 't') - goto yy497; - yy597: + goto yy522; + yy622: yych = *++p; if (yych == 'E') - goto yy598; + goto yy623; if (yych != 'e') - goto yy497; - yy598: + goto yy522; + yy623: yych = *++p; if (yych == 'M') - goto yy529; + goto yy554; if (yych == 'm') - goto yy529; - goto yy497; - yy599: + goto yy554; + goto yy522; + yy624: yych = *++p; if (yych <= '/') { if (yych <= 0x1F) { if (yych <= 0x08) - goto yy497; + goto yy522; if (yych <= '\r') - goto yy530; - goto yy497; + goto yy555; + goto yy522; } else { if (yych <= ' ') - goto yy530; + goto yy555; if (yych <= '.') - goto yy497; - goto yy532; + goto yy522; + goto yy557; } } else { if (yych <= 'M') { if (yych == '>') - goto yy530; - goto yy497; + goto yy555; + goto yy522; } else { if (yych <= 'N') - goto yy604; + goto yy629; if (yych == 'n') - goto yy604; - goto yy497; + goto yy629; + goto yy522; } } - yy600: + yy625: yych = *++p; if (yych == 'G') - goto yy601; + goto yy626; if (yych != 'g') - goto yy497; - yy601: + goto yy522; + yy626: yych = *++p; if (yych == 'E') - goto yy602; + goto yy627; if (yych != 'e') - goto yy497; - yy602: + goto yy522; + yy627: yych = *++p; if (yych == 'N') - goto yy603; + goto yy628; if (yych != 'n') - goto yy497; - yy603: + goto yy522; + yy628: yych = *++p; if (yych == 'D') - goto yy529; + goto yy554; if (yych == 'd') - goto yy529; - goto yy497; - yy604: + goto yy554; + goto yy522; + yy629: yych = *++p; if (yych == 'K') - goto yy529; + goto yy554; if (yych == 'k') - goto yy529; - goto yy497; - yy605: + goto yy554; + goto yy522; + yy630: yych = *++p; if (yych == 'R') - goto yy606; + goto yy631; if (yych != 'r') - goto yy497; - yy606: + goto yy522; + yy631: yych = *++p; if (yych == 'A') - goto yy607; + goto yy632; if (yych != 'a') - goto yy497; - yy607: + goto yy522; + yy632: yych = *++p; if (yych == 'M') - goto yy608; + goto yy633; if (yych != 'm') - goto yy497; - yy608: + goto yy522; + yy633: yych = *++p; if (yych == 'E') - goto yy529; + goto yy554; if (yych == 'e') - goto yy529; - goto yy497; - yy609: + goto yy554; + goto yy522; + yy634: yych = *++p; if (yych == 'M') - goto yy614; + goto yy639; if (yych == 'm') - goto yy614; - goto yy497; - yy610: + goto yy639; + goto yy522; + yy635: yych = *++p; if (yych == 'A') - goto yy611; + goto yy636; if (yych != 'a') - goto yy497; - yy611: + goto yy522; + yy636: yych = *++p; if (yych == 'D') - goto yy612; + goto yy637; if (yych != 'd') - goto yy497; - yy612: + goto yy522; + yy637: yych = *++p; if (yych <= '/') { if (yych <= 0x1F) { if (yych <= 0x08) - goto yy497; + goto yy522; if (yych <= '\r') - goto yy530; - goto yy497; + goto yy555; + goto yy522; } else { if (yych <= ' ') - goto yy530; + goto yy555; if (yych <= '.') - goto yy497; - goto yy532; + goto yy522; + goto yy557; } } else { if (yych <= 'D') { if (yych == '>') - goto yy530; - goto yy497; + goto yy555; + goto yy522; } else { if (yych <= 'E') - goto yy613; + goto yy638; if (yych != 'e') - goto yy497; + goto yy522; } } - yy613: + yy638: yych = *++p; if (yych == 'R') - goto yy529; + goto yy554; if (yych == 'r') - goto yy529; - goto yy497; - yy614: + goto yy554; + goto yy522; + yy639: yych = *++p; if (yych == 'L') - goto yy529; + goto yy554; if (yych == 'l') - goto yy529; - goto yy497; - yy615: + goto yy554; + goto yy522; + yy640: yych = *++p; if (yych == 'A') - goto yy636; + goto yy661; if (yych == 'a') - goto yy636; - goto yy497; - yy616: + goto yy661; + goto yy522; + yy641: yych = *++p; if (yych <= 'R') { if (yych == 'O') - goto yy632; + goto yy657; if (yych <= 'Q') - goto yy497; - goto yy633; + goto yy522; + goto yy658; } else { if (yych <= 'o') { if (yych <= 'n') - goto yy497; - goto yy632; + goto yy522; + goto yy657; } else { if (yych == 'r') - goto yy633; - goto yy497; + goto yy658; + goto yy522; } } - yy617: + yy642: yych = *++p; if (yych <= 'G') { if (yych == 'E') - goto yy618; + goto yy643; if (yych <= 'F') - goto yy497; - goto yy619; + goto yy522; + goto yy644; } else { if (yych <= 'e') { if (yych <= 'd') - goto yy497; + goto yy522; } else { if (yych == 'g') - goto yy619; - goto yy497; + goto yy644; + goto yy522; } } - yy618: + yy643: yych = *++p; if (yych == 'L') - goto yy628; + goto yy653; if (yych == 'l') - goto yy628; - goto yy497; - yy619: + goto yy653; + goto yy522; + yy644: yych = *++p; if (yych <= 'U') { if (yych == 'C') - goto yy621; + goto yy646; if (yych <= 'T') - goto yy497; + goto yy522; } else { if (yych <= 'c') { if (yych <= 'b') - goto yy497; - goto yy621; + goto yy522; + goto yy646; } else { if (yych != 'u') - goto yy497; + goto yy522; } } yych = *++p; if (yych == 'R') - goto yy627; + goto yy652; if (yych == 'r') - goto yy627; - goto yy497; - yy621: + goto yy652; + goto yy522; + yy646: yych = *++p; if (yych == 'A') - goto yy622; + goto yy647; if (yych != 'a') - goto yy497; - yy622: + goto yy522; + yy647: yych = *++p; if (yych == 'P') - goto yy623; + goto yy648; if (yych != 'p') - goto yy497; - yy623: + goto yy522; + yy648: yych = *++p; if (yych == 'T') - goto yy624; + goto yy649; if (yych != 't') - goto yy497; - yy624: + goto yy522; + yy649: yych = *++p; if (yych == 'I') - goto yy625; + goto yy650; if (yych != 'i') - goto yy497; - yy625: + goto yy522; + yy650: yych = *++p; if (yych == 'O') - goto yy626; + goto yy651; if (yych != 'o') - goto yy497; - yy626: + goto yy522; + yy651: yych = *++p; if (yych == 'N') - goto yy529; + goto yy554; if (yych == 'n') - goto yy529; - goto yy497; - yy627: + goto yy554; + goto yy522; + yy652: yych = *++p; if (yych == 'E') - goto yy529; + goto yy554; if (yych == 'e') - goto yy529; - goto yy497; - yy628: + goto yy554; + goto yy522; + yy653: yych = *++p; if (yych == 'D') - goto yy629; + goto yy654; if (yych != 'd') - goto yy497; - yy629: + goto yy522; + yy654: yych = *++p; if (yych == 'S') - goto yy630; + goto yy655; if (yych != 's') - goto yy497; - yy630: + goto yy522; + yy655: yych = *++p; if (yych == 'E') - goto yy631; + goto yy656; if (yych != 'e') - goto yy497; - yy631: + goto yy522; + yy656: yych = *++p; if (yych == 'T') - goto yy529; + goto yy554; if (yych == 't') - goto yy529; - goto yy497; - yy632: + goto yy554; + goto yy522; + yy657: yych = *++p; if (yych == 'T') - goto yy634; + goto yy659; if (yych == 't') - goto yy634; - goto yy497; - yy633: + goto yy659; + goto yy522; + yy658: yych = *++p; if (yych == 'M') - goto yy529; + goto yy554; if (yych == 'm') - goto yy529; - goto yy497; - yy634: + goto yy554; + goto yy522; + yy659: yych = *++p; if (yych == 'E') - goto yy635; + goto yy660; if (yych != 'e') - goto yy497; - yy635: + goto yy522; + yy660: yych = *++p; if (yych == 'R') - goto yy529; + goto yy554; if (yych == 'r') - goto yy529; - goto yy497; - yy636: + goto yy554; + goto yy522; + yy661: yych = *++p; if (yych == 'M') - goto yy637; + goto yy662; if (yych != 'm') - goto yy497; - yy637: + goto yy522; + yy662: yych = *++p; if (yych == 'E') - goto yy638; + goto yy663; if (yych != 'e') - goto yy497; - yy638: + goto yy522; + yy663: yych = *++p; if (yych <= '/') { if (yych <= 0x1F) { if (yych <= 0x08) - goto yy497; + goto yy522; if (yych <= '\r') - goto yy530; - goto yy497; + goto yy555; + goto yy522; } else { if (yych <= ' ') - goto yy530; + goto yy555; if (yych <= '.') - goto yy497; - goto yy532; + goto yy522; + goto yy557; } } else { if (yych <= 'R') { if (yych == '>') - goto yy530; - goto yy497; + goto yy555; + goto yy522; } else { if (yych <= 'S') - goto yy639; + goto yy664; if (yych != 's') - goto yy497; + goto yy522; } } - yy639: + yy664: yych = *++p; if (yych == 'E') - goto yy640; + goto yy665; if (yych != 'e') - goto yy497; - yy640: + goto yy522; + yy665: yych = *++p; if (yych == 'T') - goto yy529; + goto yy554; if (yych == 't') - goto yy529; - goto yy497; - yy641: + goto yy554; + goto yy522; + yy666: yych = *++p; if (yych <= 'V') { if (yych <= 'Q') { if (yych == 'A') - goto yy647; - goto yy497; + goto yy672; + goto yy522; } else { if (yych <= 'R') - goto yy529; + goto yy554; if (yych <= 'U') - goto yy497; - goto yy529; + goto yy522; + goto yy554; } } else { if (yych <= 'q') { if (yych == 'a') - goto yy647; - goto yy497; + goto yy672; + goto yy522; } else { if (yych <= 'r') - goto yy529; + goto yy554; if (yych == 'v') - goto yy529; - goto yy497; + goto yy554; + goto yy522; } } - yy642: + yy667: yych = *++p; if (yych == 'T') - goto yy643; + goto yy668; if (yych != 't') - goto yy497; - yy643: + goto yy522; + yy668: yych = *++p; if (yych == 'A') - goto yy644; + goto yy669; if (yych != 'a') - goto yy497; - yy644: + goto yy522; + yy669: yych = *++p; if (yych == 'I') - goto yy645; + goto yy670; if (yych != 'i') - goto yy497; - yy645: + goto yy522; + yy670: yych = *++p; if (yych == 'L') - goto yy646; + goto yy671; if (yych != 'l') - goto yy497; - yy646: + goto yy522; + yy671: yych = *++p; if (yych == 'S') - goto yy529; + goto yy554; if (yych == 's') - goto yy529; - goto yy497; - yy647: + goto yy554; + goto yy522; + yy672: yych = *++p; if (yych == 'L') - goto yy648; + goto yy673; if (yych != 'l') - goto yy497; - yy648: + goto yy522; + yy673: yych = *++p; if (yych == 'O') - goto yy649; + goto yy674; if (yych != 'o') - goto yy497; - yy649: + goto yy522; + yy674: yych = *++p; if (yych == 'G') - goto yy529; + goto yy554; if (yych == 'g') - goto yy529; - goto yy497; - yy650: + goto yy554; + goto yy522; + yy675: yych = *++p; if (yych == 'L') - goto yy660; + goto yy685; if (yych == 'l') - goto yy660; - goto yy497; - yy651: + goto yy685; + goto yy522; + yy676: yych = *++p; if (yych == 'N') - goto yy657; + goto yy682; if (yych == 'n') - goto yy657; - goto yy497; - yy652: + goto yy682; + goto yy522; + yy677: yych = *++p; if (yych == 'P') - goto yy653; + goto yy678; if (yych != 'p') - goto yy497; - yy653: + goto yy522; + yy678: yych = *++p; if (yych == 'T') - goto yy654; + goto yy679; if (yych != 't') - goto yy497; - yy654: + goto yy522; + yy679: yych = *++p; if (yych == 'I') - goto yy655; + goto yy680; if (yych != 'i') - goto yy497; - yy655: + goto yy522; + yy680: yych = *++p; if (yych == 'O') - goto yy656; + goto yy681; if (yych != 'o') - goto yy497; - yy656: + goto yy522; + yy681: yych = *++p; if (yych == 'N') - goto yy529; + goto yy554; if (yych == 'n') - goto yy529; - goto yy497; - yy657: + goto yy554; + goto yy522; + yy682: yych = *++p; if (yych == 'T') - goto yy658; + goto yy683; if (yych != 't') - goto yy497; - yy658: + goto yy522; + yy683: yych = *++p; if (yych == 'E') - goto yy659; + goto yy684; if (yych != 'e') - goto yy497; - yy659: + goto yy522; + yy684: yych = *++p; if (yych == 'R') - goto yy529; + goto yy554; if (yych == 'r') - goto yy529; - goto yy497; - yy660: + goto yy554; + goto yy522; + yy685: yych = *++p; if (yych <= '/') { if (yych <= 0x1F) { if (yych <= 0x08) - goto yy497; + goto yy522; if (yych <= '\r') - goto yy530; - goto yy497; + goto yy555; + goto yy522; } else { if (yych <= ' ') - goto yy530; + goto yy555; if (yych <= '.') - goto yy497; - goto yy532; + goto yy522; + goto yy557; } } else { if (yych <= 'F') { if (yych == '>') - goto yy530; - goto yy497; + goto yy555; + goto yy522; } else { if (yych <= 'G') - goto yy661; + goto yy686; if (yych != 'g') - goto yy497; + goto yy522; } } - yy661: + yy686: yych = *++p; if (yych == 'R') - goto yy662; + goto yy687; if (yych != 'r') - goto yy497; - yy662: + goto yy522; + yy687: yych = *++p; if (yych == 'O') - goto yy663; + goto yy688; if (yych != 'o') - goto yy497; - yy663: + goto yy522; + yy688: yych = *++p; if (yych == 'U') - goto yy664; + goto yy689; if (yych != 'u') - goto yy497; - yy664: + goto yy522; + yy689: yych = *++p; if (yych == 'P') - goto yy529; + goto yy554; if (yych == 'p') - goto yy529; - goto yy497; - yy665: + goto yy554; + goto yy522; + yy690: yych = *++p; if (yych == 'D') - goto yy680; + goto yy705; if (yych == 'd') - goto yy680; - goto yy497; - yy666: + goto yy705; + goto yy522; + yy691: yych = *++p; if (yych == 'O') - goto yy673; + goto yy698; if (yych == 'o') - goto yy673; - goto yy497; - yy667: + goto yy698; + goto yy522; + yy692: yych = *++p; if (yych == 'S') - goto yy668; + goto yy693; if (yych != 's') - goto yy497; - yy668: + goto yy522; + yy693: yych = *++p; if (yych == 'E') - goto yy669; + goto yy694; if (yych != 'e') - goto yy497; - yy669: + goto yy522; + yy694: yych = *++p; if (yych <= '/') { if (yych <= 0x1F) { if (yych <= 0x08) - goto yy497; + goto yy522; if (yych <= '\r') - goto yy530; - goto yy497; + goto yy555; + goto yy522; } else { if (yych <= ' ') - goto yy530; + goto yy555; if (yych <= '.') - goto yy497; - goto yy532; + goto yy522; + goto yy557; } } else { if (yych <= 'E') { if (yych == '>') - goto yy530; - goto yy497; + goto yy555; + goto yy522; } else { if (yych <= 'F') - goto yy670; + goto yy695; if (yych != 'f') - goto yy497; + goto yy522; } } - yy670: + yy695: yych = *++p; if (yych == 'O') - goto yy671; + goto yy696; if (yych != 'o') - goto yy497; - yy671: + goto yy522; + yy696: yych = *++p; if (yych == 'N') - goto yy672; + goto yy697; if (yych != 'n') - goto yy497; - yy672: + goto yy522; + yy697: yych = *++p; if (yych == 'T') - goto yy529; + goto yy554; if (yych == 't') - goto yy529; - goto yy497; - yy673: + goto yy554; + goto yy522; + yy698: yych = *++p; if (yych == 'C') - goto yy674; + goto yy699; if (yych != 'c') - goto yy497; - yy674: + goto yy522; + yy699: yych = *++p; if (yych == 'K') - goto yy675; + goto yy700; if (yych != 'k') - goto yy497; - yy675: + goto yy522; + yy700: yych = *++p; if (yych == 'Q') - goto yy676; + goto yy701; if (yych != 'q') - goto yy497; - yy676: + goto yy522; + yy701: yych = *++p; if (yych == 'U') - goto yy677; + goto yy702; if (yych != 'u') - goto yy497; - yy677: + goto yy522; + yy702: yych = *++p; if (yych == 'O') - goto yy678; + goto yy703; if (yych != 'o') - goto yy497; - yy678: + goto yy522; + yy703: yych = *++p; if (yych == 'T') - goto yy679; + goto yy704; if (yych != 't') - goto yy497; - yy679: + goto yy522; + yy704: yych = *++p; if (yych == 'E') - goto yy529; + goto yy554; if (yych == 'e') - goto yy529; - goto yy497; - yy680: + goto yy554; + goto yy522; + yy705: yych = *++p; if (yych == 'Y') - goto yy529; + goto yy554; if (yych == 'y') - goto yy529; - goto yy497; - yy681: + goto yy554; + goto yy522; + yy706: yych = *++p; if (yych == 'I') - goto yy692; + goto yy717; if (yych == 'i') - goto yy692; - goto yy497; - yy682: + goto yy717; + goto yy522; + yy707: yych = *++p; if (yych == 'T') - goto yy688; + goto yy713; if (yych == 't') - goto yy688; - goto yy497; - yy683: + goto yy713; + goto yy522; + yy708: yych = *++p; if (yych == 'D') - goto yy684; + goto yy709; if (yych != 'd') - goto yy497; - yy684: + goto yy522; + yy709: yych = *++p; if (yych == 'R') - goto yy685; + goto yy710; if (yych != 'r') - goto yy497; - yy685: + goto yy522; + yy710: yych = *++p; if (yych == 'E') - goto yy686; + goto yy711; if (yych != 'e') - goto yy497; - yy686: + goto yy522; + yy711: yych = *++p; if (yych == 'S') - goto yy687; + goto yy712; if (yych != 's') - goto yy497; - yy687: + goto yy522; + yy712: yych = *++p; if (yych == 'S') - goto yy529; + goto yy554; if (yych == 's') - goto yy529; - goto yy497; - yy688: + goto yy554; + goto yy522; + yy713: yych = *++p; if (yych == 'I') - goto yy689; + goto yy714; if (yych != 'i') - goto yy497; - yy689: + goto yy522; + yy714: yych = *++p; if (yych == 'C') - goto yy690; + goto yy715; if (yych != 'c') - goto yy497; - yy690: + goto yy522; + yy715: yych = *++p; if (yych == 'L') - goto yy691; + goto yy716; if (yych != 'l') - goto yy497; - yy691: + goto yy522; + yy716: yych = *++p; if (yych == 'E') - goto yy529; + goto yy554; if (yych == 'e') - goto yy529; - goto yy497; - yy692: + goto yy554; + goto yy522; + yy717: yych = *++p; if (yych == 'D') - goto yy693; + goto yy718; if (yych != 'd') - goto yy497; - yy693: + goto yy522; + yy718: yych = *++p; if (yych == 'E') - goto yy529; + goto yy554; if (yych == 'e') - goto yy529; - goto yy497; - yy694: + goto yy554; + goto yy522; + yy719: yych = *++p; if (yych == 'E') - goto yy570; + goto yy595; if (yych == 'e') - goto yy570; - goto yy497; - yy695: + goto yy595; + goto yy522; + yy720: yych = *++p; if (yych == 'R') - goto yy696; + goto yy721; if (yych != 'r') - goto yy497; - yy696: + goto yy522; + yy721: yych = *++p; if (yych == 'A') - goto yy697; + goto yy722; if (yych != 'a') - goto yy497; - yy697: + goto yy522; + yy722: yych = *++p; if (yych == 'M') - goto yy529; + goto yy554; if (yych == 'm') - goto yy529; - goto yy497; - yy698: + goto yy554; + goto yy522; + yy723: yych = *++p; if (yych <= '/') { if (yych <= 0x1F) { if (yych <= 0x08) - goto yy497; + goto yy522; if (yych <= '\r') - goto yy530; - goto yy497; + goto yy555; + goto yy522; } else { if (yych <= ' ') - goto yy530; + goto yy555; if (yych <= '.') - goto yy497; - goto yy532; + goto yy522; + goto yy557; } } else { if (yych <= '@') { if (yych == '>') - goto yy530; - goto yy497; + goto yy555; + goto yy522; } else { if (yych <= 'A') - goto yy695; + goto yy720; if (yych == 'a') - goto yy695; - goto yy497; + goto yy720; + goto yy522; } } - yy699: + yy724: ++p; if ((yych = *p) <= 'U') { if (yych <= 'N') { if (yych == 'E') - goto yy555; - goto yy497; + goto yy580; + goto yy522; } else { if (yych <= 'O') - goto yy554; + goto yy579; if (yych <= 'T') - goto yy497; - goto yy553; + goto yy522; + goto yy578; } } else { if (yych <= 'n') { if (yych == 'e') - goto yy555; - goto yy497; + goto yy580; + goto yy522; } else { if (yych <= 'o') - goto yy554; + goto yy579; if (yych == 'u') - goto yy553; - goto yy497; + goto yy578; + goto yy522; } } } @@ -11699,735 +11955,735 @@ bufsize_t _scan_html_block_start_7(const unsigned char *p) { }; yych = *p; if (yych == '<') - goto yy704; + goto yy729; ++p; - yy703 : { return 0; } - yy704: + yy728 : { return 0; } + yy729: yyaccept = 0; yych = *(marker = ++p); if (yych <= '@') { if (yych == '/') - goto yy708; - goto yy703; + goto yy733; + goto yy728; } else { if (yych <= 'Z') - goto yy705; + goto yy730; if (yych <= '`') - goto yy703; + goto yy728; if (yych >= '{') - goto yy703; + goto yy728; } - yy705: + yy730: ++p; yych = *p; if (yybm[0 + yych] & 1) { - goto yy705; + goto yy730; } if (yych <= ' ') { if (yych <= 0x08) - goto yy707; + goto yy732; if (yych <= '\r') - goto yy719; + goto yy744; if (yych >= ' ') - goto yy719; + goto yy744; } else { if (yych <= '/') { if (yych >= '/') - goto yy721; + goto yy746; } else { if (yych == '>') - goto yy713; + goto yy738; } } - yy707: + yy732: p = marker; if (yyaccept == 0) { - goto yy703; + goto yy728; } else { - goto yy717; + goto yy742; } - yy708: + yy733: yych = *++p; if (yych <= '@') - goto yy707; + goto yy732; if (yych <= 'Z') - goto yy709; + goto yy734; if (yych <= '`') - goto yy707; + goto yy732; if (yych >= '{') - goto yy707; - yy709: + goto yy732; + yy734: ++p; yych = *p; if (yybm[0 + yych] & 2) { - goto yy711; + goto yy736; } if (yych <= '=') { if (yych <= '-') { if (yych <= ',') - goto yy707; - goto yy709; + goto yy732; + goto yy734; } else { if (yych <= '/') - goto yy707; + goto yy732; if (yych <= '9') - goto yy709; - goto yy707; + goto yy734; + goto yy732; } } else { if (yych <= 'Z') { if (yych <= '>') - goto yy713; + goto yy738; if (yych <= '@') - goto yy707; - goto yy709; + goto yy732; + goto yy734; } else { if (yych <= '`') - goto yy707; + goto yy732; if (yych <= 'z') - goto yy709; - goto yy707; + goto yy734; + goto yy732; } } - yy711: + yy736: ++p; yych = *p; if (yybm[0 + yych] & 2) { - goto yy711; + goto yy736; } if (yych != '>') - goto yy707; - yy713: + goto yy732; + yy738: ++p; yych = *p; if (yybm[0 + yych] & 4) { - goto yy713; + goto yy738; } if (yych <= 0x08) - goto yy707; + goto yy732; if (yych <= '\n') - goto yy715; + goto yy740; if (yych <= '\v') - goto yy707; + goto yy732; if (yych <= '\r') - goto yy718; - goto yy707; - yy715: + goto yy743; + goto yy732; + yy740: yyaccept = 1; marker = ++p; yych = *p; if (yybm[0 + yych] & 4) { - goto yy713; + goto yy738; } if (yych <= 0x08) - goto yy717; + goto yy742; if (yych <= '\n') - goto yy715; + goto yy740; if (yych <= '\v') - goto yy717; + goto yy742; if (yych <= '\r') - goto yy718; - yy717 : { return 7; } - yy718: + goto yy743; + yy742 : { return 7; } + yy743: yych = *++p; - goto yy717; - yy719: + goto yy742; + yy744: ++p; yych = *p; if (yych <= ':') { if (yych <= ' ') { if (yych <= 0x08) - goto yy707; + goto yy732; if (yych <= '\r') - goto yy719; + goto yy744; if (yych <= 0x1F) - goto yy707; - goto yy719; + goto yy732; + goto yy744; } else { if (yych == '/') - goto yy721; + goto yy746; if (yych <= '9') - goto yy707; - goto yy722; + goto yy732; + goto yy747; } } else { if (yych <= 'Z') { if (yych == '>') - goto yy713; + goto yy738; if (yych <= '@') - goto yy707; - goto yy722; + goto yy732; + goto yy747; } else { if (yych <= '_') { if (yych <= '^') - goto yy707; - goto yy722; + goto yy732; + goto yy747; } else { if (yych <= '`') - goto yy707; + goto yy732; if (yych <= 'z') - goto yy722; - goto yy707; + goto yy747; + goto yy732; } } } - yy721: + yy746: yych = *++p; if (yych == '>') - goto yy713; - goto yy707; - yy722: + goto yy738; + goto yy732; + yy747: ++p; yych = *p; if (yybm[0 + yych] & 16) { - goto yy722; + goto yy747; } if (yych <= ',') { if (yych <= '\r') { if (yych <= 0x08) - goto yy707; + goto yy732; } else { if (yych != ' ') - goto yy707; + goto yy732; } } else { if (yych <= '<') { if (yych <= '/') - goto yy721; - goto yy707; + goto yy746; + goto yy732; } else { if (yych <= '=') - goto yy726; + goto yy751; if (yych <= '>') - goto yy713; - goto yy707; + goto yy738; + goto yy732; } } - yy724: + yy749: ++p; yych = *p; if (yych <= '<') { if (yych <= ' ') { if (yych <= 0x08) - goto yy707; + goto yy732; if (yych <= '\r') - goto yy724; + goto yy749; if (yych <= 0x1F) - goto yy707; - goto yy724; + goto yy732; + goto yy749; } else { if (yych <= '/') { if (yych <= '.') - goto yy707; - goto yy721; + goto yy732; + goto yy746; } else { if (yych == ':') - goto yy722; - goto yy707; + goto yy747; + goto yy732; } } } else { if (yych <= 'Z') { if (yych <= '=') - goto yy726; + goto yy751; if (yych <= '>') - goto yy713; + goto yy738; if (yych <= '@') - goto yy707; - goto yy722; + goto yy732; + goto yy747; } else { if (yych <= '_') { if (yych <= '^') - goto yy707; - goto yy722; + goto yy732; + goto yy747; } else { if (yych <= '`') - goto yy707; + goto yy732; if (yych <= 'z') - goto yy722; - goto yy707; + goto yy747; + goto yy732; } } } - yy726: + yy751: ++p; yych = *p; if (yybm[0 + yych] & 32) { - goto yy728; + goto yy753; } if (yych <= 0xE0) { if (yych <= '"') { if (yych <= 0x00) - goto yy707; + goto yy732; if (yych >= '!') - goto yy739; + goto yy764; } else { if (yych <= '\'') - goto yy737; + goto yy762; if (yych <= 0xC1) - goto yy707; + goto yy732; if (yych <= 0xDF) - goto yy730; - goto yy731; + goto yy755; + goto yy756; } } else { if (yych <= 0xEF) { if (yych == 0xED) - goto yy736; - goto yy732; + goto yy761; + goto yy757; } else { if (yych <= 0xF0) - goto yy733; + goto yy758; if (yych <= 0xF3) - goto yy734; + goto yy759; if (yych <= 0xF4) - goto yy735; - goto yy707; + goto yy760; + goto yy732; } } ++p; yych = *p; if (yybm[0 + yych] & 32) { - goto yy728; + goto yy753; } if (yych <= 0xDF) { if (yych <= '\'') { if (yych <= 0x00) - goto yy707; + goto yy732; if (yych <= ' ') - goto yy762; + goto yy787; if (yych <= '"') - goto yy739; - goto yy737; + goto yy764; + goto yy762; } else { if (yych == '>') - goto yy713; + goto yy738; if (yych <= 0xC1) - goto yy707; - goto yy730; + goto yy732; + goto yy755; } } else { if (yych <= 0xEF) { if (yych <= 0xE0) - goto yy731; + goto yy756; if (yych == 0xED) - goto yy736; - goto yy732; + goto yy761; + goto yy757; } else { if (yych <= 0xF0) - goto yy733; + goto yy758; if (yych <= 0xF3) - goto yy734; + goto yy759; if (yych <= 0xF4) - goto yy735; - goto yy707; + goto yy760; + goto yy732; } } - yy728: + yy753: ++p; yych = *p; if (yybm[0 + yych] & 32) { - goto yy728; + goto yy753; } if (yych <= 0xE0) { if (yych <= '=') { if (yych <= 0x00) - goto yy707; + goto yy732; if (yych <= ' ') - goto yy756; - goto yy707; + goto yy781; + goto yy732; } else { if (yych <= '>') - goto yy713; + goto yy738; if (yych <= 0xC1) - goto yy707; + goto yy732; if (yych >= 0xE0) - goto yy731; + goto yy756; } } else { if (yych <= 0xEF) { if (yych == 0xED) - goto yy736; - goto yy732; + goto yy761; + goto yy757; } else { if (yych <= 0xF0) - goto yy733; + goto yy758; if (yych <= 0xF3) - goto yy734; + goto yy759; if (yych <= 0xF4) - goto yy735; - goto yy707; + goto yy760; + goto yy732; } } - yy730: + yy755: ++p; yych = *p; if (yych <= 0x7F) - goto yy707; + goto yy732; if (yych <= 0xBF) - goto yy728; - goto yy707; - yy731: + goto yy753; + goto yy732; + yy756: ++p; yych = *p; if (yych <= 0x9F) - goto yy707; + goto yy732; if (yych <= 0xBF) - goto yy730; - goto yy707; - yy732: + goto yy755; + goto yy732; + yy757: ++p; yych = *p; if (yych <= 0x7F) - goto yy707; + goto yy732; if (yych <= 0xBF) - goto yy730; - goto yy707; - yy733: + goto yy755; + goto yy732; + yy758: ++p; yych = *p; if (yych <= 0x8F) - goto yy707; - if (yych <= 0xBF) goto yy732; - goto yy707; - yy734: + if (yych <= 0xBF) + goto yy757; + goto yy732; + yy759: ++p; yych = *p; if (yych <= 0x7F) - goto yy707; - if (yych <= 0xBF) goto yy732; - goto yy707; - yy735: + if (yych <= 0xBF) + goto yy757; + goto yy732; + yy760: ++p; yych = *p; if (yych <= 0x7F) - goto yy707; - if (yych <= 0x8F) goto yy732; - goto yy707; - yy736: + if (yych <= 0x8F) + goto yy757; + goto yy732; + yy761: ++p; yych = *p; if (yych <= 0x7F) - goto yy707; + goto yy732; if (yych <= 0x9F) - goto yy730; - goto yy707; - yy737: + goto yy755; + goto yy732; + yy762: ++p; yych = *p; if (yybm[0 + yych] & 64) { - goto yy737; + goto yy762; } if (yych <= 0xEC) { if (yych <= 0xC1) { if (yych <= 0x00) - goto yy707; + goto yy732; if (yych <= '\'') - goto yy748; - goto yy707; + goto yy773; + goto yy732; } else { if (yych <= 0xDF) - goto yy749; + goto yy774; if (yych <= 0xE0) - goto yy750; - goto yy751; + goto yy775; + goto yy776; } } else { if (yych <= 0xF0) { if (yych <= 0xED) - goto yy755; + goto yy780; if (yych <= 0xEF) - goto yy751; - goto yy752; + goto yy776; + goto yy777; } else { if (yych <= 0xF3) - goto yy753; + goto yy778; if (yych <= 0xF4) - goto yy754; - goto yy707; + goto yy779; + goto yy732; } } - yy739: + yy764: ++p; yych = *p; if (yybm[0 + yych] & 128) { - goto yy739; + goto yy764; } if (yych <= 0xEC) { if (yych <= 0xC1) { if (yych <= 0x00) - goto yy707; + goto yy732; if (yych <= '"') - goto yy748; - goto yy707; + goto yy773; + goto yy732; } else { if (yych <= 0xDF) - goto yy741; + goto yy766; if (yych <= 0xE0) - goto yy742; - goto yy743; + goto yy767; + goto yy768; } } else { if (yych <= 0xF0) { if (yych <= 0xED) - goto yy747; + goto yy772; if (yych <= 0xEF) - goto yy743; - goto yy744; + goto yy768; + goto yy769; } else { if (yych <= 0xF3) - goto yy745; + goto yy770; if (yych <= 0xF4) - goto yy746; - goto yy707; + goto yy771; + goto yy732; } } - yy741: + yy766: ++p; yych = *p; if (yych <= 0x7F) - goto yy707; + goto yy732; if (yych <= 0xBF) - goto yy739; - goto yy707; - yy742: + goto yy764; + goto yy732; + yy767: ++p; yych = *p; if (yych <= 0x9F) - goto yy707; + goto yy732; if (yych <= 0xBF) - goto yy741; - goto yy707; - yy743: + goto yy766; + goto yy732; + yy768: ++p; yych = *p; if (yych <= 0x7F) - goto yy707; + goto yy732; if (yych <= 0xBF) - goto yy741; - goto yy707; - yy744: + goto yy766; + goto yy732; + yy769: ++p; yych = *p; if (yych <= 0x8F) - goto yy707; + goto yy732; if (yych <= 0xBF) - goto yy743; - goto yy707; - yy745: + goto yy768; + goto yy732; + yy770: ++p; yych = *p; if (yych <= 0x7F) - goto yy707; + goto yy732; if (yych <= 0xBF) - goto yy743; - goto yy707; - yy746: + goto yy768; + goto yy732; + yy771: ++p; yych = *p; if (yych <= 0x7F) - goto yy707; + goto yy732; if (yych <= 0x8F) - goto yy743; - goto yy707; - yy747: + goto yy768; + goto yy732; + yy772: ++p; yych = *p; if (yych <= 0x7F) - goto yy707; + goto yy732; if (yych <= 0x9F) - goto yy741; - goto yy707; - yy748: + goto yy766; + goto yy732; + yy773: ++p; yych = *p; if (yych <= ' ') { if (yych <= 0x08) - goto yy707; + goto yy732; if (yych <= '\r') - goto yy719; + goto yy744; if (yych <= 0x1F) - goto yy707; - goto yy719; + goto yy732; + goto yy744; } else { if (yych <= '/') { if (yych <= '.') - goto yy707; - goto yy721; + goto yy732; + goto yy746; } else { if (yych == '>') - goto yy713; - goto yy707; + goto yy738; + goto yy732; } } - yy749: + yy774: ++p; yych = *p; if (yych <= 0x7F) - goto yy707; + goto yy732; if (yych <= 0xBF) - goto yy737; - goto yy707; - yy750: + goto yy762; + goto yy732; + yy775: ++p; yych = *p; if (yych <= 0x9F) - goto yy707; + goto yy732; if (yych <= 0xBF) - goto yy749; - goto yy707; - yy751: + goto yy774; + goto yy732; + yy776: ++p; yych = *p; if (yych <= 0x7F) - goto yy707; + goto yy732; if (yych <= 0xBF) - goto yy749; - goto yy707; - yy752: + goto yy774; + goto yy732; + yy777: ++p; yych = *p; if (yych <= 0x8F) - goto yy707; + goto yy732; if (yych <= 0xBF) - goto yy751; - goto yy707; - yy753: + goto yy776; + goto yy732; + yy778: ++p; yych = *p; if (yych <= 0x7F) - goto yy707; + goto yy732; if (yych <= 0xBF) - goto yy751; - goto yy707; - yy754: + goto yy776; + goto yy732; + yy779: ++p; yych = *p; if (yych <= 0x7F) - goto yy707; + goto yy732; if (yych <= 0x8F) - goto yy751; - goto yy707; - yy755: + goto yy776; + goto yy732; + yy780: ++p; yych = *p; if (yych <= 0x7F) - goto yy707; + goto yy732; if (yych <= 0x9F) - goto yy749; - goto yy707; - yy756: + goto yy774; + goto yy732; + yy781: ++p; yych = *p; if (yych <= '@') { if (yych <= '"') { if (yych <= '\r') { if (yych <= 0x00) - goto yy707; + goto yy732; if (yych <= 0x08) - goto yy728; - goto yy756; + goto yy753; + goto yy781; } else { if (yych == ' ') - goto yy756; + goto yy781; if (yych <= '!') - goto yy728; - goto yy707; + goto yy753; + goto yy732; } } else { if (yych <= ':') { if (yych == '\'') - goto yy707; + goto yy732; if (yych <= '9') - goto yy728; + goto yy753; } else { if (yych <= ';') - goto yy728; + goto yy753; if (yych <= '=') - goto yy707; + goto yy732; if (yych <= '>') - goto yy713; - goto yy728; + goto yy738; + goto yy753; } } } else { if (yych <= 0xDF) { if (yych <= '`') { if (yych <= 'Z') - goto yy758; + goto yy783; if (yych <= '^') - goto yy728; + goto yy753; if (yych >= '`') - goto yy707; + goto yy732; } else { if (yych <= 'z') - goto yy758; + goto yy783; if (yych <= 0x7F) - goto yy728; + goto yy753; if (yych <= 0xC1) - goto yy707; - goto yy730; + goto yy732; + goto yy755; } } else { if (yych <= 0xEF) { if (yych <= 0xE0) - goto yy731; + goto yy756; if (yych == 0xED) - goto yy736; - goto yy732; + goto yy761; + goto yy757; } else { if (yych <= 0xF0) - goto yy733; + goto yy758; if (yych <= 0xF3) - goto yy734; + goto yy759; if (yych <= 0xF4) - goto yy735; - goto yy707; + goto yy760; + goto yy732; } } } - yy758: + yy783: ++p; yych = *p; if (yych <= '>') { if (yych <= '&') { if (yych <= 0x1F) { if (yych <= 0x00) - goto yy707; + goto yy732; if (yych <= 0x08) - goto yy728; + goto yy753; if (yych >= 0x0E) - goto yy728; + goto yy753; } else { if (yych <= ' ') - goto yy760; + goto yy785; if (yych == '"') - goto yy707; - goto yy728; + goto yy732; + goto yy753; } } else { if (yych <= '/') { if (yych <= '\'') - goto yy707; + goto yy732; if (yych <= ',') - goto yy728; + goto yy753; if (yych <= '.') - goto yy758; - goto yy728; + goto yy783; + goto yy753; } else { if (yych <= ';') { if (yych <= ':') - goto yy758; - goto yy728; + goto yy783; + goto yy753; } else { if (yych <= '<') - goto yy707; + goto yy732; if (yych <= '=') - goto yy726; - goto yy713; + goto yy751; + goto yy738; } } } @@ -12435,188 +12691,188 @@ bufsize_t _scan_html_block_start_7(const unsigned char *p) { if (yych <= 0xC1) { if (yych <= '_') { if (yych <= '@') - goto yy728; + goto yy753; if (yych <= 'Z') - goto yy758; + goto yy783; if (yych <= '^') - goto yy728; - goto yy758; + goto yy753; + goto yy783; } else { if (yych <= '`') - goto yy707; + goto yy732; if (yych <= 'z') - goto yy758; + goto yy783; if (yych <= 0x7F) - goto yy728; - goto yy707; + goto yy753; + goto yy732; } } else { if (yych <= 0xED) { if (yych <= 0xDF) - goto yy730; + goto yy755; if (yych <= 0xE0) - goto yy731; + goto yy756; if (yych <= 0xEC) - goto yy732; - goto yy736; + goto yy757; + goto yy761; } else { if (yych <= 0xF0) { if (yych <= 0xEF) - goto yy732; - goto yy733; + goto yy757; + goto yy758; } else { if (yych <= 0xF3) - goto yy734; + goto yy759; if (yych <= 0xF4) - goto yy735; - goto yy707; + goto yy760; + goto yy732; } } } } - yy760: + yy785: ++p; yych = *p; if (yych <= '@') { if (yych <= '&') { if (yych <= 0x1F) { if (yych <= 0x00) - goto yy707; + goto yy732; if (yych <= 0x08) - goto yy728; + goto yy753; if (yych <= '\r') - goto yy760; - goto yy728; + goto yy785; + goto yy753; } else { if (yych <= ' ') - goto yy760; + goto yy785; if (yych == '"') - goto yy707; - goto yy728; + goto yy732; + goto yy753; } } else { if (yych <= ';') { if (yych <= '\'') - goto yy707; + goto yy732; if (yych == ':') - goto yy758; - goto yy728; + goto yy783; + goto yy753; } else { if (yych <= '<') - goto yy707; + goto yy732; if (yych <= '=') - goto yy726; + goto yy751; if (yych <= '>') - goto yy713; - goto yy728; + goto yy738; + goto yy753; } } } else { if (yych <= 0xDF) { if (yych <= '`') { if (yych <= 'Z') - goto yy758; + goto yy783; if (yych <= '^') - goto yy728; + goto yy753; if (yych <= '_') - goto yy758; - goto yy707; + goto yy783; + goto yy732; } else { if (yych <= 'z') - goto yy758; + goto yy783; if (yych <= 0x7F) - goto yy728; + goto yy753; if (yych <= 0xC1) - goto yy707; - goto yy730; + goto yy732; + goto yy755; } } else { if (yych <= 0xEF) { if (yych <= 0xE0) - goto yy731; + goto yy756; if (yych == 0xED) - goto yy736; - goto yy732; + goto yy761; + goto yy757; } else { if (yych <= 0xF0) - goto yy733; + goto yy758; if (yych <= 0xF3) - goto yy734; + goto yy759; if (yych <= 0xF4) - goto yy735; - goto yy707; + goto yy760; + goto yy732; } } } - yy762: + yy787: ++p; yych = *p; if (yych <= '@') { if (yych <= '"') { if (yych <= '\r') { if (yych <= 0x00) - goto yy707; + goto yy732; if (yych <= 0x08) - goto yy728; - goto yy762; + goto yy753; + goto yy787; } else { if (yych == ' ') - goto yy762; + goto yy787; if (yych <= '!') - goto yy728; - goto yy739; + goto yy753; + goto yy764; } } else { if (yych <= ':') { if (yych == '\'') - goto yy737; + goto yy762; if (yych <= '9') - goto yy728; - goto yy758; + goto yy753; + goto yy783; } else { if (yych <= ';') - goto yy728; + goto yy753; if (yych <= '=') - goto yy707; + goto yy732; if (yych <= '>') - goto yy713; - goto yy728; + goto yy738; + goto yy753; } } } else { if (yych <= 0xDF) { if (yych <= '`') { if (yych <= 'Z') - goto yy758; + goto yy783; if (yych <= '^') - goto yy728; + goto yy753; if (yych <= '_') - goto yy758; - goto yy707; + goto yy783; + goto yy732; } else { if (yych <= 'z') - goto yy758; + goto yy783; if (yych <= 0x7F) - goto yy728; + goto yy753; if (yych <= 0xC1) - goto yy707; - goto yy730; + goto yy732; + goto yy755; } } else { if (yych <= 0xEF) { if (yych <= 0xE0) - goto yy731; + goto yy756; if (yych == 0xED) - goto yy736; - goto yy732; + goto yy761; + goto yy757; } else { if (yych <= 0xF0) - goto yy733; + goto yy758; if (yych <= 0xF3) - goto yy734; + goto yy759; if (yych <= 0xF4) - goto yy735; - goto yy707; + goto yy760; + goto yy732; } } } @@ -12652,891 +12908,891 @@ bufsize_t _scan_html_block_end_1(const unsigned char *p) { if (yych <= 0xDF) { if (yych <= ';') { if (yych <= 0x00) - goto yy766; + goto yy791; if (yych != '\n') - goto yy768; + goto yy793; } else { if (yych <= '<') - goto yy769; + goto yy794; if (yych <= 0x7F) - goto yy768; + goto yy793; if (yych >= 0xC2) - goto yy770; + goto yy795; } } else { if (yych <= 0xEF) { if (yych <= 0xE0) - goto yy771; + goto yy796; if (yych == 0xED) - goto yy773; - goto yy772; + goto yy798; + goto yy797; } else { if (yych <= 0xF0) - goto yy774; + goto yy799; if (yych <= 0xF3) - goto yy775; + goto yy800; if (yych <= 0xF4) - goto yy776; + goto yy801; } } - yy766: + yy791: ++p; - yy767 : { return 0; } - yy768: + yy792 : { return 0; } + yy793: yyaccept = 0; yych = *(marker = ++p); if (yych <= '\n') { if (yych <= 0x00) - goto yy767; + goto yy792; if (yych <= '\t') - goto yy781; - goto yy767; + goto yy806; + goto yy792; } else { if (yych <= 0x7F) - goto yy781; + goto yy806; if (yych <= 0xC1) - goto yy767; + goto yy792; if (yych <= 0xF4) - goto yy781; - goto yy767; + goto yy806; + goto yy792; } - yy769: + yy794: yyaccept = 0; yych = *(marker = ++p); if (yych <= '.') { if (yych <= 0x00) - goto yy767; + goto yy792; if (yych == '\n') - goto yy767; - goto yy781; + goto yy792; + goto yy806; } else { if (yych <= 0x7F) { if (yych <= '/') - goto yy789; - goto yy781; + goto yy814; + goto yy806; } else { if (yych <= 0xC1) - goto yy767; + goto yy792; if (yych <= 0xF4) - goto yy781; - goto yy767; + goto yy806; + goto yy792; } } - yy770: + yy795: yyaccept = 0; yych = *(marker = ++p); if (yych <= 0x7F) - goto yy767; + goto yy792; if (yych <= 0xBF) - goto yy780; - goto yy767; - yy771: + goto yy805; + goto yy792; + yy796: yyaccept = 0; yych = *(marker = ++p); if (yych <= 0x9F) - goto yy767; + goto yy792; if (yych <= 0xBF) - goto yy779; - goto yy767; - yy772: + goto yy804; + goto yy792; + yy797: yyaccept = 0; yych = *(marker = ++p); if (yych <= 0x7F) - goto yy767; + goto yy792; if (yych <= 0xBF) - goto yy779; - goto yy767; - yy773: + goto yy804; + goto yy792; + yy798: yyaccept = 0; yych = *(marker = ++p); if (yych <= 0x7F) - goto yy767; + goto yy792; if (yych <= 0x9F) - goto yy779; - goto yy767; - yy774: + goto yy804; + goto yy792; + yy799: yyaccept = 0; yych = *(marker = ++p); if (yych <= 0x8F) - goto yy767; + goto yy792; if (yych <= 0xBF) - goto yy777; - goto yy767; - yy775: + goto yy802; + goto yy792; + yy800: yyaccept = 0; yych = *(marker = ++p); if (yych <= 0x7F) - goto yy767; + goto yy792; if (yych <= 0xBF) - goto yy777; - goto yy767; - yy776: + goto yy802; + goto yy792; + yy801: yyaccept = 0; yych = *(marker = ++p); if (yych <= 0x7F) - goto yy767; + goto yy792; if (yych >= 0x90) - goto yy767; - yy777: + goto yy792; + yy802: ++p; yych = *p; if (yych <= 0x7F) - goto yy778; + goto yy803; if (yych <= 0xBF) - goto yy779; - yy778: + goto yy804; + yy803: p = marker; if (yyaccept == 0) { - goto yy767; + goto yy792; } else { - goto yy795; + goto yy820; } - yy779: + yy804: ++p; yych = *p; if (yych <= 0x7F) - goto yy778; + goto yy803; if (yych >= 0xC0) - goto yy778; - yy780: + goto yy803; + yy805: ++p; yych = *p; - yy781: + yy806: if (yybm[0 + yych] & 64) { - goto yy780; + goto yy805; } if (yych <= 0xEC) { if (yych <= 0xC1) { if (yych <= '\n') - goto yy778; + goto yy803; if (yych >= '=') - goto yy778; + goto yy803; } else { if (yych <= 0xDF) - goto yy779; + goto yy804; if (yych <= 0xE0) - goto yy784; - goto yy777; + goto yy809; + goto yy802; } } else { if (yych <= 0xF0) { if (yych <= 0xED) - goto yy788; + goto yy813; if (yych <= 0xEF) - goto yy777; - goto yy785; + goto yy802; + goto yy810; } else { if (yych <= 0xF3) - goto yy786; + goto yy811; if (yych <= 0xF4) - goto yy787; - goto yy778; + goto yy812; + goto yy803; } } - yy782: + yy807: ++p; yych = *p; if (yybm[0 + yych] & 128) { - goto yy782; + goto yy807; } if (yych <= 0xDF) { if (yych <= '.') { if (yych <= 0x00) - goto yy778; + goto yy803; if (yych == '\n') - goto yy778; - goto yy780; + goto yy803; + goto yy805; } else { if (yych <= '/') - goto yy789; + goto yy814; if (yych <= 0x7F) - goto yy780; + goto yy805; if (yych <= 0xC1) - goto yy778; - goto yy779; + goto yy803; + goto yy804; } } else { if (yych <= 0xEF) { if (yych <= 0xE0) - goto yy784; + goto yy809; if (yych == 0xED) - goto yy788; - goto yy777; + goto yy813; + goto yy802; } else { if (yych <= 0xF0) - goto yy785; + goto yy810; if (yych <= 0xF3) - goto yy786; + goto yy811; if (yych <= 0xF4) - goto yy787; - goto yy778; + goto yy812; + goto yy803; } } - yy784: + yy809: ++p; yych = *p; if (yych <= 0x9F) - goto yy778; + goto yy803; if (yych <= 0xBF) - goto yy779; - goto yy778; - yy785: + goto yy804; + goto yy803; + yy810: ++p; yych = *p; if (yych <= 0x8F) - goto yy778; + goto yy803; if (yych <= 0xBF) - goto yy777; - goto yy778; - yy786: + goto yy802; + goto yy803; + yy811: ++p; yych = *p; if (yych <= 0x7F) - goto yy778; + goto yy803; if (yych <= 0xBF) - goto yy777; - goto yy778; - yy787: + goto yy802; + goto yy803; + yy812: ++p; yych = *p; if (yych <= 0x7F) - goto yy778; + goto yy803; if (yych <= 0x8F) - goto yy777; - goto yy778; - yy788: + goto yy802; + goto yy803; + yy813: ++p; yych = *p; if (yych <= 0x7F) - goto yy778; + goto yy803; if (yych <= 0x9F) - goto yy779; - goto yy778; - yy789: + goto yy804; + goto yy803; + yy814: ++p; yych = *p; if (yybm[0 + yych] & 128) { - goto yy782; + goto yy807; } if (yych <= 's') { if (yych <= 'P') { if (yych <= '\t') { if (yych <= 0x00) - goto yy778; - goto yy780; + goto yy803; + goto yy805; } else { if (yych <= '\n') - goto yy778; + goto yy803; if (yych <= 'O') - goto yy780; - goto yy791; + goto yy805; + goto yy816; } } else { if (yych <= 'o') { if (yych != 'S') - goto yy780; + goto yy805; } else { if (yych <= 'p') - goto yy791; + goto yy816; if (yych <= 'r') - goto yy780; + goto yy805; } } } else { if (yych <= 0xEC) { if (yych <= 0xC1) { if (yych <= 0x7F) - goto yy780; - goto yy778; + goto yy805; + goto yy803; } else { if (yych <= 0xDF) - goto yy779; + goto yy804; if (yych <= 0xE0) - goto yy784; - goto yy777; + goto yy809; + goto yy802; } } else { if (yych <= 0xF0) { if (yych <= 0xED) - goto yy788; + goto yy813; if (yych <= 0xEF) - goto yy777; - goto yy785; + goto yy802; + goto yy810; } else { if (yych <= 0xF3) - goto yy786; + goto yy811; if (yych <= 0xF4) - goto yy787; - goto yy778; + goto yy812; + goto yy803; } } } ++p; yych = *p; if (yybm[0 + yych] & 128) { - goto yy782; + goto yy807; } if (yych <= 't') { if (yych <= 'C') { if (yych <= '\t') { if (yych <= 0x00) - goto yy778; - goto yy780; + goto yy803; + goto yy805; } else { if (yych <= '\n') - goto yy778; + goto yy803; if (yych <= 'B') - goto yy780; - goto yy796; + goto yy805; + goto yy821; } } else { if (yych <= 'b') { if (yych == 'T') - goto yy797; - goto yy780; + goto yy822; + goto yy805; } else { if (yych <= 'c') - goto yy796; + goto yy821; if (yych <= 's') - goto yy780; - goto yy797; + goto yy805; + goto yy822; } } } else { if (yych <= 0xEC) { if (yych <= 0xC1) { if (yych <= 0x7F) - goto yy780; - goto yy778; + goto yy805; + goto yy803; } else { if (yych <= 0xDF) - goto yy779; + goto yy804; if (yych <= 0xE0) - goto yy784; - goto yy777; + goto yy809; + goto yy802; } } else { if (yych <= 0xF0) { if (yych <= 0xED) - goto yy788; + goto yy813; if (yych <= 0xEF) - goto yy777; - goto yy785; + goto yy802; + goto yy810; } else { if (yych <= 0xF3) - goto yy786; + goto yy811; if (yych <= 0xF4) - goto yy787; - goto yy778; + goto yy812; + goto yy803; } } } - yy791: + yy816: ++p; yych = *p; if (yybm[0 + yych] & 128) { - goto yy782; + goto yy807; } if (yych <= 0xC1) { if (yych <= 'Q') { if (yych <= 0x00) - goto yy778; + goto yy803; if (yych == '\n') - goto yy778; - goto yy780; + goto yy803; + goto yy805; } else { if (yych <= 'q') { if (yych >= 'S') - goto yy780; + goto yy805; } else { if (yych <= 'r') - goto yy792; + goto yy817; if (yych <= 0x7F) - goto yy780; - goto yy778; + goto yy805; + goto yy803; } } } else { if (yych <= 0xED) { if (yych <= 0xDF) - goto yy779; + goto yy804; if (yych <= 0xE0) - goto yy784; + goto yy809; if (yych <= 0xEC) - goto yy777; - goto yy788; + goto yy802; + goto yy813; } else { if (yych <= 0xF0) { if (yych <= 0xEF) - goto yy777; - goto yy785; + goto yy802; + goto yy810; } else { if (yych <= 0xF3) - goto yy786; + goto yy811; if (yych <= 0xF4) - goto yy787; - goto yy778; + goto yy812; + goto yy803; } } } - yy792: + yy817: ++p; yych = *p; if (yybm[0 + yych] & 128) { - goto yy782; + goto yy807; } if (yych <= 0xC1) { if (yych <= 'D') { if (yych <= 0x00) - goto yy778; + goto yy803; if (yych == '\n') - goto yy778; - goto yy780; + goto yy803; + goto yy805; } else { if (yych <= 'd') { if (yych >= 'F') - goto yy780; + goto yy805; } else { if (yych <= 'e') - goto yy793; + goto yy818; if (yych <= 0x7F) - goto yy780; - goto yy778; + goto yy805; + goto yy803; } } } else { if (yych <= 0xED) { if (yych <= 0xDF) - goto yy779; + goto yy804; if (yych <= 0xE0) - goto yy784; + goto yy809; if (yych <= 0xEC) - goto yy777; - goto yy788; + goto yy802; + goto yy813; } else { if (yych <= 0xF0) { if (yych <= 0xEF) - goto yy777; - goto yy785; + goto yy802; + goto yy810; } else { if (yych <= 0xF3) - goto yy786; + goto yy811; if (yych <= 0xF4) - goto yy787; - goto yy778; + goto yy812; + goto yy803; } } } - yy793: + yy818: ++p; yych = *p; if (yybm[0 + yych] & 128) { - goto yy782; + goto yy807; } if (yych <= 0xDF) { if (yych <= '=') { if (yych <= 0x00) - goto yy778; + goto yy803; if (yych == '\n') - goto yy778; - goto yy780; + goto yy803; + goto yy805; } else { if (yych <= '>') - goto yy794; + goto yy819; if (yych <= 0x7F) - goto yy780; + goto yy805; if (yych <= 0xC1) - goto yy778; - goto yy779; + goto yy803; + goto yy804; } } else { if (yych <= 0xEF) { if (yych <= 0xE0) - goto yy784; + goto yy809; if (yych == 0xED) - goto yy788; - goto yy777; + goto yy813; + goto yy802; } else { if (yych <= 0xF0) - goto yy785; + goto yy810; if (yych <= 0xF3) - goto yy786; + goto yy811; if (yych <= 0xF4) - goto yy787; - goto yy778; + goto yy812; + goto yy803; } } - yy794: + yy819: yyaccept = 1; marker = ++p; yych = *p; if (yybm[0 + yych] & 64) { - goto yy780; + goto yy805; } if (yych <= 0xEC) { if (yych <= 0xC1) { if (yych <= '\n') - goto yy795; + goto yy820; if (yych <= '<') - goto yy782; + goto yy807; } else { if (yych <= 0xDF) - goto yy779; + goto yy804; if (yych <= 0xE0) - goto yy784; - goto yy777; + goto yy809; + goto yy802; } } else { if (yych <= 0xF0) { if (yych <= 0xED) - goto yy788; + goto yy813; if (yych <= 0xEF) - goto yy777; - goto yy785; + goto yy802; + goto yy810; } else { if (yych <= 0xF3) - goto yy786; + goto yy811; if (yych <= 0xF4) - goto yy787; + goto yy812; } } - yy795 : { return (bufsize_t)(p - start); } - yy796: + yy820 : { return (bufsize_t)(p - start); } + yy821: ++p; yych = *p; if (yybm[0 + yych] & 128) { - goto yy782; + goto yy807; } if (yych <= 0xC1) { if (yych <= 'Q') { if (yych <= 0x00) - goto yy778; + goto yy803; if (yych == '\n') - goto yy778; - goto yy780; + goto yy803; + goto yy805; } else { if (yych <= 'q') { if (yych <= 'R') - goto yy800; - goto yy780; + goto yy825; + goto yy805; } else { if (yych <= 'r') - goto yy800; + goto yy825; if (yych <= 0x7F) - goto yy780; - goto yy778; + goto yy805; + goto yy803; } } } else { if (yych <= 0xED) { if (yych <= 0xDF) - goto yy779; + goto yy804; if (yych <= 0xE0) - goto yy784; + goto yy809; if (yych <= 0xEC) - goto yy777; - goto yy788; + goto yy802; + goto yy813; } else { if (yych <= 0xF0) { if (yych <= 0xEF) - goto yy777; - goto yy785; + goto yy802; + goto yy810; } else { if (yych <= 0xF3) - goto yy786; + goto yy811; if (yych <= 0xF4) - goto yy787; - goto yy778; + goto yy812; + goto yy803; } } } - yy797: + yy822: ++p; yych = *p; if (yybm[0 + yych] & 128) { - goto yy782; + goto yy807; } if (yych <= 0xC1) { if (yych <= 'X') { if (yych <= 0x00) - goto yy778; + goto yy803; if (yych == '\n') - goto yy778; - goto yy780; + goto yy803; + goto yy805; } else { if (yych <= 'x') { if (yych >= 'Z') - goto yy780; + goto yy805; } else { if (yych <= 'y') - goto yy798; + goto yy823; if (yych <= 0x7F) - goto yy780; - goto yy778; + goto yy805; + goto yy803; } } } else { if (yych <= 0xED) { if (yych <= 0xDF) - goto yy779; + goto yy804; if (yych <= 0xE0) - goto yy784; + goto yy809; if (yych <= 0xEC) - goto yy777; - goto yy788; + goto yy802; + goto yy813; } else { if (yych <= 0xF0) { if (yych <= 0xEF) - goto yy777; - goto yy785; + goto yy802; + goto yy810; } else { if (yych <= 0xF3) - goto yy786; + goto yy811; if (yych <= 0xF4) - goto yy787; - goto yy778; + goto yy812; + goto yy803; } } } - yy798: + yy823: ++p; yych = *p; if (yybm[0 + yych] & 128) { - goto yy782; + goto yy807; } if (yych <= 0xC1) { if (yych <= 'K') { if (yych <= 0x00) - goto yy778; + goto yy803; if (yych == '\n') - goto yy778; - goto yy780; + goto yy803; + goto yy805; } else { if (yych <= 'k') { if (yych >= 'M') - goto yy780; + goto yy805; } else { if (yych <= 'l') - goto yy799; + goto yy824; if (yych <= 0x7F) - goto yy780; - goto yy778; + goto yy805; + goto yy803; } } } else { if (yych <= 0xED) { if (yych <= 0xDF) - goto yy779; + goto yy804; if (yych <= 0xE0) - goto yy784; + goto yy809; if (yych <= 0xEC) - goto yy777; - goto yy788; + goto yy802; + goto yy813; } else { if (yych <= 0xF0) { if (yych <= 0xEF) - goto yy777; - goto yy785; + goto yy802; + goto yy810; } else { if (yych <= 0xF3) - goto yy786; + goto yy811; if (yych <= 0xF4) - goto yy787; - goto yy778; + goto yy812; + goto yy803; } } } - yy799: + yy824: ++p; yych = *p; if (yybm[0 + yych] & 128) { - goto yy782; + goto yy807; } if (yych <= 0xC1) { if (yych <= 'D') { if (yych <= 0x00) - goto yy778; + goto yy803; if (yych == '\n') - goto yy778; - goto yy780; + goto yy803; + goto yy805; } else { if (yych <= 'd') { if (yych <= 'E') - goto yy793; - goto yy780; + goto yy818; + goto yy805; } else { if (yych <= 'e') - goto yy793; + goto yy818; if (yych <= 0x7F) - goto yy780; - goto yy778; + goto yy805; + goto yy803; } } } else { if (yych <= 0xED) { if (yych <= 0xDF) - goto yy779; + goto yy804; if (yych <= 0xE0) - goto yy784; + goto yy809; if (yych <= 0xEC) - goto yy777; - goto yy788; + goto yy802; + goto yy813; } else { if (yych <= 0xF0) { if (yych <= 0xEF) - goto yy777; - goto yy785; + goto yy802; + goto yy810; } else { if (yych <= 0xF3) - goto yy786; + goto yy811; if (yych <= 0xF4) - goto yy787; - goto yy778; + goto yy812; + goto yy803; } } } - yy800: + yy825: ++p; yych = *p; if (yybm[0 + yych] & 128) { - goto yy782; + goto yy807; } if (yych <= 0xC1) { if (yych <= 'H') { if (yych <= 0x00) - goto yy778; + goto yy803; if (yych == '\n') - goto yy778; - goto yy780; + goto yy803; + goto yy805; } else { if (yych <= 'h') { if (yych >= 'J') - goto yy780; + goto yy805; } else { if (yych <= 'i') - goto yy801; + goto yy826; if (yych <= 0x7F) - goto yy780; - goto yy778; + goto yy805; + goto yy803; } } } else { if (yych <= 0xED) { if (yych <= 0xDF) - goto yy779; + goto yy804; if (yych <= 0xE0) - goto yy784; + goto yy809; if (yych <= 0xEC) - goto yy777; - goto yy788; + goto yy802; + goto yy813; } else { if (yych <= 0xF0) { if (yych <= 0xEF) - goto yy777; - goto yy785; + goto yy802; + goto yy810; } else { if (yych <= 0xF3) - goto yy786; + goto yy811; if (yych <= 0xF4) - goto yy787; - goto yy778; + goto yy812; + goto yy803; } } } - yy801: + yy826: ++p; yych = *p; if (yybm[0 + yych] & 128) { - goto yy782; + goto yy807; } if (yych <= 0xC1) { if (yych <= 'O') { if (yych <= 0x00) - goto yy778; + goto yy803; if (yych == '\n') - goto yy778; - goto yy780; + goto yy803; + goto yy805; } else { if (yych <= 'o') { if (yych >= 'Q') - goto yy780; + goto yy805; } else { if (yych <= 'p') - goto yy802; + goto yy827; if (yych <= 0x7F) - goto yy780; - goto yy778; + goto yy805; + goto yy803; } } } else { if (yych <= 0xED) { if (yych <= 0xDF) - goto yy779; + goto yy804; if (yych <= 0xE0) - goto yy784; + goto yy809; if (yych <= 0xEC) - goto yy777; - goto yy788; + goto yy802; + goto yy813; } else { if (yych <= 0xF0) { if (yych <= 0xEF) - goto yy777; - goto yy785; + goto yy802; + goto yy810; } else { if (yych <= 0xF3) - goto yy786; + goto yy811; if (yych <= 0xF4) - goto yy787; - goto yy778; + goto yy812; + goto yy803; } } } - yy802: + yy827: ++p; yych = *p; if (yybm[0 + yych] & 128) { - goto yy782; + goto yy807; } if (yych <= 0xC1) { if (yych <= 'S') { if (yych <= 0x00) - goto yy778; + goto yy803; if (yych == '\n') - goto yy778; - goto yy780; + goto yy803; + goto yy805; } else { if (yych <= 's') { if (yych <= 'T') - goto yy793; - goto yy780; + goto yy818; + goto yy805; } else { if (yych <= 't') - goto yy793; + goto yy818; if (yych <= 0x7F) - goto yy780; - goto yy778; + goto yy805; + goto yy803; } } } else { if (yych <= 0xED) { if (yych <= 0xDF) - goto yy779; + goto yy804; if (yych <= 0xE0) - goto yy784; + goto yy809; if (yych <= 0xEC) - goto yy777; - goto yy788; + goto yy802; + goto yy813; } else { if (yych <= 0xF0) { if (yych <= 0xEF) - goto yy777; - goto yy785; + goto yy802; + goto yy810; } else { if (yych <= 0xF3) - goto yy786; + goto yy811; if (yych <= 0xF4) - goto yy787; - goto yy778; + goto yy812; + goto yy803; } } } @@ -13572,334 +13828,334 @@ bufsize_t _scan_html_block_end_2(const unsigned char *p) { if (yych <= 0xDF) { if (yych <= ',') { if (yych <= 0x00) - goto yy805; + goto yy830; if (yych != '\n') - goto yy807; + goto yy832; } else { if (yych <= '-') - goto yy808; + goto yy833; if (yych <= 0x7F) - goto yy807; + goto yy832; if (yych >= 0xC2) - goto yy809; + goto yy834; } } else { if (yych <= 0xEF) { if (yych <= 0xE0) - goto yy810; + goto yy835; if (yych == 0xED) - goto yy812; - goto yy811; + goto yy837; + goto yy836; } else { if (yych <= 0xF0) - goto yy813; + goto yy838; if (yych <= 0xF3) - goto yy814; + goto yy839; if (yych <= 0xF4) - goto yy815; + goto yy840; } } - yy805: + yy830: ++p; - yy806 : { return 0; } - yy807: + yy831 : { return 0; } + yy832: yyaccept = 0; yych = *(marker = ++p); if (yych <= '\n') { if (yych <= 0x00) - goto yy806; + goto yy831; if (yych <= '\t') - goto yy820; - goto yy806; + goto yy845; + goto yy831; } else { if (yych <= 0x7F) - goto yy820; + goto yy845; if (yych <= 0xC1) - goto yy806; + goto yy831; if (yych <= 0xF4) - goto yy820; - goto yy806; + goto yy845; + goto yy831; } - yy808: + yy833: yyaccept = 0; yych = *(marker = ++p); if (yybm[0 + yych] & 128) { - goto yy827; + goto yy852; } if (yych <= '\n') { if (yych <= 0x00) - goto yy806; + goto yy831; if (yych <= '\t') - goto yy820; - goto yy806; + goto yy845; + goto yy831; } else { if (yych <= 0x7F) - goto yy820; + goto yy845; if (yych <= 0xC1) - goto yy806; + goto yy831; if (yych <= 0xF4) - goto yy820; - goto yy806; + goto yy845; + goto yy831; } - yy809: + yy834: yyaccept = 0; yych = *(marker = ++p); if (yych <= 0x7F) - goto yy806; + goto yy831; if (yych <= 0xBF) - goto yy819; - goto yy806; - yy810: + goto yy844; + goto yy831; + yy835: yyaccept = 0; yych = *(marker = ++p); if (yych <= 0x9F) - goto yy806; + goto yy831; if (yych <= 0xBF) - goto yy818; - goto yy806; - yy811: + goto yy843; + goto yy831; + yy836: yyaccept = 0; yych = *(marker = ++p); if (yych <= 0x7F) - goto yy806; + goto yy831; if (yych <= 0xBF) - goto yy818; - goto yy806; - yy812: + goto yy843; + goto yy831; + yy837: yyaccept = 0; yych = *(marker = ++p); if (yych <= 0x7F) - goto yy806; + goto yy831; if (yych <= 0x9F) - goto yy818; - goto yy806; - yy813: + goto yy843; + goto yy831; + yy838: yyaccept = 0; yych = *(marker = ++p); if (yych <= 0x8F) - goto yy806; + goto yy831; if (yych <= 0xBF) - goto yy816; - goto yy806; - yy814: + goto yy841; + goto yy831; + yy839: yyaccept = 0; yych = *(marker = ++p); if (yych <= 0x7F) - goto yy806; + goto yy831; if (yych <= 0xBF) - goto yy816; - goto yy806; - yy815: + goto yy841; + goto yy831; + yy840: yyaccept = 0; yych = *(marker = ++p); if (yych <= 0x7F) - goto yy806; + goto yy831; if (yych >= 0x90) - goto yy806; - yy816: + goto yy831; + yy841: ++p; yych = *p; if (yych <= 0x7F) - goto yy817; + goto yy842; if (yych <= 0xBF) - goto yy818; - yy817: + goto yy843; + yy842: p = marker; if (yyaccept == 0) { - goto yy806; + goto yy831; } else { - goto yy830; + goto yy855; } - yy818: + yy843: ++p; yych = *p; if (yych <= 0x7F) - goto yy817; + goto yy842; if (yych >= 0xC0) - goto yy817; - yy819: + goto yy842; + yy844: ++p; yych = *p; - yy820: + yy845: if (yybm[0 + yych] & 64) { - goto yy819; + goto yy844; } if (yych <= 0xEC) { if (yych <= 0xC1) { if (yych <= '\n') - goto yy817; + goto yy842; if (yych >= '.') - goto yy817; + goto yy842; } else { if (yych <= 0xDF) - goto yy818; + goto yy843; if (yych <= 0xE0) - goto yy822; - goto yy816; + goto yy847; + goto yy841; } } else { if (yych <= 0xF0) { if (yych <= 0xED) - goto yy826; + goto yy851; if (yych <= 0xEF) - goto yy816; - goto yy823; + goto yy841; + goto yy848; } else { if (yych <= 0xF3) - goto yy824; + goto yy849; if (yych <= 0xF4) - goto yy825; - goto yy817; + goto yy850; + goto yy842; } } - yy821: + yy846: ++p; yych = *p; if (yybm[0 + yych] & 64) { - goto yy819; + goto yy844; } if (yych <= 0xEC) { if (yych <= 0xC1) { if (yych <= '\n') - goto yy817; + goto yy842; if (yych <= '-') - goto yy827; - goto yy817; + goto yy852; + goto yy842; } else { if (yych <= 0xDF) - goto yy818; + goto yy843; if (yych >= 0xE1) - goto yy816; + goto yy841; } } else { if (yych <= 0xF0) { if (yych <= 0xED) - goto yy826; + goto yy851; if (yych <= 0xEF) - goto yy816; - goto yy823; + goto yy841; + goto yy848; } else { if (yych <= 0xF3) - goto yy824; + goto yy849; if (yych <= 0xF4) - goto yy825; - goto yy817; + goto yy850; + goto yy842; } } - yy822: + yy847: ++p; yych = *p; if (yych <= 0x9F) - goto yy817; + goto yy842; if (yych <= 0xBF) - goto yy818; - goto yy817; - yy823: + goto yy843; + goto yy842; + yy848: ++p; yych = *p; if (yych <= 0x8F) - goto yy817; + goto yy842; if (yych <= 0xBF) - goto yy816; - goto yy817; - yy824: + goto yy841; + goto yy842; + yy849: ++p; yych = *p; if (yych <= 0x7F) - goto yy817; + goto yy842; if (yych <= 0xBF) - goto yy816; - goto yy817; - yy825: + goto yy841; + goto yy842; + yy850: ++p; yych = *p; if (yych <= 0x7F) - goto yy817; + goto yy842; if (yych <= 0x8F) - goto yy816; - goto yy817; - yy826: + goto yy841; + goto yy842; + yy851: ++p; yych = *p; if (yych <= 0x7F) - goto yy817; + goto yy842; if (yych <= 0x9F) - goto yy818; - goto yy817; - yy827: + goto yy843; + goto yy842; + yy852: ++p; yych = *p; if (yybm[0 + yych] & 128) { - goto yy827; + goto yy852; } if (yych <= 0xDF) { if (yych <= '=') { if (yych <= 0x00) - goto yy817; + goto yy842; if (yych == '\n') - goto yy817; - goto yy819; + goto yy842; + goto yy844; } else { if (yych <= '>') - goto yy829; + goto yy854; if (yych <= 0x7F) - goto yy819; + goto yy844; if (yych <= 0xC1) - goto yy817; - goto yy818; + goto yy842; + goto yy843; } } else { if (yych <= 0xEF) { if (yych <= 0xE0) - goto yy822; + goto yy847; if (yych == 0xED) - goto yy826; - goto yy816; + goto yy851; + goto yy841; } else { if (yych <= 0xF0) - goto yy823; + goto yy848; if (yych <= 0xF3) - goto yy824; + goto yy849; if (yych <= 0xF4) - goto yy825; - goto yy817; + goto yy850; + goto yy842; } } - yy829: + yy854: yyaccept = 1; marker = ++p; yych = *p; if (yybm[0 + yych] & 64) { - goto yy819; + goto yy844; } if (yych <= 0xEC) { if (yych <= 0xC1) { if (yych <= '\n') - goto yy830; + goto yy855; if (yych <= '-') - goto yy821; + goto yy846; } else { if (yych <= 0xDF) - goto yy818; + goto yy843; if (yych <= 0xE0) - goto yy822; - goto yy816; + goto yy847; + goto yy841; } } else { if (yych <= 0xF0) { if (yych <= 0xED) - goto yy826; + goto yy851; if (yych <= 0xEF) - goto yy816; - goto yy823; + goto yy841; + goto yy848; } else { if (yych <= 0xF3) - goto yy824; + goto yy849; if (yych <= 0xF4) - goto yy825; + goto yy850; } } - yy830 : { return (bufsize_t)(p - start); } + yy855 : { return (bufsize_t)(p - start); } } } @@ -13932,301 +14188,301 @@ bufsize_t _scan_html_block_end_3(const unsigned char *p) { if (yych <= 0xDF) { if (yych <= '>') { if (yych <= 0x00) - goto yy833; + goto yy858; if (yych != '\n') - goto yy835; + goto yy860; } else { if (yych <= '?') - goto yy836; + goto yy861; if (yych <= 0x7F) - goto yy835; + goto yy860; if (yych >= 0xC2) - goto yy837; + goto yy862; } } else { if (yych <= 0xEF) { if (yych <= 0xE0) - goto yy838; + goto yy863; if (yych == 0xED) - goto yy840; - goto yy839; + goto yy865; + goto yy864; } else { if (yych <= 0xF0) - goto yy841; + goto yy866; if (yych <= 0xF3) - goto yy842; + goto yy867; if (yych <= 0xF4) - goto yy843; + goto yy868; } } - yy833: + yy858: ++p; - yy834 : { return 0; } - yy835: + yy859 : { return 0; } + yy860: yyaccept = 0; yych = *(marker = ++p); if (yych <= '\n') { if (yych <= 0x00) - goto yy834; + goto yy859; if (yych <= '\t') - goto yy848; - goto yy834; + goto yy873; + goto yy859; } else { if (yych <= 0x7F) - goto yy848; + goto yy873; if (yych <= 0xC1) - goto yy834; + goto yy859; if (yych <= 0xF4) - goto yy848; - goto yy834; + goto yy873; + goto yy859; } - yy836: + yy861: yyaccept = 0; yych = *(marker = ++p); if (yych <= '=') { if (yych <= 0x00) - goto yy834; + goto yy859; if (yych == '\n') - goto yy834; - goto yy848; + goto yy859; + goto yy873; } else { if (yych <= 0x7F) { if (yych <= '>') - goto yy856; - goto yy848; + goto yy881; + goto yy873; } else { if (yych <= 0xC1) - goto yy834; + goto yy859; if (yych <= 0xF4) - goto yy848; - goto yy834; + goto yy873; + goto yy859; } } - yy837: + yy862: yyaccept = 0; yych = *(marker = ++p); if (yych <= 0x7F) - goto yy834; + goto yy859; if (yych <= 0xBF) - goto yy847; - goto yy834; - yy838: + goto yy872; + goto yy859; + yy863: yyaccept = 0; yych = *(marker = ++p); if (yych <= 0x9F) - goto yy834; + goto yy859; if (yych <= 0xBF) - goto yy846; - goto yy834; - yy839: + goto yy871; + goto yy859; + yy864: yyaccept = 0; yych = *(marker = ++p); if (yych <= 0x7F) - goto yy834; + goto yy859; if (yych <= 0xBF) - goto yy846; - goto yy834; - yy840: + goto yy871; + goto yy859; + yy865: yyaccept = 0; yych = *(marker = ++p); if (yych <= 0x7F) - goto yy834; + goto yy859; if (yych <= 0x9F) - goto yy846; - goto yy834; - yy841: + goto yy871; + goto yy859; + yy866: yyaccept = 0; yych = *(marker = ++p); if (yych <= 0x8F) - goto yy834; + goto yy859; if (yych <= 0xBF) - goto yy844; - goto yy834; - yy842: + goto yy869; + goto yy859; + yy867: yyaccept = 0; yych = *(marker = ++p); if (yych <= 0x7F) - goto yy834; + goto yy859; if (yych <= 0xBF) - goto yy844; - goto yy834; - yy843: + goto yy869; + goto yy859; + yy868: yyaccept = 0; yych = *(marker = ++p); if (yych <= 0x7F) - goto yy834; + goto yy859; if (yych >= 0x90) - goto yy834; - yy844: + goto yy859; + yy869: ++p; yych = *p; if (yych <= 0x7F) - goto yy845; + goto yy870; if (yych <= 0xBF) - goto yy846; - yy845: + goto yy871; + yy870: p = marker; if (yyaccept == 0) { - goto yy834; + goto yy859; } else { - goto yy857; + goto yy882; } - yy846: + yy871: ++p; yych = *p; if (yych <= 0x7F) - goto yy845; + goto yy870; if (yych >= 0xC0) - goto yy845; - yy847: + goto yy870; + yy872: ++p; yych = *p; - yy848: + yy873: if (yybm[0 + yych] & 64) { - goto yy847; + goto yy872; } if (yych <= 0xEC) { if (yych <= 0xC1) { if (yych <= '\n') - goto yy845; + goto yy870; if (yych >= '@') - goto yy845; + goto yy870; } else { if (yych <= 0xDF) - goto yy846; + goto yy871; if (yych <= 0xE0) - goto yy851; - goto yy844; + goto yy876; + goto yy869; } } else { if (yych <= 0xF0) { if (yych <= 0xED) - goto yy855; + goto yy880; if (yych <= 0xEF) - goto yy844; - goto yy852; + goto yy869; + goto yy877; } else { if (yych <= 0xF3) - goto yy853; + goto yy878; if (yych <= 0xF4) - goto yy854; - goto yy845; + goto yy879; + goto yy870; } } - yy849: + yy874: ++p; yych = *p; if (yybm[0 + yych] & 128) { - goto yy849; + goto yy874; } if (yych <= 0xDF) { if (yych <= '=') { if (yych <= 0x00) - goto yy845; + goto yy870; if (yych == '\n') - goto yy845; - goto yy847; + goto yy870; + goto yy872; } else { if (yych <= '>') - goto yy856; + goto yy881; if (yych <= 0x7F) - goto yy847; + goto yy872; if (yych <= 0xC1) - goto yy845; - goto yy846; + goto yy870; + goto yy871; } } else { if (yych <= 0xEF) { if (yych <= 0xE0) - goto yy851; + goto yy876; if (yych == 0xED) - goto yy855; - goto yy844; + goto yy880; + goto yy869; } else { if (yych <= 0xF0) - goto yy852; + goto yy877; if (yych <= 0xF3) - goto yy853; + goto yy878; if (yych <= 0xF4) - goto yy854; - goto yy845; + goto yy879; + goto yy870; } } - yy851: + yy876: ++p; yych = *p; if (yych <= 0x9F) - goto yy845; + goto yy870; if (yych <= 0xBF) - goto yy846; - goto yy845; - yy852: + goto yy871; + goto yy870; + yy877: ++p; yych = *p; if (yych <= 0x8F) - goto yy845; + goto yy870; if (yych <= 0xBF) - goto yy844; - goto yy845; - yy853: + goto yy869; + goto yy870; + yy878: ++p; yych = *p; if (yych <= 0x7F) - goto yy845; + goto yy870; if (yych <= 0xBF) - goto yy844; - goto yy845; - yy854: + goto yy869; + goto yy870; + yy879: ++p; yych = *p; if (yych <= 0x7F) - goto yy845; + goto yy870; if (yych <= 0x8F) - goto yy844; - goto yy845; - yy855: + goto yy869; + goto yy870; + yy880: ++p; yych = *p; if (yych <= 0x7F) - goto yy845; + goto yy870; if (yych <= 0x9F) - goto yy846; - goto yy845; - yy856: + goto yy871; + goto yy870; + yy881: yyaccept = 1; marker = ++p; yych = *p; if (yybm[0 + yych] & 64) { - goto yy847; + goto yy872; } if (yych <= 0xEC) { if (yych <= 0xC1) { if (yych <= '\n') - goto yy857; + goto yy882; if (yych <= '?') - goto yy849; + goto yy874; } else { if (yych <= 0xDF) - goto yy846; + goto yy871; if (yych <= 0xE0) - goto yy851; - goto yy844; + goto yy876; + goto yy869; } } else { if (yych <= 0xF0) { if (yych <= 0xED) - goto yy855; + goto yy880; if (yych <= 0xEF) - goto yy844; - goto yy852; + goto yy869; + goto yy877; } else { if (yych <= 0xF3) - goto yy853; + goto yy878; if (yych <= 0xF4) - goto yy854; + goto yy879; } } - yy857 : { return (bufsize_t)(p - start); } + yy882 : { return (bufsize_t)(p - start); } } } @@ -14259,257 +14515,257 @@ bufsize_t _scan_html_block_end_4(const unsigned char *p) { if (yych <= 0xDF) { if (yych <= '=') { if (yych <= 0x00) - goto yy860; + goto yy885; if (yych != '\n') - goto yy862; + goto yy887; } else { if (yych <= '>') - goto yy863; + goto yy888; if (yych <= 0x7F) - goto yy862; + goto yy887; if (yych >= 0xC2) - goto yy865; + goto yy890; } } else { if (yych <= 0xEF) { if (yych <= 0xE0) - goto yy866; + goto yy891; if (yych == 0xED) - goto yy868; - goto yy867; + goto yy893; + goto yy892; } else { if (yych <= 0xF0) - goto yy869; + goto yy894; if (yych <= 0xF3) - goto yy870; + goto yy895; if (yych <= 0xF4) - goto yy871; + goto yy896; } } - yy860: + yy885: ++p; - yy861 : { return 0; } - yy862: + yy886 : { return 0; } + yy887: yyaccept = 0; yych = *(marker = ++p); if (yych <= '\n') { if (yych <= 0x00) - goto yy861; + goto yy886; if (yych <= '\t') - goto yy876; - goto yy861; + goto yy901; + goto yy886; } else { if (yych <= 0x7F) - goto yy876; + goto yy901; if (yych <= 0xC1) - goto yy861; + goto yy886; if (yych <= 0xF4) - goto yy876; - goto yy861; + goto yy901; + goto yy886; } - yy863: + yy888: yyaccept = 1; yych = *(marker = ++p); if (yych <= '\n') { if (yych <= 0x00) - goto yy864; + goto yy889; if (yych <= '\t') - goto yy876; + goto yy901; } else { if (yych <= 0x7F) - goto yy876; + goto yy901; if (yych <= 0xC1) - goto yy864; + goto yy889; if (yych <= 0xF4) - goto yy876; + goto yy901; } - yy864 : { return (bufsize_t)(p - start); } - yy865: + yy889 : { return (bufsize_t)(p - start); } + yy890: yyaccept = 0; yych = *(marker = ++p); if (yych <= 0x7F) - goto yy861; + goto yy886; if (yych <= 0xBF) - goto yy875; - goto yy861; - yy866: + goto yy900; + goto yy886; + yy891: yyaccept = 0; yych = *(marker = ++p); if (yych <= 0x9F) - goto yy861; + goto yy886; if (yych <= 0xBF) - goto yy874; - goto yy861; - yy867: + goto yy899; + goto yy886; + yy892: yyaccept = 0; yych = *(marker = ++p); if (yych <= 0x7F) - goto yy861; + goto yy886; if (yych <= 0xBF) - goto yy874; - goto yy861; - yy868: + goto yy899; + goto yy886; + yy893: yyaccept = 0; yych = *(marker = ++p); if (yych <= 0x7F) - goto yy861; + goto yy886; if (yych <= 0x9F) - goto yy874; - goto yy861; - yy869: + goto yy899; + goto yy886; + yy894: yyaccept = 0; yych = *(marker = ++p); if (yych <= 0x8F) - goto yy861; + goto yy886; if (yych <= 0xBF) - goto yy872; - goto yy861; - yy870: + goto yy897; + goto yy886; + yy895: yyaccept = 0; yych = *(marker = ++p); if (yych <= 0x7F) - goto yy861; + goto yy886; if (yych <= 0xBF) - goto yy872; - goto yy861; - yy871: + goto yy897; + goto yy886; + yy896: yyaccept = 0; yych = *(marker = ++p); if (yych <= 0x7F) - goto yy861; + goto yy886; if (yych >= 0x90) - goto yy861; - yy872: + goto yy886; + yy897: ++p; yych = *p; if (yych <= 0x7F) - goto yy873; + goto yy898; if (yych <= 0xBF) - goto yy874; - yy873: + goto yy899; + yy898: p = marker; if (yyaccept == 0) { - goto yy861; + goto yy886; } else { - goto yy864; + goto yy889; } - yy874: + yy899: ++p; yych = *p; if (yych <= 0x7F) - goto yy873; + goto yy898; if (yych >= 0xC0) - goto yy873; - yy875: + goto yy898; + yy900: ++p; yych = *p; - yy876: + yy901: if (yybm[0 + yych] & 64) { - goto yy875; + goto yy900; } if (yych <= 0xEC) { if (yych <= 0xC1) { if (yych <= '\n') - goto yy873; + goto yy898; if (yych >= '?') - goto yy873; + goto yy898; } else { if (yych <= 0xDF) - goto yy874; + goto yy899; if (yych <= 0xE0) - goto yy879; - goto yy872; + goto yy904; + goto yy897; } } else { if (yych <= 0xF0) { if (yych <= 0xED) - goto yy883; + goto yy908; if (yych <= 0xEF) - goto yy872; - goto yy880; + goto yy897; + goto yy905; } else { if (yych <= 0xF3) - goto yy881; + goto yy906; if (yych <= 0xF4) - goto yy882; - goto yy873; + goto yy907; + goto yy898; } } - yy877: + yy902: yyaccept = 1; marker = ++p; yych = *p; if (yybm[0 + yych] & 64) { - goto yy875; + goto yy900; } if (yych <= 0xEC) { if (yych <= 0xC1) { if (yych <= '\n') - goto yy864; + goto yy889; if (yych <= '>') - goto yy877; - goto yy864; + goto yy902; + goto yy889; } else { if (yych <= 0xDF) - goto yy874; + goto yy899; if (yych >= 0xE1) - goto yy872; + goto yy897; } } else { if (yych <= 0xF0) { if (yych <= 0xED) - goto yy883; + goto yy908; if (yych <= 0xEF) - goto yy872; - goto yy880; + goto yy897; + goto yy905; } else { if (yych <= 0xF3) - goto yy881; + goto yy906; if (yych <= 0xF4) - goto yy882; - goto yy864; + goto yy907; + goto yy889; } } - yy879: + yy904: ++p; yych = *p; if (yych <= 0x9F) - goto yy873; + goto yy898; if (yych <= 0xBF) - goto yy874; - goto yy873; - yy880: + goto yy899; + goto yy898; + yy905: ++p; yych = *p; if (yych <= 0x8F) - goto yy873; + goto yy898; if (yych <= 0xBF) - goto yy872; - goto yy873; - yy881: + goto yy897; + goto yy898; + yy906: ++p; yych = *p; if (yych <= 0x7F) - goto yy873; + goto yy898; if (yych <= 0xBF) - goto yy872; - goto yy873; - yy882: + goto yy897; + goto yy898; + yy907: ++p; yych = *p; if (yych <= 0x7F) - goto yy873; + goto yy898; if (yych <= 0x8F) - goto yy872; - goto yy873; - yy883: + goto yy897; + goto yy898; + yy908: ++p; yych = *p; if (yych <= 0x7F) - goto yy873; + goto yy898; if (yych <= 0x9F) - goto yy874; - goto yy873; + goto yy899; + goto yy898; } } @@ -14542,334 +14798,334 @@ bufsize_t _scan_html_block_end_5(const unsigned char *p) { if (yych <= 0xDF) { if (yych <= '\\') { if (yych <= 0x00) - goto yy886; + goto yy911; if (yych != '\n') - goto yy888; + goto yy913; } else { if (yych <= ']') - goto yy889; + goto yy914; if (yych <= 0x7F) - goto yy888; + goto yy913; if (yych >= 0xC2) - goto yy890; + goto yy915; } } else { if (yych <= 0xEF) { if (yych <= 0xE0) - goto yy891; + goto yy916; if (yych == 0xED) - goto yy893; - goto yy892; + goto yy918; + goto yy917; } else { if (yych <= 0xF0) - goto yy894; + goto yy919; if (yych <= 0xF3) - goto yy895; + goto yy920; if (yych <= 0xF4) - goto yy896; + goto yy921; } } - yy886: + yy911: ++p; - yy887 : { return 0; } - yy888: + yy912 : { return 0; } + yy913: yyaccept = 0; yych = *(marker = ++p); if (yych <= '\n') { if (yych <= 0x00) - goto yy887; + goto yy912; if (yych <= '\t') - goto yy901; - goto yy887; + goto yy926; + goto yy912; } else { if (yych <= 0x7F) - goto yy901; + goto yy926; if (yych <= 0xC1) - goto yy887; + goto yy912; if (yych <= 0xF4) - goto yy901; - goto yy887; + goto yy926; + goto yy912; } - yy889: + yy914: yyaccept = 0; yych = *(marker = ++p); if (yybm[0 + yych] & 128) { - goto yy908; + goto yy933; } if (yych <= '\n') { if (yych <= 0x00) - goto yy887; + goto yy912; if (yych <= '\t') - goto yy901; - goto yy887; + goto yy926; + goto yy912; } else { if (yych <= 0x7F) - goto yy901; + goto yy926; if (yych <= 0xC1) - goto yy887; + goto yy912; if (yych <= 0xF4) - goto yy901; - goto yy887; + goto yy926; + goto yy912; } - yy890: + yy915: yyaccept = 0; yych = *(marker = ++p); if (yych <= 0x7F) - goto yy887; + goto yy912; if (yych <= 0xBF) - goto yy900; - goto yy887; - yy891: + goto yy925; + goto yy912; + yy916: yyaccept = 0; yych = *(marker = ++p); if (yych <= 0x9F) - goto yy887; + goto yy912; if (yych <= 0xBF) - goto yy899; - goto yy887; - yy892: + goto yy924; + goto yy912; + yy917: yyaccept = 0; yych = *(marker = ++p); if (yych <= 0x7F) - goto yy887; + goto yy912; if (yych <= 0xBF) - goto yy899; - goto yy887; - yy893: + goto yy924; + goto yy912; + yy918: yyaccept = 0; yych = *(marker = ++p); if (yych <= 0x7F) - goto yy887; + goto yy912; if (yych <= 0x9F) - goto yy899; - goto yy887; - yy894: + goto yy924; + goto yy912; + yy919: yyaccept = 0; yych = *(marker = ++p); if (yych <= 0x8F) - goto yy887; + goto yy912; if (yych <= 0xBF) - goto yy897; - goto yy887; - yy895: + goto yy922; + goto yy912; + yy920: yyaccept = 0; yych = *(marker = ++p); if (yych <= 0x7F) - goto yy887; + goto yy912; if (yych <= 0xBF) - goto yy897; - goto yy887; - yy896: + goto yy922; + goto yy912; + yy921: yyaccept = 0; yych = *(marker = ++p); if (yych <= 0x7F) - goto yy887; + goto yy912; if (yych >= 0x90) - goto yy887; - yy897: + goto yy912; + yy922: ++p; yych = *p; if (yych <= 0x7F) - goto yy898; + goto yy923; if (yych <= 0xBF) - goto yy899; - yy898: + goto yy924; + yy923: p = marker; if (yyaccept == 0) { - goto yy887; + goto yy912; } else { - goto yy911; + goto yy936; } - yy899: + yy924: ++p; yych = *p; if (yych <= 0x7F) - goto yy898; + goto yy923; if (yych >= 0xC0) - goto yy898; - yy900: + goto yy923; + yy925: ++p; yych = *p; - yy901: + yy926: if (yybm[0 + yych] & 64) { - goto yy900; + goto yy925; } if (yych <= 0xEC) { if (yych <= 0xC1) { if (yych <= '\n') - goto yy898; + goto yy923; if (yych >= '^') - goto yy898; + goto yy923; } else { if (yych <= 0xDF) - goto yy899; + goto yy924; if (yych <= 0xE0) - goto yy903; - goto yy897; + goto yy928; + goto yy922; } } else { if (yych <= 0xF0) { if (yych <= 0xED) - goto yy907; + goto yy932; if (yych <= 0xEF) - goto yy897; - goto yy904; + goto yy922; + goto yy929; } else { if (yych <= 0xF3) - goto yy905; + goto yy930; if (yych <= 0xF4) - goto yy906; - goto yy898; + goto yy931; + goto yy923; } } - yy902: + yy927: ++p; yych = *p; if (yybm[0 + yych] & 64) { - goto yy900; + goto yy925; } if (yych <= 0xEC) { if (yych <= 0xC1) { if (yych <= '\n') - goto yy898; + goto yy923; if (yych <= ']') - goto yy908; - goto yy898; + goto yy933; + goto yy923; } else { if (yych <= 0xDF) - goto yy899; + goto yy924; if (yych >= 0xE1) - goto yy897; + goto yy922; } } else { if (yych <= 0xF0) { if (yych <= 0xED) - goto yy907; + goto yy932; if (yych <= 0xEF) - goto yy897; - goto yy904; + goto yy922; + goto yy929; } else { if (yych <= 0xF3) - goto yy905; + goto yy930; if (yych <= 0xF4) - goto yy906; - goto yy898; + goto yy931; + goto yy923; } } - yy903: + yy928: ++p; yych = *p; if (yych <= 0x9F) - goto yy898; + goto yy923; if (yych <= 0xBF) - goto yy899; - goto yy898; - yy904: + goto yy924; + goto yy923; + yy929: ++p; yych = *p; if (yych <= 0x8F) - goto yy898; + goto yy923; if (yych <= 0xBF) - goto yy897; - goto yy898; - yy905: + goto yy922; + goto yy923; + yy930: ++p; yych = *p; if (yych <= 0x7F) - goto yy898; + goto yy923; if (yych <= 0xBF) - goto yy897; - goto yy898; - yy906: + goto yy922; + goto yy923; + yy931: ++p; yych = *p; if (yych <= 0x7F) - goto yy898; + goto yy923; if (yych <= 0x8F) - goto yy897; - goto yy898; - yy907: + goto yy922; + goto yy923; + yy932: ++p; yych = *p; if (yych <= 0x7F) - goto yy898; + goto yy923; if (yych <= 0x9F) - goto yy899; - goto yy898; - yy908: + goto yy924; + goto yy923; + yy933: ++p; yych = *p; if (yybm[0 + yych] & 128) { - goto yy908; + goto yy933; } if (yych <= 0xDF) { if (yych <= '=') { if (yych <= 0x00) - goto yy898; + goto yy923; if (yych == '\n') - goto yy898; - goto yy900; + goto yy923; + goto yy925; } else { if (yych <= '>') - goto yy910; + goto yy935; if (yych <= 0x7F) - goto yy900; + goto yy925; if (yych <= 0xC1) - goto yy898; - goto yy899; + goto yy923; + goto yy924; } } else { if (yych <= 0xEF) { if (yych <= 0xE0) - goto yy903; + goto yy928; if (yych == 0xED) - goto yy907; - goto yy897; + goto yy932; + goto yy922; } else { if (yych <= 0xF0) - goto yy904; + goto yy929; if (yych <= 0xF3) - goto yy905; + goto yy930; if (yych <= 0xF4) - goto yy906; - goto yy898; + goto yy931; + goto yy923; } } - yy910: + yy935: yyaccept = 1; marker = ++p; yych = *p; if (yybm[0 + yych] & 64) { - goto yy900; + goto yy925; } if (yych <= 0xEC) { if (yych <= 0xC1) { if (yych <= '\n') - goto yy911; + goto yy936; if (yych <= ']') - goto yy902; + goto yy927; } else { if (yych <= 0xDF) - goto yy899; + goto yy924; if (yych <= 0xE0) - goto yy903; - goto yy897; + goto yy928; + goto yy922; } } else { if (yych <= 0xF0) { if (yych <= 0xED) - goto yy907; + goto yy932; if (yych <= 0xEF) - goto yy897; - goto yy904; + goto yy922; + goto yy929; } else { if (yych <= 0xF3) - goto yy905; + goto yy930; if (yych <= 0xF4) - goto yy906; + goto yy931; } } - yy911 : { return (bufsize_t)(p - start); } + yy936 : { return (bufsize_t)(p - start); } } } @@ -14907,558 +15163,558 @@ bufsize_t _scan_link_title(const unsigned char *p) { yych = *p; if (yych <= '&') { if (yych == '"') - goto yy916; + goto yy941; } else { if (yych <= '\'') - goto yy917; + goto yy942; if (yych <= '(') - goto yy918; + goto yy943; } ++p; - yy915 : { return 0; } - yy916: + yy940 : { return 0; } + yy941: yyaccept = 0; yych = *(marker = ++p); if (yych <= 0x00) - goto yy915; + goto yy940; if (yych <= 0x7F) - goto yy951; + goto yy976; if (yych <= 0xC1) - goto yy915; + goto yy940; if (yych <= 0xF4) - goto yy951; - goto yy915; - yy917: + goto yy976; + goto yy940; + yy942: yyaccept = 0; yych = *(marker = ++p); if (yych <= 0x00) - goto yy915; + goto yy940; if (yych <= 0x7F) - goto yy937; + goto yy962; if (yych <= 0xC1) - goto yy915; + goto yy940; if (yych <= 0xF4) - goto yy937; - goto yy915; - yy918: + goto yy962; + goto yy940; + yy943: yyaccept = 0; yych = *(marker = ++p); if (yych <= 0x00) - goto yy915; + goto yy940; if (yych <= 0x7F) - goto yy923; + goto yy948; if (yych <= 0xC1) - goto yy915; + goto yy940; if (yych <= 0xF4) - goto yy923; - goto yy915; - yy919: + goto yy948; + goto yy940; + yy944: ++p; yych = *p; if (yybm[0 + yych] & 32) { - goto yy922; + goto yy947; } if (yych <= 0xE0) { if (yych <= '\\') { if (yych <= 0x00) - goto yy921; + goto yy946; if (yych <= ')') - goto yy933; - goto yy919; + goto yy958; + goto yy944; } else { if (yych <= 0xC1) - goto yy921; + goto yy946; if (yych <= 0xDF) - goto yy924; - goto yy925; + goto yy949; + goto yy950; } } else { if (yych <= 0xEF) { if (yych == 0xED) - goto yy930; - goto yy926; + goto yy955; + goto yy951; } else { if (yych <= 0xF0) - goto yy927; + goto yy952; if (yych <= 0xF3) - goto yy928; + goto yy953; if (yych <= 0xF4) - goto yy929; + goto yy954; } } - yy921: + yy946: p = marker; if (yyaccept <= 1) { if (yyaccept == 0) { - goto yy915; + goto yy940; } else { - goto yy932; + goto yy957; } } else { if (yyaccept == 2) { - goto yy946; + goto yy971; } else { - goto yy960; + goto yy985; } } - yy922: + yy947: ++p; yych = *p; - yy923: + yy948: if (yybm[0 + yych] & 32) { - goto yy922; + goto yy947; } if (yych <= 0xE0) { if (yych <= '\\') { if (yych <= 0x00) - goto yy921; + goto yy946; if (yych <= ')') - goto yy931; - goto yy919; + goto yy956; + goto yy944; } else { if (yych <= 0xC1) - goto yy921; + goto yy946; if (yych >= 0xE0) - goto yy925; + goto yy950; } } else { if (yych <= 0xEF) { if (yych == 0xED) - goto yy930; - goto yy926; + goto yy955; + goto yy951; } else { if (yych <= 0xF0) - goto yy927; + goto yy952; if (yych <= 0xF3) - goto yy928; + goto yy953; if (yych <= 0xF4) - goto yy929; - goto yy921; + goto yy954; + goto yy946; } } - yy924: + yy949: ++p; yych = *p; if (yych <= 0x7F) - goto yy921; + goto yy946; if (yych <= 0xBF) - goto yy922; - goto yy921; - yy925: + goto yy947; + goto yy946; + yy950: ++p; yych = *p; if (yych <= 0x9F) - goto yy921; + goto yy946; if (yych <= 0xBF) - goto yy924; - goto yy921; - yy926: + goto yy949; + goto yy946; + yy951: ++p; yych = *p; if (yych <= 0x7F) - goto yy921; + goto yy946; if (yych <= 0xBF) - goto yy924; - goto yy921; - yy927: + goto yy949; + goto yy946; + yy952: ++p; yych = *p; if (yych <= 0x8F) - goto yy921; + goto yy946; if (yych <= 0xBF) - goto yy926; - goto yy921; - yy928: + goto yy951; + goto yy946; + yy953: ++p; yych = *p; if (yych <= 0x7F) - goto yy921; + goto yy946; if (yych <= 0xBF) - goto yy926; - goto yy921; - yy929: + goto yy951; + goto yy946; + yy954: ++p; yych = *p; if (yych <= 0x7F) - goto yy921; + goto yy946; if (yych <= 0x8F) - goto yy926; - goto yy921; - yy930: + goto yy951; + goto yy946; + yy955: ++p; yych = *p; if (yych <= 0x7F) - goto yy921; + goto yy946; if (yych <= 0x9F) - goto yy924; - goto yy921; - yy931: + goto yy949; + goto yy946; + yy956: ++p; - yy932 : { return (bufsize_t)(p - start); } - yy933: + yy957 : { return (bufsize_t)(p - start); } + yy958: yyaccept = 1; marker = ++p; yych = *p; if (yybm[0 + yych] & 32) { - goto yy922; + goto yy947; } if (yych <= 0xE0) { if (yych <= '\\') { if (yych <= 0x00) - goto yy932; + goto yy957; if (yych <= ')') - goto yy931; - goto yy919; + goto yy956; + goto yy944; } else { if (yych <= 0xC1) - goto yy932; + goto yy957; if (yych <= 0xDF) - goto yy924; - goto yy925; + goto yy949; + goto yy950; } } else { if (yych <= 0xEF) { if (yych == 0xED) - goto yy930; - goto yy926; + goto yy955; + goto yy951; } else { if (yych <= 0xF0) - goto yy927; + goto yy952; if (yych <= 0xF3) - goto yy928; + goto yy953; if (yych <= 0xF4) - goto yy929; - goto yy932; + goto yy954; + goto yy957; } } - yy934: + yy959: ++p; yych = *p; if (yybm[0 + yych] & 64) { - goto yy936; + goto yy961; } if (yych <= 0xE0) { if (yych <= '\\') { if (yych <= 0x00) - goto yy921; + goto yy946; if (yych <= '\'') - goto yy947; - goto yy934; + goto yy972; + goto yy959; } else { if (yych <= 0xC1) - goto yy921; + goto yy946; if (yych <= 0xDF) - goto yy938; - goto yy939; + goto yy963; + goto yy964; } } else { if (yych <= 0xEF) { if (yych == 0xED) - goto yy944; - goto yy940; + goto yy969; + goto yy965; } else { if (yych <= 0xF0) - goto yy941; + goto yy966; if (yych <= 0xF3) - goto yy942; + goto yy967; if (yych <= 0xF4) - goto yy943; - goto yy921; + goto yy968; + goto yy946; } } - yy936: + yy961: ++p; yych = *p; - yy937: + yy962: if (yybm[0 + yych] & 64) { - goto yy936; + goto yy961; } if (yych <= 0xE0) { if (yych <= '\\') { if (yych <= 0x00) - goto yy921; + goto yy946; if (yych <= '\'') - goto yy945; - goto yy934; + goto yy970; + goto yy959; } else { if (yych <= 0xC1) - goto yy921; + goto yy946; if (yych >= 0xE0) - goto yy939; + goto yy964; } } else { if (yych <= 0xEF) { if (yych == 0xED) - goto yy944; - goto yy940; + goto yy969; + goto yy965; } else { if (yych <= 0xF0) - goto yy941; + goto yy966; if (yych <= 0xF3) - goto yy942; + goto yy967; if (yych <= 0xF4) - goto yy943; - goto yy921; + goto yy968; + goto yy946; } } - yy938: + yy963: ++p; yych = *p; if (yych <= 0x7F) - goto yy921; + goto yy946; if (yych <= 0xBF) - goto yy936; - goto yy921; - yy939: + goto yy961; + goto yy946; + yy964: ++p; yych = *p; if (yych <= 0x9F) - goto yy921; + goto yy946; if (yych <= 0xBF) - goto yy938; - goto yy921; - yy940: + goto yy963; + goto yy946; + yy965: ++p; yych = *p; if (yych <= 0x7F) - goto yy921; + goto yy946; if (yych <= 0xBF) - goto yy938; - goto yy921; - yy941: + goto yy963; + goto yy946; + yy966: ++p; yych = *p; if (yych <= 0x8F) - goto yy921; + goto yy946; if (yych <= 0xBF) - goto yy940; - goto yy921; - yy942: + goto yy965; + goto yy946; + yy967: ++p; yych = *p; if (yych <= 0x7F) - goto yy921; + goto yy946; if (yych <= 0xBF) - goto yy940; - goto yy921; - yy943: + goto yy965; + goto yy946; + yy968: ++p; yych = *p; if (yych <= 0x7F) - goto yy921; + goto yy946; if (yych <= 0x8F) - goto yy940; - goto yy921; - yy944: + goto yy965; + goto yy946; + yy969: ++p; yych = *p; if (yych <= 0x7F) - goto yy921; + goto yy946; if (yych <= 0x9F) - goto yy938; - goto yy921; - yy945: + goto yy963; + goto yy946; + yy970: ++p; - yy946 : { return (bufsize_t)(p - start); } - yy947: + yy971 : { return (bufsize_t)(p - start); } + yy972: yyaccept = 2; marker = ++p; yych = *p; if (yybm[0 + yych] & 64) { - goto yy936; + goto yy961; } if (yych <= 0xE0) { if (yych <= '\\') { if (yych <= 0x00) - goto yy946; + goto yy971; if (yych <= '\'') - goto yy945; - goto yy934; + goto yy970; + goto yy959; } else { if (yych <= 0xC1) - goto yy946; + goto yy971; if (yych <= 0xDF) - goto yy938; - goto yy939; + goto yy963; + goto yy964; } } else { if (yych <= 0xEF) { if (yych == 0xED) - goto yy944; - goto yy940; + goto yy969; + goto yy965; } else { if (yych <= 0xF0) - goto yy941; + goto yy966; if (yych <= 0xF3) - goto yy942; + goto yy967; if (yych <= 0xF4) - goto yy943; - goto yy946; + goto yy968; + goto yy971; } } - yy948: + yy973: ++p; yych = *p; if (yybm[0 + yych] & 128) { - goto yy950; + goto yy975; } if (yych <= 0xE0) { if (yych <= '\\') { if (yych <= 0x00) - goto yy921; + goto yy946; if (yych <= '"') - goto yy961; - goto yy948; + goto yy986; + goto yy973; } else { if (yych <= 0xC1) - goto yy921; + goto yy946; if (yych <= 0xDF) - goto yy952; - goto yy953; + goto yy977; + goto yy978; } } else { if (yych <= 0xEF) { if (yych == 0xED) - goto yy958; - goto yy954; + goto yy983; + goto yy979; } else { if (yych <= 0xF0) - goto yy955; + goto yy980; if (yych <= 0xF3) - goto yy956; + goto yy981; if (yych <= 0xF4) - goto yy957; - goto yy921; + goto yy982; + goto yy946; } } - yy950: + yy975: ++p; yych = *p; - yy951: + yy976: if (yybm[0 + yych] & 128) { - goto yy950; + goto yy975; } if (yych <= 0xE0) { if (yych <= '\\') { if (yych <= 0x00) - goto yy921; + goto yy946; if (yych <= '"') - goto yy959; - goto yy948; + goto yy984; + goto yy973; } else { if (yych <= 0xC1) - goto yy921; + goto yy946; if (yych >= 0xE0) - goto yy953; + goto yy978; } } else { if (yych <= 0xEF) { if (yych == 0xED) - goto yy958; - goto yy954; + goto yy983; + goto yy979; } else { if (yych <= 0xF0) - goto yy955; + goto yy980; if (yych <= 0xF3) - goto yy956; + goto yy981; if (yych <= 0xF4) - goto yy957; - goto yy921; + goto yy982; + goto yy946; } } - yy952: + yy977: ++p; yych = *p; if (yych <= 0x7F) - goto yy921; + goto yy946; if (yych <= 0xBF) - goto yy950; - goto yy921; - yy953: + goto yy975; + goto yy946; + yy978: ++p; yych = *p; if (yych <= 0x9F) - goto yy921; + goto yy946; if (yych <= 0xBF) - goto yy952; - goto yy921; - yy954: + goto yy977; + goto yy946; + yy979: ++p; yych = *p; if (yych <= 0x7F) - goto yy921; + goto yy946; if (yych <= 0xBF) - goto yy952; - goto yy921; - yy955: + goto yy977; + goto yy946; + yy980: ++p; yych = *p; if (yych <= 0x8F) - goto yy921; + goto yy946; if (yych <= 0xBF) - goto yy954; - goto yy921; - yy956: + goto yy979; + goto yy946; + yy981: ++p; yych = *p; if (yych <= 0x7F) - goto yy921; + goto yy946; if (yych <= 0xBF) - goto yy954; - goto yy921; - yy957: + goto yy979; + goto yy946; + yy982: ++p; yych = *p; if (yych <= 0x7F) - goto yy921; + goto yy946; if (yych <= 0x8F) - goto yy954; - goto yy921; - yy958: + goto yy979; + goto yy946; + yy983: ++p; yych = *p; if (yych <= 0x7F) - goto yy921; + goto yy946; if (yych <= 0x9F) - goto yy952; - goto yy921; - yy959: + goto yy977; + goto yy946; + yy984: ++p; - yy960 : { return (bufsize_t)(p - start); } - yy961: + yy985 : { return (bufsize_t)(p - start); } + yy986: yyaccept = 3; marker = ++p; yych = *p; if (yybm[0 + yych] & 128) { - goto yy950; + goto yy975; } if (yych <= 0xE0) { if (yych <= '\\') { if (yych <= 0x00) - goto yy960; + goto yy985; if (yych <= '"') - goto yy959; - goto yy948; + goto yy984; + goto yy973; } else { if (yych <= 0xC1) - goto yy960; + goto yy985; if (yych <= 0xDF) - goto yy952; - goto yy953; + goto yy977; + goto yy978; } } else { if (yych <= 0xEF) { if (yych == 0xED) - goto yy958; - goto yy954; + goto yy983; + goto yy979; } else { if (yych <= 0xF0) - goto yy955; + goto yy980; if (yych <= 0xF3) - goto yy956; + goto yy981; if (yych <= 0xF4) - goto yy957; - goto yy960; + goto yy982; + goto yy985; } } } @@ -15487,27 +15743,27 @@ bufsize_t _scan_spacechars(const unsigned char *p) { }; yych = *p; if (yych <= 0x08) - goto yy964; + goto yy989; if (yych <= '\r') - goto yy966; + goto yy991; if (yych == ' ') - goto yy966; - yy964: + goto yy991; + yy989: ++p; { return 0; } - yy966: + yy991: ++p; yych = *p; - goto yy969; - yy967 : { return (bufsize_t)(p - start); } - yy968: + goto yy994; + yy992 : { return (bufsize_t)(p - start); } + yy993: ++p; yych = *p; - yy969: + yy994: if (yybm[0 + yych] & 128) { - goto yy968; + goto yy993; } - goto yy967; + goto yy992; } } @@ -15534,115 +15790,115 @@ bufsize_t _scan_atx_heading_start(const unsigned char *p) { }; yych = *p; if (yych == '#') - goto yy974; + goto yy999; ++p; - yy973 : { return 0; } - yy974: + yy998 : { return 0; } + yy999: yych = *(marker = ++p); if (yybm[0 + yych] & 128) { - goto yy977; + goto yy1002; } if (yych <= '\f') { if (yych <= 0x08) - goto yy973; + goto yy998; if (yych >= '\v') - goto yy973; + goto yy998; } else { if (yych <= '\r') - goto yy975; + goto yy1000; if (yych == '#') - goto yy979; - goto yy973; + goto yy1004; + goto yy998; } - yy975: + yy1000: ++p; - yy976 : { return (bufsize_t)(p - start); } - yy977: + yy1001 : { return (bufsize_t)(p - start); } + yy1002: ++p; yych = *p; if (yybm[0 + yych] & 128) { - goto yy977; + goto yy1002; } - goto yy976; - yy979: + goto yy1001; + yy1004: yych = *++p; if (yybm[0 + yych] & 128) { - goto yy977; + goto yy1002; } if (yych <= '\f') { if (yych <= 0x08) - goto yy980; + goto yy1005; if (yych <= '\n') - goto yy975; + goto yy1000; } else { if (yych <= '\r') - goto yy975; + goto yy1000; if (yych == '#') - goto yy981; + goto yy1006; } - yy980: + yy1005: p = marker; - goto yy973; - yy981: + goto yy998; + yy1006: yych = *++p; if (yybm[0 + yych] & 128) { - goto yy977; + goto yy1002; } if (yych <= '\f') { if (yych <= 0x08) - goto yy980; + goto yy1005; if (yych <= '\n') - goto yy975; - goto yy980; + goto yy1000; + goto yy1005; } else { if (yych <= '\r') - goto yy975; + goto yy1000; if (yych != '#') - goto yy980; + goto yy1005; } yych = *++p; if (yybm[0 + yych] & 128) { - goto yy977; + goto yy1002; } if (yych <= '\f') { if (yych <= 0x08) - goto yy980; + goto yy1005; if (yych <= '\n') - goto yy975; - goto yy980; + goto yy1000; + goto yy1005; } else { if (yych <= '\r') - goto yy975; + goto yy1000; if (yych != '#') - goto yy980; + goto yy1005; } yych = *++p; if (yybm[0 + yych] & 128) { - goto yy977; + goto yy1002; } if (yych <= '\f') { if (yych <= 0x08) - goto yy980; + goto yy1005; if (yych <= '\n') - goto yy975; - goto yy980; + goto yy1000; + goto yy1005; } else { if (yych <= '\r') - goto yy975; + goto yy1000; if (yych != '#') - goto yy980; + goto yy1005; } ++p; if (yybm[0 + (yych = *p)] & 128) { - goto yy977; + goto yy1002; } if (yych <= 0x08) - goto yy980; + goto yy1005; if (yych <= '\n') - goto yy975; + goto yy1000; if (yych == '\r') - goto yy975; - goto yy980; + goto yy1000; + goto yy1005; } } @@ -15669,126 +15925,126 @@ bufsize_t _scan_setext_heading_line(const unsigned char *p) { }; yych = *p; if (yych == '-') - goto yy989; + goto yy1014; if (yych == '=') - goto yy990; + goto yy1015; ++p; - yy988 : { return 0; } - yy989: + yy1013 : { return 0; } + yy1014: yych = *(marker = ++p); if (yybm[0 + yych] & 128) { - goto yy1002; + goto yy1027; } if (yych <= '\f') { if (yych <= 0x08) - goto yy988; + goto yy1013; if (yych <= '\n') - goto yy999; - goto yy988; + goto yy1024; + goto yy1013; } else { if (yych <= '\r') - goto yy999; + goto yy1024; if (yych == ' ') - goto yy999; - goto yy988; + goto yy1024; + goto yy1013; } - yy990: + yy1015: yych = *(marker = ++p); if (yybm[0 + yych] & 64) { - goto yy996; + goto yy1021; } if (yych <= '\f') { if (yych <= 0x08) - goto yy988; + goto yy1013; if (yych <= '\n') - goto yy992; - goto yy988; + goto yy1017; + goto yy1013; } else { if (yych <= '\r') - goto yy992; + goto yy1017; if (yych == ' ') - goto yy992; - goto yy988; + goto yy1017; + goto yy1013; } - yy991: + yy1016: ++p; yych = *p; - yy992: + yy1017: if (yybm[0 + yych] & 32) { - goto yy991; + goto yy1016; } if (yych <= 0x08) - goto yy993; + goto yy1018; if (yych <= '\n') - goto yy994; + goto yy1019; if (yych == '\r') - goto yy994; - yy993: + goto yy1019; + yy1018: p = marker; - goto yy988; - yy994: + goto yy1013; + yy1019: ++p; { return 1; } - yy996: + yy1021: ++p; yych = *p; if (yybm[0 + yych] & 32) { - goto yy991; + goto yy1016; } if (yych <= '\f') { if (yych <= 0x08) - goto yy993; + goto yy1018; if (yych <= '\n') - goto yy994; - goto yy993; + goto yy1019; + goto yy1018; } else { if (yych <= '\r') - goto yy994; + goto yy1019; if (yych == '=') - goto yy996; - goto yy993; + goto yy1021; + goto yy1018; } - yy998: + yy1023: ++p; yych = *p; - yy999: + yy1024: if (yych <= '\f') { if (yych <= 0x08) - goto yy993; + goto yy1018; if (yych <= '\t') - goto yy998; + goto yy1023; if (yych >= '\v') - goto yy993; + goto yy1018; } else { if (yych <= '\r') - goto yy1000; + goto yy1025; if (yych == ' ') - goto yy998; - goto yy993; + goto yy1023; + goto yy1018; } - yy1000: + yy1025: ++p; { return 2; } - yy1002: + yy1027: ++p; yych = *p; if (yybm[0 + yych] & 128) { - goto yy1002; + goto yy1027; } if (yych <= '\f') { if (yych <= 0x08) - goto yy993; + goto yy1018; if (yych <= '\t') - goto yy998; + goto yy1023; if (yych <= '\n') - goto yy1000; - goto yy993; + goto yy1025; + goto yy1018; } else { if (yych <= '\r') - goto yy1000; + goto yy1025; if (yych == ' ') - goto yy998; - goto yy993; + goto yy1023; + goto yy1018; } } } @@ -15820,248 +16076,248 @@ bufsize_t _scan_thematic_break(const unsigned char *p) { yych = *p; if (yych <= ',') { if (yych == '*') - goto yy1008; + goto yy1033; } else { if (yych <= '-') - goto yy1009; + goto yy1034; if (yych == '_') - goto yy1010; + goto yy1035; } ++p; - yy1007 : { return 0; } - yy1008: + yy1032 : { return 0; } + yy1033: yych = *(marker = ++p); if (yych <= 0x1F) { if (yych == '\t') - goto yy1032; - goto yy1007; + goto yy1057; + goto yy1032; } else { if (yych <= ' ') - goto yy1032; + goto yy1057; if (yych == '*') - goto yy1034; - goto yy1007; + goto yy1059; + goto yy1032; } - yy1009: + yy1034: yych = *(marker = ++p); if (yych <= 0x1F) { if (yych == '\t') - goto yy1022; - goto yy1007; + goto yy1047; + goto yy1032; } else { if (yych <= ' ') - goto yy1022; + goto yy1047; if (yych == '-') - goto yy1024; - goto yy1007; + goto yy1049; + goto yy1032; } - yy1010: + yy1035: yych = *(marker = ++p); if (yybm[0 + yych] & 16) { - goto yy1011; + goto yy1036; } if (yych == '_') - goto yy1014; - goto yy1007; - yy1011: + goto yy1039; + goto yy1032; + yy1036: ++p; yych = *p; if (yybm[0 + yych] & 16) { - goto yy1011; + goto yy1036; } if (yych == '_') - goto yy1014; - yy1013: + goto yy1039; + yy1038: p = marker; - goto yy1007; - yy1014: + goto yy1032; + yy1039: ++p; yych = *p; if (yych <= 0x1F) { if (yych == '\t') - goto yy1014; - goto yy1013; + goto yy1039; + goto yy1038; } else { if (yych <= ' ') - goto yy1014; + goto yy1039; if (yych != '_') - goto yy1013; + goto yy1038; } - yy1016: + yy1041: ++p; yych = *p; if (yych <= '\r') { if (yych <= '\t') { if (yych <= 0x08) - goto yy1013; - goto yy1016; + goto yy1038; + goto yy1041; } else { if (yych <= '\n') - goto yy1018; + goto yy1043; if (yych <= '\f') - goto yy1013; + goto yy1038; } } else { if (yych <= ' ') { if (yych <= 0x1F) - goto yy1013; - goto yy1016; + goto yy1038; + goto yy1041; } else { if (yych == '_') - goto yy1020; - goto yy1013; + goto yy1045; + goto yy1038; } } - yy1018: + yy1043: ++p; { return (bufsize_t)(p - start); } - yy1020: + yy1045: ++p; yych = *p; if (yybm[0 + yych] & 32) { - goto yy1020; + goto yy1045; } if (yych <= 0x08) - goto yy1013; + goto yy1038; if (yych <= '\n') - goto yy1018; + goto yy1043; if (yych == '\r') - goto yy1018; - goto yy1013; - yy1022: + goto yy1043; + goto yy1038; + yy1047: ++p; yych = *p; if (yych <= 0x1F) { if (yych == '\t') - goto yy1022; - goto yy1013; + goto yy1047; + goto yy1038; } else { if (yych <= ' ') - goto yy1022; + goto yy1047; if (yych != '-') - goto yy1013; + goto yy1038; } - yy1024: + yy1049: ++p; yych = *p; if (yych <= 0x1F) { if (yych == '\t') - goto yy1024; - goto yy1013; + goto yy1049; + goto yy1038; } else { if (yych <= ' ') - goto yy1024; + goto yy1049; if (yych != '-') - goto yy1013; + goto yy1038; } - yy1026: + yy1051: ++p; yych = *p; if (yych <= '\r') { if (yych <= '\t') { if (yych <= 0x08) - goto yy1013; - goto yy1026; + goto yy1038; + goto yy1051; } else { if (yych <= '\n') - goto yy1028; + goto yy1053; if (yych <= '\f') - goto yy1013; + goto yy1038; } } else { if (yych <= ' ') { if (yych <= 0x1F) - goto yy1013; - goto yy1026; + goto yy1038; + goto yy1051; } else { if (yych == '-') - goto yy1030; - goto yy1013; + goto yy1055; + goto yy1038; } } - yy1028: + yy1053: ++p; { return (bufsize_t)(p - start); } - yy1030: + yy1055: ++p; yych = *p; if (yybm[0 + yych] & 64) { - goto yy1030; + goto yy1055; } if (yych <= 0x08) - goto yy1013; + goto yy1038; if (yych <= '\n') - goto yy1028; + goto yy1053; if (yych == '\r') - goto yy1028; - goto yy1013; - yy1032: + goto yy1053; + goto yy1038; + yy1057: ++p; yych = *p; if (yych <= 0x1F) { if (yych == '\t') - goto yy1032; - goto yy1013; + goto yy1057; + goto yy1038; } else { if (yych <= ' ') - goto yy1032; + goto yy1057; if (yych != '*') - goto yy1013; + goto yy1038; } - yy1034: + yy1059: ++p; yych = *p; if (yych <= 0x1F) { if (yych == '\t') - goto yy1034; - goto yy1013; + goto yy1059; + goto yy1038; } else { if (yych <= ' ') - goto yy1034; + goto yy1059; if (yych != '*') - goto yy1013; + goto yy1038; } - yy1036: + yy1061: ++p; yych = *p; if (yych <= '\r') { if (yych <= '\t') { if (yych <= 0x08) - goto yy1013; - goto yy1036; + goto yy1038; + goto yy1061; } else { if (yych <= '\n') - goto yy1038; + goto yy1063; if (yych <= '\f') - goto yy1013; + goto yy1038; } } else { if (yych <= ' ') { if (yych <= 0x1F) - goto yy1013; - goto yy1036; + goto yy1038; + goto yy1061; } else { if (yych == '*') - goto yy1040; - goto yy1013; + goto yy1065; + goto yy1038; } } - yy1038: + yy1063: ++p; { return (bufsize_t)(p - start); } - yy1040: + yy1065: ++p; yych = *p; if (yybm[0 + yych] & 128) { - goto yy1040; + goto yy1065; } if (yych <= 0x08) - goto yy1013; - if (yych <= '\n') goto yy1038; + if (yych <= '\n') + goto yy1063; if (yych == '\r') - goto yy1038; - goto yy1013; + goto yy1063; + goto yy1038; } } @@ -16095,292 +16351,292 @@ bufsize_t _scan_open_code_fence(const unsigned char *p) { }; yych = *p; if (yych == '`') - goto yy1046; + goto yy1071; if (yych == '~') - goto yy1047; + goto yy1072; ++p; - yy1045 : { return 0; } - yy1046: + yy1070 : { return 0; } + yy1071: yych = *(marker = ++p); if (yych == '`') - goto yy1063; - goto yy1045; - yy1047: + goto yy1088; + goto yy1070; + yy1072: yych = *(marker = ++p); if (yych != '~') - goto yy1045; + goto yy1070; yych = *++p; if (yybm[0 + yych] & 16) { - goto yy1050; + goto yy1075; } - yy1049: + yy1074: p = marker; - goto yy1045; - yy1050: + goto yy1070; + yy1075: ++p; yych = *p; marker = p; if (yybm[0 + yych] & 32) { - goto yy1052; + goto yy1077; } if (yych <= 0xE0) { if (yych <= '~') { if (yych <= 0x00) - goto yy1049; + goto yy1074; if (yych <= '\r') - goto yy1061; - goto yy1050; + goto yy1086; + goto yy1075; } else { if (yych <= 0xC1) - goto yy1049; + goto yy1074; if (yych <= 0xDF) - goto yy1054; - goto yy1055; + goto yy1079; + goto yy1080; } } else { if (yych <= 0xEF) { if (yych == 0xED) - goto yy1060; - goto yy1056; + goto yy1085; + goto yy1081; } else { if (yych <= 0xF0) - goto yy1057; + goto yy1082; if (yych <= 0xF3) - goto yy1058; + goto yy1083; if (yych <= 0xF4) - goto yy1059; - goto yy1049; + goto yy1084; + goto yy1074; } } - yy1052: + yy1077: ++p; yych = *p; if (yybm[0 + yych] & 32) { - goto yy1052; + goto yy1077; } if (yych <= 0xEC) { if (yych <= 0xC1) { if (yych <= 0x00) - goto yy1049; + goto yy1074; if (yych <= '\r') - goto yy1061; - goto yy1049; + goto yy1086; + goto yy1074; } else { if (yych <= 0xDF) - goto yy1054; + goto yy1079; if (yych <= 0xE0) - goto yy1055; - goto yy1056; + goto yy1080; + goto yy1081; } } else { if (yych <= 0xF0) { if (yych <= 0xED) - goto yy1060; + goto yy1085; if (yych <= 0xEF) - goto yy1056; - goto yy1057; + goto yy1081; + goto yy1082; } else { if (yych <= 0xF3) - goto yy1058; + goto yy1083; if (yych <= 0xF4) - goto yy1059; - goto yy1049; + goto yy1084; + goto yy1074; } } - yy1054: + yy1079: ++p; yych = *p; if (yych <= 0x7F) - goto yy1049; + goto yy1074; if (yych <= 0xBF) - goto yy1052; - goto yy1049; - yy1055: + goto yy1077; + goto yy1074; + yy1080: ++p; yych = *p; if (yych <= 0x9F) - goto yy1049; + goto yy1074; if (yych <= 0xBF) - goto yy1054; - goto yy1049; - yy1056: + goto yy1079; + goto yy1074; + yy1081: ++p; yych = *p; if (yych <= 0x7F) - goto yy1049; + goto yy1074; if (yych <= 0xBF) - goto yy1054; - goto yy1049; - yy1057: + goto yy1079; + goto yy1074; + yy1082: ++p; yych = *p; if (yych <= 0x8F) - goto yy1049; + goto yy1074; if (yych <= 0xBF) - goto yy1056; - goto yy1049; - yy1058: + goto yy1081; + goto yy1074; + yy1083: ++p; yych = *p; if (yych <= 0x7F) - goto yy1049; + goto yy1074; if (yych <= 0xBF) - goto yy1056; - goto yy1049; - yy1059: + goto yy1081; + goto yy1074; + yy1084: ++p; yych = *p; if (yych <= 0x7F) - goto yy1049; + goto yy1074; if (yych <= 0x8F) - goto yy1056; - goto yy1049; - yy1060: + goto yy1081; + goto yy1074; + yy1085: ++p; yych = *p; if (yych <= 0x7F) - goto yy1049; + goto yy1074; if (yych <= 0x9F) - goto yy1054; - goto yy1049; - yy1061: + goto yy1079; + goto yy1074; + yy1086: ++p; p = marker; { return (bufsize_t)(p - start); } - yy1063: + yy1088: yych = *++p; if (yybm[0 + yych] & 64) { - goto yy1064; + goto yy1089; } - goto yy1049; - yy1064: + goto yy1074; + yy1089: ++p; yych = *p; marker = p; if (yybm[0 + yych] & 128) { - goto yy1066; + goto yy1091; } if (yych <= 0xE0) { if (yych <= '`') { if (yych <= 0x00) - goto yy1049; + goto yy1074; if (yych <= '\r') - goto yy1075; - goto yy1064; + goto yy1100; + goto yy1089; } else { if (yych <= 0xC1) - goto yy1049; + goto yy1074; if (yych <= 0xDF) - goto yy1068; - goto yy1069; + goto yy1093; + goto yy1094; } } else { if (yych <= 0xEF) { if (yych == 0xED) - goto yy1074; - goto yy1070; + goto yy1099; + goto yy1095; } else { if (yych <= 0xF0) - goto yy1071; + goto yy1096; if (yych <= 0xF3) - goto yy1072; + goto yy1097; if (yych <= 0xF4) - goto yy1073; - goto yy1049; + goto yy1098; + goto yy1074; } } - yy1066: + yy1091: ++p; yych = *p; if (yybm[0 + yych] & 128) { - goto yy1066; + goto yy1091; } if (yych <= 0xEC) { if (yych <= 0xC1) { if (yych <= 0x00) - goto yy1049; + goto yy1074; if (yych <= '\r') - goto yy1075; - goto yy1049; + goto yy1100; + goto yy1074; } else { if (yych <= 0xDF) - goto yy1068; + goto yy1093; if (yych <= 0xE0) - goto yy1069; - goto yy1070; + goto yy1094; + goto yy1095; } } else { if (yych <= 0xF0) { if (yych <= 0xED) - goto yy1074; + goto yy1099; if (yych <= 0xEF) - goto yy1070; - goto yy1071; + goto yy1095; + goto yy1096; } else { if (yych <= 0xF3) - goto yy1072; + goto yy1097; if (yych <= 0xF4) - goto yy1073; - goto yy1049; + goto yy1098; + goto yy1074; } } - yy1068: + yy1093: ++p; yych = *p; if (yych <= 0x7F) - goto yy1049; + goto yy1074; if (yych <= 0xBF) - goto yy1066; - goto yy1049; - yy1069: + goto yy1091; + goto yy1074; + yy1094: ++p; yych = *p; if (yych <= 0x9F) - goto yy1049; + goto yy1074; if (yych <= 0xBF) - goto yy1068; - goto yy1049; - yy1070: + goto yy1093; + goto yy1074; + yy1095: ++p; yych = *p; if (yych <= 0x7F) - goto yy1049; + goto yy1074; if (yych <= 0xBF) - goto yy1068; - goto yy1049; - yy1071: + goto yy1093; + goto yy1074; + yy1096: ++p; yych = *p; if (yych <= 0x8F) - goto yy1049; + goto yy1074; if (yych <= 0xBF) - goto yy1070; - goto yy1049; - yy1072: + goto yy1095; + goto yy1074; + yy1097: ++p; yych = *p; if (yych <= 0x7F) - goto yy1049; + goto yy1074; if (yych <= 0xBF) - goto yy1070; - goto yy1049; - yy1073: + goto yy1095; + goto yy1074; + yy1098: ++p; yych = *p; if (yych <= 0x7F) - goto yy1049; + goto yy1074; if (yych <= 0x8F) - goto yy1070; - goto yy1049; - yy1074: + goto yy1095; + goto yy1074; + yy1099: ++p; yych = *p; if (yych <= 0x7F) - goto yy1049; + goto yy1074; if (yych <= 0x9F) - goto yy1068; - goto yy1049; - yy1075: + goto yy1093; + goto yy1074; + yy1100: ++p; p = marker; { return (bufsize_t)(p - start); } @@ -16410,108 +16666,108 @@ bufsize_t _scan_close_code_fence(const unsigned char *p) { }; yych = *p; if (yych == '`') - goto yy1081; + goto yy1106; if (yych == '~') - goto yy1082; + goto yy1107; ++p; - yy1080 : { return 0; } - yy1081: + yy1105 : { return 0; } + yy1106: yych = *(marker = ++p); if (yych == '`') - goto yy1091; - goto yy1080; - yy1082: + goto yy1116; + goto yy1105; + yy1107: yych = *(marker = ++p); if (yych != '~') - goto yy1080; + goto yy1105; yych = *++p; if (yybm[0 + yych] & 32) { - goto yy1085; + goto yy1110; } - yy1084: + yy1109: p = marker; - goto yy1080; - yy1085: + goto yy1105; + yy1110: ++p; yych = *p; marker = p; if (yybm[0 + yych] & 64) { - goto yy1087; + goto yy1112; } if (yych <= '\f') { if (yych <= 0x08) - goto yy1084; + goto yy1109; if (yych <= '\n') - goto yy1089; - goto yy1084; + goto yy1114; + goto yy1109; } else { if (yych <= '\r') - goto yy1089; + goto yy1114; if (yych == '~') - goto yy1085; - goto yy1084; + goto yy1110; + goto yy1109; } - yy1087: + yy1112: ++p; yych = *p; if (yybm[0 + yych] & 64) { - goto yy1087; + goto yy1112; } if (yych <= 0x08) - goto yy1084; + goto yy1109; if (yych <= '\n') - goto yy1089; + goto yy1114; if (yych != '\r') - goto yy1084; - yy1089: + goto yy1109; + yy1114: ++p; p = marker; { return (bufsize_t)(p - start); } - yy1091: + yy1116: yych = *++p; if (yybm[0 + yych] & 128) { - goto yy1092; + goto yy1117; } - goto yy1084; - yy1092: + goto yy1109; + yy1117: ++p; yych = *p; marker = p; if (yybm[0 + yych] & 128) { - goto yy1092; + goto yy1117; } if (yych <= '\f') { if (yych <= 0x08) - goto yy1084; + goto yy1109; if (yych <= '\t') - goto yy1094; + goto yy1119; if (yych <= '\n') - goto yy1096; - goto yy1084; + goto yy1121; + goto yy1109; } else { if (yych <= '\r') - goto yy1096; + goto yy1121; if (yych != ' ') - goto yy1084; + goto yy1109; } - yy1094: + yy1119: ++p; yych = *p; if (yych <= '\f') { if (yych <= 0x08) - goto yy1084; + goto yy1109; if (yych <= '\t') - goto yy1094; + goto yy1119; if (yych >= '\v') - goto yy1084; + goto yy1109; } else { if (yych <= '\r') - goto yy1096; + goto yy1121; if (yych == ' ') - goto yy1094; - goto yy1084; + goto yy1119; + goto yy1109; } - yy1096: + yy1121: ++p; p = marker; { return (bufsize_t)(p - start); } @@ -16528,919 +16784,919 @@ bufsize_t _scan_entity(const unsigned char *p) { unsigned char yych; yych = *p; if (yych == '&') - goto yy1102; + goto yy1127; ++p; - yy1101 : { return 0; } - yy1102: + yy1126 : { return 0; } + yy1127: yych = *(marker = ++p); if (yych <= '@') { if (yych != '#') - goto yy1101; + goto yy1126; } else { if (yych <= 'Z') - goto yy1105; + goto yy1130; if (yych <= '`') - goto yy1101; + goto yy1126; if (yych <= 'z') - goto yy1105; - goto yy1101; + goto yy1130; + goto yy1126; } yych = *++p; if (yych <= 'W') { if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1140; + goto yy1165; } else { if (yych <= 'X') - goto yy1139; + goto yy1164; if (yych == 'x') - goto yy1139; + goto yy1164; } - yy1104: + yy1129: p = marker; - goto yy1101; - yy1105: + goto yy1126; + yy1130: yych = *++p; if (yych <= '@') { if (yych <= '/') - goto yy1104; + goto yy1129; if (yych >= ':') - goto yy1104; + goto yy1129; } else { if (yych <= 'Z') - goto yy1106; + goto yy1131; if (yych <= '`') - goto yy1104; + goto yy1129; if (yych >= '{') - goto yy1104; + goto yy1129; } - yy1106: + yy1131: yych = *++p; if (yych <= ';') { if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1109; + goto yy1134; if (yych <= ':') - goto yy1104; + goto yy1129; } else { if (yych <= 'Z') { if (yych <= '@') - goto yy1104; - goto yy1109; + goto yy1129; + goto yy1134; } else { if (yych <= '`') - goto yy1104; + goto yy1129; if (yych <= 'z') - goto yy1109; - goto yy1104; + goto yy1134; + goto yy1129; } } - yy1107: + yy1132: ++p; { return (bufsize_t)(p - start); } - yy1109: + yy1134: yych = *++p; if (yych <= ';') { if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1110; + goto yy1135; if (yych <= ':') - goto yy1104; - goto yy1107; + goto yy1129; + goto yy1132; } else { if (yych <= 'Z') { if (yych <= '@') - goto yy1104; + goto yy1129; } else { if (yych <= '`') - goto yy1104; + goto yy1129; if (yych >= '{') - goto yy1104; + goto yy1129; } } - yy1110: + yy1135: yych = *++p; if (yych <= ';') { if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1111; + goto yy1136; if (yych <= ':') - goto yy1104; - goto yy1107; + goto yy1129; + goto yy1132; } else { if (yych <= 'Z') { if (yych <= '@') - goto yy1104; + goto yy1129; } else { if (yych <= '`') - goto yy1104; + goto yy1129; if (yych >= '{') - goto yy1104; + goto yy1129; } } - yy1111: + yy1136: yych = *++p; if (yych <= ';') { if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1112; + goto yy1137; if (yych <= ':') - goto yy1104; - goto yy1107; + goto yy1129; + goto yy1132; } else { if (yych <= 'Z') { if (yych <= '@') - goto yy1104; + goto yy1129; } else { if (yych <= '`') - goto yy1104; + goto yy1129; if (yych >= '{') - goto yy1104; + goto yy1129; } } - yy1112: + yy1137: yych = *++p; if (yych <= ';') { if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1113; + goto yy1138; if (yych <= ':') - goto yy1104; - goto yy1107; + goto yy1129; + goto yy1132; } else { if (yych <= 'Z') { if (yych <= '@') - goto yy1104; + goto yy1129; } else { if (yych <= '`') - goto yy1104; + goto yy1129; if (yych >= '{') - goto yy1104; + goto yy1129; } } - yy1113: + yy1138: yych = *++p; if (yych <= ';') { if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1114; + goto yy1139; if (yych <= ':') - goto yy1104; - goto yy1107; + goto yy1129; + goto yy1132; } else { if (yych <= 'Z') { if (yych <= '@') - goto yy1104; + goto yy1129; } else { if (yych <= '`') - goto yy1104; + goto yy1129; if (yych >= '{') - goto yy1104; + goto yy1129; } } - yy1114: + yy1139: yych = *++p; if (yych <= ';') { if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1115; + goto yy1140; if (yych <= ':') - goto yy1104; - goto yy1107; + goto yy1129; + goto yy1132; } else { if (yych <= 'Z') { if (yych <= '@') - goto yy1104; + goto yy1129; } else { if (yych <= '`') - goto yy1104; + goto yy1129; if (yych >= '{') - goto yy1104; + goto yy1129; } } - yy1115: + yy1140: yych = *++p; if (yych <= ';') { if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1116; + goto yy1141; if (yych <= ':') - goto yy1104; - goto yy1107; + goto yy1129; + goto yy1132; } else { if (yych <= 'Z') { if (yych <= '@') - goto yy1104; + goto yy1129; } else { if (yych <= '`') - goto yy1104; + goto yy1129; if (yych >= '{') - goto yy1104; + goto yy1129; } } - yy1116: + yy1141: yych = *++p; if (yych <= ';') { if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1117; + goto yy1142; if (yych <= ':') - goto yy1104; - goto yy1107; + goto yy1129; + goto yy1132; } else { if (yych <= 'Z') { if (yych <= '@') - goto yy1104; + goto yy1129; } else { if (yych <= '`') - goto yy1104; + goto yy1129; if (yych >= '{') - goto yy1104; + goto yy1129; } } - yy1117: + yy1142: yych = *++p; if (yych <= ';') { if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1118; + goto yy1143; if (yych <= ':') - goto yy1104; - goto yy1107; + goto yy1129; + goto yy1132; } else { if (yych <= 'Z') { if (yych <= '@') - goto yy1104; + goto yy1129; } else { if (yych <= '`') - goto yy1104; + goto yy1129; if (yych >= '{') - goto yy1104; + goto yy1129; } } - yy1118: + yy1143: yych = *++p; if (yych <= ';') { if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1119; + goto yy1144; if (yych <= ':') - goto yy1104; - goto yy1107; + goto yy1129; + goto yy1132; } else { if (yych <= 'Z') { if (yych <= '@') - goto yy1104; + goto yy1129; } else { if (yych <= '`') - goto yy1104; + goto yy1129; if (yych >= '{') - goto yy1104; + goto yy1129; } } - yy1119: + yy1144: yych = *++p; if (yych <= ';') { if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1120; + goto yy1145; if (yych <= ':') - goto yy1104; - goto yy1107; + goto yy1129; + goto yy1132; } else { if (yych <= 'Z') { if (yych <= '@') - goto yy1104; + goto yy1129; } else { if (yych <= '`') - goto yy1104; + goto yy1129; if (yych >= '{') - goto yy1104; + goto yy1129; } } - yy1120: + yy1145: yych = *++p; if (yych <= ';') { if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1121; + goto yy1146; if (yych <= ':') - goto yy1104; - goto yy1107; + goto yy1129; + goto yy1132; } else { if (yych <= 'Z') { if (yych <= '@') - goto yy1104; + goto yy1129; } else { if (yych <= '`') - goto yy1104; + goto yy1129; if (yych >= '{') - goto yy1104; + goto yy1129; } } - yy1121: + yy1146: yych = *++p; if (yych <= ';') { if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1122; + goto yy1147; if (yych <= ':') - goto yy1104; - goto yy1107; + goto yy1129; + goto yy1132; } else { if (yych <= 'Z') { if (yych <= '@') - goto yy1104; + goto yy1129; } else { if (yych <= '`') - goto yy1104; + goto yy1129; if (yych >= '{') - goto yy1104; + goto yy1129; } } - yy1122: + yy1147: yych = *++p; if (yych <= ';') { if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1123; + goto yy1148; if (yych <= ':') - goto yy1104; - goto yy1107; + goto yy1129; + goto yy1132; } else { if (yych <= 'Z') { if (yych <= '@') - goto yy1104; + goto yy1129; } else { if (yych <= '`') - goto yy1104; + goto yy1129; if (yych >= '{') - goto yy1104; + goto yy1129; } } - yy1123: + yy1148: yych = *++p; if (yych <= ';') { if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1124; + goto yy1149; if (yych <= ':') - goto yy1104; - goto yy1107; + goto yy1129; + goto yy1132; } else { if (yych <= 'Z') { if (yych <= '@') - goto yy1104; + goto yy1129; } else { if (yych <= '`') - goto yy1104; + goto yy1129; if (yych >= '{') - goto yy1104; + goto yy1129; } } - yy1124: + yy1149: yych = *++p; if (yych <= ';') { if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1125; + goto yy1150; if (yych <= ':') - goto yy1104; - goto yy1107; + goto yy1129; + goto yy1132; } else { if (yych <= 'Z') { if (yych <= '@') - goto yy1104; + goto yy1129; } else { if (yych <= '`') - goto yy1104; + goto yy1129; if (yych >= '{') - goto yy1104; + goto yy1129; } } - yy1125: + yy1150: yych = *++p; if (yych <= ';') { if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1126; + goto yy1151; if (yych <= ':') - goto yy1104; - goto yy1107; + goto yy1129; + goto yy1132; } else { if (yych <= 'Z') { if (yych <= '@') - goto yy1104; + goto yy1129; } else { if (yych <= '`') - goto yy1104; + goto yy1129; if (yych >= '{') - goto yy1104; + goto yy1129; } } - yy1126: + yy1151: yych = *++p; if (yych <= ';') { if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1127; + goto yy1152; if (yych <= ':') - goto yy1104; - goto yy1107; + goto yy1129; + goto yy1132; } else { if (yych <= 'Z') { if (yych <= '@') - goto yy1104; + goto yy1129; } else { if (yych <= '`') - goto yy1104; + goto yy1129; if (yych >= '{') - goto yy1104; + goto yy1129; } } - yy1127: + yy1152: yych = *++p; if (yych <= ';') { if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1128; + goto yy1153; if (yych <= ':') - goto yy1104; - goto yy1107; + goto yy1129; + goto yy1132; } else { if (yych <= 'Z') { if (yych <= '@') - goto yy1104; + goto yy1129; } else { if (yych <= '`') - goto yy1104; + goto yy1129; if (yych >= '{') - goto yy1104; + goto yy1129; } } - yy1128: + yy1153: yych = *++p; if (yych <= ';') { if (yych <= '/') - goto yy1104; - if (yych <= '9') goto yy1129; + if (yych <= '9') + goto yy1154; if (yych <= ':') - goto yy1104; - goto yy1107; + goto yy1129; + goto yy1132; } else { if (yych <= 'Z') { if (yych <= '@') - goto yy1104; + goto yy1129; } else { if (yych <= '`') - goto yy1104; + goto yy1129; if (yych >= '{') - goto yy1104; + goto yy1129; } } - yy1129: + yy1154: yych = *++p; if (yych <= ';') { if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1130; + goto yy1155; if (yych <= ':') - goto yy1104; - goto yy1107; + goto yy1129; + goto yy1132; } else { if (yych <= 'Z') { if (yych <= '@') - goto yy1104; + goto yy1129; } else { if (yych <= '`') - goto yy1104; + goto yy1129; if (yych >= '{') - goto yy1104; + goto yy1129; } } - yy1130: + yy1155: yych = *++p; if (yych <= ';') { if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1131; + goto yy1156; if (yych <= ':') - goto yy1104; - goto yy1107; + goto yy1129; + goto yy1132; } else { if (yych <= 'Z') { if (yych <= '@') - goto yy1104; + goto yy1129; } else { if (yych <= '`') - goto yy1104; + goto yy1129; if (yych >= '{') - goto yy1104; + goto yy1129; } } - yy1131: + yy1156: yych = *++p; if (yych <= ';') { if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1132; + goto yy1157; if (yych <= ':') - goto yy1104; - goto yy1107; + goto yy1129; + goto yy1132; } else { if (yych <= 'Z') { if (yych <= '@') - goto yy1104; + goto yy1129; } else { if (yych <= '`') - goto yy1104; + goto yy1129; if (yych >= '{') - goto yy1104; + goto yy1129; } } - yy1132: + yy1157: yych = *++p; if (yych <= ';') { if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1133; + goto yy1158; if (yych <= ':') - goto yy1104; - goto yy1107; + goto yy1129; + goto yy1132; } else { if (yych <= 'Z') { if (yych <= '@') - goto yy1104; + goto yy1129; } else { if (yych <= '`') - goto yy1104; + goto yy1129; if (yych >= '{') - goto yy1104; + goto yy1129; } } - yy1133: + yy1158: yych = *++p; if (yych <= ';') { if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1134; + goto yy1159; if (yych <= ':') - goto yy1104; - goto yy1107; + goto yy1129; + goto yy1132; } else { if (yych <= 'Z') { if (yych <= '@') - goto yy1104; + goto yy1129; } else { if (yych <= '`') - goto yy1104; + goto yy1129; if (yych >= '{') - goto yy1104; + goto yy1129; } } - yy1134: + yy1159: yych = *++p; if (yych <= ';') { if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1135; + goto yy1160; if (yych <= ':') - goto yy1104; - goto yy1107; + goto yy1129; + goto yy1132; } else { if (yych <= 'Z') { if (yych <= '@') - goto yy1104; + goto yy1129; } else { if (yych <= '`') - goto yy1104; + goto yy1129; if (yych >= '{') - goto yy1104; + goto yy1129; } } - yy1135: + yy1160: yych = *++p; if (yych <= ';') { if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1136; + goto yy1161; if (yych <= ':') - goto yy1104; - goto yy1107; + goto yy1129; + goto yy1132; } else { if (yych <= 'Z') { if (yych <= '@') - goto yy1104; + goto yy1129; } else { if (yych <= '`') - goto yy1104; + goto yy1129; if (yych >= '{') - goto yy1104; + goto yy1129; } } - yy1136: + yy1161: yych = *++p; if (yych <= ';') { if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1137; + goto yy1162; if (yych <= ':') - goto yy1104; - goto yy1107; + goto yy1129; + goto yy1132; } else { if (yych <= 'Z') { if (yych <= '@') - goto yy1104; + goto yy1129; } else { if (yych <= '`') - goto yy1104; + goto yy1129; if (yych >= '{') - goto yy1104; + goto yy1129; } } - yy1137: + yy1162: yych = *++p; if (yych <= ';') { if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1138; + goto yy1163; if (yych <= ':') - goto yy1104; - goto yy1107; + goto yy1129; + goto yy1132; } else { if (yych <= 'Z') { if (yych <= '@') - goto yy1104; + goto yy1129; } else { if (yych <= '`') - goto yy1104; + goto yy1129; if (yych >= '{') - goto yy1104; + goto yy1129; } } - yy1138: + yy1163: yych = *++p; if (yych == ';') - goto yy1107; - goto yy1104; - yy1139: + goto yy1132; + goto yy1129; + yy1164: yych = *++p; if (yych <= '@') { if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1147; - goto yy1104; + goto yy1172; + goto yy1129; } else { if (yych <= 'F') - goto yy1147; + goto yy1172; if (yych <= '`') - goto yy1104; + goto yy1129; if (yych <= 'f') - goto yy1147; - goto yy1104; + goto yy1172; + goto yy1129; } - yy1140: + yy1165: yych = *++p; if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1141; + goto yy1166; if (yych == ';') - goto yy1107; - goto yy1104; - yy1141: + goto yy1132; + goto yy1129; + yy1166: yych = *++p; if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1142; + goto yy1167; if (yych == ';') - goto yy1107; - goto yy1104; - yy1142: + goto yy1132; + goto yy1129; + yy1167: yych = *++p; if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1143; + goto yy1168; if (yych == ';') - goto yy1107; - goto yy1104; - yy1143: + goto yy1132; + goto yy1129; + yy1168: yych = *++p; if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1144; + goto yy1169; if (yych == ';') - goto yy1107; - goto yy1104; - yy1144: + goto yy1132; + goto yy1129; + yy1169: yych = *++p; if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1145; + goto yy1170; if (yych == ';') - goto yy1107; - goto yy1104; - yy1145: + goto yy1132; + goto yy1129; + yy1170: yych = *++p; if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1146; + goto yy1171; if (yych == ';') - goto yy1107; - goto yy1104; - yy1146: + goto yy1132; + goto yy1129; + yy1171: yych = *++p; if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1138; + goto yy1163; if (yych == ';') - goto yy1107; - goto yy1104; - yy1147: + goto yy1132; + goto yy1129; + yy1172: yych = *++p; if (yych <= ';') { if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1148; + goto yy1173; if (yych <= ':') - goto yy1104; - goto yy1107; + goto yy1129; + goto yy1132; } else { if (yych <= 'F') { if (yych <= '@') - goto yy1104; + goto yy1129; } else { if (yych <= '`') - goto yy1104; + goto yy1129; if (yych >= 'g') - goto yy1104; + goto yy1129; } } - yy1148: + yy1173: yych = *++p; if (yych <= ';') { if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1149; + goto yy1174; if (yych <= ':') - goto yy1104; - goto yy1107; + goto yy1129; + goto yy1132; } else { if (yych <= 'F') { if (yych <= '@') - goto yy1104; + goto yy1129; } else { if (yych <= '`') - goto yy1104; + goto yy1129; if (yych >= 'g') - goto yy1104; + goto yy1129; } } - yy1149: + yy1174: yych = *++p; if (yych <= ';') { if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1150; + goto yy1175; if (yych <= ':') - goto yy1104; - goto yy1107; + goto yy1129; + goto yy1132; } else { if (yych <= 'F') { if (yych <= '@') - goto yy1104; + goto yy1129; } else { if (yych <= '`') - goto yy1104; + goto yy1129; if (yych >= 'g') - goto yy1104; + goto yy1129; } } - yy1150: + yy1175: yych = *++p; if (yych <= ';') { if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1151; + goto yy1176; if (yych <= ':') - goto yy1104; - goto yy1107; + goto yy1129; + goto yy1132; } else { if (yych <= 'F') { if (yych <= '@') - goto yy1104; + goto yy1129; } else { if (yych <= '`') - goto yy1104; + goto yy1129; if (yych >= 'g') - goto yy1104; + goto yy1129; } } - yy1151: + yy1176: yych = *++p; if (yych <= ';') { if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1152; + goto yy1177; if (yych <= ':') - goto yy1104; - goto yy1107; + goto yy1129; + goto yy1132; } else { if (yych <= 'F') { if (yych <= '@') - goto yy1104; + goto yy1129; } else { if (yych <= '`') - goto yy1104; + goto yy1129; if (yych >= 'g') - goto yy1104; + goto yy1129; } } - yy1152: + yy1177: yych = *++p; if (yych <= ';') { if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1153; + goto yy1178; if (yych <= ':') - goto yy1104; - goto yy1107; + goto yy1129; + goto yy1132; } else { if (yych <= 'F') { if (yych <= '@') - goto yy1104; + goto yy1129; } else { if (yych <= '`') - goto yy1104; + goto yy1129; if (yych >= 'g') - goto yy1104; + goto yy1129; } } - yy1153: + yy1178: ++p; if ((yych = *p) <= ';') { if (yych <= '/') - goto yy1104; + goto yy1129; if (yych <= '9') - goto yy1138; + goto yy1163; if (yych <= ':') - goto yy1104; - goto yy1107; + goto yy1129; + goto yy1132; } else { if (yych <= 'F') { if (yych <= '@') - goto yy1104; - goto yy1138; + goto yy1129; + goto yy1163; } else { if (yych <= '`') - goto yy1104; + goto yy1129; if (yych <= 'f') - goto yy1138; - goto yy1104; + goto yy1163; + goto yy1129; } } } @@ -17459,335 +17715,335 @@ bufsize_t _scan_dangerous_url(const unsigned char *p) { if (yych <= 'V') { if (yych <= 'F') { if (yych == 'D') - goto yy1158; + goto yy1183; if (yych >= 'F') - goto yy1159; + goto yy1184; } else { if (yych == 'J') - goto yy1160; + goto yy1185; if (yych >= 'V') - goto yy1161; + goto yy1186; } } else { if (yych <= 'f') { if (yych == 'd') - goto yy1158; + goto yy1183; if (yych >= 'f') - goto yy1159; + goto yy1184; } else { if (yych <= 'j') { if (yych >= 'j') - goto yy1160; + goto yy1185; } else { if (yych == 'v') - goto yy1161; + goto yy1186; } } } ++p; - yy1157 : { return 0; } - yy1158: + yy1182 : { return 0; } + yy1183: yyaccept = 0; yych = *(marker = ++p); if (yych == 'A') - goto yy1184; + goto yy1209; if (yych == 'a') - goto yy1184; - goto yy1157; - yy1159: + goto yy1209; + goto yy1182; + yy1184: yyaccept = 0; yych = *(marker = ++p); if (yych == 'I') - goto yy1181; + goto yy1206; if (yych == 'i') - goto yy1181; - goto yy1157; - yy1160: + goto yy1206; + goto yy1182; + yy1185: yyaccept = 0; yych = *(marker = ++p); if (yych == 'A') - goto yy1172; + goto yy1197; if (yych == 'a') - goto yy1172; - goto yy1157; - yy1161: + goto yy1197; + goto yy1182; + yy1186: yyaccept = 0; yych = *(marker = ++p); if (yych == 'B') - goto yy1162; + goto yy1187; if (yych != 'b') - goto yy1157; - yy1162: + goto yy1182; + yy1187: yych = *++p; if (yych == 'S') - goto yy1164; + goto yy1189; if (yych == 's') - goto yy1164; - yy1163: + goto yy1189; + yy1188: p = marker; if (yyaccept == 0) { - goto yy1157; + goto yy1182; } else { - goto yy1171; + goto yy1196; } - yy1164: + yy1189: yych = *++p; if (yych == 'C') - goto yy1165; + goto yy1190; if (yych != 'c') - goto yy1163; - yy1165: + goto yy1188; + yy1190: yych = *++p; if (yych == 'R') - goto yy1166; + goto yy1191; if (yych != 'r') - goto yy1163; - yy1166: + goto yy1188; + yy1191: yych = *++p; if (yych == 'I') - goto yy1167; + goto yy1192; if (yych != 'i') - goto yy1163; - yy1167: + goto yy1188; + yy1192: yych = *++p; if (yych == 'P') - goto yy1168; + goto yy1193; if (yych != 'p') - goto yy1163; - yy1168: + goto yy1188; + yy1193: yych = *++p; if (yych == 'T') - goto yy1169; + goto yy1194; if (yych != 't') - goto yy1163; - yy1169: + goto yy1188; + yy1194: yych = *++p; if (yych != ':') - goto yy1163; - yy1170: + goto yy1188; + yy1195: ++p; - yy1171 : { return (bufsize_t)(p - start); } - yy1172: + yy1196 : { return (bufsize_t)(p - start); } + yy1197: yych = *++p; if (yych == 'V') - goto yy1173; + goto yy1198; if (yych != 'v') - goto yy1163; - yy1173: + goto yy1188; + yy1198: yych = *++p; if (yych == 'A') - goto yy1174; + goto yy1199; if (yych != 'a') - goto yy1163; - yy1174: + goto yy1188; + yy1199: yych = *++p; if (yych == 'S') - goto yy1175; + goto yy1200; if (yych != 's') - goto yy1163; - yy1175: + goto yy1188; + yy1200: yych = *++p; if (yych == 'C') - goto yy1176; + goto yy1201; if (yych != 'c') - goto yy1163; - yy1176: + goto yy1188; + yy1201: yych = *++p; if (yych == 'R') - goto yy1177; + goto yy1202; if (yych != 'r') - goto yy1163; - yy1177: + goto yy1188; + yy1202: yych = *++p; if (yych == 'I') - goto yy1178; + goto yy1203; if (yych != 'i') - goto yy1163; - yy1178: + goto yy1188; + yy1203: yych = *++p; if (yych == 'P') - goto yy1179; + goto yy1204; if (yych != 'p') - goto yy1163; - yy1179: + goto yy1188; + yy1204: yych = *++p; if (yych == 'T') - goto yy1180; + goto yy1205; if (yych != 't') - goto yy1163; - yy1180: + goto yy1188; + yy1205: yych = *++p; if (yych == ':') - goto yy1170; - goto yy1163; - yy1181: + goto yy1195; + goto yy1188; + yy1206: yych = *++p; if (yych == 'L') - goto yy1182; + goto yy1207; if (yych != 'l') - goto yy1163; - yy1182: + goto yy1188; + yy1207: yych = *++p; if (yych == 'E') - goto yy1183; + goto yy1208; if (yych != 'e') - goto yy1163; - yy1183: + goto yy1188; + yy1208: yych = *++p; if (yych == ':') - goto yy1170; - goto yy1163; - yy1184: + goto yy1195; + goto yy1188; + yy1209: yych = *++p; if (yych == 'T') - goto yy1185; + goto yy1210; if (yych != 't') - goto yy1163; - yy1185: + goto yy1188; + yy1210: yych = *++p; if (yych == 'A') - goto yy1186; + goto yy1211; if (yych != 'a') - goto yy1163; - yy1186: + goto yy1188; + yy1211: yych = *++p; if (yych != ':') - goto yy1163; + goto yy1188; yyaccept = 1; yych = *(marker = ++p); if (yych == 'I') - goto yy1188; + goto yy1213; if (yych != 'i') - goto yy1171; - yy1188: + goto yy1196; + yy1213: yych = *++p; if (yych == 'M') - goto yy1189; + goto yy1214; if (yych != 'm') - goto yy1163; - yy1189: + goto yy1188; + yy1214: yych = *++p; if (yych == 'A') - goto yy1190; + goto yy1215; if (yych != 'a') - goto yy1163; - yy1190: + goto yy1188; + yy1215: yych = *++p; if (yych == 'G') - goto yy1191; + goto yy1216; if (yych != 'g') - goto yy1163; - yy1191: + goto yy1188; + yy1216: yych = *++p; if (yych == 'E') - goto yy1192; + goto yy1217; if (yych != 'e') - goto yy1163; - yy1192: + goto yy1188; + yy1217: yych = *++p; if (yych != '/') - goto yy1163; + goto yy1188; yych = *++p; if (yych <= 'W') { if (yych <= 'J') { if (yych == 'G') - goto yy1195; + goto yy1220; if (yych <= 'I') - goto yy1163; - goto yy1196; + goto yy1188; + goto yy1221; } else { if (yych == 'P') - goto yy1194; + goto yy1219; if (yych <= 'V') - goto yy1163; - goto yy1197; + goto yy1188; + goto yy1222; } } else { if (yych <= 'j') { if (yych == 'g') - goto yy1195; + goto yy1220; if (yych <= 'i') - goto yy1163; - goto yy1196; + goto yy1188; + goto yy1221; } else { if (yych <= 'p') { if (yych <= 'o') - goto yy1163; + goto yy1188; } else { if (yych == 'w') - goto yy1197; - goto yy1163; + goto yy1222; + goto yy1188; } } } - yy1194: + yy1219: yych = *++p; if (yych == 'N') - goto yy1205; + goto yy1230; if (yych == 'n') - goto yy1205; - goto yy1163; - yy1195: + goto yy1230; + goto yy1188; + yy1220: yych = *++p; if (yych == 'I') - goto yy1204; + goto yy1229; if (yych == 'i') - goto yy1204; - goto yy1163; - yy1196: + goto yy1229; + goto yy1188; + yy1221: yych = *++p; if (yych == 'P') - goto yy1202; + goto yy1227; if (yych == 'p') - goto yy1202; - goto yy1163; - yy1197: + goto yy1227; + goto yy1188; + yy1222: yych = *++p; if (yych == 'E') - goto yy1198; + goto yy1223; if (yych != 'e') - goto yy1163; - yy1198: + goto yy1188; + yy1223: yych = *++p; if (yych == 'B') - goto yy1199; + goto yy1224; if (yych != 'b') - goto yy1163; - yy1199: + goto yy1188; + yy1224: yych = *++p; if (yych == 'P') - goto yy1200; + goto yy1225; if (yych != 'p') - goto yy1163; - yy1200: + goto yy1188; + yy1225: ++p; { return 0; } - yy1202: + yy1227: yych = *++p; if (yych == 'E') - goto yy1203; + goto yy1228; if (yych != 'e') - goto yy1163; - yy1203: + goto yy1188; + yy1228: yych = *++p; if (yych == 'G') - goto yy1200; + goto yy1225; if (yych == 'g') - goto yy1200; - goto yy1163; - yy1204: + goto yy1225; + goto yy1188; + yy1229: yych = *++p; if (yych == 'F') - goto yy1200; + goto yy1225; if (yych == 'f') - goto yy1200; - goto yy1163; - yy1205: + goto yy1225; + goto yy1188; + yy1230: ++p; if ((yych = *p) == 'G') - goto yy1200; + goto yy1225; if (yych == 'g') - goto yy1200; - goto yy1163; + goto yy1225; + goto yy1188; } } diff --git a/cbits/scanners.h b/cbits/scanners.h index 207f91a..d54d9d2 100644 --- a/cbits/scanners.h +++ b/cbits/scanners.h @@ -1,3 +1,6 @@ +#ifndef CMARK_SCANNERS_H +#define CMARK_SCANNERS_H + #include "cmark.h" #include "chunk.h" @@ -11,6 +14,7 @@ bufsize_t _scan_scheme(const unsigned char *p); bufsize_t _scan_autolink_uri(const unsigned char *p); bufsize_t _scan_autolink_email(const unsigned char *p); bufsize_t _scan_html_tag(const unsigned char *p); +bufsize_t _scan_liberal_html_tag(const unsigned char *p); bufsize_t _scan_html_block_start(const unsigned char *p); bufsize_t _scan_html_block_start_7(const unsigned char *p); bufsize_t _scan_html_block_end_1(const unsigned char *p); @@ -32,6 +36,7 @@ bufsize_t _scan_dangerous_url(const unsigned char *p); #define scan_autolink_uri(c, n) _scan_at(&_scan_autolink_uri, c, n) #define scan_autolink_email(c, n) _scan_at(&_scan_autolink_email, c, n) #define scan_html_tag(c, n) _scan_at(&_scan_html_tag, c, n) +#define scan_liberal_html_tag(c, n) _scan_at(&_scan_liberal_html_tag, c, n) #define scan_html_block_start(c, n) _scan_at(&_scan_html_block_start, c, n) #define scan_html_block_start_7(c, n) _scan_at(&_scan_html_block_start_7, c, n) #define scan_html_block_end_1(c, n) _scan_at(&_scan_html_block_end_1, c, n) @@ -53,3 +58,5 @@ bufsize_t _scan_dangerous_url(const unsigned char *p); #ifdef __cplusplus } #endif + +#endif diff --git a/cbits/strikethrough.c b/cbits/strikethrough.c new file mode 100644 index 0000000..802a6bb --- /dev/null +++ b/cbits/strikethrough.c @@ -0,0 +1,151 @@ +#include "strikethrough.h" +#include + +cmark_node_type CMARK_NODE_STRIKETHROUGH; + +static cmark_node *match(cmark_syntax_extension *self, cmark_parser *parser, + cmark_node *parent, unsigned char character, + cmark_inline_parser *inline_parser) { + cmark_node *res = NULL; + int left_flanking, right_flanking, punct_before, punct_after, delims; + char buffer[101]; + + if (character != '~') + return NULL; + + delims = cmark_inline_parser_scan_delimiters( + inline_parser, sizeof(buffer) - 1, '~', + &left_flanking, + &right_flanking, &punct_before, &punct_after); + + memset(buffer, '~', delims); + buffer[delims] = 0; + + res = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem); + cmark_node_set_literal(res, buffer); + + if (left_flanking || right_flanking) { + cmark_inline_parser_push_delimiter(inline_parser, character, left_flanking, + right_flanking, res); + } + + return res; +} + +static delimiter *insert(cmark_syntax_extension *self, cmark_parser *parser, + cmark_inline_parser *inline_parser, delimiter *opener, + delimiter *closer) { + cmark_node *strikethrough; + cmark_node *tmp, *next; + delimiter *delim, *tmp_delim; + delimiter *res = closer->next; + + strikethrough = opener->inl_text; + + if (!cmark_node_set_type(strikethrough, CMARK_NODE_STRIKETHROUGH)) + goto done; + + cmark_node_set_syntax_extension(strikethrough, self); + + cmark_node_set_string_content(strikethrough, "~"); + tmp = cmark_node_next(opener->inl_text); + + while (tmp) { + if (tmp == closer->inl_text) + break; + next = cmark_node_next(tmp); + cmark_node_append_child(strikethrough, tmp); + tmp = next; + } + + cmark_node_free(closer->inl_text); + + delim = closer; + while (delim != NULL && delim != opener) { + tmp_delim = delim->previous; + cmark_inline_parser_remove_delimiter(inline_parser, delim); + delim = tmp_delim; + } + + cmark_inline_parser_remove_delimiter(inline_parser, opener); + +done: + return res; +} + +static const char *get_type_string(cmark_syntax_extension *extension, + cmark_node *node) { + return node->type == CMARK_NODE_STRIKETHROUGH ? "strikethrough" : ""; +} + +static int can_contain(cmark_syntax_extension *extension, cmark_node *node, + cmark_node_type child_type) { + if (node->type != CMARK_NODE_STRIKETHROUGH) + return false; + + return CMARK_NODE_TYPE_INLINE_P(child_type); +} + +static void commonmark_render(cmark_syntax_extension *extension, + cmark_renderer *renderer, cmark_node *node, + cmark_event_type ev_type, int options) { + renderer->out(renderer, node, cmark_node_get_string_content(node), false, LITERAL); +} + +static void latex_render(cmark_syntax_extension *extension, + cmark_renderer *renderer, cmark_node *node, + cmark_event_type ev_type, int options) { + // requires \usepackage{ulem} + bool entering = (ev_type == CMARK_EVENT_ENTER); + if (entering) { + renderer->out(renderer, node, "\\sout{", false, LITERAL); + } else { + renderer->out(renderer, node, "}", false, LITERAL); + } +} + +static void man_render(cmark_syntax_extension *extension, + cmark_renderer *renderer, cmark_node *node, + cmark_event_type ev_type, int options) { + bool entering = (ev_type == CMARK_EVENT_ENTER); + if (entering) { + renderer->cr(renderer); + renderer->out(renderer, node, ".ST \"", false, LITERAL); + } else { + renderer->out(renderer, node, "\"", false, LITERAL); + renderer->cr(renderer); + } +} + +static void html_render(cmark_syntax_extension *extension, + cmark_html_renderer *renderer, cmark_node *node, + cmark_event_type ev_type, int options) { + bool entering = (ev_type == CMARK_EVENT_ENTER); + if (entering) { + cmark_strbuf_puts(renderer->html, ""); + } else { + cmark_strbuf_puts(renderer->html, ""); + } +} + +cmark_syntax_extension *create_strikethrough_extension(void) { + cmark_syntax_extension *ext = cmark_syntax_extension_new("strikethrough"); + cmark_llist *special_chars = NULL; + + cmark_syntax_extension_set_get_type_string_func(ext, get_type_string); + cmark_syntax_extension_set_can_contain_func(ext, can_contain); + cmark_syntax_extension_set_commonmark_render_func(ext, commonmark_render); + cmark_syntax_extension_set_latex_render_func(ext, latex_render); + cmark_syntax_extension_set_man_render_func(ext, man_render); + cmark_syntax_extension_set_html_render_func(ext, html_render); + CMARK_NODE_STRIKETHROUGH = cmark_syntax_extension_add_node(1); + + cmark_syntax_extension_set_match_inline_func(ext, match); + cmark_syntax_extension_set_inline_from_delim_func(ext, insert); + + cmark_mem *mem = cmark_get_default_mem_allocator(); + special_chars = cmark_llist_append(mem, special_chars, (void *)'~'); + cmark_syntax_extension_set_special_inline_chars(ext, special_chars); + + return ext; +} diff --git a/cbits/strikethrough.h b/cbits/strikethrough.h new file mode 100644 index 0000000..1c43f57 --- /dev/null +++ b/cbits/strikethrough.h @@ -0,0 +1,9 @@ +#ifndef STRIKETHROUGH_H +#define STRIKETHROUGH_H + +#include "core-extensions.h" + +extern cmark_node_type CMARK_NODE_STRIKETHROUGH; +cmark_syntax_extension *create_strikethrough_extension(void); + +#endif diff --git a/cbits/syntax_extension.c b/cbits/syntax_extension.c new file mode 100644 index 0000000..ee86b66 --- /dev/null +++ b/cbits/syntax_extension.c @@ -0,0 +1,133 @@ +#include + +#include "cmark.h" +#include "syntax_extension.h" +#include "buffer.h" + +extern cmark_mem CMARK_DEFAULT_MEM_ALLOCATOR; + +static cmark_mem *_mem = &CMARK_DEFAULT_MEM_ALLOCATOR; + +void cmark_syntax_extension_free(cmark_mem *mem, cmark_syntax_extension *extension) { + if (extension->free_function && extension->priv) { + extension->free_function(mem, extension->priv); + } + + cmark_llist_free(mem, extension->special_inline_chars); + mem->free(extension->name); + mem->free(extension); +} + +cmark_syntax_extension *cmark_syntax_extension_new(const char *name) { + cmark_syntax_extension *res = (cmark_syntax_extension *) _mem->calloc(1, sizeof(cmark_syntax_extension)); + res->name = (char *) _mem->calloc(1, sizeof(char) * (strlen(name)) + 1); + strcpy(res->name, name); + return res; +} + +cmark_node_type cmark_syntax_extension_add_node(int is_inline) { + cmark_node_type *ref = !is_inline ? &CMARK_NODE_LAST_BLOCK : &CMARK_NODE_LAST_INLINE; + + if ((*ref & CMARK_NODE_VALUE_MASK) == CMARK_NODE_VALUE_MASK) { + assert(false); + return (cmark_node_type) 0; + } + + return *ref = (cmark_node_type) ((int) *ref + 1); +} + +void cmark_syntax_extension_set_open_block_func(cmark_syntax_extension *extension, + cmark_open_block_func func) { + extension->try_opening_block = func; +} + +void cmark_syntax_extension_set_match_block_func(cmark_syntax_extension *extension, + cmark_match_block_func func) { + extension->last_block_matches = func; +} + +void cmark_syntax_extension_set_match_inline_func(cmark_syntax_extension *extension, + cmark_match_inline_func func) { + extension->match_inline = func; +} + +void cmark_syntax_extension_set_inline_from_delim_func(cmark_syntax_extension *extension, + cmark_inline_from_delim_func func) { + extension->insert_inline_from_delim = func; +} + +void cmark_syntax_extension_set_special_inline_chars(cmark_syntax_extension *extension, + cmark_llist *special_chars) { + extension->special_inline_chars = special_chars; +} + +void cmark_syntax_extension_set_get_type_string_func(cmark_syntax_extension *extension, + cmark_get_type_string_func func) { + extension->get_type_string_func = func; +} + +void cmark_syntax_extension_set_can_contain_func(cmark_syntax_extension *extension, + cmark_can_contain_func func) { + extension->can_contain_func = func; +} + +void cmark_syntax_extension_set_contains_inlines_func(cmark_syntax_extension *extension, + cmark_contains_inlines_func func) { + extension->contains_inlines_func = func; +} + +void cmark_syntax_extension_set_commonmark_render_func(cmark_syntax_extension *extension, + cmark_common_render_func func) { + extension->commonmark_render_func = func; +} + +void cmark_syntax_extension_set_plaintext_render_func(cmark_syntax_extension *extension, + cmark_common_render_func func) { + extension->plaintext_render_func = func; +} + +void cmark_syntax_extension_set_latex_render_func(cmark_syntax_extension *extension, + cmark_common_render_func func) { + extension->latex_render_func = func; +} + +void cmark_syntax_extension_set_man_render_func(cmark_syntax_extension *extension, + cmark_common_render_func func) { + extension->man_render_func = func; +} + +void cmark_syntax_extension_set_html_render_func(cmark_syntax_extension *extension, + cmark_html_render_func func) { + extension->html_render_func = func; +} + +void cmark_syntax_extension_set_html_filter_func(cmark_syntax_extension *extension, + cmark_html_filter_func func) { + extension->html_filter_func = func; +} + +void cmark_syntax_extension_set_postprocess_func(cmark_syntax_extension *extension, + cmark_postprocess_func func) { + extension->postprocess_func = func; +} + +void cmark_syntax_extension_set_private(cmark_syntax_extension *extension, + void *priv, + cmark_free_func free_func) { + extension->priv = priv; + extension->free_function = free_func; +} + +void *cmark_syntax_extension_get_private(cmark_syntax_extension *extension) { + return extension->priv; +} + +void cmark_syntax_extension_set_opaque_free_func(cmark_syntax_extension *extension, + cmark_opaque_free_func func) { + extension->opaque_free_func = func; +} + +void cmark_syntax_extension_set_commonmark_escape_func(cmark_syntax_extension *extension, + cmark_commonmark_escape_func func) { + extension->commonmark_escape_func = func; +} diff --git a/cbits/syntax_extension.h b/cbits/syntax_extension.h new file mode 100644 index 0000000..6fcf109 --- /dev/null +++ b/cbits/syntax_extension.h @@ -0,0 +1,30 @@ +#ifndef CMARK_SYNTAX_EXTENSION_H +#define CMARK_SYNTAX_EXTENSION_H + +#include "cmark.h" +#include "cmark_extension_api.h" + +struct cmark_syntax_extension { + cmark_match_block_func last_block_matches; + cmark_open_block_func try_opening_block; + cmark_match_inline_func match_inline; + cmark_inline_from_delim_func insert_inline_from_delim; + cmark_llist * special_inline_chars; + char * name; + void * priv; + cmark_free_func free_function; + cmark_get_type_string_func get_type_string_func; + cmark_can_contain_func can_contain_func; + cmark_contains_inlines_func contains_inlines_func; + cmark_common_render_func commonmark_render_func; + cmark_common_render_func plaintext_render_func; + cmark_common_render_func latex_render_func; + cmark_common_render_func man_render_func; + cmark_html_render_func html_render_func; + cmark_html_filter_func html_filter_func; + cmark_postprocess_func postprocess_func; + cmark_opaque_free_func opaque_free_func; + cmark_commonmark_escape_func commonmark_escape_func; +}; + +#endif diff --git a/cbits/table.c b/cbits/table.c new file mode 100644 index 0000000..27b4697 --- /dev/null +++ b/cbits/table.c @@ -0,0 +1,660 @@ +#include +#include +#include +#include +#include +#include + +#include "ext_scanners.h" +#include "strikethrough.h" +#include "table.h" + +cmark_node_type CMARK_NODE_TABLE, CMARK_NODE_TABLE_ROW, + CMARK_NODE_TABLE_CELL; + +typedef struct { + uint16_t n_columns; + cmark_llist *cells; +} table_row; + +typedef struct { + uint16_t n_columns; + uint8_t *alignments; +} node_table; + +typedef struct { + bool is_header; +} node_table_row; + +static void free_table_cell(cmark_mem *mem, void *data) { + cmark_strbuf_free((cmark_strbuf *)data); + mem->free(data); +} + +static void free_table_row(cmark_mem *mem, table_row *row) { + if (!row) + return; + + cmark_llist_free_full(mem, row->cells, (cmark_free_func)free_table_cell); + + mem->free(row); +} + +static void free_node_table(cmark_mem *mem, void *ptr) { + node_table *t = (node_table *)ptr; + mem->free(t->alignments); + mem->free(t); +} + +static void free_node_table_row(cmark_mem *mem, void *ptr) { + mem->free(ptr); +} + +static int get_n_table_columns(cmark_node *node) { + if (!node || node->type != CMARK_NODE_TABLE) + return -1; + + return (int)((node_table *)node->as.opaque)->n_columns; +} + +static int set_n_table_columns(cmark_node *node, uint16_t n_columns) { + if (!node || node->type != CMARK_NODE_TABLE) + return 0; + + ((node_table *)node->as.opaque)->n_columns = n_columns; + return 1; +} + +static uint8_t *get_table_alignments(cmark_node *node) { + if (!node || node->type != CMARK_NODE_TABLE) + return 0; + + return ((node_table *)node->as.opaque)->alignments; +} + +static int set_table_alignments(cmark_node *node, uint8_t *alignments) { + if (!node || node->type != CMARK_NODE_TABLE) + return 0; + + ((node_table *)node->as.opaque)->alignments = alignments; + return 1; +} + +static cmark_strbuf *unescape_pipes(cmark_mem *mem, unsigned char *string, bufsize_t len) +{ + cmark_strbuf *res = (cmark_strbuf *)mem->calloc(1, sizeof(cmark_strbuf)); + bufsize_t r, w; + + cmark_strbuf_init(mem, res, len + 1); + cmark_strbuf_put(res, string, len); + cmark_strbuf_putc(res, '\0'); + + for (r = 0, w = 0; r < len; ++r) { + if (res->ptr[r] == '\\' && res->ptr[r + 1] == '|') + r++; + + res->ptr[w++] = res->ptr[r]; + } + + cmark_strbuf_truncate(res, w); + + return res; +} + +static table_row *row_from_string(cmark_syntax_extension *self, + cmark_parser *parser, unsigned char *string, + int len) { + table_row *row = NULL; + bufsize_t cell_matched, pipe_matched, offset = 0; + + row = (table_row *)parser->mem->calloc(1, sizeof(table_row)); + row->n_columns = 0; + row->cells = NULL; + + if (len > 0 && string[0] == '|') + ++offset; + + do { + cell_matched = scan_table_cell(string, len, offset); + pipe_matched = scan_table_cell_end(string, len, offset + cell_matched); + + if (cell_matched || pipe_matched) { + cmark_strbuf *cell_buf = unescape_pipes(parser->mem, string + offset, + cell_matched); + cmark_strbuf_trim(cell_buf); + row->n_columns += 1; + row->cells = cmark_llist_append(parser->mem, row->cells, cell_buf); + } + + offset += cell_matched + pipe_matched; + + if (!pipe_matched) { + pipe_matched = scan_table_row_end(string, len, offset); + offset += pipe_matched; + } + } while ((cell_matched || pipe_matched) && offset < len); + + if (offset != len || !row->n_columns) { + free_table_row(parser->mem, row); + row = NULL; + } + + return row; +} + +static cmark_node *try_opening_table_header(cmark_syntax_extension *self, + cmark_parser *parser, + cmark_node *parent_container, + unsigned char *input, int len) { + bufsize_t matched = + scan_table_start(input, len, cmark_parser_get_first_nonspace(parser)); + cmark_node *table_header; + table_row *header_row = NULL; + table_row *marker_row = NULL; + node_table_row *ntr; + const char *parent_string; + uint16_t i; + + if (!matched) + return parent_container; + + parent_string = cmark_node_get_string_content(parent_container); + + cmark_arena_push(); + + header_row = row_from_string(self, parser, (unsigned char *)parent_string, + (int)strlen(parent_string)); + + if (!header_row) { + free_table_row(parser->mem, header_row); + cmark_arena_pop(); + return parent_container; + } + + marker_row = row_from_string(self, parser, + input + cmark_parser_get_first_nonspace(parser), + len - cmark_parser_get_first_nonspace(parser)); + + assert(marker_row); + + if (header_row->n_columns != marker_row->n_columns) { + free_table_row(parser->mem, header_row); + free_table_row(parser->mem, marker_row); + cmark_arena_pop(); + return parent_container; + } + + if (cmark_arena_pop()) { + header_row = row_from_string(self, parser, (unsigned char *)parent_string, + (int)strlen(parent_string)); + marker_row = row_from_string(self, parser, + input + cmark_parser_get_first_nonspace(parser), + len - cmark_parser_get_first_nonspace(parser)); + } + + if (!cmark_node_set_type(parent_container, CMARK_NODE_TABLE)) { + free_table_row(parser->mem, header_row); + free_table_row(parser->mem, marker_row); + return parent_container; + } + + cmark_node_set_syntax_extension(parent_container, self); + + parent_container->as.opaque = parser->mem->calloc(1, sizeof(node_table)); + + + set_n_table_columns(parent_container, header_row->n_columns); + + uint8_t *alignments = + (uint8_t *)parser->mem->calloc(header_row->n_columns, sizeof(uint8_t)); + cmark_llist *it = marker_row->cells; + for (i = 0; it; it = it->next, ++i) { + cmark_strbuf *node = (cmark_strbuf *)it->data; + bool left = node->ptr[0] == ':', right = node->ptr[node->size - 1] == ':'; + + if (left && right) + alignments[i] = 'c'; + else if (left) + alignments[i] = 'l'; + else if (right) + alignments[i] = 'r'; + } + set_table_alignments(parent_container, alignments); + + table_header = + cmark_parser_add_child(parser, parent_container, CMARK_NODE_TABLE_ROW, + cmark_parser_get_offset(parser)); + cmark_node_set_syntax_extension(table_header, self); + + table_header->as.opaque = ntr = (node_table_row *)parser->mem->calloc(1, sizeof(node_table_row)); + ntr->is_header = true; + + { + cmark_llist *tmp; + + for (tmp = header_row->cells; tmp; tmp = tmp->next) { + cmark_strbuf *cell_buf = (cmark_strbuf *) tmp->data; + cmark_node *header_cell = cmark_parser_add_child(parser, table_header, + CMARK_NODE_TABLE_CELL, cmark_parser_get_offset(parser)); + cmark_node_set_string_content(header_cell, (char *) cell_buf->ptr); + cmark_node_set_syntax_extension(header_cell, self); + } + } + + cmark_parser_advance_offset( + parser, (char *)input, + (int)strlen((char *)input) - 1 - cmark_parser_get_offset(parser), false); + + free_table_row(parser->mem, header_row); + free_table_row(parser->mem, marker_row); + return parent_container; +} + +static cmark_node *try_opening_table_row(cmark_syntax_extension *self, + cmark_parser *parser, + cmark_node *parent_container, + unsigned char *input, int len) { + cmark_node *table_row_block; + table_row *row; + + if (cmark_parser_is_blank(parser)) + return NULL; + + table_row_block = + cmark_parser_add_child(parser, parent_container, CMARK_NODE_TABLE_ROW, + cmark_parser_get_offset(parser)); + + cmark_node_set_syntax_extension(table_row_block, self); + table_row_block->as.opaque = parser->mem->calloc(1, sizeof(node_table_row)); + + row = row_from_string(self, parser, input + cmark_parser_get_first_nonspace(parser), + len - cmark_parser_get_first_nonspace(parser)); + + { + cmark_llist *tmp; + int i, table_columns = get_n_table_columns(parent_container); + + for (tmp = row->cells, i = 0; tmp && i < table_columns; tmp = tmp->next, ++i) { + cmark_strbuf *cell_buf = (cmark_strbuf *) tmp->data; + cmark_node *cell = cmark_parser_add_child(parser, table_row_block, + CMARK_NODE_TABLE_CELL, cmark_parser_get_offset(parser)); + cmark_node_set_string_content(cell, (char *) cell_buf->ptr); + cmark_node_set_syntax_extension(cell, self); + } + + for (; i < table_columns; ++i) { + cmark_node *cell = cmark_parser_add_child( + parser, table_row_block, CMARK_NODE_TABLE_CELL, cmark_parser_get_offset(parser)); + cmark_node_set_syntax_extension(cell, self); + } + } + + free_table_row(parser->mem, row); + + cmark_parser_advance_offset(parser, (char *)input, + len - 1 - cmark_parser_get_offset(parser), false); + + return table_row_block; +} + +static cmark_node *try_opening_table_block(cmark_syntax_extension *self, + int indented, cmark_parser *parser, + cmark_node *parent_container, + unsigned char *input, int len) { + cmark_node_type parent_type = cmark_node_get_type(parent_container); + + if (!indented && parent_type == CMARK_NODE_PARAGRAPH) { + return try_opening_table_header(self, parser, parent_container, input, len); + } else if (!indented && parent_type == CMARK_NODE_TABLE) { + return try_opening_table_row(self, parser, parent_container, input, len); + } + + return NULL; +} + +static int matches(cmark_syntax_extension *self, cmark_parser *parser, + unsigned char *input, int len, + cmark_node *parent_container) { + int res = 0; + + if (cmark_node_get_type(parent_container) == CMARK_NODE_TABLE) { + cmark_arena_push(); + table_row *new_row = row_from_string( + self, parser, input + cmark_parser_get_first_nonspace(parser), + len - cmark_parser_get_first_nonspace(parser)); + if (new_row && new_row->n_columns) + res = 1; + free_table_row(parser->mem, new_row); + cmark_arena_pop(); + } + + return res; +} + +static const char *get_type_string(cmark_syntax_extension *self, + cmark_node *node) { + if (node->type == CMARK_NODE_TABLE) { + return "table"; + } else if (node->type == CMARK_NODE_TABLE_ROW) { + if (((node_table_row *)node->as.opaque)->is_header) + return "table_header"; + else + return "table_row"; + } else if (node->type == CMARK_NODE_TABLE_CELL) { + return "table_cell"; + } + + return ""; +} + +static int can_contain(cmark_syntax_extension *extension, cmark_node *node, + cmark_node_type child_type) { + if (node->type == CMARK_NODE_TABLE) { + return child_type == CMARK_NODE_TABLE_ROW; + } else if (node->type == CMARK_NODE_TABLE_ROW) { + return child_type == CMARK_NODE_TABLE_CELL; + } else if (node->type == CMARK_NODE_TABLE_CELL) { + return child_type == CMARK_NODE_TEXT || child_type == CMARK_NODE_CODE || + child_type == CMARK_NODE_EMPH || child_type == CMARK_NODE_STRONG || + child_type == CMARK_NODE_LINK || child_type == CMARK_NODE_IMAGE || + child_type == CMARK_NODE_STRIKETHROUGH || + child_type == CMARK_NODE_HTML_INLINE; + } + return false; +} + +static int contains_inlines(cmark_syntax_extension *extension, + cmark_node *node) { + return node->type == CMARK_NODE_TABLE_CELL; +} + +static void commonmark_render(cmark_syntax_extension *extension, + cmark_renderer *renderer, cmark_node *node, + cmark_event_type ev_type, int options) { + bool entering = (ev_type == CMARK_EVENT_ENTER); + + if (node->type == CMARK_NODE_TABLE) { + renderer->blankline(renderer); + } else if (node->type == CMARK_NODE_TABLE_ROW) { + if (entering) { + renderer->cr(renderer); + renderer->out(renderer, node, "|", false, LITERAL); + } + } else if (node->type == CMARK_NODE_TABLE_CELL) { + if (entering) { + renderer->out(renderer, node, " ", false, LITERAL); + } else { + renderer->out(renderer, node, " |", false, LITERAL); + if (((node_table_row *)node->parent->as.opaque)->is_header && + !node->next) { + int i; + uint8_t *alignments = get_table_alignments(node->parent->parent); + uint16_t n_cols = + ((node_table *)node->parent->parent->as.opaque)->n_columns; + renderer->cr(renderer); + renderer->out(renderer, node, "|", false, LITERAL); + for (i = 0; i < n_cols; i++) { + switch (alignments[i]) { + case 0: renderer->out(renderer, node, " --- |", false, LITERAL); break; + case 'l': renderer->out(renderer, node, " :-- |", false, LITERAL); break; + case 'c': renderer->out(renderer, node, " :-: |", false, LITERAL); break; + case 'r': renderer->out(renderer, node, " --: |", false, LITERAL); break; + } + } + renderer->cr(renderer); + } + } + } else { + assert(false); + } +} + +static void latex_render(cmark_syntax_extension *extension, + cmark_renderer *renderer, cmark_node *node, + cmark_event_type ev_type, int options) { + bool entering = (ev_type == CMARK_EVENT_ENTER); + + if (node->type == CMARK_NODE_TABLE) { + if (entering) { + int i; + uint16_t n_cols; + uint8_t *alignments = get_table_alignments(node); + + renderer->cr(renderer); + renderer->out(renderer, node, "\\begin{table}", false, LITERAL); + renderer->cr(renderer); + renderer->out(renderer, node, "\\begin{tabular}{", false, LITERAL); + + n_cols = ((node_table *)node->as.opaque)->n_columns; + for (i = 0; i < n_cols; i++) { + switch(alignments[i]) { + case 0: + case 'l': + renderer->out(renderer, node, "l", false, LITERAL); + break; + case 'c': + renderer->out(renderer, node, "c", false, LITERAL); + break; + case 'r': + renderer->out(renderer, node, "r", false, LITERAL); + break; + } + } + renderer->out(renderer, node, "}", false, LITERAL); + renderer->cr(renderer); + } else { + renderer->out(renderer, node, "\\end{tabular}", false, LITERAL); + renderer->cr(renderer); + renderer->out(renderer, node, "\\end{table}", false, LITERAL); + renderer->cr(renderer); + } + } else if (node->type == CMARK_NODE_TABLE_ROW) { + if (!entering) { + renderer->cr(renderer); + } + } else if (node->type == CMARK_NODE_TABLE_CELL) { + if (!entering) { + if (node->next) { + renderer->out(renderer, node, " & ", false, LITERAL); + } else { + renderer->out(renderer, node, " \\\\", false, LITERAL); + } + } + } else { + assert(false); + } +} + +static void man_render(cmark_syntax_extension *extension, + cmark_renderer *renderer, cmark_node *node, + cmark_event_type ev_type, int options) { + bool entering = (ev_type == CMARK_EVENT_ENTER); + + if (node->type == CMARK_NODE_TABLE) { + if (entering) { + int i; + uint16_t n_cols; + uint8_t *alignments = get_table_alignments(node); + + renderer->cr(renderer); + renderer->out(renderer, node, ".TS", false, LITERAL); + renderer->cr(renderer); + renderer->out(renderer, node, "tab(@);", false, LITERAL); + renderer->cr(renderer); + + n_cols = ((node_table *)node->as.opaque)->n_columns; + + for (i = 0; i < n_cols; i++) { + switch (alignments[i]) { + case 'l': + renderer->out(renderer, node, "l", false, LITERAL); + break; + case 0: + case 'c': + renderer->out(renderer, node, "c", false, LITERAL); + break; + case 'r': + renderer->out(renderer, node, "r", false, LITERAL); + break; + } + } + + if (n_cols) { + renderer->out(renderer, node, ".", false, LITERAL); + renderer->cr(renderer); + } + } else { + renderer->out(renderer, node, ".TE", false, LITERAL); + renderer->cr(renderer); + } + } else if (node->type == CMARK_NODE_TABLE_ROW) { + if (!entering) { + renderer->cr(renderer); + } + } else if (node->type == CMARK_NODE_TABLE_CELL) { + if (!entering && node->next) { + renderer->out(renderer, node, "@", false, LITERAL); + } + } else { + assert(false); + } +} + +struct html_table_state { + unsigned need_closing_table_body : 1; + unsigned in_table_header : 1; +}; + +static void html_render(cmark_syntax_extension *extension, + cmark_html_renderer *renderer, cmark_node *node, + cmark_event_type ev_type, int options) { + bool entering = (ev_type == CMARK_EVENT_ENTER); + cmark_strbuf *html = renderer->html; + cmark_node *n; + + // XXX: we just monopolise renderer->opaque. + struct html_table_state *table_state = + (struct html_table_state *)&renderer->opaque; + + if (node->type == CMARK_NODE_TABLE) { + if (entering) { + cmark_html_render_cr(html); + cmark_strbuf_puts(html, "'); + table_state->need_closing_table_body = false; + } else { + if (table_state->need_closing_table_body) + cmark_strbuf_puts(html, ""); + table_state->need_closing_table_body = false; + cmark_strbuf_puts(html, "\n"); + } + } else if (node->type == CMARK_NODE_TABLE_ROW) { + if (entering) { + cmark_html_render_cr(html); + if (((node_table_row *)node->as.opaque)->is_header) { + table_state->in_table_header = 1; + cmark_strbuf_puts(html, ""); + cmark_html_render_cr(html); + } + cmark_strbuf_puts(html, "'); + } else { + cmark_html_render_cr(html); + cmark_strbuf_puts(html, ""); + if (((node_table_row *)node->as.opaque)->is_header) { + cmark_html_render_cr(html); + cmark_strbuf_puts(html, ""); + cmark_html_render_cr(html); + cmark_strbuf_puts(html, ""); + table_state->need_closing_table_body = 1; + table_state->in_table_header = false; + } + } + } else if (node->type == CMARK_NODE_TABLE_CELL) { + uint8_t *alignments = get_table_alignments(node->parent->parent); + if (entering) { + cmark_html_render_cr(html); + if (table_state->in_table_header) { + cmark_strbuf_puts(html, "parent->first_child; n; n = n->next, ++i) + if (n == node) + break; + + switch (alignments[i]) { + case 'l': cmark_strbuf_puts(html, " align=\"left\""); break; + case 'c': cmark_strbuf_puts(html, " align=\"center\""); break; + case 'r': cmark_strbuf_puts(html, " align=\"right\""); break; + } + + cmark_html_render_sourcepos(node, html, options); + cmark_strbuf_putc(html, '>'); + } else { + if (table_state->in_table_header) { + cmark_strbuf_puts(html, ""); + } else { + cmark_strbuf_puts(html, ""); + } + } + } else { + assert(false); + } +} + +static void opaque_free(cmark_syntax_extension *self, cmark_mem *mem, cmark_node *node) { + if (node->type == CMARK_NODE_TABLE) { + free_node_table(mem, node->as.opaque); + } else if (node->type == CMARK_NODE_TABLE_ROW) { + free_node_table_row(mem, node->as.opaque); + } +} + +static int escape(cmark_syntax_extension *self, cmark_node *node, int c) { + return + node->type != CMARK_NODE_TABLE && + node->type != CMARK_NODE_TABLE_ROW && + node->type != CMARK_NODE_TABLE_CELL && + c == '|'; +} + +cmark_syntax_extension *create_table_extension(void) { + cmark_syntax_extension *self = cmark_syntax_extension_new("table"); + + cmark_syntax_extension_set_match_block_func(self, matches); + cmark_syntax_extension_set_open_block_func(self, try_opening_table_block); + cmark_syntax_extension_set_get_type_string_func(self, get_type_string); + cmark_syntax_extension_set_can_contain_func(self, can_contain); + cmark_syntax_extension_set_contains_inlines_func(self, contains_inlines); + cmark_syntax_extension_set_commonmark_render_func(self, commonmark_render); + cmark_syntax_extension_set_plaintext_render_func(self, commonmark_render); + cmark_syntax_extension_set_latex_render_func(self, latex_render); + cmark_syntax_extension_set_man_render_func(self, man_render); + cmark_syntax_extension_set_html_render_func(self, html_render); + cmark_syntax_extension_set_opaque_free_func(self, opaque_free); + cmark_syntax_extension_set_commonmark_escape_func(self, escape); + CMARK_NODE_TABLE = cmark_syntax_extension_add_node(0); + CMARK_NODE_TABLE_ROW = cmark_syntax_extension_add_node(0); + CMARK_NODE_TABLE_CELL = cmark_syntax_extension_add_node(0); + + return self; +} + +uint16_t cmarkextensions_get_table_columns(cmark_node *node) { + if (node->type != CMARK_NODE_TABLE) + return 0; + + return ((node_table *)node->as.opaque)->n_columns; +} + +uint8_t *cmarkextensions_get_table_alignments(cmark_node *node) { + if (node->type != CMARK_NODE_TABLE) + return 0; + + return ((node_table *)node->as.opaque)->alignments; +} diff --git a/cbits/table.h b/cbits/table.h new file mode 100644 index 0000000..ff630b2 --- /dev/null +++ b/cbits/table.h @@ -0,0 +1,8 @@ +#ifndef TABLE_H +#define TABLE_H + +#include "core-extensions.h" + +cmark_syntax_extension *create_table_extension(void); + +#endif diff --git a/cbits/tagfilter.c b/cbits/tagfilter.c new file mode 100644 index 0000000..80cbd6b --- /dev/null +++ b/cbits/tagfilter.c @@ -0,0 +1,59 @@ +#include "tagfilter.h" +#include + +static const char *blacklist[] = { + "title", "textarea", "style", "xmp", "iframe", + "noembed", "noframes", "script", "plaintext", NULL, +}; + +static int is_tag(const unsigned char *tag_data, size_t tag_size, + const char *tagname) { + size_t i; + + if (tag_size < 3 || tag_data[0] != '<') + return 0; + + i = 1; + + if (tag_data[i] == '/') { + i++; + } + + for (; i < tag_size; ++i, ++tagname) { + if (*tagname == 0) + break; + + if (tag_data[i] != *tagname) + return 0; + } + + if (i == tag_size) + return 0; + + if (cmark_isspace(tag_data[i]) || tag_data[i] == '>') + return 1; + + if (tag_data[i] == '/' && tag_size >= i + 2 && tag_data[i + 1] == '>') + return 1; + + return 0; +} + +static int filter(cmark_syntax_extension *ext, const unsigned char *tag, + size_t tag_len) { + const char **it; + + for (it = blacklist; *it; ++it) { + if (is_tag(tag, tag_len, *it)) { + return 0; + } + } + + return 1; +} + +cmark_syntax_extension *create_tagfilter_extension(void) { + cmark_syntax_extension *ext = cmark_syntax_extension_new("tagfilter"); + cmark_syntax_extension_set_html_filter_func(ext, filter); + return ext; +} diff --git a/cbits/tagfilter.h b/cbits/tagfilter.h new file mode 100644 index 0000000..4068b50 --- /dev/null +++ b/cbits/tagfilter.h @@ -0,0 +1,8 @@ +#ifndef TAGFILTER_H +#define TAGFILTER_H + +#include "core-extensions.h" + +cmark_syntax_extension *create_tagfilter_extension(void); + +#endif diff --git a/cbits/utf8.h b/cbits/utf8.h index 8e45714..5e64344 100644 --- a/cbits/utf8.h +++ b/cbits/utf8.h @@ -8,13 +8,24 @@ extern "C" { #endif +CMARK_EXPORT void cmark_utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str, bufsize_t len); + +CMARK_EXPORT void cmark_utf8proc_encode_char(int32_t uc, cmark_strbuf *buf); + +CMARK_EXPORT int cmark_utf8proc_iterate(const uint8_t *str, bufsize_t str_len, int32_t *dst); + +CMARK_EXPORT void cmark_utf8proc_check(cmark_strbuf *dest, const uint8_t *line, bufsize_t size); + +CMARK_EXPORT int cmark_utf8proc_is_space(int32_t uc); + +CMARK_EXPORT int cmark_utf8proc_is_punctuation(int32_t uc); #ifdef __cplusplus diff --git a/cbits/xml.c b/cbits/xml.c index 4898cd2..ea53b99 100644 --- a/cbits/xml.c +++ b/cbits/xml.c @@ -148,8 +148,12 @@ static int S_render_node(cmark_node *node, cmark_event_type ev_type, } char *cmark_render_xml(cmark_node *root, int options) { + return cmark_render_xml_with_mem(root, options, cmark_node_mem(root)); +} + +char *cmark_render_xml_with_mem(cmark_node *root, int options, cmark_mem *mem) { char *result; - cmark_strbuf xml = CMARK_BUF_INIT(cmark_node_mem(root)); + cmark_strbuf xml = CMARK_BUF_INIT(mem); cmark_event_type ev_type; cmark_node *cur; struct render_state state = {&xml, 0}; diff --git a/cmark.cabal b/cmark-gfm.cabal similarity index 61% rename from cmark.cabal rename to cmark-gfm.cabal index fc1ec83..f823e69 100644 --- a/cmark.cabal +++ b/cmark-gfm.cabal @@ -1,20 +1,20 @@ -name: cmark -version: 0.5.6 -synopsis: Fast, accurate CommonMark (Markdown) parser and renderer +name: cmark-gfm +version: 0.1.0 +synopsis: Fast, accurate GitHub Flavored Markdown parser and renderer description: This package provides Haskell bindings for - , the reference - parser for , a fully + , the reference + parser for , a fully specified variant of Markdown. It includes sources for - libcmark (0.28.0) and does not require prior installation of the + libcmark-gfm (0.28.0) and does not require prior installation of the C library. -homepage: https://github.com/jgm/cmark-hs +homepage: https://github.com/kivikakk/cmark-gfm-hs license: BSD3 license-file: LICENSE -author: John MacFarlane -maintainer: jgm@berkeley.edu -copyright: (C) 2015--17 John MacFarlane +author: Yuki Izumi +maintainer: kivikakk@github.com +copyright: (C) 2015--17 John MacFarlane, (C) 2017 Yuki Izumi category: Text tested-with: GHC == 7.4.2, GHC == 7.6.3, GHC == 7.8.2, GHC == 7.10.3 build-type: Simple @@ -40,20 +40,32 @@ extra-source-files: README.md cbits/scanners.h cbits/case_fold_switch.inc cbits/entities.inc + cbits/cmark_extension_api.h + cbits/html.h + cbits/plugin.h + cbits/registry.h + cbits/syntax_extension.h + cbits/autolink.h + cbits/core-extensions.h + cbits/ext_scanners.h + cbits/strikethrough.h + cbits/table.h + cbits/tagfilter.h + cbits/cmarkextensions_export.h bench/sample.md bench/full-sample.md cabal-version: >=1.14 Source-repository head type: git - location: git://github.com/jgm/cmark-hs.git + location: git://github.com/kivikakk/cmark-gfm-hs.git flag pkgconfig default: False - description: Use system libcmark via pkgconfig + description: Use system libcmark-gfm via pkgconfig library - exposed-modules: CMark + exposed-modules: CMarkGFM build-depends: base >=4.5 && < 5.0, text >= 1.0 && < 1.3, bytestring @@ -62,7 +74,7 @@ library default-language: Haskell2010 ghc-options: -Wall -fno-warn-unused-do-bind if flag(pkgconfig) - pkgconfig-depends: libcmark + pkgconfig-depends: libcmark-gfm else cc-options: -Wall -std=c99 Include-dirs: cbits @@ -86,12 +98,24 @@ library cbits/latex.c cbits/xml.c cbits/render.c + cbits/arena.c + cbits/linked_list.c + cbits/plaintext.c + cbits/plugin.c + cbits/registry.c + cbits/syntax_extension.c + cbits/autolink.c + cbits/core-extensions.c + cbits/ext_scanners.c + cbits/strikethrough.c + cbits/table.c + cbits/tagfilter.c -benchmark bench-cmark +benchmark bench-cmark-gfm type: exitcode-stdio-1.0 hs-source-dirs: bench main-is: bench-cmark.hs - build-depends: base, text, cmark, criterion, + build-depends: base, text, cmark-gfm, criterion, sundown >= 0.6 && < 0.7, cheapskate >= 0.1 && < 0.2, markdown >= 0.1 && < 0.2, @@ -100,10 +124,10 @@ benchmark bench-cmark ghc-options: -O2 default-language: Haskell2010 -Test-Suite test-cmark +Test-Suite test-cmark-gfm type: exitcode-stdio-1.0 main-is: test-cmark.hs hs-source-dirs: test - build-depends: base, cmark, text, HUnit >= 1.2 && < 1.7 + build-depends: base, cmark-gfm, text, HUnit >= 1.2 && < 1.7 ghc-options: -Wall -fno-warn-unused-do-bind -threaded default-language: Haskell98 diff --git a/test/test-cmark.hs b/test/test-cmark.hs index bb54bb2..a7de476 100644 --- a/test/test-cmark.hs +++ b/test/test-cmark.hs @@ -1,12 +1,13 @@ {-# LANGUAGE OverloadedStrings #-} -import CMark +import CMarkGFM import Test.HUnit import System.Exit import Data.Text () main :: IO () main = do + registerPlugins counts' <- runTestTT tests case (errors counts' + failures counts') of 0 -> exitWith ExitSuccess @@ -16,11 +17,21 @@ main = do -- Here we just make sure it's basically working. tests :: Test tests = TestList [ - "

    Hi

    \n" ~=? commonmarkToHtml [] "# Hi" - , "

    dog’s

    \n" ~=? commonmarkToHtml [optSmart] "dog's" - , "

    trick

    \n" ~=? commonmarkToHtml [optSafe] "[trick](javascript:alert('hi'))" - , ".RS\n.PP\nquote\n.RE\n" ~=? commonmarkToMan [] Nothing "> quote" - , (Node (Just (PosInfo {startLine = 1, startColumn = 1, endLine = 1, endColumn = 13})) DOCUMENT [Node (Just (PosInfo {startLine = 1, startColumn = 1, endLine = 1, endColumn = 13})) PARAGRAPH [Node Nothing (TEXT "Hello ") [],Node Nothing EMPH [Node Nothing (TEXT "world") []]]]) ~=? commonmarkToNode [] "Hello *world*" + "

    Hi

    \n" ~=? commonmarkToHtml [] [] "# Hi" + , "

    dog’s

    \n" ~=? commonmarkToHtml [optSmart] [] "dog's" + , "

    trick

    \n" ~=? commonmarkToHtml [optSafe] [] "[trick](javascript:alert('hi'))" + , ".RS\n.PP\nquote\n.RE\n" ~=? commonmarkToMan [] [] Nothing "> quote" + , (Node (Just (PosInfo {startLine = 1, startColumn = 1, endLine = 1, endColumn = 13})) DOCUMENT [Node (Just (PosInfo {startLine = 1, startColumn = 1, endLine = 1, endColumn = 13})) PARAGRAPH [Node Nothing (TEXT "Hello ") [],Node Nothing EMPH [Node Nothing (TEXT "world") []]]]) ~=? commonmarkToNode [] [] "Hello *world*" , "> Hello\n> *world*\n" ~=? nodeToCommonmark [] (Just 12) (Node Nothing DOCUMENT [Node Nothing BLOCK_QUOTE [Node Nothing PARAGRAPH [Node Nothing (TEXT "Hello ") [],Node Nothing EMPH [Node Nothing (TEXT "world") []]]]]) + , "

    ~hi~

    \n" ~=? commonmarkToHtml [] [] "~hi~" + , "

    hi

    \n" ~=? commonmarkToHtml [] [extStrikethrough] "~hi~" + , (Node (Just (PosInfo {startLine = 1, startColumn = 1, endLine = 1, endColumn = 4})) DOCUMENT [Node (Just (PosInfo {startLine = 1, startColumn = 1, endLine = 1, endColumn = 4})) PARAGRAPH [Node Nothing STRIKETHROUGH [Node Nothing (TEXT "hi") []]]]) ~=? commonmarkToNode [] [extStrikethrough] "~hi~" + , "

    www.google.com

    \n" ~=? commonmarkToHtml [] [] "www.google.com" + , "

    www.google.com

    \n" ~=? commonmarkToHtml [] [extAutolink] "www.google.com" + , "

    | a |\n| --- |\n| b |

    \n" ~=? commonmarkToHtml [] [] "| a |\n| --- |\n| b |\n" + , "\n\n\n\n\n\n\n\n\n
    a
    b
    \n" ~=? commonmarkToHtml [] [extTable] "| a |\n| --- |\n| b |\n" + , (Node (Just (PosInfo {startLine = 1, startColumn = 1, endLine = 3, endColumn = 17})) DOCUMENT [Node (Just (PosInfo {startLine = 1, startColumn = 1, endLine = 3, endColumn = 17})) (TABLE [CMarkGFM.Left,Center,None,CMarkGFM.Right]) [Node (Just (PosInfo {startLine = 2, startColumn = 0, endLine = 2, endColumn = 0})) TABLE_ROW [Node (Just (PosInfo {startLine = 2, startColumn = 0, endLine = 2, endColumn = 0})) TABLE_CELL [Node Nothing (TEXT "a") []],Node (Just (PosInfo {startLine = 2, startColumn = 0, endLine = 2, endColumn = 0})) TABLE_CELL [Node Nothing (TEXT "b") []],Node (Just (PosInfo {startLine = 2, startColumn = 0, endLine = 2, endColumn = 0})) TABLE_CELL [Node Nothing (TEXT "c") []],Node (Just (PosInfo {startLine = 2, startColumn = 0, endLine = 2, endColumn = 0})) TABLE_CELL [Node Nothing (TEXT "d") []]],Node (Just (PosInfo {startLine = 3, startColumn = 0, endLine = 3, endColumn = 17})) TABLE_ROW [Node (Just (PosInfo {startLine = 3, startColumn = 0, endLine = 3, endColumn = 0})) TABLE_CELL [Node Nothing (TEXT "y") []],Node (Just (PosInfo {startLine = 3, startColumn = 0, endLine = 3, endColumn = 0})) TABLE_CELL [Node Nothing (TEXT "o") []],Node (Just (PosInfo {startLine = 3, startColumn = 0, endLine = 3, endColumn = 0})) TABLE_CELL [Node Nothing (TEXT "s") []],Node (Just (PosInfo {startLine = 3, startColumn = 0, endLine = 3, endColumn = 0})) TABLE_CELL [Node Nothing (TEXT "h") []]]]]) ~=? commonmarkToNode [] [extTable] "| a | b | c | d |\n| :-- | :-: | --- | --: |\n| y | o | s | h |" + , "\n" ~=? commonmarkToHtml [] [] "<xmp>" + , "&lt;xmp>\n" ~=? commonmarkToHtml [] [extTagfilter] "<xmp>" ]