Skip to content

Commit

Permalink
Merge remote-tracking branch 'jgm/master'
Browse files Browse the repository at this point in the history
  • Loading branch information
kivikakk committed Apr 9, 2019
2 parents b0e8822 + bbba070 commit 3acaa52
Show file tree
Hide file tree
Showing 20 changed files with 952 additions and 1,040 deletions.
6 changes: 6 additions & 0 deletions CMarkGFM.hsc
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ module CMarkGFM (
, optSourcePos
, optHardBreaks
, optSmart
, optSafe
, optUnsafe
, extStrikethrough
, extTable
Expand Down Expand Up @@ -277,6 +278,11 @@ optHardBreaks = CMarkOption #const CMARK_OPT_HARDBREAKS
optSmart :: CMarkOption
optSmart = CMarkOption #const CMARK_OPT_SMART

-- | optSafe is defined here for API compatibility, but it no longer has any
-- effect. "Safe" mode is now the default: set optUnsafe to disable it.
optSafe :: CMarkOption
optSafe = CMarkOption #const CMARK_OPT_SAFE

-- | Allow rendering of raw HTML and potentially dangerous URLs in links
-- and images.
optUnsafe :: CMarkOption
Expand Down
118 changes: 89 additions & 29 deletions cbits/blocks.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@ static bool S_last_line_blank(const cmark_node *node) {
return (node->flags & CMARK_NODE__LAST_LINE_BLANK) != 0;
}

static bool S_last_line_checked(const cmark_node *node) {
return (node->flags & CMARK_NODE__LAST_LINE_CHECKED) != 0;
}

static CMARK_INLINE cmark_node_type S_type(const cmark_node *node) {
return (cmark_node_type)node->type;
}
Expand All @@ -47,6 +51,10 @@ static void S_set_last_line_blank(cmark_node *node, bool is_blank) {
node->flags &= ~CMARK_NODE__LAST_LINE_BLANK;
}

static void S_set_last_line_checked(cmark_node *node) {
node->flags |= CMARK_NODE__LAST_LINE_CHECKED;
}

static CMARK_INLINE bool S_is_line_end_char(char c) {
return (c == '\n' || c == '\r');
}
Expand Down Expand Up @@ -121,8 +129,6 @@ static void cmark_parser_reset(cmark_parser *parser) {
parser->root = document;
parser->current = document;

parser->last_buffer_ended_with_cr = false;

parser->syntax_extensions = saved_exts;
parser->inline_syntax_extensions = saved_inline_exts;
parser->options = saved_options;
Expand Down Expand Up @@ -234,26 +240,43 @@ static void remove_trailing_blank_lines(cmark_strbuf *ln) {

// Check to see if a node ends with a blank line, descending
// if needed into lists and sublists.
static bool ends_with_blank_line(cmark_node *node) {
cmark_node *cur = node;
while (cur != NULL) {
if (S_last_line_blank(cur)) {
return true;
}
if (S_type(cur) == CMARK_NODE_LIST || S_type(cur) == CMARK_NODE_ITEM) {
cur = cur->last_child;
} else {
cur = NULL;
}
static bool S_ends_with_blank_line(cmark_node *node) {
if (S_last_line_checked(node)) {
return(S_last_line_blank(node));
} else if ((S_type(node) == CMARK_NODE_LIST ||
S_type(node) == CMARK_NODE_ITEM) && node->last_child) {
S_set_last_line_checked(node);
return(S_ends_with_blank_line(node->last_child));
} else {
S_set_last_line_checked(node);
return (S_last_line_blank(node));
}
return false;
}

// returns true if content remains after link defs are resolved.
static bool resolve_reference_link_definitions(
cmark_parser *parser,
cmark_node *b) {
bufsize_t pos;
cmark_strbuf *node_content = &b->content;
cmark_chunk chunk = {node_content->ptr, node_content->size, 0};
while (chunk.len && chunk.data[0] == '[' &&
(pos = cmark_parse_reference_inline(parser->mem, &chunk,
parser->refmap))) {

chunk.data += pos;
chunk.len -= pos;
}
cmark_strbuf_drop(node_content, (node_content->size - chunk.len));
return !is_blank(&b->content, 0);
}

static cmark_node *finalize(cmark_parser *parser, cmark_node *b) {
bufsize_t pos;
cmark_node *item;
cmark_node *subitem;
cmark_node *parent;
bool has_content;

parent = b->parent;
assert(b->flags &
Expand Down Expand Up @@ -283,15 +306,8 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) {
switch (S_type(b)) {
case CMARK_NODE_PARAGRAPH:
{
cmark_chunk chunk = {node_content->ptr, node_content->size, 0};
while (chunk.len && chunk.data[0] == '[' &&
(pos = cmark_parse_reference_inline(parser->mem, &chunk, parser->refmap))) {

chunk.data += pos;
chunk.len -= pos;
}
cmark_strbuf_drop(node_content, (node_content->size - chunk.len));
if (is_blank(node_content, 0)) {
has_content = resolve_reference_link_definitions(parser, b);
if (!has_content) {
// remove blank node (former reference def)
cmark_node_free(b);
}
Expand Down Expand Up @@ -343,7 +359,8 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) {
// spaces between them:
subitem = item->first_child;
while (subitem) {
if (ends_with_blank_line(subitem) && (item->next || subitem->next)) {
if ((item->next || subitem->next) &&
S_ends_with_blank_line(subitem)) {
b->as.list.tight = false;
break;
}
Expand Down Expand Up @@ -748,6 +765,40 @@ static void chop_trailing_hashtags(cmark_chunk *ch) {
}
}

// Check for thematic break. On failure, return 0 and update
// thematic_break_kill_pos with the index at which the
// parse fails. On success, return length of match.
// "...three or more hyphens, asterisks,
// or underscores on a line by themselves. If you wish, you may use
// spaces between the hyphens or asterisks."
static int S_scan_thematic_break(cmark_parser *parser, cmark_chunk *input,
bufsize_t offset) {
bufsize_t i;
char c;
char nextc = '\0';
int count;
i = offset;
c = peek_at(input, i);
if (!(c == '*' || c == '_' || c == '-')) {
parser->thematic_break_kill_pos = i;
return 0;
}
count = 1;
while ((nextc = peek_at(input, ++i))) {
if (nextc == c) {
count++;
} else if (nextc != ' ' && nextc != '\t') {
break;
}
}
if (count >= 3 && (nextc == '\r' || nextc == '\n')) {
return (i - offset) + 1;
} else {
parser->thematic_break_kill_pos = i;
return 0;
}
}

// Find first nonspace character from current offset, setting
// parser->first_nonspace, parser->first_nonspace_column,
// parser->indent, and parser->blank. Does not advance parser->offset.
Expand Down Expand Up @@ -1040,6 +1091,7 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container,
bufsize_t matched = 0;
int lev = 0;
bool save_partially_consumed_tab;
bool has_content;
int save_offset;
int save_column;

Expand Down Expand Up @@ -1112,13 +1164,20 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container,
} else if (!indented && cont_type == CMARK_NODE_PARAGRAPH &&
(lev =
scan_setext_heading_line(input, parser->first_nonspace))) {
(*container)->type = (uint16_t)CMARK_NODE_HEADING;
(*container)->as.heading.level = lev;
(*container)->as.heading.setext = true;
S_advance_offset(parser, input, input->len - 1 - parser->offset, false);
// finalize paragraph, resolving reference links
has_content = resolve_reference_link_definitions(parser, *container);

if (has_content) {

(*container)->type = (uint16_t)CMARK_NODE_HEADING;
(*container)->as.heading.level = lev;
(*container)->as.heading.setext = true;
S_advance_offset(parser, input, input->len - 1 - parser->offset, false);
}
} else if (!indented &&
!(cont_type == CMARK_NODE_PARAGRAPH && !all_matched) &&
(matched = scan_thematic_break(input, parser->first_nonspace))) {
(parser->thematic_break_kill_pos <= parser->first_nonspace) &&
(matched = S_scan_thematic_break(parser, input, parser->first_nonspace))) {
// it's only now that we know the line is not part of a setext heading:
*container = add_child(parser, *container, CMARK_NODE_THEMATIC_BREAK,
parser->first_nonspace + 1);
Expand Down Expand Up @@ -1377,6 +1436,7 @@ static void S_process_line(cmark_parser *parser, const unsigned char *buffer,
parser->column = 0;
parser->first_nonspace = 0;
parser->first_nonspace_column = 0;
parser->thematic_break_kill_pos = 0;
parser->indent = 0;
parser->blank = false;
parser->partially_consumed_tab = false;
Expand Down
1 change: 0 additions & 1 deletion cbits/buffer.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
#include "config.h"
#include "cmark_ctype.h"
#include "buffer.h"
#include "memory.h"

/* Used as default value for cmark_strbuf->ptr so that people can always
* assume ptr is non-NULL and zero terminated even for new cmark_strbufs.
Expand Down
1 change: 0 additions & 1 deletion cbits/chunk.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
#include <assert.h>
#include "cmark-gfm.h"
#include "buffer.h"
#include "memory.h"
#include "cmark_ctype.h"

#define CMARK_CHUNK_EMPTY \
Expand Down
22 changes: 14 additions & 8 deletions cbits/cmark-gfm.h
Original file line number Diff line number Diff line change
Expand Up @@ -690,6 +690,20 @@ char *cmark_render_latex_with_mem(cmark_node *root, int options, int width, cmar
*/
#define CMARK_OPT_HARDBREAKS (1 << 2)

/** `CMARK_OPT_SAFE` is defined here for API compatibility,
but it no longer has any effect. "Safe" mode is now the default:
set `CMARK_OPT_UNSAFE` to disable it.
*/
#define CMARK_OPT_SAFE (1 << 3)

/** Render raw HTML and unsafe links (`javascript:`, `vbscript:`,
* `file:`, and `data:`, except for `image/png`, `image/gif`,
* `image/jpeg`, or `image/webp` mime types). By default,
* raw HTML is replaced by a placeholder HTML comment. Unsafe
* links are replaced by empty strings.
*/
#define CMARK_OPT_UNSAFE (1 << 17)

/** Render `softbreak` elements as spaces.
*/
#define CMARK_OPT_NOBREAKS (1 << 4)
Expand Down Expand Up @@ -738,14 +752,6 @@ char *cmark_render_latex_with_mem(cmark_node *root, int options, int width, cmar
*/
#define CMARK_OPT_FULL_INFO_STRING (1 << 16)

/** Allow raw HTML and unsafe links, `javascript:`, `vbscript:`, `file:`, and
* all `data:` URLs -- by default, only `image/png`, `image/gif`, `image/jpeg`,
* or `image/webp` mime types are allowed. Without this option, raw HTML is
* replaced by a placeholder HTML comment, and unsafe links are replaced by
* empty strings.
*/
#define CMARK_OPT_UNSAFE (1 << 17)

/**
* ## Version information
*/
Expand Down
4 changes: 2 additions & 2 deletions cbits/cmark-gfm_version.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#ifndef CMARK_GFM_VERSION_H
#define CMARK_GFM_VERSION_H

#define CMARK_GFM_VERSION ((0 << 24) | (28 << 16) | (3 << 8) | 20)
#define CMARK_GFM_VERSION_STRING "0.28.3.gfm.20"
#define CMARK_GFM_VERSION ((0 << 24) | (29 << 16) | (0 << 8) | 0)
#define CMARK_GFM_VERSION_STRING "0.29.0.gfm.0"

#endif
13 changes: 9 additions & 4 deletions cbits/commonmark.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@ static CMARK_INLINE void outc(cmark_renderer *renderer, cmark_node *node,
needs_escaping =
c < 0x80 && escape != LITERAL &&
((escape == NORMAL &&
(c == '*' || c == '_' || c == '[' || c == ']' || c == '#' || c == '<' ||
(c < 0x20 ||
c == '*' || c == '_' || c == '[' || c == ']' || c == '#' || c == '<' ||
c == '>' || c == '\\' || c == '`' || c == '~' || c == '!' ||
(c == '&' && cmark_isalpha(nextc)) || (c == '!' && nextc == '[') ||
(renderer->begin_content && (c == '-' || c == '+' || c == '=') &&
Expand All @@ -50,14 +51,18 @@ static CMARK_INLINE void outc(cmark_renderer *renderer, cmark_node *node,
(c == '`' || c == '<' || c == '>' || c == '"' || c == '\\')));

if (needs_escaping) {
if (cmark_isspace((char)c)) {
if (escape == URL && cmark_isspace((char)c)) {
// use percent encoding for spaces
snprintf(encoded, ENCODED_SIZE, "%%%2x", c);
snprintf(encoded, ENCODED_SIZE, "%%%2X", c);
cmark_strbuf_puts(renderer->buffer, encoded);
renderer->column += 3;
} else {
} else if (cmark_ispunct((char)c)) {
cmark_render_ascii(renderer, "\\");
cmark_render_code_point(renderer, c);
} else { // render as entity
snprintf(encoded, ENCODED_SIZE, "&#%d;", c);
cmark_strbuf_puts(renderer->buffer, encoded);
renderer->column += (int)strlen(encoded);
}
} else {
cmark_render_code_point(renderer, c);
Expand Down
8 changes: 4 additions & 4 deletions cbits/html.c
Original file line number Diff line number Diff line change
Expand Up @@ -354,8 +354,8 @@ static int S_render_node(cmark_html_renderer *renderer, cmark_node *node,
case CMARK_NODE_LINK:
if (entering) {
cmark_strbuf_puts(html, "<a href=\"");
if (!(!(options & CMARK_OPT_UNSAFE) &&
scan_dangerous_url(&node->as.link.url, 0))) {
if ((options & CMARK_OPT_UNSAFE) ||
!(scan_dangerous_url(&node->as.link.url, 0))) {
houdini_escape_href(html, node->as.link.url.data,
node->as.link.url.len);
}
Expand All @@ -372,8 +372,8 @@ static int S_render_node(cmark_html_renderer *renderer, cmark_node *node,
case CMARK_NODE_IMAGE:
if (entering) {
cmark_strbuf_puts(html, "<img src=\"");
if (!(!(options & CMARK_OPT_UNSAFE) &&
scan_dangerous_url(&node->as.link.url, 0))) {
if ((options & CMARK_OPT_UNSAFE) ||
!(scan_dangerous_url(&node->as.link.url, 0))) {
houdini_escape_href(html, node->as.link.url.data,
node->as.link.url.len);
}
Expand Down
Loading

0 comments on commit 3acaa52

Please sign in to comment.