482 lines
13 KiB
C
482 lines
13 KiB
C
#include <stdlib.h>
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
#include <stdint.h>
|
|
#include <assert.h>
|
|
|
|
#include "config.h"
|
|
#include "cmark.h"
|
|
#include "node.h"
|
|
#include "buffer.h"
|
|
#include "utf8.h"
|
|
#include "scanners.h"
|
|
#include "render.h"
|
|
|
|
#define OUT(s, wrap, escaping) renderer->out(renderer, s, wrap, escaping)
|
|
#define LIT(s) renderer->out(renderer, s, false, LITERAL)
|
|
#define CR() renderer->cr(renderer)
|
|
#define BLANKLINE() renderer->blankline(renderer)
|
|
#define ENCODED_SIZE 20
|
|
#define LISTMARKER_SIZE 20
|
|
|
|
// Functions to convert cmark_nodes to commonmark strings.
|
|
|
|
static CMARK_INLINE void outc(cmark_renderer *renderer, cmark_escaping escape,
|
|
int32_t c, unsigned char nextc) {
|
|
bool needs_escaping = false;
|
|
bool follows_digit =
|
|
renderer->buffer->size > 0 &&
|
|
cmark_isdigit(renderer->buffer->ptr[renderer->buffer->size - 1]);
|
|
char encoded[ENCODED_SIZE];
|
|
|
|
needs_escaping =
|
|
c < 0x80 && escape != LITERAL &&
|
|
((escape == NORMAL &&
|
|
(c == '*' || c == '_' || c == '[' || c == ']' || c == '#' || c == '<' ||
|
|
c == '>' || c == '\\' || c == '`' || c == '!' ||
|
|
(c == '&' && cmark_isalpha(nextc)) || (c == '!' && nextc == '[') ||
|
|
(renderer->begin_content && (c == '-' || c == '+' || c == '=') &&
|
|
// begin_content doesn't get set to false til we've passed digits
|
|
// at the beginning of line, so...
|
|
!follows_digit) ||
|
|
(renderer->begin_content && (c == '.' || c == ')') && follows_digit &&
|
|
(nextc == 0 || cmark_isspace(nextc))))) ||
|
|
(escape == URL &&
|
|
(c == '`' || c == '<' || c == '>' || cmark_isspace(c) || c == '\\' ||
|
|
c == ')' || c == '(')) ||
|
|
(escape == TITLE &&
|
|
(c == '`' || c == '<' || c == '>' || c == '"' || c == '\\')));
|
|
|
|
if (needs_escaping) {
|
|
if (cmark_isspace(c)) {
|
|
// use percent encoding for spaces
|
|
snprintf(encoded, ENCODED_SIZE, "%%%2x", c);
|
|
cmark_strbuf_puts(renderer->buffer, encoded);
|
|
renderer->column += 3;
|
|
} else {
|
|
cmark_render_ascii(renderer, "\\");
|
|
cmark_render_code_point(renderer, c);
|
|
}
|
|
} else {
|
|
cmark_render_code_point(renderer, c);
|
|
}
|
|
}
|
|
|
|
static int longest_backtick_sequence(const char *code) {
|
|
int longest = 0;
|
|
int current = 0;
|
|
size_t i = 0;
|
|
size_t code_len = strlen(code);
|
|
while (i <= code_len) {
|
|
if (code[i] == '`') {
|
|
current++;
|
|
} else {
|
|
if (current > longest) {
|
|
longest = current;
|
|
}
|
|
current = 0;
|
|
}
|
|
i++;
|
|
}
|
|
return longest;
|
|
}
|
|
|
|
static int shortest_unused_backtick_sequence(const char *code) {
|
|
// note: if the shortest sequence is >= 32, this returns 32
|
|
// so as not to overflow the bit array.
|
|
uint32_t used = 1;
|
|
int current = 0;
|
|
size_t i = 0;
|
|
size_t code_len = strlen(code);
|
|
while (i <= code_len) {
|
|
if (code[i] == '`') {
|
|
current++;
|
|
} else {
|
|
if (current > 0 && current < 32) {
|
|
used |= (1U << current);
|
|
}
|
|
current = 0;
|
|
}
|
|
i++;
|
|
}
|
|
// return number of first bit that is 0:
|
|
i = 0;
|
|
while (i < 32 && used & 1) {
|
|
used = used >> 1;
|
|
i++;
|
|
}
|
|
return (int)i;
|
|
}
|
|
|
|
static bool is_autolink(cmark_node *node) {
|
|
cmark_chunk *title;
|
|
cmark_chunk *url;
|
|
cmark_node *link_text;
|
|
char *realurl;
|
|
int realurllen;
|
|
|
|
if (node->type != CMARK_NODE_LINK) {
|
|
return false;
|
|
}
|
|
|
|
url = &node->as.link.url;
|
|
if (url->len == 0 || scan_scheme(url, 0) == 0) {
|
|
return false;
|
|
}
|
|
|
|
title = &node->as.link.title;
|
|
// if it has a title, we can't treat it as an autolink:
|
|
if (title->len > 0) {
|
|
return false;
|
|
}
|
|
|
|
link_text = node->first_child;
|
|
if (link_text == NULL) {
|
|
return false;
|
|
}
|
|
cmark_consolidate_text_nodes(link_text);
|
|
realurl = (char *)url->data;
|
|
realurllen = url->len;
|
|
if (strncmp(realurl, "mailto:", 7) == 0) {
|
|
realurl += 7;
|
|
realurllen -= 7;
|
|
}
|
|
return (realurllen == link_text->as.literal.len &&
|
|
strncmp(realurl, (char *)link_text->as.literal.data,
|
|
link_text->as.literal.len) == 0);
|
|
}
|
|
|
|
// if node is a block node, returns node.
|
|
// otherwise returns first block-level node that is an ancestor of node.
|
|
// if there is no block-level ancestor, returns NULL.
|
|
static cmark_node *get_containing_block(cmark_node *node) {
|
|
while (node) {
|
|
if (node->type >= CMARK_NODE_FIRST_BLOCK &&
|
|
node->type <= CMARK_NODE_LAST_BLOCK) {
|
|
return node;
|
|
} else {
|
|
node = node->parent;
|
|
}
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
static int S_render_node(cmark_renderer *renderer, cmark_node *node,
|
|
cmark_event_type ev_type, int options) {
|
|
cmark_node *tmp;
|
|
int list_number;
|
|
cmark_delim_type list_delim;
|
|
int numticks;
|
|
bool extra_spaces;
|
|
int i;
|
|
bool entering = (ev_type == CMARK_EVENT_ENTER);
|
|
const char *info, *code, *title;
|
|
char fencechar[2] = {'\0', '\0'};
|
|
size_t info_len, code_len;
|
|
char listmarker[LISTMARKER_SIZE];
|
|
char *emph_delim;
|
|
bool first_in_list_item;
|
|
bufsize_t marker_width;
|
|
bool allow_wrap = renderer->width > 0 && !(CMARK_OPT_NOBREAKS & options) &&
|
|
!(CMARK_OPT_HARDBREAKS & options);
|
|
|
|
// Don't adjust tight list status til we've started the list.
|
|
// Otherwise we loose the blank line between a paragraph and
|
|
// a following list.
|
|
if (!(node->type == CMARK_NODE_ITEM && node->prev == NULL && entering)) {
|
|
tmp = get_containing_block(node);
|
|
renderer->in_tight_list_item =
|
|
tmp && // tmp might be NULL if there is no containing block
|
|
((tmp->type == CMARK_NODE_ITEM &&
|
|
cmark_node_get_list_tight(tmp->parent)) ||
|
|
(tmp && tmp->parent && tmp->parent->type == CMARK_NODE_ITEM &&
|
|
cmark_node_get_list_tight(tmp->parent->parent)));
|
|
}
|
|
|
|
switch (node->type) {
|
|
case CMARK_NODE_DOCUMENT:
|
|
break;
|
|
|
|
case CMARK_NODE_BLOCK_QUOTE:
|
|
if (entering) {
|
|
LIT("> ");
|
|
renderer->begin_content = true;
|
|
cmark_strbuf_puts(renderer->prefix, "> ");
|
|
} else {
|
|
cmark_strbuf_truncate(renderer->prefix, renderer->prefix->size - 2);
|
|
BLANKLINE();
|
|
}
|
|
break;
|
|
|
|
case CMARK_NODE_LIST:
|
|
if (!entering && node->next && (node->next->type == CMARK_NODE_CODE_BLOCK ||
|
|
node->next->type == CMARK_NODE_LIST)) {
|
|
// this ensures that a following indented code block or list will be
|
|
// inteprereted correctly.
|
|
CR();
|
|
LIT("<!-- end list -->");
|
|
BLANKLINE();
|
|
}
|
|
break;
|
|
|
|
case CMARK_NODE_ITEM:
|
|
if (cmark_node_get_list_type(node->parent) == CMARK_BULLET_LIST) {
|
|
marker_width = 4;
|
|
} else {
|
|
list_number = cmark_node_get_list_start(node->parent);
|
|
list_delim = cmark_node_get_list_delim(node->parent);
|
|
tmp = node;
|
|
while (tmp->prev) {
|
|
tmp = tmp->prev;
|
|
list_number += 1;
|
|
}
|
|
// we ensure a width of at least 4 so
|
|
// we get nice transition from single digits
|
|
// to double
|
|
snprintf(listmarker, LISTMARKER_SIZE, "%d%s%s", list_number,
|
|
list_delim == CMARK_PAREN_DELIM ? ")" : ".",
|
|
list_number < 10 ? " " : " ");
|
|
marker_width = strlen(listmarker);
|
|
}
|
|
if (entering) {
|
|
if (cmark_node_get_list_type(node->parent) == CMARK_BULLET_LIST) {
|
|
LIT(" - ");
|
|
renderer->begin_content = true;
|
|
} else {
|
|
LIT(listmarker);
|
|
renderer->begin_content = true;
|
|
}
|
|
for (i = marker_width; i--;) {
|
|
cmark_strbuf_putc(renderer->prefix, ' ');
|
|
}
|
|
} else {
|
|
cmark_strbuf_truncate(renderer->prefix,
|
|
renderer->prefix->size - marker_width);
|
|
CR();
|
|
}
|
|
break;
|
|
|
|
case CMARK_NODE_HEADING:
|
|
if (entering) {
|
|
for (i = cmark_node_get_heading_level(node); i > 0; i--) {
|
|
LIT("#");
|
|
}
|
|
LIT(" ");
|
|
renderer->begin_content = true;
|
|
renderer->no_linebreaks = true;
|
|
} else {
|
|
renderer->no_linebreaks = false;
|
|
BLANKLINE();
|
|
}
|
|
break;
|
|
|
|
case CMARK_NODE_CODE_BLOCK:
|
|
first_in_list_item = node->prev == NULL && node->parent &&
|
|
node->parent->type == CMARK_NODE_ITEM;
|
|
|
|
if (!first_in_list_item) {
|
|
BLANKLINE();
|
|
}
|
|
info = cmark_node_get_fence_info(node);
|
|
info_len = strlen(info);
|
|
fencechar[0] = strchr(info, '`') == NULL ? '`' : '~';
|
|
code = cmark_node_get_literal(node);
|
|
code_len = strlen(code);
|
|
// use indented form if no info, and code doesn't
|
|
// begin or end with a blank line, and code isn't
|
|
// first thing in a list item
|
|
if (info_len == 0 && (code_len > 2 && !cmark_isspace(code[0]) &&
|
|
!(cmark_isspace(code[code_len - 1]) &&
|
|
cmark_isspace(code[code_len - 2]))) &&
|
|
!first_in_list_item) {
|
|
LIT(" ");
|
|
cmark_strbuf_puts(renderer->prefix, " ");
|
|
OUT(cmark_node_get_literal(node), false, LITERAL);
|
|
cmark_strbuf_truncate(renderer->prefix, renderer->prefix->size - 4);
|
|
} else {
|
|
numticks = longest_backtick_sequence(code) + 1;
|
|
if (numticks < 3) {
|
|
numticks = 3;
|
|
}
|
|
for (i = 0; i < numticks; i++) {
|
|
LIT(fencechar);
|
|
}
|
|
LIT(" ");
|
|
OUT(info, false, LITERAL);
|
|
CR();
|
|
OUT(cmark_node_get_literal(node), false, LITERAL);
|
|
CR();
|
|
for (i = 0; i < numticks; i++) {
|
|
LIT(fencechar);
|
|
}
|
|
}
|
|
BLANKLINE();
|
|
break;
|
|
|
|
case CMARK_NODE_HTML_BLOCK:
|
|
BLANKLINE();
|
|
OUT(cmark_node_get_literal(node), false, LITERAL);
|
|
BLANKLINE();
|
|
break;
|
|
|
|
case CMARK_NODE_CUSTOM_BLOCK:
|
|
BLANKLINE();
|
|
OUT(entering ? cmark_node_get_on_enter(node) : cmark_node_get_on_exit(node),
|
|
false, LITERAL);
|
|
BLANKLINE();
|
|
break;
|
|
|
|
case CMARK_NODE_THEMATIC_BREAK:
|
|
BLANKLINE();
|
|
LIT("-----");
|
|
BLANKLINE();
|
|
break;
|
|
|
|
case CMARK_NODE_PARAGRAPH:
|
|
if (!entering) {
|
|
BLANKLINE();
|
|
}
|
|
break;
|
|
|
|
case CMARK_NODE_TEXT:
|
|
OUT(cmark_node_get_literal(node), allow_wrap, NORMAL);
|
|
break;
|
|
|
|
case CMARK_NODE_LINEBREAK:
|
|
if (!(CMARK_OPT_HARDBREAKS & options)) {
|
|
LIT(" ");
|
|
}
|
|
CR();
|
|
break;
|
|
|
|
case CMARK_NODE_SOFTBREAK:
|
|
if (CMARK_OPT_HARDBREAKS & options) {
|
|
LIT(" ");
|
|
CR();
|
|
} else if (!renderer->no_linebreaks && renderer->width == 0 &&
|
|
!(CMARK_OPT_HARDBREAKS & options) &&
|
|
!(CMARK_OPT_NOBREAKS & options)) {
|
|
CR();
|
|
} else {
|
|
OUT(" ", allow_wrap, LITERAL);
|
|
}
|
|
break;
|
|
|
|
case CMARK_NODE_CODE:
|
|
code = cmark_node_get_literal(node);
|
|
code_len = strlen(code);
|
|
numticks = shortest_unused_backtick_sequence(code);
|
|
extra_spaces = code_len == 0 ||
|
|
code[0] == '`' || code[code_len - 1] == '`' ||
|
|
code[0] == ' ' || code[code_len - 1] == ' ';
|
|
for (i = 0; i < numticks; i++) {
|
|
LIT("`");
|
|
}
|
|
if (extra_spaces) {
|
|
LIT(" ");
|
|
}
|
|
OUT(cmark_node_get_literal(node), allow_wrap, LITERAL);
|
|
if (extra_spaces) {
|
|
LIT(" ");
|
|
}
|
|
for (i = 0; i < numticks; i++) {
|
|
LIT("`");
|
|
}
|
|
break;
|
|
|
|
case CMARK_NODE_HTML_INLINE:
|
|
OUT(cmark_node_get_literal(node), false, LITERAL);
|
|
break;
|
|
|
|
case CMARK_NODE_CUSTOM_INLINE:
|
|
OUT(entering ? cmark_node_get_on_enter(node) : cmark_node_get_on_exit(node),
|
|
false, LITERAL);
|
|
break;
|
|
|
|
case CMARK_NODE_STRONG:
|
|
if (entering) {
|
|
LIT("**");
|
|
} else {
|
|
LIT("**");
|
|
}
|
|
break;
|
|
|
|
case CMARK_NODE_EMPH:
|
|
// If we have EMPH(EMPH(x)), we need to use *_x_*
|
|
// because **x** is STRONG(x):
|
|
if (node->parent && node->parent->type == CMARK_NODE_EMPH &&
|
|
node->next == NULL && node->prev == NULL) {
|
|
emph_delim = "_";
|
|
} else {
|
|
emph_delim = "*";
|
|
}
|
|
if (entering) {
|
|
LIT(emph_delim);
|
|
} else {
|
|
LIT(emph_delim);
|
|
}
|
|
break;
|
|
|
|
case CMARK_NODE_LINK:
|
|
if (is_autolink(node)) {
|
|
if (entering) {
|
|
LIT("<");
|
|
if (strncmp(cmark_node_get_url(node), "mailto:", 7) == 0) {
|
|
LIT((const char *)cmark_node_get_url(node) + 7);
|
|
} else {
|
|
LIT((const char *)cmark_node_get_url(node));
|
|
}
|
|
LIT(">");
|
|
// return signal to skip contents of node...
|
|
return 0;
|
|
}
|
|
} else {
|
|
if (entering) {
|
|
LIT("[");
|
|
} else {
|
|
LIT("](");
|
|
OUT(cmark_node_get_url(node), false, URL);
|
|
title = cmark_node_get_title(node);
|
|
if (strlen(title) > 0) {
|
|
LIT(" \"");
|
|
OUT(title, false, TITLE);
|
|
LIT("\"");
|
|
}
|
|
LIT(")");
|
|
}
|
|
}
|
|
break;
|
|
|
|
case CMARK_NODE_IMAGE:
|
|
if (entering) {
|
|
LIT("![");
|
|
} else {
|
|
LIT("](");
|
|
OUT(cmark_node_get_url(node), false, URL);
|
|
title = cmark_node_get_title(node);
|
|
if (strlen(title) > 0) {
|
|
OUT(" \"", allow_wrap, LITERAL);
|
|
OUT(title, false, TITLE);
|
|
LIT("\"");
|
|
}
|
|
LIT(")");
|
|
}
|
|
break;
|
|
|
|
default:
|
|
assert(false);
|
|
break;
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
char *cmark_render_commonmark(cmark_node *root, int options, int width) {
|
|
if (options & CMARK_OPT_HARDBREAKS) {
|
|
// disable breaking on width, since it has
|
|
// a different meaning with OPT_HARDBREAKS
|
|
width = 0;
|
|
}
|
|
return cmark_render(root, options, width, outc, S_render_node);
|
|
}
|