1375 lines
41 KiB
C
1375 lines
41 KiB
C
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <stdio.h>
|
|
|
|
#include "cmark_ctype.h"
|
|
#include "config.h"
|
|
#include "node.h"
|
|
#include "parser.h"
|
|
#include "references.h"
|
|
#include "cmark.h"
|
|
#include "houdini.h"
|
|
#include "utf8.h"
|
|
#include "scanners.h"
|
|
#include "inlines.h"
|
|
|
|
static const char *EMDASH = "\xE2\x80\x94";
|
|
static const char *ENDASH = "\xE2\x80\x93";
|
|
static const char *ELLIPSES = "\xE2\x80\xA6";
|
|
static const char *LEFTDOUBLEQUOTE = "\xE2\x80\x9C";
|
|
static const char *RIGHTDOUBLEQUOTE = "\xE2\x80\x9D";
|
|
static const char *LEFTSINGLEQUOTE = "\xE2\x80\x98";
|
|
static const char *RIGHTSINGLEQUOTE = "\xE2\x80\x99";
|
|
|
|
// Macros for creating various kinds of simple.
|
|
#define make_str(subj, sc, ec, s) make_literal(subj, CMARK_NODE_TEXT, sc, ec, s)
|
|
#define make_code(subj, sc, ec, s) make_literal(subj, CMARK_NODE_CODE, sc, ec, s)
|
|
#define make_raw_html(subj, sc, ec, s) make_literal(subj, CMARK_NODE_HTML_INLINE, sc, ec, s)
|
|
#define make_linebreak(mem) make_simple(mem, CMARK_NODE_LINEBREAK)
|
|
#define make_softbreak(mem) make_simple(mem, CMARK_NODE_SOFTBREAK)
|
|
#define make_emph(mem) make_simple(mem, CMARK_NODE_EMPH)
|
|
#define make_strong(mem) make_simple(mem, CMARK_NODE_STRONG)
|
|
|
|
#define MAXBACKTICKS 1000
|
|
|
|
typedef struct delimiter {
|
|
struct delimiter *previous;
|
|
struct delimiter *next;
|
|
cmark_node *inl_text;
|
|
bufsize_t length;
|
|
unsigned char delim_char;
|
|
bool can_open;
|
|
bool can_close;
|
|
} delimiter;
|
|
|
|
typedef struct bracket {
|
|
struct bracket *previous;
|
|
struct delimiter *previous_delimiter;
|
|
cmark_node *inl_text;
|
|
bufsize_t position;
|
|
bool image;
|
|
bool active;
|
|
bool bracket_after;
|
|
} bracket;
|
|
|
|
typedef struct {
|
|
cmark_mem *mem;
|
|
cmark_chunk input;
|
|
int line;
|
|
bufsize_t pos;
|
|
int block_offset;
|
|
int column_offset;
|
|
cmark_reference_map *refmap;
|
|
delimiter *last_delim;
|
|
bracket *last_bracket;
|
|
bufsize_t backticks[MAXBACKTICKS + 1];
|
|
bool scanned_for_backticks;
|
|
} subject;
|
|
|
|
static CMARK_INLINE bool S_is_line_end_char(char c) {
|
|
return (c == '\n' || c == '\r');
|
|
}
|
|
|
|
static delimiter *S_insert_emph(subject *subj, delimiter *opener,
|
|
delimiter *closer);
|
|
|
|
static int parse_inline(subject *subj, cmark_node *parent, int options);
|
|
|
|
static void subject_from_buf(cmark_mem *mem, int line_number, int block_offset, subject *e,
|
|
cmark_chunk *chunk, cmark_reference_map *refmap);
|
|
static bufsize_t subject_find_special_char(subject *subj, int options);
|
|
|
|
// Create an inline with a literal string value.
|
|
static CMARK_INLINE cmark_node *make_literal(subject *subj, cmark_node_type t,
|
|
int start_column, int end_column,
|
|
cmark_chunk s) {
|
|
cmark_node *e = (cmark_node *)subj->mem->calloc(1, sizeof(*e));
|
|
cmark_strbuf_init(subj->mem, &e->content, 0);
|
|
e->type = (uint16_t)t;
|
|
e->as.literal = s;
|
|
e->start_line = e->end_line = subj->line;
|
|
// columns are 1 based.
|
|
e->start_column = start_column + 1 + subj->column_offset + subj->block_offset;
|
|
e->end_column = end_column + 1 + subj->column_offset + subj->block_offset;
|
|
return e;
|
|
}
|
|
|
|
// Create an inline with no value.
|
|
static CMARK_INLINE cmark_node *make_simple(cmark_mem *mem, cmark_node_type t) {
|
|
cmark_node *e = (cmark_node *)mem->calloc(1, sizeof(*e));
|
|
cmark_strbuf_init(mem, &e->content, 0);
|
|
e->type = t;
|
|
return e;
|
|
}
|
|
|
|
// Like make_str, but parses entities.
|
|
static cmark_node *make_str_with_entities(subject *subj,
|
|
int start_column, int end_column,
|
|
cmark_chunk *content) {
|
|
cmark_strbuf unescaped = CMARK_BUF_INIT(subj->mem);
|
|
|
|
if (houdini_unescape_html(&unescaped, content->data, content->len)) {
|
|
return make_str(subj, start_column, end_column, cmark_chunk_buf_detach(&unescaped));
|
|
} else {
|
|
return make_str(subj, start_column, end_column, *content);
|
|
}
|
|
}
|
|
|
|
// Duplicate a chunk by creating a copy of the buffer not by reusing the
|
|
// buffer like cmark_chunk_dup does.
|
|
static cmark_chunk chunk_clone(cmark_mem *mem, cmark_chunk *src) {
|
|
cmark_chunk c;
|
|
bufsize_t len = src->len;
|
|
|
|
c.len = len;
|
|
c.data = (unsigned char *)mem->calloc(len + 1, 1);
|
|
c.alloc = 1;
|
|
if (len)
|
|
memcpy(c.data, src->data, len);
|
|
c.data[len] = '\0';
|
|
|
|
return c;
|
|
}
|
|
|
|
static cmark_chunk cmark_clean_autolink(cmark_mem *mem, cmark_chunk *url,
|
|
int is_email) {
|
|
cmark_strbuf buf = CMARK_BUF_INIT(mem);
|
|
|
|
cmark_chunk_trim(url);
|
|
|
|
if (url->len == 0) {
|
|
cmark_chunk result = CMARK_CHUNK_EMPTY;
|
|
return result;
|
|
}
|
|
|
|
if (is_email)
|
|
cmark_strbuf_puts(&buf, "mailto:");
|
|
|
|
houdini_unescape_html_f(&buf, url->data, url->len);
|
|
return cmark_chunk_buf_detach(&buf);
|
|
}
|
|
|
|
static CMARK_INLINE cmark_node *make_autolink(subject *subj,
|
|
int start_column, int end_column,
|
|
cmark_chunk url, int is_email) {
|
|
cmark_node *link = make_simple(subj->mem, CMARK_NODE_LINK);
|
|
link->as.link.url = cmark_clean_autolink(subj->mem, &url, is_email);
|
|
link->as.link.title = cmark_chunk_literal("");
|
|
link->start_line = link->end_line = subj->line;
|
|
link->start_column = start_column + 1;
|
|
link->end_column = end_column + 1;
|
|
cmark_node_append_child(link, make_str_with_entities(subj, start_column + 1, end_column - 1, &url));
|
|
return link;
|
|
}
|
|
|
|
static void subject_from_buf(cmark_mem *mem, int line_number, int block_offset, subject *e,
|
|
cmark_chunk *chunk, cmark_reference_map *refmap) {
|
|
int i;
|
|
e->mem = mem;
|
|
e->input = *chunk;
|
|
e->line = line_number;
|
|
e->pos = 0;
|
|
e->block_offset = block_offset;
|
|
e->column_offset = 0;
|
|
e->refmap = refmap;
|
|
e->last_delim = NULL;
|
|
e->last_bracket = NULL;
|
|
for (i = 0; i <= MAXBACKTICKS; i++) {
|
|
e->backticks[i] = 0;
|
|
}
|
|
e->scanned_for_backticks = false;
|
|
}
|
|
|
|
static CMARK_INLINE int isbacktick(int c) { return (c == '`'); }
|
|
|
|
static CMARK_INLINE unsigned char peek_char(subject *subj) {
|
|
// NULL bytes should have been stripped out by now. If they're
|
|
// present, it's a programming error:
|
|
assert(!(subj->pos < subj->input.len && subj->input.data[subj->pos] == 0));
|
|
return (subj->pos < subj->input.len) ? subj->input.data[subj->pos] : 0;
|
|
}
|
|
|
|
static CMARK_INLINE unsigned char peek_at(subject *subj, bufsize_t pos) {
|
|
return subj->input.data[pos];
|
|
}
|
|
|
|
// Return true if there are more characters in the subject.
|
|
static CMARK_INLINE int is_eof(subject *subj) {
|
|
return (subj->pos >= subj->input.len);
|
|
}
|
|
|
|
// Advance the subject. Doesn't check for eof.
|
|
#define advance(subj) (subj)->pos += 1
|
|
|
|
static CMARK_INLINE bool skip_spaces(subject *subj) {
|
|
bool skipped = false;
|
|
while (peek_char(subj) == ' ' || peek_char(subj) == '\t') {
|
|
advance(subj);
|
|
skipped = true;
|
|
}
|
|
return skipped;
|
|
}
|
|
|
|
static CMARK_INLINE bool skip_line_end(subject *subj) {
|
|
bool seen_line_end_char = false;
|
|
if (peek_char(subj) == '\r') {
|
|
advance(subj);
|
|
seen_line_end_char = true;
|
|
}
|
|
if (peek_char(subj) == '\n') {
|
|
advance(subj);
|
|
seen_line_end_char = true;
|
|
}
|
|
return seen_line_end_char || is_eof(subj);
|
|
}
|
|
|
|
// Take characters while a predicate holds, and return a string.
|
|
static CMARK_INLINE cmark_chunk take_while(subject *subj, int (*f)(int)) {
|
|
unsigned char c;
|
|
bufsize_t startpos = subj->pos;
|
|
bufsize_t len = 0;
|
|
|
|
while ((c = peek_char(subj)) && (*f)(c)) {
|
|
advance(subj);
|
|
len++;
|
|
}
|
|
|
|
return cmark_chunk_dup(&subj->input, startpos, len);
|
|
}
|
|
|
|
// Return the number of newlines in a given span of text in a subject. If
|
|
// the number is greater than zero, also return the number of characters
|
|
// between the last newline and the end of the span in `since_newline`.
|
|
static int count_newlines(subject *subj, bufsize_t from, bufsize_t len, int *since_newline) {
|
|
int nls = 0;
|
|
int since_nl = 0;
|
|
|
|
while (len--) {
|
|
if (subj->input.data[from++] == '\n') {
|
|
++nls;
|
|
since_nl = 0;
|
|
} else {
|
|
++since_nl;
|
|
}
|
|
}
|
|
|
|
if (!nls)
|
|
return 0;
|
|
|
|
*since_newline = since_nl;
|
|
return nls;
|
|
}
|
|
|
|
// Adjust `node`'s `end_line`, `end_column`, and `subj`'s `line` and
|
|
// `column_offset` according to the number of newlines in a just-matched span
|
|
// of text in `subj`.
|
|
static void adjust_subj_node_newlines(subject *subj, cmark_node *node, int matchlen, int extra, int options) {
|
|
if (!(options & CMARK_OPT_SOURCEPOS)) {
|
|
return;
|
|
}
|
|
|
|
int since_newline;
|
|
int newlines = count_newlines(subj, subj->pos - matchlen - extra, matchlen, &since_newline);
|
|
if (newlines) {
|
|
subj->line += newlines;
|
|
node->end_line += newlines;
|
|
node->end_column = since_newline;
|
|
subj->column_offset = -subj->pos + since_newline + extra;
|
|
}
|
|
}
|
|
|
|
// Try to process a backtick code span that began with a
|
|
// span of ticks of length openticklength length (already
|
|
// parsed). Return 0 if you don't find matching closing
|
|
// backticks, otherwise return the position in the subject
|
|
// after the closing backticks.
|
|
static bufsize_t scan_to_closing_backticks(subject *subj,
|
|
bufsize_t openticklength) {
|
|
|
|
bool found = false;
|
|
if (openticklength > MAXBACKTICKS) {
|
|
// we limit backtick string length because of the array subj->backticks:
|
|
return 0;
|
|
}
|
|
if (subj->scanned_for_backticks &&
|
|
subj->backticks[openticklength] <= subj->pos) {
|
|
// return if we already know there's no closer
|
|
return 0;
|
|
}
|
|
while (!found) {
|
|
// read non backticks
|
|
unsigned char c;
|
|
while ((c = peek_char(subj)) && c != '`') {
|
|
advance(subj);
|
|
}
|
|
if (is_eof(subj)) {
|
|
break;
|
|
}
|
|
bufsize_t numticks = 0;
|
|
while (peek_char(subj) == '`') {
|
|
advance(subj);
|
|
numticks++;
|
|
}
|
|
// store position of ender
|
|
if (numticks <= MAXBACKTICKS) {
|
|
subj->backticks[numticks] = subj->pos - numticks;
|
|
}
|
|
if (numticks == openticklength) {
|
|
return (subj->pos);
|
|
}
|
|
}
|
|
// got through whole input without finding closer
|
|
subj->scanned_for_backticks = true;
|
|
return 0;
|
|
}
|
|
|
|
// Destructively modify string, converting newlines to
|
|
// spaces, then removing a single leading + trailing space.
|
|
static void S_normalize_code(cmark_strbuf *s) {
|
|
bufsize_t r, w;
|
|
|
|
for (r = 0, w = 0; r < s->size; ++r) {
|
|
switch (s->ptr[r]) {
|
|
case '\r':
|
|
if (s->ptr[r + 1] != '\n') {
|
|
s->ptr[w++] = ' ';
|
|
}
|
|
break;
|
|
case '\n':
|
|
s->ptr[w++] = ' ';
|
|
break;
|
|
default:
|
|
s->ptr[w++] = s->ptr[r];
|
|
}
|
|
}
|
|
|
|
// begins and ends with space?
|
|
if (s->ptr[0] == ' ' && s->ptr[w - 1] == ' ') {
|
|
cmark_strbuf_drop(s, 1);
|
|
cmark_strbuf_truncate(s, w - 2);
|
|
} else {
|
|
cmark_strbuf_truncate(s, w);
|
|
}
|
|
|
|
}
|
|
|
|
|
|
// Parse backtick code section or raw backticks, return an inline.
|
|
// Assumes that the subject has a backtick at the current position.
|
|
static cmark_node *handle_backticks(subject *subj, int options) {
|
|
cmark_chunk openticks = take_while(subj, isbacktick);
|
|
bufsize_t startpos = subj->pos;
|
|
bufsize_t endpos = scan_to_closing_backticks(subj, openticks.len);
|
|
|
|
if (endpos == 0) { // not found
|
|
subj->pos = startpos; // rewind
|
|
return make_str(subj, subj->pos, subj->pos, openticks);
|
|
} else {
|
|
cmark_strbuf buf = CMARK_BUF_INIT(subj->mem);
|
|
|
|
cmark_strbuf_set(&buf, subj->input.data + startpos,
|
|
endpos - startpos - openticks.len);
|
|
S_normalize_code(&buf);
|
|
|
|
cmark_node *node = make_code(subj, startpos, endpos - openticks.len - 1, cmark_chunk_buf_detach(&buf));
|
|
adjust_subj_node_newlines(subj, node, endpos - startpos, openticks.len, options);
|
|
return node;
|
|
}
|
|
}
|
|
|
|
|
|
// Scan ***, **, or * and return number scanned, or 0.
|
|
// Advances position.
|
|
static int scan_delims(subject *subj, unsigned char c, bool *can_open,
|
|
bool *can_close) {
|
|
int numdelims = 0;
|
|
bufsize_t before_char_pos;
|
|
int32_t after_char = 0;
|
|
int32_t before_char = 0;
|
|
int len;
|
|
bool left_flanking, right_flanking;
|
|
|
|
if (subj->pos == 0) {
|
|
before_char = 10;
|
|
} else {
|
|
before_char_pos = subj->pos - 1;
|
|
// walk back to the beginning of the UTF_8 sequence:
|
|
while (peek_at(subj, before_char_pos) >> 6 == 2 && before_char_pos > 0) {
|
|
before_char_pos -= 1;
|
|
}
|
|
len = cmark_utf8proc_iterate(subj->input.data + before_char_pos,
|
|
subj->pos - before_char_pos, &before_char);
|
|
if (len == -1) {
|
|
before_char = 10;
|
|
}
|
|
}
|
|
|
|
if (c == '\'' || c == '"') {
|
|
numdelims++;
|
|
advance(subj); // limit to 1 delim for quotes
|
|
} else {
|
|
while (peek_char(subj) == c) {
|
|
numdelims++;
|
|
advance(subj);
|
|
}
|
|
}
|
|
|
|
len = cmark_utf8proc_iterate(subj->input.data + subj->pos,
|
|
subj->input.len - subj->pos, &after_char);
|
|
if (len == -1) {
|
|
after_char = 10;
|
|
}
|
|
left_flanking = numdelims > 0 && !cmark_utf8proc_is_space(after_char) &&
|
|
(!cmark_utf8proc_is_punctuation(after_char) ||
|
|
cmark_utf8proc_is_space(before_char) ||
|
|
cmark_utf8proc_is_punctuation(before_char));
|
|
right_flanking = numdelims > 0 && !cmark_utf8proc_is_space(before_char) &&
|
|
(!cmark_utf8proc_is_punctuation(before_char) ||
|
|
cmark_utf8proc_is_space(after_char) ||
|
|
cmark_utf8proc_is_punctuation(after_char));
|
|
if (c == '_') {
|
|
*can_open = left_flanking &&
|
|
(!right_flanking || cmark_utf8proc_is_punctuation(before_char));
|
|
*can_close = right_flanking &&
|
|
(!left_flanking || cmark_utf8proc_is_punctuation(after_char));
|
|
} else if (c == '\'' || c == '"') {
|
|
*can_open = left_flanking && !right_flanking &&
|
|
before_char != ']' && before_char != ')';
|
|
*can_close = right_flanking;
|
|
} else {
|
|
*can_open = left_flanking;
|
|
*can_close = right_flanking;
|
|
}
|
|
return numdelims;
|
|
}
|
|
|
|
/*
|
|
static void print_delimiters(subject *subj)
|
|
{
|
|
delimiter *delim;
|
|
delim = subj->last_delim;
|
|
while (delim != NULL) {
|
|
printf("Item at stack pos %p: %d %d %d next(%p) prev(%p)\n",
|
|
(void*)delim, delim->delim_char,
|
|
delim->can_open, delim->can_close,
|
|
(void*)delim->next, (void*)delim->previous);
|
|
delim = delim->previous;
|
|
}
|
|
}
|
|
*/
|
|
|
|
static void remove_delimiter(subject *subj, delimiter *delim) {
|
|
if (delim == NULL)
|
|
return;
|
|
if (delim->next == NULL) {
|
|
// end of list:
|
|
assert(delim == subj->last_delim);
|
|
subj->last_delim = delim->previous;
|
|
} else {
|
|
delim->next->previous = delim->previous;
|
|
}
|
|
if (delim->previous != NULL) {
|
|
delim->previous->next = delim->next;
|
|
}
|
|
subj->mem->free(delim);
|
|
}
|
|
|
|
static void pop_bracket(subject *subj) {
|
|
bracket *b;
|
|
if (subj->last_bracket == NULL)
|
|
return;
|
|
b = subj->last_bracket;
|
|
subj->last_bracket = subj->last_bracket->previous;
|
|
subj->mem->free(b);
|
|
}
|
|
|
|
static void push_delimiter(subject *subj, unsigned char c, bool can_open,
|
|
bool can_close, cmark_node *inl_text) {
|
|
delimiter *delim = (delimiter *)subj->mem->calloc(1, sizeof(delimiter));
|
|
delim->delim_char = c;
|
|
delim->can_open = can_open;
|
|
delim->can_close = can_close;
|
|
delim->inl_text = inl_text;
|
|
delim->length = inl_text->as.literal.len;
|
|
delim->previous = subj->last_delim;
|
|
delim->next = NULL;
|
|
if (delim->previous != NULL) {
|
|
delim->previous->next = delim;
|
|
}
|
|
subj->last_delim = delim;
|
|
}
|
|
|
|
static void push_bracket(subject *subj, bool image, cmark_node *inl_text) {
|
|
bracket *b = (bracket *)subj->mem->calloc(1, sizeof(bracket));
|
|
if (subj->last_bracket != NULL) {
|
|
subj->last_bracket->bracket_after = true;
|
|
}
|
|
b->image = image;
|
|
b->active = true;
|
|
b->inl_text = inl_text;
|
|
b->previous = subj->last_bracket;
|
|
b->previous_delimiter = subj->last_delim;
|
|
b->position = subj->pos;
|
|
b->bracket_after = false;
|
|
subj->last_bracket = b;
|
|
}
|
|
|
|
// Assumes the subject has a c at the current position.
|
|
static cmark_node *handle_delim(subject *subj, unsigned char c, bool smart) {
|
|
bufsize_t numdelims;
|
|
cmark_node *inl_text;
|
|
bool can_open, can_close;
|
|
cmark_chunk contents;
|
|
|
|
numdelims = scan_delims(subj, c, &can_open, &can_close);
|
|
|
|
if (c == '\'' && smart) {
|
|
contents = cmark_chunk_literal(RIGHTSINGLEQUOTE);
|
|
} else if (c == '"' && smart) {
|
|
contents =
|
|
cmark_chunk_literal(can_close ? RIGHTDOUBLEQUOTE : LEFTDOUBLEQUOTE);
|
|
} else {
|
|
contents = cmark_chunk_dup(&subj->input, subj->pos - numdelims, numdelims);
|
|
}
|
|
|
|
inl_text = make_str(subj, subj->pos - numdelims, subj->pos - 1, contents);
|
|
|
|
if ((can_open || can_close) && (!(c == '\'' || c == '"') || smart)) {
|
|
push_delimiter(subj, c, can_open, can_close, inl_text);
|
|
}
|
|
|
|
return inl_text;
|
|
}
|
|
|
|
// Assumes we have a hyphen at the current position.
|
|
static cmark_node *handle_hyphen(subject *subj, bool smart) {
|
|
int startpos = subj->pos;
|
|
|
|
advance(subj);
|
|
|
|
if (!smart || peek_char(subj) != '-') {
|
|
return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("-"));
|
|
}
|
|
|
|
while (smart && peek_char(subj) == '-') {
|
|
advance(subj);
|
|
}
|
|
|
|
int numhyphens = subj->pos - startpos;
|
|
int en_count = 0;
|
|
int em_count = 0;
|
|
int i;
|
|
cmark_strbuf buf = CMARK_BUF_INIT(subj->mem);
|
|
|
|
if (numhyphens % 3 == 0) { // if divisible by 3, use all em dashes
|
|
em_count = numhyphens / 3;
|
|
} else if (numhyphens % 2 == 0) { // if divisible by 2, use all en dashes
|
|
en_count = numhyphens / 2;
|
|
} else if (numhyphens % 3 == 2) { // use one en dash at end
|
|
en_count = 1;
|
|
em_count = (numhyphens - 2) / 3;
|
|
} else { // use two en dashes at the end
|
|
en_count = 2;
|
|
em_count = (numhyphens - 4) / 3;
|
|
}
|
|
|
|
for (i = em_count; i > 0; i--) {
|
|
cmark_strbuf_puts(&buf, EMDASH);
|
|
}
|
|
|
|
for (i = en_count; i > 0; i--) {
|
|
cmark_strbuf_puts(&buf, ENDASH);
|
|
}
|
|
|
|
return make_str(subj, startpos, subj->pos - 1, cmark_chunk_buf_detach(&buf));
|
|
}
|
|
|
|
// Assumes we have a period at the current position.
|
|
static cmark_node *handle_period(subject *subj, bool smart) {
|
|
advance(subj);
|
|
if (smart && peek_char(subj) == '.') {
|
|
advance(subj);
|
|
if (peek_char(subj) == '.') {
|
|
advance(subj);
|
|
return make_str(subj, subj->pos - 3, subj->pos - 1, cmark_chunk_literal(ELLIPSES));
|
|
} else {
|
|
return make_str(subj, subj->pos - 2, subj->pos - 1, cmark_chunk_literal(".."));
|
|
}
|
|
} else {
|
|
return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("."));
|
|
}
|
|
}
|
|
|
|
static void process_emphasis(subject *subj, delimiter *stack_bottom) {
|
|
delimiter *closer = subj->last_delim;
|
|
delimiter *opener;
|
|
delimiter *old_closer;
|
|
bool opener_found;
|
|
int openers_bottom_index;
|
|
delimiter *openers_bottom[6] = {stack_bottom, stack_bottom, stack_bottom,
|
|
stack_bottom, stack_bottom, stack_bottom};
|
|
|
|
// move back to first relevant delim.
|
|
while (closer != NULL && closer->previous != stack_bottom) {
|
|
closer = closer->previous;
|
|
}
|
|
|
|
// now move forward, looking for closers, and handling each
|
|
while (closer != NULL) {
|
|
if (closer->can_close) {
|
|
switch (closer->delim_char) {
|
|
case '"':
|
|
openers_bottom_index = 0;
|
|
break;
|
|
case '\'':
|
|
openers_bottom_index = 1;
|
|
break;
|
|
case '_':
|
|
openers_bottom_index = 2;
|
|
break;
|
|
case '*':
|
|
openers_bottom_index = 3 + (closer->length % 3);
|
|
break;
|
|
default:
|
|
assert(false);
|
|
}
|
|
|
|
// Now look backwards for first matching opener:
|
|
opener = closer->previous;
|
|
opener_found = false;
|
|
while (opener != NULL && opener != openers_bottom[openers_bottom_index]) {
|
|
if (opener->can_open && opener->delim_char == closer->delim_char) {
|
|
// interior closer of size 2 can't match opener of size 1
|
|
// or of size 1 can't match 2
|
|
if (!(closer->can_open || opener->can_close) ||
|
|
((opener->length + closer->length) % 3) != 0) {
|
|
opener_found = true;
|
|
break;
|
|
}
|
|
}
|
|
opener = opener->previous;
|
|
}
|
|
old_closer = closer;
|
|
if (closer->delim_char == '*' || closer->delim_char == '_') {
|
|
if (opener_found) {
|
|
closer = S_insert_emph(subj, opener, closer);
|
|
} else {
|
|
closer = closer->next;
|
|
}
|
|
} else if (closer->delim_char == '\'') {
|
|
cmark_chunk_free(subj->mem, &closer->inl_text->as.literal);
|
|
closer->inl_text->as.literal = cmark_chunk_literal(RIGHTSINGLEQUOTE);
|
|
if (opener_found) {
|
|
cmark_chunk_free(subj->mem, &opener->inl_text->as.literal);
|
|
opener->inl_text->as.literal = cmark_chunk_literal(LEFTSINGLEQUOTE);
|
|
}
|
|
closer = closer->next;
|
|
} else if (closer->delim_char == '"') {
|
|
cmark_chunk_free(subj->mem, &closer->inl_text->as.literal);
|
|
closer->inl_text->as.literal = cmark_chunk_literal(RIGHTDOUBLEQUOTE);
|
|
if (opener_found) {
|
|
cmark_chunk_free(subj->mem, &opener->inl_text->as.literal);
|
|
opener->inl_text->as.literal = cmark_chunk_literal(LEFTDOUBLEQUOTE);
|
|
}
|
|
closer = closer->next;
|
|
}
|
|
if (!opener_found) {
|
|
// set lower bound for future searches for openers
|
|
openers_bottom[openers_bottom_index] = old_closer->previous;
|
|
if (!old_closer->can_open) {
|
|
// we can remove a closer that can't be an
|
|
// opener, once we've seen there's no
|
|
// matching opener:
|
|
remove_delimiter(subj, old_closer);
|
|
}
|
|
}
|
|
} else {
|
|
closer = closer->next;
|
|
}
|
|
}
|
|
// free all delimiters in list until stack_bottom:
|
|
while (subj->last_delim != NULL && subj->last_delim != stack_bottom) {
|
|
remove_delimiter(subj, subj->last_delim);
|
|
}
|
|
}
|
|
|
|
static delimiter *S_insert_emph(subject *subj, delimiter *opener,
|
|
delimiter *closer) {
|
|
delimiter *delim, *tmp_delim;
|
|
bufsize_t use_delims;
|
|
cmark_node *opener_inl = opener->inl_text;
|
|
cmark_node *closer_inl = closer->inl_text;
|
|
bufsize_t opener_num_chars = opener_inl->as.literal.len;
|
|
bufsize_t closer_num_chars = closer_inl->as.literal.len;
|
|
cmark_node *tmp, *tmpnext, *emph;
|
|
|
|
// calculate the actual number of characters used from this closer
|
|
use_delims = (closer_num_chars >= 2 && opener_num_chars >= 2) ? 2 : 1;
|
|
|
|
// remove used characters from associated inlines.
|
|
opener_num_chars -= use_delims;
|
|
closer_num_chars -= use_delims;
|
|
opener_inl->as.literal.len = opener_num_chars;
|
|
closer_inl->as.literal.len = closer_num_chars;
|
|
|
|
// free delimiters between opener and closer
|
|
delim = closer->previous;
|
|
while (delim != NULL && delim != opener) {
|
|
tmp_delim = delim->previous;
|
|
remove_delimiter(subj, delim);
|
|
delim = tmp_delim;
|
|
}
|
|
|
|
// create new emph or strong, and splice it in to our inlines
|
|
// between the opener and closer
|
|
emph = use_delims == 1 ? make_emph(subj->mem) : make_strong(subj->mem);
|
|
|
|
tmp = opener_inl->next;
|
|
while (tmp && tmp != closer_inl) {
|
|
tmpnext = tmp->next;
|
|
cmark_node_append_child(emph, tmp);
|
|
tmp = tmpnext;
|
|
}
|
|
cmark_node_insert_after(opener_inl, emph);
|
|
|
|
emph->start_line = opener_inl->start_line;
|
|
emph->end_line = closer_inl->end_line;
|
|
emph->start_column = opener_inl->start_column;
|
|
emph->end_column = closer_inl->end_column;
|
|
|
|
// if opener has 0 characters, remove it and its associated inline
|
|
if (opener_num_chars == 0) {
|
|
cmark_node_free(opener_inl);
|
|
remove_delimiter(subj, opener);
|
|
}
|
|
|
|
// if closer has 0 characters, remove it and its associated inline
|
|
if (closer_num_chars == 0) {
|
|
// remove empty closer inline
|
|
cmark_node_free(closer_inl);
|
|
// remove closer from list
|
|
tmp_delim = closer->next;
|
|
remove_delimiter(subj, closer);
|
|
closer = tmp_delim;
|
|
}
|
|
|
|
return closer;
|
|
}
|
|
|
|
// Parse backslash-escape or just a backslash, returning an inline.
|
|
static cmark_node *handle_backslash(subject *subj) {
|
|
advance(subj);
|
|
unsigned char nextchar = peek_char(subj);
|
|
if (cmark_ispunct(
|
|
nextchar)) { // only ascii symbols and newline can be escaped
|
|
advance(subj);
|
|
return make_str(subj, subj->pos - 2, subj->pos - 1, cmark_chunk_dup(&subj->input, subj->pos - 1, 1));
|
|
} else if (!is_eof(subj) && skip_line_end(subj)) {
|
|
return make_linebreak(subj->mem);
|
|
} else {
|
|
return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("\\"));
|
|
}
|
|
}
|
|
|
|
// Parse an entity or a regular "&" string.
|
|
// Assumes the subject has an '&' character at the current position.
|
|
static cmark_node *handle_entity(subject *subj) {
|
|
cmark_strbuf ent = CMARK_BUF_INIT(subj->mem);
|
|
bufsize_t len;
|
|
|
|
advance(subj);
|
|
|
|
len = houdini_unescape_ent(&ent, subj->input.data + subj->pos,
|
|
subj->input.len - subj->pos);
|
|
|
|
if (len == 0)
|
|
return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("&"));
|
|
|
|
subj->pos += len;
|
|
return make_str(subj, subj->pos - 1 - len, subj->pos - 1, cmark_chunk_buf_detach(&ent));
|
|
}
|
|
|
|
// Clean a URL: remove surrounding whitespace, and remove \ that escape
|
|
// punctuation.
|
|
cmark_chunk cmark_clean_url(cmark_mem *mem, cmark_chunk *url) {
|
|
cmark_strbuf buf = CMARK_BUF_INIT(mem);
|
|
|
|
cmark_chunk_trim(url);
|
|
|
|
if (url->len == 0) {
|
|
cmark_chunk result = CMARK_CHUNK_EMPTY;
|
|
return result;
|
|
}
|
|
|
|
houdini_unescape_html_f(&buf, url->data, url->len);
|
|
|
|
cmark_strbuf_unescape(&buf);
|
|
return cmark_chunk_buf_detach(&buf);
|
|
}
|
|
|
|
cmark_chunk cmark_clean_title(cmark_mem *mem, cmark_chunk *title) {
|
|
cmark_strbuf buf = CMARK_BUF_INIT(mem);
|
|
unsigned char first, last;
|
|
|
|
if (title->len == 0) {
|
|
cmark_chunk result = CMARK_CHUNK_EMPTY;
|
|
return result;
|
|
}
|
|
|
|
first = title->data[0];
|
|
last = title->data[title->len - 1];
|
|
|
|
// remove surrounding quotes if any:
|
|
if ((first == '\'' && last == '\'') || (first == '(' && last == ')') ||
|
|
(first == '"' && last == '"')) {
|
|
houdini_unescape_html_f(&buf, title->data + 1, title->len - 2);
|
|
} else {
|
|
houdini_unescape_html_f(&buf, title->data, title->len);
|
|
}
|
|
|
|
cmark_strbuf_unescape(&buf);
|
|
return cmark_chunk_buf_detach(&buf);
|
|
}
|
|
|
|
// Parse an autolink or HTML tag.
|
|
// Assumes the subject has a '<' character at the current position.
|
|
static cmark_node *handle_pointy_brace(subject *subj, int options) {
|
|
bufsize_t matchlen = 0;
|
|
cmark_chunk contents;
|
|
|
|
advance(subj); // advance past first <
|
|
|
|
// first try to match a URL autolink
|
|
matchlen = scan_autolink_uri(&subj->input, subj->pos);
|
|
if (matchlen > 0) {
|
|
contents = cmark_chunk_dup(&subj->input, subj->pos, matchlen - 1);
|
|
subj->pos += matchlen;
|
|
|
|
return make_autolink(subj, subj->pos - 1 - matchlen, subj->pos - 1, contents, 0);
|
|
}
|
|
|
|
// next try to match an email autolink
|
|
matchlen = scan_autolink_email(&subj->input, subj->pos);
|
|
if (matchlen > 0) {
|
|
contents = cmark_chunk_dup(&subj->input, subj->pos, matchlen - 1);
|
|
subj->pos += matchlen;
|
|
|
|
return make_autolink(subj, subj->pos - 1 - matchlen, subj->pos - 1, contents, 1);
|
|
}
|
|
|
|
// finally, try to match an html tag
|
|
matchlen = scan_html_tag(&subj->input, subj->pos);
|
|
if (matchlen > 0) {
|
|
contents = cmark_chunk_dup(&subj->input, subj->pos - 1, matchlen + 1);
|
|
subj->pos += matchlen;
|
|
cmark_node *node = make_raw_html(subj, subj->pos - matchlen - 1, subj->pos - 1, contents);
|
|
adjust_subj_node_newlines(subj, node, matchlen, 1, options);
|
|
return node;
|
|
}
|
|
|
|
// if nothing matches, just return the opening <:
|
|
return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("<"));
|
|
}
|
|
|
|
// Parse a link label. Returns 1 if successful.
|
|
// Note: unescaped brackets are not allowed in labels.
|
|
// The label begins with `[` and ends with the first `]` character
|
|
// encountered. Backticks in labels do not start code spans.
|
|
static int link_label(subject *subj, cmark_chunk *raw_label) {
|
|
bufsize_t startpos = subj->pos;
|
|
int length = 0;
|
|
unsigned char c;
|
|
|
|
// advance past [
|
|
if (peek_char(subj) == '[') {
|
|
advance(subj);
|
|
} else {
|
|
return 0;
|
|
}
|
|
|
|
while ((c = peek_char(subj)) && c != '[' && c != ']') {
|
|
if (c == '\\') {
|
|
advance(subj);
|
|
length++;
|
|
if (cmark_ispunct(peek_char(subj))) {
|
|
advance(subj);
|
|
length++;
|
|
}
|
|
} else {
|
|
advance(subj);
|
|
length++;
|
|
}
|
|
if (length > MAX_LINK_LABEL_LENGTH) {
|
|
goto noMatch;
|
|
}
|
|
}
|
|
|
|
if (c == ']') { // match found
|
|
*raw_label =
|
|
cmark_chunk_dup(&subj->input, startpos + 1, subj->pos - (startpos + 1));
|
|
cmark_chunk_trim(raw_label);
|
|
advance(subj); // advance past ]
|
|
return 1;
|
|
}
|
|
|
|
noMatch:
|
|
subj->pos = startpos; // rewind
|
|
return 0;
|
|
}
|
|
|
|
static bufsize_t manual_scan_link_url_2(cmark_chunk *input, bufsize_t offset,
|
|
cmark_chunk *output) {
|
|
bufsize_t i = offset;
|
|
size_t nb_p = 0;
|
|
|
|
while (i < input->len) {
|
|
if (input->data[i] == '\\' &&
|
|
i + 1 < input-> len &&
|
|
cmark_ispunct(input->data[i+1]))
|
|
i += 2;
|
|
else if (input->data[i] == '(') {
|
|
++nb_p;
|
|
++i;
|
|
if (nb_p > 32)
|
|
return -1;
|
|
} else if (input->data[i] == ')') {
|
|
if (nb_p == 0)
|
|
break;
|
|
--nb_p;
|
|
++i;
|
|
} else if (cmark_isspace(input->data[i]))
|
|
break;
|
|
else
|
|
++i;
|
|
}
|
|
|
|
if (i >= input->len)
|
|
return -1;
|
|
|
|
{
|
|
cmark_chunk result = {input->data + offset, i - offset, 0};
|
|
*output = result;
|
|
}
|
|
return i - offset;
|
|
}
|
|
|
|
static bufsize_t manual_scan_link_url(cmark_chunk *input, bufsize_t offset,
|
|
cmark_chunk *output) {
|
|
bufsize_t i = offset;
|
|
|
|
if (i < input->len && input->data[i] == '<') {
|
|
++i;
|
|
while (i < input->len) {
|
|
if (input->data[i] == '>') {
|
|
++i;
|
|
break;
|
|
} else if (input->data[i] == '\\')
|
|
i += 2;
|
|
else if (input->data[i] == '\n' || input->data[i] == '<')
|
|
return manual_scan_link_url_2(input, offset, output);
|
|
else
|
|
++i;
|
|
}
|
|
} else {
|
|
return manual_scan_link_url_2(input, offset, output);
|
|
}
|
|
|
|
if (i >= input->len)
|
|
return -1;
|
|
|
|
{
|
|
cmark_chunk result = {input->data + offset + 1, i - 2 - offset, 0};
|
|
*output = result;
|
|
}
|
|
return i - offset;
|
|
}
|
|
|
|
// Return a link, an image, or a literal close bracket.
|
|
static cmark_node *handle_close_bracket(subject *subj) {
|
|
bufsize_t initial_pos, after_link_text_pos;
|
|
bufsize_t endurl, starttitle, endtitle, endall;
|
|
bufsize_t sps, n;
|
|
cmark_reference *ref = NULL;
|
|
cmark_chunk url_chunk, title_chunk;
|
|
cmark_chunk url, title;
|
|
bracket *opener;
|
|
cmark_node *inl;
|
|
cmark_chunk raw_label;
|
|
int found_label;
|
|
cmark_node *tmp, *tmpnext;
|
|
bool is_image;
|
|
|
|
advance(subj); // advance past ]
|
|
initial_pos = subj->pos;
|
|
|
|
// get last [ or ![
|
|
opener = subj->last_bracket;
|
|
|
|
if (opener == NULL) {
|
|
return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("]"));
|
|
}
|
|
|
|
if (!opener->active) {
|
|
// take delimiter off stack
|
|
pop_bracket(subj);
|
|
return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("]"));
|
|
}
|
|
|
|
// If we got here, we matched a potential link/image text.
|
|
// Now we check to see if it's a link/image.
|
|
is_image = opener->image;
|
|
|
|
after_link_text_pos = subj->pos;
|
|
|
|
// First, look for an inline link.
|
|
if (peek_char(subj) == '(' &&
|
|
((sps = scan_spacechars(&subj->input, subj->pos + 1)) > -1) &&
|
|
((n = manual_scan_link_url(&subj->input, subj->pos + 1 + sps,
|
|
&url_chunk)) > -1)) {
|
|
|
|
// try to parse an explicit link:
|
|
endurl = subj->pos + 1 + sps + n;
|
|
starttitle = endurl + scan_spacechars(&subj->input, endurl);
|
|
|
|
// ensure there are spaces btw url and title
|
|
endtitle = (starttitle == endurl)
|
|
? starttitle
|
|
: starttitle + scan_link_title(&subj->input, starttitle);
|
|
|
|
endall = endtitle + scan_spacechars(&subj->input, endtitle);
|
|
|
|
if (peek_at(subj, endall) == ')') {
|
|
subj->pos = endall + 1;
|
|
|
|
title_chunk =
|
|
cmark_chunk_dup(&subj->input, starttitle, endtitle - starttitle);
|
|
url = cmark_clean_url(subj->mem, &url_chunk);
|
|
title = cmark_clean_title(subj->mem, &title_chunk);
|
|
cmark_chunk_free(subj->mem, &url_chunk);
|
|
cmark_chunk_free(subj->mem, &title_chunk);
|
|
goto match;
|
|
|
|
} else {
|
|
// it could still be a shortcut reference link
|
|
subj->pos = after_link_text_pos;
|
|
}
|
|
}
|
|
|
|
// Next, look for a following [link label] that matches in refmap.
|
|
// skip spaces
|
|
raw_label = cmark_chunk_literal("");
|
|
found_label = link_label(subj, &raw_label);
|
|
if (!found_label) {
|
|
// If we have a shortcut reference link, back up
|
|
// to before the spacse we skipped.
|
|
subj->pos = initial_pos;
|
|
}
|
|
|
|
if ((!found_label || raw_label.len == 0) && !opener->bracket_after) {
|
|
cmark_chunk_free(subj->mem, &raw_label);
|
|
raw_label = cmark_chunk_dup(&subj->input, opener->position,
|
|
initial_pos - opener->position - 1);
|
|
found_label = true;
|
|
}
|
|
|
|
if (found_label) {
|
|
ref = cmark_reference_lookup(subj->refmap, &raw_label);
|
|
cmark_chunk_free(subj->mem, &raw_label);
|
|
}
|
|
|
|
if (ref != NULL) { // found
|
|
url = chunk_clone(subj->mem, &ref->url);
|
|
title = chunk_clone(subj->mem, &ref->title);
|
|
goto match;
|
|
} else {
|
|
goto noMatch;
|
|
}
|
|
|
|
noMatch:
|
|
// If we fall through to here, it means we didn't match a link:
|
|
pop_bracket(subj); // remove this opener from delimiter list
|
|
subj->pos = initial_pos;
|
|
return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("]"));
|
|
|
|
match:
|
|
inl = make_simple(subj->mem, is_image ? CMARK_NODE_IMAGE : CMARK_NODE_LINK);
|
|
inl->as.link.url = url;
|
|
inl->as.link.title = title;
|
|
inl->start_line = inl->end_line = subj->line;
|
|
inl->start_column = opener->inl_text->start_column;
|
|
inl->end_column = subj->pos + subj->column_offset + subj->block_offset;
|
|
cmark_node_insert_before(opener->inl_text, inl);
|
|
// Add link text:
|
|
tmp = opener->inl_text->next;
|
|
while (tmp) {
|
|
tmpnext = tmp->next;
|
|
cmark_node_append_child(inl, tmp);
|
|
tmp = tmpnext;
|
|
}
|
|
|
|
// Free the bracket [:
|
|
cmark_node_free(opener->inl_text);
|
|
|
|
process_emphasis(subj, opener->previous_delimiter);
|
|
pop_bracket(subj);
|
|
|
|
// Now, if we have a link, we also want to deactivate earlier link
|
|
// delimiters. (This code can be removed if we decide to allow links
|
|
// inside links.)
|
|
if (!is_image) {
|
|
opener = subj->last_bracket;
|
|
while (opener != NULL) {
|
|
if (!opener->image) {
|
|
if (!opener->active) {
|
|
break;
|
|
} else {
|
|
opener->active = false;
|
|
}
|
|
}
|
|
opener = opener->previous;
|
|
}
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
// Parse a hard or soft linebreak, returning an inline.
|
|
// Assumes the subject has a cr or newline at the current position.
|
|
static cmark_node *handle_newline(subject *subj) {
|
|
bufsize_t nlpos = subj->pos;
|
|
// skip over cr, crlf, or lf:
|
|
if (peek_at(subj, subj->pos) == '\r') {
|
|
advance(subj);
|
|
}
|
|
if (peek_at(subj, subj->pos) == '\n') {
|
|
advance(subj);
|
|
}
|
|
++subj->line;
|
|
subj->column_offset = -subj->pos;
|
|
// skip spaces at beginning of line
|
|
skip_spaces(subj);
|
|
if (nlpos > 1 && peek_at(subj, nlpos - 1) == ' ' &&
|
|
peek_at(subj, nlpos - 2) == ' ') {
|
|
return make_linebreak(subj->mem);
|
|
} else {
|
|
return make_softbreak(subj->mem);
|
|
}
|
|
}
|
|
|
|
static bufsize_t subject_find_special_char(subject *subj, int options) {
|
|
// "\r\n\\`&_*[]<!"
|
|
static const int8_t SPECIAL_CHARS[256] = {
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1,
|
|
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
|
|
|
|
// " ' . -
|
|
static const char SMART_PUNCT_CHARS[] = {
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
};
|
|
|
|
bufsize_t n = subj->pos + 1;
|
|
|
|
while (n < subj->input.len) {
|
|
if (SPECIAL_CHARS[subj->input.data[n]])
|
|
return n;
|
|
if (options & CMARK_OPT_SMART && SMART_PUNCT_CHARS[subj->input.data[n]])
|
|
return n;
|
|
n++;
|
|
}
|
|
|
|
return subj->input.len;
|
|
}
|
|
|
|
// Parse an inline, advancing subject, and add it as a child of parent.
|
|
// Return 0 if no inline can be parsed, 1 otherwise.
|
|
static int parse_inline(subject *subj, cmark_node *parent, int options) {
|
|
cmark_node *new_inl = NULL;
|
|
cmark_chunk contents;
|
|
unsigned char c;
|
|
bufsize_t startpos, endpos;
|
|
c = peek_char(subj);
|
|
if (c == 0) {
|
|
return 0;
|
|
}
|
|
switch (c) {
|
|
case '\r':
|
|
case '\n':
|
|
new_inl = handle_newline(subj);
|
|
break;
|
|
case '`':
|
|
new_inl = handle_backticks(subj, options);
|
|
break;
|
|
case '\\':
|
|
new_inl = handle_backslash(subj);
|
|
break;
|
|
case '&':
|
|
new_inl = handle_entity(subj);
|
|
break;
|
|
case '<':
|
|
new_inl = handle_pointy_brace(subj, options);
|
|
break;
|
|
case '*':
|
|
case '_':
|
|
case '\'':
|
|
case '"':
|
|
new_inl = handle_delim(subj, c, (options & CMARK_OPT_SMART) != 0);
|
|
break;
|
|
case '-':
|
|
new_inl = handle_hyphen(subj, (options & CMARK_OPT_SMART) != 0);
|
|
break;
|
|
case '.':
|
|
new_inl = handle_period(subj, (options & CMARK_OPT_SMART) != 0);
|
|
break;
|
|
case '[':
|
|
advance(subj);
|
|
new_inl = make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("["));
|
|
push_bracket(subj, false, new_inl);
|
|
break;
|
|
case ']':
|
|
new_inl = handle_close_bracket(subj);
|
|
break;
|
|
case '!':
|
|
advance(subj);
|
|
if (peek_char(subj) == '[') {
|
|
advance(subj);
|
|
new_inl = make_str(subj, subj->pos - 2, subj->pos - 1, cmark_chunk_literal("!["));
|
|
push_bracket(subj, true, new_inl);
|
|
} else {
|
|
new_inl = make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("!"));
|
|
}
|
|
break;
|
|
default:
|
|
endpos = subject_find_special_char(subj, options);
|
|
contents = cmark_chunk_dup(&subj->input, subj->pos, endpos - subj->pos);
|
|
startpos = subj->pos;
|
|
subj->pos = endpos;
|
|
|
|
// if we're at a newline, strip trailing spaces.
|
|
if (S_is_line_end_char(peek_char(subj))) {
|
|
cmark_chunk_rtrim(&contents);
|
|
}
|
|
|
|
new_inl = make_str(subj, startpos, endpos - 1, contents);
|
|
}
|
|
if (new_inl != NULL) {
|
|
cmark_node_append_child(parent, new_inl);
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
// Parse inlines from parent's string_content, adding as children of parent.
|
|
extern void cmark_parse_inlines(cmark_mem *mem, cmark_node *parent,
|
|
cmark_reference_map *refmap, int options) {
|
|
subject subj;
|
|
cmark_chunk content = {parent->content.ptr, parent->content.size, 0};
|
|
subject_from_buf(mem, parent->start_line, parent->start_column - 1 + parent->internal_offset, &subj, &content, refmap);
|
|
cmark_chunk_rtrim(&subj.input);
|
|
|
|
while (!is_eof(&subj) && parse_inline(&subj, parent, options))
|
|
;
|
|
|
|
process_emphasis(&subj, NULL);
|
|
// free bracket and delim stack
|
|
while (subj.last_delim) {
|
|
remove_delimiter(&subj, subj.last_delim);
|
|
}
|
|
while (subj.last_bracket) {
|
|
pop_bracket(&subj);
|
|
}
|
|
}
|
|
|
|
// Parse zero or more space characters, including at most one newline.
|
|
static void spnl(subject *subj) {
|
|
skip_spaces(subj);
|
|
if (skip_line_end(subj)) {
|
|
skip_spaces(subj);
|
|
}
|
|
}
|
|
|
|
// Parse reference. Assumes string begins with '[' character.
|
|
// Modify refmap if a reference is encountered.
|
|
// Return 0 if no reference found, otherwise position of subject
|
|
// after reference is parsed.
|
|
bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_chunk *input,
|
|
cmark_reference_map *refmap) {
|
|
subject subj;
|
|
|
|
cmark_chunk lab;
|
|
cmark_chunk url;
|
|
cmark_chunk title;
|
|
|
|
bufsize_t matchlen = 0;
|
|
bufsize_t beforetitle;
|
|
|
|
subject_from_buf(mem, -1, 0, &subj, input, NULL);
|
|
|
|
// parse label:
|
|
if (!link_label(&subj, &lab) || lab.len == 0)
|
|
return 0;
|
|
|
|
// colon:
|
|
if (peek_char(&subj) == ':') {
|
|
advance(&subj);
|
|
} else {
|
|
return 0;
|
|
}
|
|
|
|
// parse link url:
|
|
spnl(&subj);
|
|
if ((matchlen = manual_scan_link_url(&subj.input, subj.pos, &url)) > -1 &&
|
|
url.len > 0) {
|
|
subj.pos += matchlen;
|
|
} else {
|
|
return 0;
|
|
}
|
|
|
|
// parse optional link_title
|
|
beforetitle = subj.pos;
|
|
spnl(&subj);
|
|
matchlen = subj.pos == beforetitle ? 0 : scan_link_title(&subj.input, subj.pos);
|
|
if (matchlen) {
|
|
title = cmark_chunk_dup(&subj.input, subj.pos, matchlen);
|
|
subj.pos += matchlen;
|
|
} else {
|
|
subj.pos = beforetitle;
|
|
title = cmark_chunk_literal("");
|
|
}
|
|
|
|
// parse final spaces and newline:
|
|
skip_spaces(&subj);
|
|
if (!skip_line_end(&subj)) {
|
|
if (matchlen) { // try rewinding before title
|
|
subj.pos = beforetitle;
|
|
skip_spaces(&subj);
|
|
if (!skip_line_end(&subj)) {
|
|
return 0;
|
|
}
|
|
} else {
|
|
return 0;
|
|
}
|
|
}
|
|
// insert reference into refmap
|
|
cmark_reference_create(refmap, &lab, &url, &title);
|
|
return subj.pos;
|
|
}
|