Change markdown parser.

This commit is contained in:
Black Hat 2019-02-13 16:14:13 +08:00
parent b71359e92a
commit 1d1cd3410a
59 changed files with 5896 additions and 28407 deletions

View File

@ -3,13 +3,13 @@ image: Visual Studio 2017
environment:
DEPLOY_DIR: Spectral-%APPVEYOR_BUILD_VERSION%
matrix:
- QTDIR: C:\Qt\5.12.1\mingw73_64
- QTDIR: C:\Qt\5.12.1\msvc2017_64
VCVARS: "C:\\Program Files (x86)\\Microsoft Visual Studio\\2017\\Community\\VC\\Auxiliary\\Build\\vcvars64.bat"
PLATFORM:
init:
- call "%QTDIR%\bin\qtenv2.bat"
- set PATH=%PATH%;C:\Qt\Tools\QtCreator\bin;%QT_DIR%\bin;C:\MinGW\bin
- set PATH=%PATH%;C:\Qt\Tools\QtCreator\bin
- call "%VCVARS%" %platform%
- cd /D "%APPVEYOR_BUILD_FOLDER%"
@ -18,10 +18,10 @@ before_build:
build_script:
- qmake spectral.pro CONFIG+=release CONFIG+=qtquickcompiler PREFIX="%DEPLOY_DIR%"
- mingw32-make
- nmake
after_build:
- mingw32-make install
- nmake install
- windeployqt --release --qmldir qml --qmldir imports "%DEPLOY_DIR%\spectral.exe"
- 7z a spectral.zip "%DEPLOY_DIR%\"

View File

@ -1,170 +0,0 @@
Copyright (c) 2014, John MacFarlane
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following
disclaimer in the documentation and/or other materials provided
with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-----
houdini.h, houdini_href_e.c, houdini_html_e.c, houdini_html_u.c
derive from https://github.com/vmg/houdini (with some modifications)
Copyright (C) 2012 Vicent Martí
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
of the Software, and to permit persons to whom the Software is furnished to do
so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
-----
buffer.h, buffer.c, chunk.h
are derived from code (C) 2012 Github, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
of the Software, and to permit persons to whom the Software is furnished to do
so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
-----
utf8.c and utf8.c
are derived from utf8proc
(<http://www.public-software-group.org/utf8proc>),
(C) 2009 Public Software Group e. V., Berlin, Germany.
Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the "Software"),
to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense,
and/or sell copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
-----
The normalization code in normalize.py was derived from the
markdowntest project, Copyright 2013 Karl Dubost:
The MIT License (MIT)
Copyright (c) 2013 Karl Dubost
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-----
The CommonMark spec (test/spec.txt) is
Copyright (C) 2014-15 John MacFarlane
Released under the Creative Commons CC-BY-SA 4.0 license:
<http://creativecommons.org/licenses/by-sa/4.0/>.
-----
The test software in test/ is
Copyright (c) 2014, John MacFarlane
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following
disclaimer in the documentation and/or other materials provided
with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

File diff suppressed because it is too large Load Diff

View File

@ -1,279 +0,0 @@
#include <stdarg.h>
#include <string.h>
#include <assert.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <limits.h>
#include "config.h"
#include "cmark_ctype.h"
#include "buffer.h"
#include "memory.h"
/* Used as default value for cmark_strbuf->ptr so that people can always
* assume ptr is non-NULL and zero terminated even for new cmark_strbufs.
*/
unsigned char cmark_strbuf__initbuf[1];
#ifndef MIN
#define MIN(x, y) ((x < y) ? x : y)
#endif
void cmark_strbuf_init(cmark_mem *mem, cmark_strbuf *buf,
bufsize_t initial_size) {
buf->mem = mem;
buf->asize = 0;
buf->size = 0;
buf->ptr = cmark_strbuf__initbuf;
if (initial_size > 0)
cmark_strbuf_grow(buf, initial_size);
}
static CMARK_INLINE void S_strbuf_grow_by(cmark_strbuf *buf, bufsize_t add) {
cmark_strbuf_grow(buf, buf->size + add);
}
void cmark_strbuf_grow(cmark_strbuf *buf, bufsize_t target_size) {
assert(target_size > 0);
if (target_size < buf->asize)
return;
if (target_size > (bufsize_t)(INT32_MAX / 2)) {
fprintf(stderr,
"[cmark] cmark_strbuf_grow requests buffer with size > %d, aborting\n",
(INT32_MAX / 2));
abort();
}
/* Oversize the buffer by 50% to guarantee amortized linear time
* complexity on append operations. */
bufsize_t new_size = target_size + target_size / 2;
new_size += 1;
new_size = (new_size + 7) & ~7;
buf->ptr = (unsigned char *)buf->mem->realloc(buf->asize ? buf->ptr : NULL,
new_size);
buf->asize = new_size;
}
bufsize_t cmark_strbuf_len(const cmark_strbuf *buf) { return buf->size; }
void cmark_strbuf_free(cmark_strbuf *buf) {
if (!buf)
return;
if (buf->ptr != cmark_strbuf__initbuf)
buf->mem->free(buf->ptr);
cmark_strbuf_init(buf->mem, buf, 0);
}
void cmark_strbuf_clear(cmark_strbuf *buf) {
buf->size = 0;
if (buf->asize > 0)
buf->ptr[0] = '\0';
}
void cmark_strbuf_set(cmark_strbuf *buf, const unsigned char *data,
bufsize_t len) {
if (len <= 0 || data == NULL) {
cmark_strbuf_clear(buf);
} else {
if (data != buf->ptr) {
if (len >= buf->asize)
cmark_strbuf_grow(buf, len);
memmove(buf->ptr, data, len);
}
buf->size = len;
buf->ptr[buf->size] = '\0';
}
}
void cmark_strbuf_sets(cmark_strbuf *buf, const char *string) {
cmark_strbuf_set(buf, (const unsigned char *)string,
string ? strlen(string) : 0);
}
void cmark_strbuf_putc(cmark_strbuf *buf, int c) {
S_strbuf_grow_by(buf, 1);
buf->ptr[buf->size++] = (unsigned char)(c & 0xFF);
buf->ptr[buf->size] = '\0';
}
void cmark_strbuf_put(cmark_strbuf *buf, const unsigned char *data,
bufsize_t len) {
if (len <= 0)
return;
S_strbuf_grow_by(buf, len);
memmove(buf->ptr + buf->size, data, len);
buf->size += len;
buf->ptr[buf->size] = '\0';
}
void cmark_strbuf_puts(cmark_strbuf *buf, const char *string) {
cmark_strbuf_put(buf, (const unsigned char *)string, strlen(string));
}
void cmark_strbuf_copy_cstr(char *data, bufsize_t datasize,
const cmark_strbuf *buf) {
bufsize_t copylen;
assert(buf);
if (!data || datasize <= 0)
return;
data[0] = '\0';
if (buf->size == 0 || buf->asize <= 0)
return;
copylen = buf->size;
if (copylen > datasize - 1)
copylen = datasize - 1;
memmove(data, buf->ptr, copylen);
data[copylen] = '\0';
}
void cmark_strbuf_swap(cmark_strbuf *buf_a, cmark_strbuf *buf_b) {
cmark_strbuf t = *buf_a;
*buf_a = *buf_b;
*buf_b = t;
}
unsigned char *cmark_strbuf_detach(cmark_strbuf *buf) {
unsigned char *data = buf->ptr;
if (buf->asize == 0) {
/* return an empty string */
return (unsigned char *)buf->mem->calloc(1, 1);
}
cmark_strbuf_init(buf->mem, buf, 0);
return data;
}
int cmark_strbuf_cmp(const cmark_strbuf *a, const cmark_strbuf *b) {
int result = memcmp(a->ptr, b->ptr, MIN(a->size, b->size));
return (result != 0) ? result
: (a->size < b->size) ? -1 : (a->size > b->size) ? 1 : 0;
}
bufsize_t cmark_strbuf_strchr(const cmark_strbuf *buf, int c, bufsize_t pos) {
if (pos >= buf->size)
return -1;
if (pos < 0)
pos = 0;
const unsigned char *p =
(unsigned char *)memchr(buf->ptr + pos, c, buf->size - pos);
if (!p)
return -1;
return (bufsize_t)(p - (const unsigned char *)buf->ptr);
}
bufsize_t cmark_strbuf_strrchr(const cmark_strbuf *buf, int c, bufsize_t pos) {
if (pos < 0 || buf->size == 0)
return -1;
if (pos >= buf->size)
pos = buf->size - 1;
bufsize_t i;
for (i = pos; i >= 0; i--) {
if (buf->ptr[i] == (unsigned char)c)
return i;
}
return -1;
}
void cmark_strbuf_truncate(cmark_strbuf *buf, bufsize_t len) {
if (len < 0)
len = 0;
if (len < buf->size) {
buf->size = len;
buf->ptr[buf->size] = '\0';
}
}
void cmark_strbuf_drop(cmark_strbuf *buf, bufsize_t n) {
if (n > 0) {
if (n > buf->size)
n = buf->size;
buf->size = buf->size - n;
if (buf->size)
memmove(buf->ptr, buf->ptr + n, buf->size);
buf->ptr[buf->size] = '\0';
}
}
void cmark_strbuf_rtrim(cmark_strbuf *buf) {
if (!buf->size)
return;
while (buf->size > 0) {
if (!cmark_isspace(buf->ptr[buf->size - 1]))
break;
buf->size--;
}
buf->ptr[buf->size] = '\0';
}
void cmark_strbuf_trim(cmark_strbuf *buf) {
bufsize_t i = 0;
if (!buf->size)
return;
while (i < buf->size && cmark_isspace(buf->ptr[i]))
i++;
cmark_strbuf_drop(buf, i);
cmark_strbuf_rtrim(buf);
}
// Destructively modify string, collapsing consecutive
// space and newline characters into a single space.
void cmark_strbuf_normalize_whitespace(cmark_strbuf *s) {
bool last_char_was_space = false;
bufsize_t r, w;
for (r = 0, w = 0; r < s->size; ++r) {
if (cmark_isspace(s->ptr[r])) {
if (!last_char_was_space) {
s->ptr[w++] = ' ';
last_char_was_space = true;
}
} else {
s->ptr[w++] = s->ptr[r];
last_char_was_space = false;
}
}
cmark_strbuf_truncate(s, w);
}
// Destructively unescape a string: remove backslashes before punctuation chars.
extern void cmark_strbuf_unescape(cmark_strbuf *buf) {
bufsize_t r, w;
for (r = 0, w = 0; r < buf->size; ++r) {
if (buf->ptr[r] == '\\' && cmark_ispunct(buf->ptr[r + 1]))
r++;
buf->ptr[w++] = buf->ptr[r];
}
cmark_strbuf_truncate(buf, w);
}

View File

@ -1,82 +0,0 @@
#ifndef CMARK_BUFFER_H
#define CMARK_BUFFER_H
#include <stddef.h>
#include <stdarg.h>
#include <string.h>
#include <limits.h>
#include <stdint.h>
#include "config.h"
#include "cmark.h"
#ifdef __cplusplus
extern "C" {
#endif
typedef int32_t bufsize_t;
typedef struct {
cmark_mem *mem;
unsigned char *ptr;
bufsize_t asize, size;
} cmark_strbuf;
extern unsigned char cmark_strbuf__initbuf[];
#define CMARK_BUF_INIT(mem) \
{ mem, cmark_strbuf__initbuf, 0, 0 }
/**
* Initialize a cmark_strbuf structure.
*
* For the cases where CMARK_BUF_INIT cannot be used to do static
* initialization.
*/
void cmark_strbuf_init(cmark_mem *mem, cmark_strbuf *buf,
bufsize_t initial_size);
/**
* Grow the buffer to hold at least `target_size` bytes.
*/
void cmark_strbuf_grow(cmark_strbuf *buf, bufsize_t target_size);
void cmark_strbuf_free(cmark_strbuf *buf);
void cmark_strbuf_swap(cmark_strbuf *buf_a, cmark_strbuf *buf_b);
bufsize_t cmark_strbuf_len(const cmark_strbuf *buf);
int cmark_strbuf_cmp(const cmark_strbuf *a, const cmark_strbuf *b);
unsigned char *cmark_strbuf_detach(cmark_strbuf *buf);
void cmark_strbuf_copy_cstr(char *data, bufsize_t datasize,
const cmark_strbuf *buf);
static CMARK_INLINE const char *cmark_strbuf_cstr(const cmark_strbuf *buf) {
return (char *)buf->ptr;
}
#define cmark_strbuf_at(buf, n) ((buf)->ptr[n])
void cmark_strbuf_set(cmark_strbuf *buf, const unsigned char *data,
bufsize_t len);
void cmark_strbuf_sets(cmark_strbuf *buf, const char *string);
void cmark_strbuf_putc(cmark_strbuf *buf, int c);
void cmark_strbuf_put(cmark_strbuf *buf, const unsigned char *data,
bufsize_t len);
void cmark_strbuf_puts(cmark_strbuf *buf, const char *string);
void cmark_strbuf_clear(cmark_strbuf *buf);
bufsize_t cmark_strbuf_strchr(const cmark_strbuf *buf, int c, bufsize_t pos);
bufsize_t cmark_strbuf_strrchr(const cmark_strbuf *buf, int c, bufsize_t pos);
void cmark_strbuf_drop(cmark_strbuf *buf, bufsize_t n);
void cmark_strbuf_truncate(cmark_strbuf *buf, bufsize_t len);
void cmark_strbuf_rtrim(cmark_strbuf *buf);
void cmark_strbuf_trim(cmark_strbuf *buf);
void cmark_strbuf_normalize_whitespace(cmark_strbuf *s);
void cmark_strbuf_unescape(cmark_strbuf *s);
#ifdef __cplusplus
}
#endif
#endif

File diff suppressed because it is too large Load Diff

View File

@ -1,120 +0,0 @@
#ifndef CMARK_CHUNK_H
#define CMARK_CHUNK_H
#include <string.h>
#include <stdlib.h>
#include <assert.h>
#include "cmark.h"
#include "buffer.h"
#include "memory.h"
#include "cmark_ctype.h"
#define CMARK_CHUNK_EMPTY \
{ NULL, 0, 0 }
typedef struct {
unsigned char *data;
bufsize_t len;
bufsize_t alloc; // also implies a NULL-terminated string
} cmark_chunk;
static CMARK_INLINE void cmark_chunk_free(cmark_mem *mem, cmark_chunk *c) {
if (c->alloc)
mem->free(c->data);
c->data = NULL;
c->alloc = 0;
c->len = 0;
}
static CMARK_INLINE void cmark_chunk_ltrim(cmark_chunk *c) {
assert(!c->alloc);
while (c->len && cmark_isspace(c->data[0])) {
c->data++;
c->len--;
}
}
static CMARK_INLINE void cmark_chunk_rtrim(cmark_chunk *c) {
assert(!c->alloc);
while (c->len > 0) {
if (!cmark_isspace(c->data[c->len - 1]))
break;
c->len--;
}
}
static CMARK_INLINE void cmark_chunk_trim(cmark_chunk *c) {
cmark_chunk_ltrim(c);
cmark_chunk_rtrim(c);
}
static CMARK_INLINE bufsize_t cmark_chunk_strchr(cmark_chunk *ch, int c,
bufsize_t offset) {
const unsigned char *p =
(unsigned char *)memchr(ch->data + offset, c, ch->len - offset);
return p ? (bufsize_t)(p - ch->data) : ch->len;
}
static CMARK_INLINE const char *cmark_chunk_to_cstr(cmark_mem *mem,
cmark_chunk *c) {
unsigned char *str;
if (c->alloc) {
return (char *)c->data;
}
str = (unsigned char *)mem->calloc(c->len + 1, 1);
if (c->len > 0) {
memcpy(str, c->data, c->len);
}
str[c->len] = 0;
c->data = str;
c->alloc = 1;
return (char *)str;
}
static CMARK_INLINE void cmark_chunk_set_cstr(cmark_mem *mem, cmark_chunk *c,
const char *str) {
unsigned char *old = c->alloc ? c->data : NULL;
if (str == NULL) {
c->len = 0;
c->data = NULL;
c->alloc = 0;
} else {
c->len = (bufsize_t)strlen(str);
c->data = (unsigned char *)mem->calloc(c->len + 1, 1);
c->alloc = 1;
memcpy(c->data, str, c->len + 1);
}
if (old != NULL) {
mem->free(old);
}
}
static CMARK_INLINE cmark_chunk cmark_chunk_literal(const char *data) {
bufsize_t len = data ? (bufsize_t)strlen(data) : 0;
cmark_chunk c = {(unsigned char *)data, len, 0};
return c;
}
static CMARK_INLINE cmark_chunk cmark_chunk_dup(const cmark_chunk *ch,
bufsize_t pos, bufsize_t len) {
cmark_chunk c = {ch->data + pos, len, 0};
return c;
}
static CMARK_INLINE cmark_chunk cmark_chunk_buf_detach(cmark_strbuf *buf) {
cmark_chunk c;
c.len = buf->size;
c.data = cmark_strbuf_detach(buf);
c.alloc = 1;
return c;
}
#endif

View File

@ -1,43 +0,0 @@
#include <stdlib.h>
#include <assert.h>
#include <stdio.h>
#include "node.h"
#include "houdini.h"
#include "cmark.h"
#include "buffer.h"
int cmark_version() { return CMARK_VERSION; }
const char *cmark_version_string() { return CMARK_VERSION_STRING; }
static void *xcalloc(size_t nmem, size_t size) {
void *ptr = calloc(nmem, size);
if (!ptr) {
fprintf(stderr, "[cmark] calloc returned null pointer, aborting\n");
abort();
}
return ptr;
}
static void *xrealloc(void *ptr, size_t size) {
void *new_ptr = realloc(ptr, size);
if (!new_ptr) {
fprintf(stderr, "[cmark] realloc returned null pointer, aborting\n");
abort();
}
return new_ptr;
}
cmark_mem DEFAULT_MEM_ALLOCATOR = {xcalloc, xrealloc, free};
char *cmark_markdown_to_html(const char *text, size_t len, int options) {
cmark_node *doc;
char *result;
doc = cmark_parse_document(text, len, options);
result = cmark_render_html(doc, options);
cmark_node_free(doc);
return result;
}

View File

@ -1,644 +0,0 @@
#ifndef CMARK_H
#define CMARK_H
#include <stdio.h>
#include <cmark_export.h>
#include <cmark_version.h>
#ifdef __cplusplus
extern "C" {
#endif
/** # NAME
*
* **cmark** - CommonMark parsing, manipulating, and rendering
*/
/** # DESCRIPTION
*
* ## Simple Interface
*/
/** Convert 'text' (assumed to be a UTF-8 encoded string with length
* 'len') from CommonMark Markdown to HTML, returning a null-terminated,
* UTF-8-encoded string. It is the caller's responsibility
* to free the returned buffer.
*/
CMARK_EXPORT
char *cmark_markdown_to_html(const char *text, size_t len, int options);
/** ## Node Structure
*/
typedef enum {
/* Error status */
CMARK_NODE_NONE,
/* Block */
CMARK_NODE_DOCUMENT,
CMARK_NODE_BLOCK_QUOTE,
CMARK_NODE_LIST,
CMARK_NODE_ITEM,
CMARK_NODE_CODE_BLOCK,
CMARK_NODE_HTML_BLOCK,
CMARK_NODE_CUSTOM_BLOCK,
CMARK_NODE_PARAGRAPH,
CMARK_NODE_HEADING,
CMARK_NODE_THEMATIC_BREAK,
CMARK_NODE_FIRST_BLOCK = CMARK_NODE_DOCUMENT,
CMARK_NODE_LAST_BLOCK = CMARK_NODE_THEMATIC_BREAK,
/* Inline */
CMARK_NODE_TEXT,
CMARK_NODE_SOFTBREAK,
CMARK_NODE_LINEBREAK,
CMARK_NODE_CODE,
CMARK_NODE_HTML_INLINE,
CMARK_NODE_CUSTOM_INLINE,
CMARK_NODE_EMPH,
CMARK_NODE_STRONG,
CMARK_NODE_LINK,
CMARK_NODE_IMAGE,
CMARK_NODE_FIRST_INLINE = CMARK_NODE_TEXT,
CMARK_NODE_LAST_INLINE = CMARK_NODE_IMAGE,
} cmark_node_type;
/* For backwards compatibility: */
#define CMARK_NODE_HEADER CMARK_NODE_HEADING
#define CMARK_NODE_HRULE CMARK_NODE_THEMATIC_BREAK
#define CMARK_NODE_HTML CMARK_NODE_HTML_BLOCK
#define CMARK_NODE_INLINE_HTML CMARK_NODE_HTML_INLINE
typedef enum {
CMARK_NO_LIST,
CMARK_BULLET_LIST,
CMARK_ORDERED_LIST
} cmark_list_type;
typedef enum {
CMARK_NO_DELIM,
CMARK_PERIOD_DELIM,
CMARK_PAREN_DELIM
} cmark_delim_type;
typedef struct cmark_node cmark_node;
typedef struct cmark_parser cmark_parser;
typedef struct cmark_iter cmark_iter;
/**
* ## Custom memory allocator support
*/
/** Defines the memory allocation functions to be used by CMark
* when parsing and allocating a document tree
*/
typedef struct cmark_mem {
void *(*calloc)(size_t, size_t);
void *(*realloc)(void *, size_t);
void (*free)(void *);
} cmark_mem;
/**
* ## Creating and Destroying Nodes
*/
/** Creates a new node of type 'type'. Note that the node may have
* other required properties, which it is the caller's responsibility
* to assign.
*/
CMARK_EXPORT cmark_node *cmark_node_new(cmark_node_type type);
/** Same as `cmark_node_new`, but explicitly listing the memory
* allocator used to allocate the node. Note: be sure to use the same
* allocator for every node in a tree, or bad things can happen.
*/
CMARK_EXPORT cmark_node *cmark_node_new_with_mem(cmark_node_type type,
cmark_mem *mem);
/** Frees the memory allocated for a node and any children.
*/
CMARK_EXPORT void cmark_node_free(cmark_node *node);
/**
* ## Tree Traversal
*/
/** Returns the next node in the sequence after 'node', or NULL if
* there is none.
*/
CMARK_EXPORT cmark_node *cmark_node_next(cmark_node *node);
/** Returns the previous node in the sequence after 'node', or NULL if
* there is none.
*/
CMARK_EXPORT cmark_node *cmark_node_previous(cmark_node *node);
/** Returns the parent of 'node', or NULL if there is none.
*/
CMARK_EXPORT cmark_node *cmark_node_parent(cmark_node *node);
/** Returns the first child of 'node', or NULL if 'node' has no children.
*/
CMARK_EXPORT cmark_node *cmark_node_first_child(cmark_node *node);
/** Returns the last child of 'node', or NULL if 'node' has no children.
*/
CMARK_EXPORT cmark_node *cmark_node_last_child(cmark_node *node);
/**
* ## Iterator
*
* An iterator will walk through a tree of nodes, starting from a root
* node, returning one node at a time, together with information about
* whether the node is being entered or exited. The iterator will
* first descend to a child node, if there is one. When there is no
* child, the iterator will go to the next sibling. When there is no
* next sibling, the iterator will return to the parent (but with
* a 'cmark_event_type' of `CMARK_EVENT_EXIT`). The iterator will
* return `CMARK_EVENT_DONE` when it reaches the root node again.
* One natural application is an HTML renderer, where an `ENTER` event
* outputs an open tag and an `EXIT` event outputs a close tag.
* An iterator might also be used to transform an AST in some systematic
* way, for example, turning all level-3 headings into regular paragraphs.
*
* void
* usage_example(cmark_node *root) {
* cmark_event_type ev_type;
* cmark_iter *iter = cmark_iter_new(root);
*
* while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
* cmark_node *cur = cmark_iter_get_node(iter);
* // Do something with `cur` and `ev_type`
* }
*
* cmark_iter_free(iter);
* }
*
* Iterators will never return `EXIT` events for leaf nodes, which are nodes
* of type:
*
* * CMARK_NODE_HTML_BLOCK
* * CMARK_NODE_THEMATIC_BREAK
* * CMARK_NODE_CODE_BLOCK
* * CMARK_NODE_TEXT
* * CMARK_NODE_SOFTBREAK
* * CMARK_NODE_LINEBREAK
* * CMARK_NODE_CODE
* * CMARK_NODE_HTML_INLINE
*
* Nodes must only be modified after an `EXIT` event, or an `ENTER` event for
* leaf nodes.
*/
typedef enum {
CMARK_EVENT_NONE,
CMARK_EVENT_DONE,
CMARK_EVENT_ENTER,
CMARK_EVENT_EXIT
} cmark_event_type;
/** Creates a new iterator starting at 'root'. The current node and event
* type are undefined until 'cmark_iter_next' is called for the first time.
* The memory allocated for the iterator should be released using
* 'cmark_iter_free' when it is no longer needed.
*/
CMARK_EXPORT
cmark_iter *cmark_iter_new(cmark_node *root);
/** Frees the memory allocated for an iterator.
*/
CMARK_EXPORT
void cmark_iter_free(cmark_iter *iter);
/** Advances to the next node and returns the event type (`CMARK_EVENT_ENTER`,
* `CMARK_EVENT_EXIT` or `CMARK_EVENT_DONE`).
*/
CMARK_EXPORT
cmark_event_type cmark_iter_next(cmark_iter *iter);
/** Returns the current node.
*/
CMARK_EXPORT
cmark_node *cmark_iter_get_node(cmark_iter *iter);
/** Returns the current event type.
*/
CMARK_EXPORT
cmark_event_type cmark_iter_get_event_type(cmark_iter *iter);
/** Returns the root node.
*/
CMARK_EXPORT
cmark_node *cmark_iter_get_root(cmark_iter *iter);
/** Resets the iterator so that the current node is 'current' and
* the event type is 'event_type'. The new current node must be a
* descendant of the root node or the root node itself.
*/
CMARK_EXPORT
void cmark_iter_reset(cmark_iter *iter, cmark_node *current,
cmark_event_type event_type);
/**
* ## Accessors
*/
/** Returns the user data of 'node'.
*/
CMARK_EXPORT void *cmark_node_get_user_data(cmark_node *node);
/** Sets arbitrary user data for 'node'. Returns 1 on success,
* 0 on failure.
*/
CMARK_EXPORT int cmark_node_set_user_data(cmark_node *node, void *user_data);
/** Returns the type of 'node', or `CMARK_NODE_NONE` on error.
*/
CMARK_EXPORT cmark_node_type cmark_node_get_type(cmark_node *node);
/** Like 'cmark_node_get_type', but returns a string representation
of the type, or `"<unknown>"`.
*/
CMARK_EXPORT
const char *cmark_node_get_type_string(cmark_node *node);
/** Returns the string contents of 'node', or an empty
string if none is set. Returns NULL if called on a
node that does not have string content.
*/
CMARK_EXPORT const char *cmark_node_get_literal(cmark_node *node);
/** Sets the string contents of 'node'. Returns 1 on success,
* 0 on failure.
*/
CMARK_EXPORT int cmark_node_set_literal(cmark_node *node, const char *content);
/** Returns the heading level of 'node', or 0 if 'node' is not a heading.
*/
CMARK_EXPORT int cmark_node_get_heading_level(cmark_node *node);
/* For backwards compatibility */
#define cmark_node_get_header_level cmark_node_get_heading_level
#define cmark_node_set_header_level cmark_node_set_heading_level
/** Sets the heading level of 'node', returning 1 on success and 0 on error.
*/
CMARK_EXPORT int cmark_node_set_heading_level(cmark_node *node, int level);
/** Returns the list type of 'node', or `CMARK_NO_LIST` if 'node'
* is not a list.
*/
CMARK_EXPORT cmark_list_type cmark_node_get_list_type(cmark_node *node);
/** Sets the list type of 'node', returning 1 on success and 0 on error.
*/
CMARK_EXPORT int cmark_node_set_list_type(cmark_node *node,
cmark_list_type type);
/** Returns the list delimiter type of 'node', or `CMARK_NO_DELIM` if 'node'
* is not a list.
*/
CMARK_EXPORT cmark_delim_type cmark_node_get_list_delim(cmark_node *node);
/** Sets the list delimiter type of 'node', returning 1 on success and 0
* on error.
*/
CMARK_EXPORT int cmark_node_set_list_delim(cmark_node *node,
cmark_delim_type delim);
/** Returns starting number of 'node', if it is an ordered list, otherwise 0.
*/
CMARK_EXPORT int cmark_node_get_list_start(cmark_node *node);
/** Sets starting number of 'node', if it is an ordered list. Returns 1
* on success, 0 on failure.
*/
CMARK_EXPORT int cmark_node_set_list_start(cmark_node *node, int start);
/** Returns 1 if 'node' is a tight list, 0 otherwise.
*/
CMARK_EXPORT int cmark_node_get_list_tight(cmark_node *node);
/** Sets the "tightness" of a list. Returns 1 on success, 0 on failure.
*/
CMARK_EXPORT int cmark_node_set_list_tight(cmark_node *node, int tight);
/** Returns the info string from a fenced code block.
*/
CMARK_EXPORT const char *cmark_node_get_fence_info(cmark_node *node);
/** Sets the info string in a fenced code block, returning 1 on
* success and 0 on failure.
*/
CMARK_EXPORT int cmark_node_set_fence_info(cmark_node *node, const char *info);
/** Returns the URL of a link or image 'node', or an empty string
if no URL is set. Returns NULL if called on a node that is
not a link or image.
*/
CMARK_EXPORT const char *cmark_node_get_url(cmark_node *node);
/** Sets the URL of a link or image 'node'. Returns 1 on success,
* 0 on failure.
*/
CMARK_EXPORT int cmark_node_set_url(cmark_node *node, const char *url);
/** Returns the title of a link or image 'node', or an empty
string if no title is set. Returns NULL if called on a node
that is not a link or image.
*/
CMARK_EXPORT const char *cmark_node_get_title(cmark_node *node);
/** Sets the title of a link or image 'node'. Returns 1 on success,
* 0 on failure.
*/
CMARK_EXPORT int cmark_node_set_title(cmark_node *node, const char *title);
/** Returns the literal "on enter" text for a custom 'node', or
an empty string if no on_enter is set. Returns NULL if called
on a non-custom node.
*/
CMARK_EXPORT const char *cmark_node_get_on_enter(cmark_node *node);
/** Sets the literal text to render "on enter" for a custom 'node'.
Any children of the node will be rendered after this text.
Returns 1 on success 0 on failure.
*/
CMARK_EXPORT int cmark_node_set_on_enter(cmark_node *node,
const char *on_enter);
/** Returns the literal "on exit" text for a custom 'node', or
an empty string if no on_exit is set. Returns NULL if
called on a non-custom node.
*/
CMARK_EXPORT const char *cmark_node_get_on_exit(cmark_node *node);
/** Sets the literal text to render "on exit" for a custom 'node'.
Any children of the node will be rendered before this text.
Returns 1 on success 0 on failure.
*/
CMARK_EXPORT int cmark_node_set_on_exit(cmark_node *node, const char *on_exit);
/** Returns the line on which 'node' begins.
*/
CMARK_EXPORT int cmark_node_get_start_line(cmark_node *node);
/** Returns the column at which 'node' begins.
*/
CMARK_EXPORT int cmark_node_get_start_column(cmark_node *node);
/** Returns the line on which 'node' ends.
*/
CMARK_EXPORT int cmark_node_get_end_line(cmark_node *node);
/** Returns the column at which 'node' ends.
*/
CMARK_EXPORT int cmark_node_get_end_column(cmark_node *node);
/**
* ## Tree Manipulation
*/
/** Unlinks a 'node', removing it from the tree, but not freeing its
* memory. (Use 'cmark_node_free' for that.)
*/
CMARK_EXPORT void cmark_node_unlink(cmark_node *node);
/** Inserts 'sibling' before 'node'. Returns 1 on success, 0 on failure.
*/
CMARK_EXPORT int cmark_node_insert_before(cmark_node *node,
cmark_node *sibling);
/** Inserts 'sibling' after 'node'. Returns 1 on success, 0 on failure.
*/
CMARK_EXPORT int cmark_node_insert_after(cmark_node *node, cmark_node *sibling);
/** Replaces 'oldnode' with 'newnode' and unlinks 'oldnode' (but does
* not free its memory).
* Returns 1 on success, 0 on failure.
*/
CMARK_EXPORT int cmark_node_replace(cmark_node *oldnode, cmark_node *newnode);
/** Adds 'child' to the beginning of the children of 'node'.
* Returns 1 on success, 0 on failure.
*/
CMARK_EXPORT int cmark_node_prepend_child(cmark_node *node, cmark_node *child);
/** Adds 'child' to the end of the children of 'node'.
* Returns 1 on success, 0 on failure.
*/
CMARK_EXPORT int cmark_node_append_child(cmark_node *node, cmark_node *child);
/** Consolidates adjacent text nodes.
*/
CMARK_EXPORT void cmark_consolidate_text_nodes(cmark_node *root);
/**
* ## Parsing
*
* Simple interface:
*
* cmark_node *document = cmark_parse_document("Hello *world*", 13,
* CMARK_OPT_DEFAULT);
*
* Streaming interface:
*
* cmark_parser *parser = cmark_parser_new(CMARK_OPT_DEFAULT);
* FILE *fp = fopen("myfile.md", "rb");
* while ((bytes = fread(buffer, 1, sizeof(buffer), fp)) > 0) {
* cmark_parser_feed(parser, buffer, bytes);
* if (bytes < sizeof(buffer)) {
* break;
* }
* }
* document = cmark_parser_finish(parser);
* cmark_parser_free(parser);
*/
/** Creates a new parser object.
*/
CMARK_EXPORT
cmark_parser *cmark_parser_new(int options);
/** Creates a new parser object with the given memory allocator
*/
CMARK_EXPORT
cmark_parser *cmark_parser_new_with_mem(int options, cmark_mem *mem);
/** Frees memory allocated for a parser object.
*/
CMARK_EXPORT
void cmark_parser_free(cmark_parser *parser);
/** Feeds a string of length 'len' to 'parser'.
*/
CMARK_EXPORT
void cmark_parser_feed(cmark_parser *parser, const char *buffer, size_t len);
/** Finish parsing and return a pointer to a tree of nodes.
*/
CMARK_EXPORT
cmark_node *cmark_parser_finish(cmark_parser *parser);
/** Parse a CommonMark document in 'buffer' of length 'len'.
* Returns a pointer to a tree of nodes. The memory allocated for
* the node tree should be released using 'cmark_node_free'
* when it is no longer needed.
*/
CMARK_EXPORT
cmark_node *cmark_parse_document(const char *buffer, size_t len, int options);
/** Parse a CommonMark document in file 'f', returning a pointer to
* a tree of nodes. The memory allocated for the node tree should be
* released using 'cmark_node_free' when it is no longer needed.
*/
CMARK_EXPORT
cmark_node *cmark_parse_file(FILE *f, int options);
/**
* ## Rendering
*/
/** Render a 'node' tree as XML. It is the caller's responsibility
* to free the returned buffer.
*/
CMARK_EXPORT
char *cmark_render_xml(cmark_node *root, int options);
/** Render a 'node' tree as an HTML fragment. It is up to the user
* to add an appropriate header and footer. It is the caller's
* responsibility to free the returned buffer.
*/
CMARK_EXPORT
char *cmark_render_html(cmark_node *root, int options);
/** Render a 'node' tree as a groff man page, without the header.
* It is the caller's responsibility to free the returned buffer.
*/
CMARK_EXPORT
char *cmark_render_man(cmark_node *root, int options, int width);
/** Render a 'node' tree as a commonmark document.
* It is the caller's responsibility to free the returned buffer.
*/
CMARK_EXPORT
char *cmark_render_commonmark(cmark_node *root, int options, int width);
/** Render a 'node' tree as a LaTeX document.
* It is the caller's responsibility to free the returned buffer.
*/
CMARK_EXPORT
char *cmark_render_latex(cmark_node *root, int options, int width);
/**
* ## Options
*/
/** Default options.
*/
#define CMARK_OPT_DEFAULT 0
/**
* ### Options affecting rendering
*/
/** Include a `data-sourcepos` attribute on all block elements.
*/
#define CMARK_OPT_SOURCEPOS (1 << 1)
/** Render `softbreak` elements as hard line breaks.
*/
#define CMARK_OPT_HARDBREAKS (1 << 2)
/** Suppress raw HTML and unsafe links (`javascript:`, `vbscript:`,
* `file:`, and `data:`, except for `image/png`, `image/gif`,
* `image/jpeg`, or `image/webp` mime types). Raw HTML is replaced
* by a placeholder HTML comment. Unsafe links are replaced by
* empty strings.
*/
#define CMARK_OPT_SAFE (1 << 3)
/** Render `softbreak` elements as spaces.
*/
#define CMARK_OPT_NOBREAKS (1 << 4)
/**
* ### Options affecting parsing
*/
/** Legacy option (no effect).
*/
#define CMARK_OPT_NORMALIZE (1 << 8)
/** Validate UTF-8 in the input before parsing, replacing illegal
* sequences with the replacement character U+FFFD.
*/
#define CMARK_OPT_VALIDATE_UTF8 (1 << 9)
/** Convert straight quotes to curly, --- to em dashes, -- to en dashes.
*/
#define CMARK_OPT_SMART (1 << 10)
/**
* ## Version information
*/
/** The library version as integer for runtime checks. Also available as
* macro CMARK_VERSION for compile time checks.
*
* * Bits 16-23 contain the major version.
* * Bits 8-15 contain the minor version.
* * Bits 0-7 contain the patchlevel.
*
* In hexadecimal format, the number 0x010203 represents version 1.2.3.
*/
CMARK_EXPORT
int cmark_version(void);
/** The library version string for runtime checks. Also available as
* macro CMARK_VERSION_STRING for compile time checks.
*/
CMARK_EXPORT
const char *cmark_version_string(void);
/** # AUTHORS
*
* John MacFarlane, Vicent Marti, Kārlis Gaņģis, Nick Wellnhofer.
*/
#ifndef CMARK_NO_SHORT_NAMES
#define NODE_DOCUMENT CMARK_NODE_DOCUMENT
#define NODE_BLOCK_QUOTE CMARK_NODE_BLOCK_QUOTE
#define NODE_LIST CMARK_NODE_LIST
#define NODE_ITEM CMARK_NODE_ITEM
#define NODE_CODE_BLOCK CMARK_NODE_CODE_BLOCK
#define NODE_HTML_BLOCK CMARK_NODE_HTML_BLOCK
#define NODE_CUSTOM_BLOCK CMARK_NODE_CUSTOM_BLOCK
#define NODE_PARAGRAPH CMARK_NODE_PARAGRAPH
#define NODE_HEADING CMARK_NODE_HEADING
#define NODE_HEADER CMARK_NODE_HEADER
#define NODE_THEMATIC_BREAK CMARK_NODE_THEMATIC_BREAK
#define NODE_HRULE CMARK_NODE_HRULE
#define NODE_TEXT CMARK_NODE_TEXT
#define NODE_SOFTBREAK CMARK_NODE_SOFTBREAK
#define NODE_LINEBREAK CMARK_NODE_LINEBREAK
#define NODE_CODE CMARK_NODE_CODE
#define NODE_HTML_INLINE CMARK_NODE_HTML_INLINE
#define NODE_CUSTOM_INLINE CMARK_NODE_CUSTOM_INLINE
#define NODE_EMPH CMARK_NODE_EMPH
#define NODE_STRONG CMARK_NODE_STRONG
#define NODE_LINK CMARK_NODE_LINK
#define NODE_IMAGE CMARK_NODE_IMAGE
#define BULLET_LIST CMARK_BULLET_LIST
#define ORDERED_LIST CMARK_ORDERED_LIST
#define PERIOD_DELIM CMARK_PERIOD_DELIM
#define PAREN_DELIM CMARK_PAREN_DELIM
#endif
#ifdef __cplusplus
}
#endif
#endif

View File

@ -1,44 +0,0 @@
#include <stdint.h>
#include "cmark_ctype.h"
/** 1 = space, 2 = punct, 3 = digit, 4 = alpha, 0 = other
*/
static const uint8_t cmark_ctype_class[256] = {
/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
/* 0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0,
/* 1 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* 2 */ 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
/* 3 */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2,
/* 4 */ 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
/* 5 */ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 2,
/* 6 */ 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
/* 7 */ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 0,
/* 8 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* 9 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* a */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* b */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* c */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* d */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* e */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* f */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
/**
* Returns 1 if c is a "whitespace" character as defined by the spec.
*/
int cmark_isspace(char c) { return cmark_ctype_class[(uint8_t)c] == 1; }
/**
* Returns 1 if c is an ascii punctuation character.
*/
int cmark_ispunct(char c) { return cmark_ctype_class[(uint8_t)c] == 2; }
int cmark_isalnum(char c) {
uint8_t result;
result = cmark_ctype_class[(uint8_t)c];
return (result == 3 || result == 4);
}
int cmark_isdigit(char c) { return cmark_ctype_class[(uint8_t)c] == 3; }
int cmark_isalpha(char c) { return cmark_ctype_class[(uint8_t)c] == 4; }

View File

@ -1,26 +0,0 @@
#ifndef CMARK_CMARK_CTYPE_H
#define CMARK_CMARK_CTYPE_H
#ifdef __cplusplus
extern "C" {
#endif
/** Locale-independent versions of functions from ctype.h.
* We want cmark to behave the same no matter what the system locale.
*/
int cmark_isspace(char c);
int cmark_ispunct(char c);
int cmark_isalnum(char c);
int cmark_isdigit(char c);
int cmark_isalpha(char c);
#ifdef __cplusplus
}
#endif
#endif

View File

@ -1,42 +0,0 @@
#ifndef CMARK_EXPORT_H
#define CMARK_EXPORT_H
#ifdef CMARK_STATIC_DEFINE
# define CMARK_EXPORT
# define CMARK_NO_EXPORT
#else
# ifndef CMARK_EXPORT
# ifdef libcmark_EXPORTS
/* We are building this library */
# define CMARK_EXPORT __attribute__((visibility("default")))
# else
/* We are using this library */
# define CMARK_EXPORT __attribute__((visibility("default")))
# endif
# endif
# ifndef CMARK_NO_EXPORT
# define CMARK_NO_EXPORT __attribute__((visibility("hidden")))
# endif
#endif
#ifndef CMARK_DEPRECATED
# define CMARK_DEPRECATED __attribute__ ((__deprecated__))
#endif
#ifndef CMARK_DEPRECATED_EXPORT
# define CMARK_DEPRECATED_EXPORT CMARK_EXPORT CMARK_DEPRECATED
#endif
#ifndef CMARK_DEPRECATED_NO_EXPORT
# define CMARK_DEPRECATED_NO_EXPORT CMARK_NO_EXPORT CMARK_DEPRECATED
#endif
#if 0 /* DEFINE_NO_DEPRECATED */
# ifndef CMARK_NO_DEPRECATED
# define CMARK_NO_DEPRECATED
# endif
#endif
#endif /* CMARK_EXPORT_H */

View File

@ -1,7 +0,0 @@
#ifndef CMARK_VERSION_H
#define CMARK_VERSION_H
#define CMARK_VERSION ((0 << 16) | (28 << 8) | 3)
#define CMARK_VERSION_STRING "0.28.3"
#endif

View File

@ -1,481 +0,0 @@
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <stdint.h>
#include <assert.h>
#include "config.h"
#include "cmark.h"
#include "node.h"
#include "buffer.h"
#include "utf8.h"
#include "scanners.h"
#include "render.h"
#define OUT(s, wrap, escaping) renderer->out(renderer, s, wrap, escaping)
#define LIT(s) renderer->out(renderer, s, false, LITERAL)
#define CR() renderer->cr(renderer)
#define BLANKLINE() renderer->blankline(renderer)
#define ENCODED_SIZE 20
#define LISTMARKER_SIZE 20
// Functions to convert cmark_nodes to commonmark strings.
static CMARK_INLINE void outc(cmark_renderer *renderer, cmark_escaping escape,
int32_t c, unsigned char nextc) {
bool needs_escaping = false;
bool follows_digit =
renderer->buffer->size > 0 &&
cmark_isdigit(renderer->buffer->ptr[renderer->buffer->size - 1]);
char encoded[ENCODED_SIZE];
needs_escaping =
c < 0x80 && escape != LITERAL &&
((escape == NORMAL &&
(c == '*' || c == '_' || c == '[' || c == ']' || c == '#' || c == '<' ||
c == '>' || c == '\\' || c == '`' || c == '!' ||
(c == '&' && cmark_isalpha(nextc)) || (c == '!' && nextc == '[') ||
(renderer->begin_content && (c == '-' || c == '+' || c == '=') &&
// begin_content doesn't get set to false til we've passed digits
// at the beginning of line, so...
!follows_digit) ||
(renderer->begin_content && (c == '.' || c == ')') && follows_digit &&
(nextc == 0 || cmark_isspace(nextc))))) ||
(escape == URL &&
(c == '`' || c == '<' || c == '>' || cmark_isspace(c) || c == '\\' ||
c == ')' || c == '(')) ||
(escape == TITLE &&
(c == '`' || c == '<' || c == '>' || c == '"' || c == '\\')));
if (needs_escaping) {
if (cmark_isspace(c)) {
// use percent encoding for spaces
snprintf(encoded, ENCODED_SIZE, "%%%2x", c);
cmark_strbuf_puts(renderer->buffer, encoded);
renderer->column += 3;
} else {
cmark_render_ascii(renderer, "\\");
cmark_render_code_point(renderer, c);
}
} else {
cmark_render_code_point(renderer, c);
}
}
static int longest_backtick_sequence(const char *code) {
int longest = 0;
int current = 0;
size_t i = 0;
size_t code_len = strlen(code);
while (i <= code_len) {
if (code[i] == '`') {
current++;
} else {
if (current > longest) {
longest = current;
}
current = 0;
}
i++;
}
return longest;
}
static int shortest_unused_backtick_sequence(const char *code) {
// note: if the shortest sequence is >= 32, this returns 32
// so as not to overflow the bit array.
uint32_t used = 1;
int current = 0;
size_t i = 0;
size_t code_len = strlen(code);
while (i <= code_len) {
if (code[i] == '`') {
current++;
} else {
if (current > 0 && current < 32) {
used |= (1U << current);
}
current = 0;
}
i++;
}
// return number of first bit that is 0:
i = 0;
while (i < 32 && used & 1) {
used = used >> 1;
i++;
}
return (int)i;
}
static bool is_autolink(cmark_node *node) {
cmark_chunk *title;
cmark_chunk *url;
cmark_node *link_text;
char *realurl;
int realurllen;
if (node->type != CMARK_NODE_LINK) {
return false;
}
url = &node->as.link.url;
if (url->len == 0 || scan_scheme(url, 0) == 0) {
return false;
}
title = &node->as.link.title;
// if it has a title, we can't treat it as an autolink:
if (title->len > 0) {
return false;
}
link_text = node->first_child;
if (link_text == NULL) {
return false;
}
cmark_consolidate_text_nodes(link_text);
realurl = (char *)url->data;
realurllen = url->len;
if (strncmp(realurl, "mailto:", 7) == 0) {
realurl += 7;
realurllen -= 7;
}
return (realurllen == link_text->as.literal.len &&
strncmp(realurl, (char *)link_text->as.literal.data,
link_text->as.literal.len) == 0);
}
// if node is a block node, returns node.
// otherwise returns first block-level node that is an ancestor of node.
// if there is no block-level ancestor, returns NULL.
static cmark_node *get_containing_block(cmark_node *node) {
while (node) {
if (node->type >= CMARK_NODE_FIRST_BLOCK &&
node->type <= CMARK_NODE_LAST_BLOCK) {
return node;
} else {
node = node->parent;
}
}
return NULL;
}
static int S_render_node(cmark_renderer *renderer, cmark_node *node,
cmark_event_type ev_type, int options) {
cmark_node *tmp;
int list_number;
cmark_delim_type list_delim;
int numticks;
bool extra_spaces;
int i;
bool entering = (ev_type == CMARK_EVENT_ENTER);
const char *info, *code, *title;
char fencechar[2] = {'\0', '\0'};
size_t info_len, code_len;
char listmarker[LISTMARKER_SIZE];
char *emph_delim;
bool first_in_list_item;
bufsize_t marker_width;
bool allow_wrap = renderer->width > 0 && !(CMARK_OPT_NOBREAKS & options) &&
!(CMARK_OPT_HARDBREAKS & options);
// Don't adjust tight list status til we've started the list.
// Otherwise we loose the blank line between a paragraph and
// a following list.
if (!(node->type == CMARK_NODE_ITEM && node->prev == NULL && entering)) {
tmp = get_containing_block(node);
renderer->in_tight_list_item =
tmp && // tmp might be NULL if there is no containing block
((tmp->type == CMARK_NODE_ITEM &&
cmark_node_get_list_tight(tmp->parent)) ||
(tmp && tmp->parent && tmp->parent->type == CMARK_NODE_ITEM &&
cmark_node_get_list_tight(tmp->parent->parent)));
}
switch (node->type) {
case CMARK_NODE_DOCUMENT:
break;
case CMARK_NODE_BLOCK_QUOTE:
if (entering) {
LIT("> ");
renderer->begin_content = true;
cmark_strbuf_puts(renderer->prefix, "> ");
} else {
cmark_strbuf_truncate(renderer->prefix, renderer->prefix->size - 2);
BLANKLINE();
}
break;
case CMARK_NODE_LIST:
if (!entering && node->next && (node->next->type == CMARK_NODE_CODE_BLOCK ||
node->next->type == CMARK_NODE_LIST)) {
// this ensures that a following indented code block or list will be
// inteprereted correctly.
CR();
LIT("<!-- end list -->");
BLANKLINE();
}
break;
case CMARK_NODE_ITEM:
if (cmark_node_get_list_type(node->parent) == CMARK_BULLET_LIST) {
marker_width = 4;
} else {
list_number = cmark_node_get_list_start(node->parent);
list_delim = cmark_node_get_list_delim(node->parent);
tmp = node;
while (tmp->prev) {
tmp = tmp->prev;
list_number += 1;
}
// we ensure a width of at least 4 so
// we get nice transition from single digits
// to double
snprintf(listmarker, LISTMARKER_SIZE, "%d%s%s", list_number,
list_delim == CMARK_PAREN_DELIM ? ")" : ".",
list_number < 10 ? " " : " ");
marker_width = strlen(listmarker);
}
if (entering) {
if (cmark_node_get_list_type(node->parent) == CMARK_BULLET_LIST) {
LIT(" - ");
renderer->begin_content = true;
} else {
LIT(listmarker);
renderer->begin_content = true;
}
for (i = marker_width; i--;) {
cmark_strbuf_putc(renderer->prefix, ' ');
}
} else {
cmark_strbuf_truncate(renderer->prefix,
renderer->prefix->size - marker_width);
CR();
}
break;
case CMARK_NODE_HEADING:
if (entering) {
for (i = cmark_node_get_heading_level(node); i > 0; i--) {
LIT("#");
}
LIT(" ");
renderer->begin_content = true;
renderer->no_linebreaks = true;
} else {
renderer->no_linebreaks = false;
BLANKLINE();
}
break;
case CMARK_NODE_CODE_BLOCK:
first_in_list_item = node->prev == NULL && node->parent &&
node->parent->type == CMARK_NODE_ITEM;
if (!first_in_list_item) {
BLANKLINE();
}
info = cmark_node_get_fence_info(node);
info_len = strlen(info);
fencechar[0] = strchr(info, '`') == NULL ? '`' : '~';
code = cmark_node_get_literal(node);
code_len = strlen(code);
// use indented form if no info, and code doesn't
// begin or end with a blank line, and code isn't
// first thing in a list item
if (info_len == 0 && (code_len > 2 && !cmark_isspace(code[0]) &&
!(cmark_isspace(code[code_len - 1]) &&
cmark_isspace(code[code_len - 2]))) &&
!first_in_list_item) {
LIT(" ");
cmark_strbuf_puts(renderer->prefix, " ");
OUT(cmark_node_get_literal(node), false, LITERAL);
cmark_strbuf_truncate(renderer->prefix, renderer->prefix->size - 4);
} else {
numticks = longest_backtick_sequence(code) + 1;
if (numticks < 3) {
numticks = 3;
}
for (i = 0; i < numticks; i++) {
LIT(fencechar);
}
LIT(" ");
OUT(info, false, LITERAL);
CR();
OUT(cmark_node_get_literal(node), false, LITERAL);
CR();
for (i = 0; i < numticks; i++) {
LIT(fencechar);
}
}
BLANKLINE();
break;
case CMARK_NODE_HTML_BLOCK:
BLANKLINE();
OUT(cmark_node_get_literal(node), false, LITERAL);
BLANKLINE();
break;
case CMARK_NODE_CUSTOM_BLOCK:
BLANKLINE();
OUT(entering ? cmark_node_get_on_enter(node) : cmark_node_get_on_exit(node),
false, LITERAL);
BLANKLINE();
break;
case CMARK_NODE_THEMATIC_BREAK:
BLANKLINE();
LIT("-----");
BLANKLINE();
break;
case CMARK_NODE_PARAGRAPH:
if (!entering) {
BLANKLINE();
}
break;
case CMARK_NODE_TEXT:
OUT(cmark_node_get_literal(node), allow_wrap, NORMAL);
break;
case CMARK_NODE_LINEBREAK:
if (!(CMARK_OPT_HARDBREAKS & options)) {
LIT(" ");
}
CR();
break;
case CMARK_NODE_SOFTBREAK:
if (CMARK_OPT_HARDBREAKS & options) {
LIT(" ");
CR();
} else if (!renderer->no_linebreaks && renderer->width == 0 &&
!(CMARK_OPT_HARDBREAKS & options) &&
!(CMARK_OPT_NOBREAKS & options)) {
CR();
} else {
OUT(" ", allow_wrap, LITERAL);
}
break;
case CMARK_NODE_CODE:
code = cmark_node_get_literal(node);
code_len = strlen(code);
numticks = shortest_unused_backtick_sequence(code);
extra_spaces = code_len == 0 ||
code[0] == '`' || code[code_len - 1] == '`' ||
code[0] == ' ' || code[code_len - 1] == ' ';
for (i = 0; i < numticks; i++) {
LIT("`");
}
if (extra_spaces) {
LIT(" ");
}
OUT(cmark_node_get_literal(node), allow_wrap, LITERAL);
if (extra_spaces) {
LIT(" ");
}
for (i = 0; i < numticks; i++) {
LIT("`");
}
break;
case CMARK_NODE_HTML_INLINE:
OUT(cmark_node_get_literal(node), false, LITERAL);
break;
case CMARK_NODE_CUSTOM_INLINE:
OUT(entering ? cmark_node_get_on_enter(node) : cmark_node_get_on_exit(node),
false, LITERAL);
break;
case CMARK_NODE_STRONG:
if (entering) {
LIT("**");
} else {
LIT("**");
}
break;
case CMARK_NODE_EMPH:
// If we have EMPH(EMPH(x)), we need to use *_x_*
// because **x** is STRONG(x):
if (node->parent && node->parent->type == CMARK_NODE_EMPH &&
node->next == NULL && node->prev == NULL) {
emph_delim = "_";
} else {
emph_delim = "*";
}
if (entering) {
LIT(emph_delim);
} else {
LIT(emph_delim);
}
break;
case CMARK_NODE_LINK:
if (is_autolink(node)) {
if (entering) {
LIT("<");
if (strncmp(cmark_node_get_url(node), "mailto:", 7) == 0) {
LIT((const char *)cmark_node_get_url(node) + 7);
} else {
LIT((const char *)cmark_node_get_url(node));
}
LIT(">");
// return signal to skip contents of node...
return 0;
}
} else {
if (entering) {
LIT("[");
} else {
LIT("](");
OUT(cmark_node_get_url(node), false, URL);
title = cmark_node_get_title(node);
if (strlen(title) > 0) {
LIT(" \"");
OUT(title, false, TITLE);
LIT("\"");
}
LIT(")");
}
}
break;
case CMARK_NODE_IMAGE:
if (entering) {
LIT("![");
} else {
LIT("](");
OUT(cmark_node_get_url(node), false, URL);
title = cmark_node_get_title(node);
if (strlen(title) > 0) {
OUT(" \"", allow_wrap, LITERAL);
OUT(title, false, TITLE);
LIT("\"");
}
LIT(")");
}
break;
default:
assert(false);
break;
}
return 1;
}
char *cmark_render_commonmark(cmark_node *root, int options, int width) {
if (options & CMARK_OPT_HARDBREAKS) {
// disable breaking on width, since it has
// a different meaning with OPT_HARDBREAKS
width = 0;
}
return cmark_render(root, options, width, outc, S_render_node);
}

View File

@ -1,76 +0,0 @@
#ifndef CMARK_CONFIG_H
#define CMARK_CONFIG_H
#ifdef __cplusplus
extern "C" {
#endif
#define HAVE_STDBOOL_H
#ifdef HAVE_STDBOOL_H
#include <stdbool.h>
#elif !defined(__cplusplus)
typedef char bool;
#endif
#define HAVE___BUILTIN_EXPECT
#define HAVE___ATTRIBUTE__
#ifdef HAVE___ATTRIBUTE__
#define CMARK_ATTRIBUTE(list) __attribute__ (list)
#else
#define CMARK_ATTRIBUTE(list)
#endif
#ifndef CMARK_INLINE
#if defined(_MSC_VER) && !defined(__cplusplus)
#define CMARK_INLINE __inline
#else
#define CMARK_INLINE inline
#endif
#endif
/* snprintf and vsnprintf fallbacks for MSVC before 2015,
due to Valentin Milea http://stackoverflow.com/questions/2915672/
*/
#if defined(_MSC_VER) && _MSC_VER < 1900
#include <stdio.h>
#include <stdarg.h>
#define snprintf c99_snprintf
#define vsnprintf c99_vsnprintf
CMARK_INLINE int c99_vsnprintf(char *outBuf, size_t size, const char *format, va_list ap)
{
int count = -1;
if (size != 0)
count = _vsnprintf_s(outBuf, size, _TRUNCATE, format, ap);
if (count == -1)
count = _vscprintf(format, ap);
return count;
}
CMARK_INLINE int c99_snprintf(char *outBuf, size_t size, const char *format, ...)
{
int count;
va_list ap;
va_start(ap, format);
count = c99_vsnprintf(outBuf, size, format, ap);
va_end(ap);
return count;
}
#endif
#ifdef __cplusplus
}
#endif
#endif

File diff suppressed because it is too large Load Diff

View File

@ -1,51 +0,0 @@
#ifndef CMARK_HOUDINI_H
#define CMARK_HOUDINI_H
#ifdef __cplusplus
extern "C" {
#endif
#include <stdint.h>
#include "config.h"
#include "buffer.h"
#ifdef HAVE___BUILTIN_EXPECT
#define likely(x) __builtin_expect((x), 1)
#define unlikely(x) __builtin_expect((x), 0)
#else
#define likely(x) (x)
#define unlikely(x) (x)
#endif
#ifdef HOUDINI_USE_LOCALE
#define _isxdigit(c) isxdigit(c)
#define _isdigit(c) isdigit(c)
#else
/*
* Helper _isdigit methods -- do not trust the current locale
* */
#define _isxdigit(c) (strchr("0123456789ABCDEFabcdef", (c)) != NULL)
#define _isdigit(c) ((c) >= '0' && (c) <= '9')
#endif
#define HOUDINI_ESCAPED_SIZE(x) (((x)*12) / 10)
#define HOUDINI_UNESCAPED_SIZE(x) (x)
extern bufsize_t houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src,
bufsize_t size);
extern int houdini_escape_html(cmark_strbuf *ob, const uint8_t *src,
bufsize_t size);
extern int houdini_escape_html0(cmark_strbuf *ob, const uint8_t *src,
bufsize_t size, int secure);
extern int houdini_unescape_html(cmark_strbuf *ob, const uint8_t *src,
bufsize_t size);
extern void houdini_unescape_html_f(cmark_strbuf *ob, const uint8_t *src,
bufsize_t size);
extern int houdini_escape_href(cmark_strbuf *ob, const uint8_t *src,
bufsize_t size);
#ifdef __cplusplus
}
#endif
#endif

View File

@ -1,100 +0,0 @@
#include <assert.h>
#include <stdio.h>
#include <string.h>
#include "houdini.h"
/*
* The following characters will not be escaped:
*
* -_.+!*'(),%#@?=;:/,+&$ alphanum
*
* Note that this character set is the addition of:
*
* - The characters which are safe to be in an URL
* - The characters which are *not* safe to be in
* an URL because they are RESERVED characters.
*
* We assume (lazily) that any RESERVED char that
* appears inside an URL is actually meant to
* have its native function (i.e. as an URL
* component/separator) and hence needs no escaping.
*
* There are two exceptions: the chacters & (amp)
* and ' (single quote) do not appear in the table.
* They are meant to appear in the URL as components,
* yet they require special HTML-entity escaping
* to generate valid HTML markup.
*
* All other characters will be escaped to %XX.
*
*/
static const char HREF_SAFE[] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
int houdini_escape_href(cmark_strbuf *ob, const uint8_t *src, bufsize_t size) {
static const uint8_t hex_chars[] = "0123456789ABCDEF";
bufsize_t i = 0, org;
uint8_t hex_str[3];
hex_str[0] = '%';
while (i < size) {
org = i;
while (i < size && HREF_SAFE[src[i]] != 0)
i++;
if (likely(i > org))
cmark_strbuf_put(ob, src + org, i - org);
/* escaping */
if (i >= size)
break;
switch (src[i]) {
/* amp appears all the time in URLs, but needs
* HTML-entity escaping to be inside an href */
case '&':
cmark_strbuf_puts(ob, "&amp;");
break;
/* the single quote is a valid URL character
* according to the standard; it needs HTML
* entity escaping too */
case '\'':
cmark_strbuf_puts(ob, "&#x27;");
break;
/* the space can be escaped to %20 or a plus
* sign. we're going with the generic escape
* for now. the plus thing is more commonly seen
* when building GET strings */
#if 0
case ' ':
cmark_strbuf_putc(ob, '+');
break;
#endif
/* every other character goes with a %XX escaping */
default:
hex_str[1] = hex_chars[(src[i] >> 4) & 0xF];
hex_str[2] = hex_chars[src[i] & 0xF];
cmark_strbuf_put(ob, hex_str, 3);
}
i++;
}
return 1;
}

View File

@ -1,66 +0,0 @@
#include <assert.h>
#include <stdio.h>
#include <string.h>
#include "houdini.h"
/**
* According to the OWASP rules:
*
* & --> &amp;
* < --> &lt;
* > --> &gt;
* " --> &quot;
* ' --> &#x27; &apos; is not recommended
* / --> &#x2F; forward slash is included as it helps end an HTML entity
*
*/
static const char HTML_ESCAPE_TABLE[] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 4,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
static const char *HTML_ESCAPES[] = {"", "&quot;", "&amp;", "&#39;",
"&#47;", "&lt;", "&gt;"};
int houdini_escape_html0(cmark_strbuf *ob, const uint8_t *src, bufsize_t size,
int secure) {
bufsize_t i = 0, org, esc = 0;
while (i < size) {
org = i;
while (i < size && (esc = HTML_ESCAPE_TABLE[src[i]]) == 0)
i++;
if (i > org)
cmark_strbuf_put(ob, src + org, i - org);
/* escaping */
if (unlikely(i >= size))
break;
/* The forward slash is only escaped in secure mode */
if ((src[i] == '/' || src[i] == '\'') && !secure) {
cmark_strbuf_putc(ob, src[i]);
} else {
cmark_strbuf_puts(ob, HTML_ESCAPES[esc]);
}
i++;
}
return 1;
}
int houdini_escape_html(cmark_strbuf *ob, const uint8_t *src, bufsize_t size) {
return houdini_escape_html0(ob, src, size, 1);
}

View File

@ -1,149 +0,0 @@
#include <assert.h>
#include <stdio.h>
#include <string.h>
#include "buffer.h"
#include "houdini.h"
#include "utf8.h"
#include "entities.inc"
/* Binary tree lookup code for entities added by JGM */
static const unsigned char *S_lookup(int i, int low, int hi,
const unsigned char *s, int len) {
int j;
int cmp =
strncmp((const char *)s, (const char *)cmark_entities[i].entity, len);
if (cmp == 0 && cmark_entities[i].entity[len] == 0) {
return (const unsigned char *)cmark_entities[i].bytes;
} else if (cmp <= 0 && i > low) {
j = i - ((i - low) / 2);
if (j == i)
j -= 1;
return S_lookup(j, low, i - 1, s, len);
} else if (cmp > 0 && i < hi) {
j = i + ((hi - i) / 2);
if (j == i)
j += 1;
return S_lookup(j, i + 1, hi, s, len);
} else {
return NULL;
}
}
static const unsigned char *S_lookup_entity(const unsigned char *s, int len) {
return S_lookup(CMARK_NUM_ENTITIES / 2, 0, CMARK_NUM_ENTITIES - 1, s, len);
}
bufsize_t houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src,
bufsize_t size) {
bufsize_t i = 0;
if (size >= 3 && src[0] == '#') {
int codepoint = 0;
int num_digits = 0;
if (_isdigit(src[1])) {
for (i = 1; i < size && _isdigit(src[i]); ++i) {
codepoint = (codepoint * 10) + (src[i] - '0');
if (codepoint >= 0x110000) {
// Keep counting digits but
// avoid integer overflow.
codepoint = 0x110000;
}
}
num_digits = i - 1;
}
else if (src[1] == 'x' || src[1] == 'X') {
for (i = 2; i < size && _isxdigit(src[i]); ++i) {
codepoint = (codepoint * 16) + ((src[i] | 32) % 39 - 9);
if (codepoint >= 0x110000) {
// Keep counting digits but
// avoid integer overflow.
codepoint = 0x110000;
}
}
num_digits = i - 2;
}
if (num_digits >= 1 && num_digits <= 8 && i < size && src[i] == ';') {
if (codepoint == 0 || (codepoint >= 0xD800 && codepoint < 0xE000) ||
codepoint >= 0x110000) {
codepoint = 0xFFFD;
}
cmark_utf8proc_encode_char(codepoint, ob);
return i + 1;
}
}
else {
if (size > CMARK_ENTITY_MAX_LENGTH)
size = CMARK_ENTITY_MAX_LENGTH;
for (i = CMARK_ENTITY_MIN_LENGTH; i < size; ++i) {
if (src[i] == ' ')
break;
if (src[i] == ';') {
const unsigned char *entity = S_lookup_entity(src, i);
if (entity != NULL) {
cmark_strbuf_puts(ob, (const char *)entity);
return i + 1;
}
break;
}
}
}
return 0;
}
int houdini_unescape_html(cmark_strbuf *ob, const uint8_t *src,
bufsize_t size) {
bufsize_t i = 0, org, ent;
while (i < size) {
org = i;
while (i < size && src[i] != '&')
i++;
if (likely(i > org)) {
if (unlikely(org == 0)) {
if (i >= size)
return 0;
cmark_strbuf_grow(ob, HOUDINI_UNESCAPED_SIZE(size));
}
cmark_strbuf_put(ob, src + org, i - org);
}
/* escaping */
if (i >= size)
break;
i++;
ent = houdini_unescape_ent(ob, src + i, size - i);
i += ent;
/* not really an entity */
if (ent == 0)
cmark_strbuf_putc(ob, '&');
}
return 1;
}
void houdini_unescape_html_f(cmark_strbuf *ob, const uint8_t *src,
bufsize_t size) {
if (!houdini_unescape_html(ob, src, size))
cmark_strbuf_put(ob, src, size);
}

View File

@ -1,341 +0,0 @@
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include "cmark_ctype.h"
#include "config.h"
#include "cmark.h"
#include "node.h"
#include "buffer.h"
#include "houdini.h"
#include "scanners.h"
#define BUFFER_SIZE 100
// Functions to convert cmark_nodes to HTML strings.
static void escape_html(cmark_strbuf *dest, const unsigned char *source,
bufsize_t length) {
houdini_escape_html0(dest, source, length, 0);
}
static CMARK_INLINE void cr(cmark_strbuf *html) {
if (html->size && html->ptr[html->size - 1] != '\n')
cmark_strbuf_putc(html, '\n');
}
struct render_state {
cmark_strbuf *html;
cmark_node *plain;
};
static void S_render_sourcepos(cmark_node *node, cmark_strbuf *html,
int options) {
char buffer[BUFFER_SIZE];
if (CMARK_OPT_SOURCEPOS & options) {
snprintf(buffer, BUFFER_SIZE, " data-sourcepos=\"%d:%d-%d:%d\"",
cmark_node_get_start_line(node), cmark_node_get_start_column(node),
cmark_node_get_end_line(node), cmark_node_get_end_column(node));
cmark_strbuf_puts(html, buffer);
}
}
static int S_render_node(cmark_node *node, cmark_event_type ev_type,
struct render_state *state, int options) {
cmark_node *parent;
cmark_node *grandparent;
cmark_strbuf *html = state->html;
char start_heading[] = "<h0";
char end_heading[] = "</h0";
bool tight;
char buffer[BUFFER_SIZE];
bool entering = (ev_type == CMARK_EVENT_ENTER);
if (state->plain == node) { // back at original node
state->plain = NULL;
}
if (state->plain != NULL) {
switch (node->type) {
case CMARK_NODE_TEXT:
case CMARK_NODE_CODE:
case CMARK_NODE_HTML_INLINE:
escape_html(html, node->as.literal.data, node->as.literal.len);
break;
case CMARK_NODE_LINEBREAK:
case CMARK_NODE_SOFTBREAK:
cmark_strbuf_putc(html, ' ');
break;
default:
break;
}
return 1;
}
switch (node->type) {
case CMARK_NODE_DOCUMENT:
break;
case CMARK_NODE_BLOCK_QUOTE:
if (entering) {
cr(html);
cmark_strbuf_puts(html, "<blockquote");
S_render_sourcepos(node, html, options);
cmark_strbuf_puts(html, ">\n");
} else {
cr(html);
cmark_strbuf_puts(html, "</blockquote>\n");
}
break;
case CMARK_NODE_LIST: {
cmark_list_type list_type = node->as.list.list_type;
int start = node->as.list.start;
if (entering) {
cr(html);
if (list_type == CMARK_BULLET_LIST) {
cmark_strbuf_puts(html, "<ul");
S_render_sourcepos(node, html, options);
cmark_strbuf_puts(html, ">\n");
} else if (start == 1) {
cmark_strbuf_puts(html, "<ol");
S_render_sourcepos(node, html, options);
cmark_strbuf_puts(html, ">\n");
} else {
snprintf(buffer, BUFFER_SIZE, "<ol start=\"%d\"", start);
cmark_strbuf_puts(html, buffer);
S_render_sourcepos(node, html, options);
cmark_strbuf_puts(html, ">\n");
}
} else {
cmark_strbuf_puts(html,
list_type == CMARK_BULLET_LIST ? "</ul>\n" : "</ol>\n");
}
break;
}
case CMARK_NODE_ITEM:
if (entering) {
cr(html);
cmark_strbuf_puts(html, "<li");
S_render_sourcepos(node, html, options);
cmark_strbuf_putc(html, '>');
} else {
cmark_strbuf_puts(html, "</li>\n");
}
break;
case CMARK_NODE_HEADING:
if (entering) {
cr(html);
start_heading[2] = (char)('0' + node->as.heading.level);
cmark_strbuf_puts(html, start_heading);
S_render_sourcepos(node, html, options);
cmark_strbuf_putc(html, '>');
} else {
end_heading[3] = (char)('0' + node->as.heading.level);
cmark_strbuf_puts(html, end_heading);
cmark_strbuf_puts(html, ">\n");
}
break;
case CMARK_NODE_CODE_BLOCK:
cr(html);
if (node->as.code.info.len == 0) {
cmark_strbuf_puts(html, "<pre");
S_render_sourcepos(node, html, options);
cmark_strbuf_puts(html, "><code>");
} else {
bufsize_t first_tag = 0;
while (first_tag < node->as.code.info.len &&
!cmark_isspace(node->as.code.info.data[first_tag])) {
first_tag += 1;
}
cmark_strbuf_puts(html, "<pre");
S_render_sourcepos(node, html, options);
cmark_strbuf_puts(html, "><code class=\"language-");
escape_html(html, node->as.code.info.data, first_tag);
cmark_strbuf_puts(html, "\">");
}
escape_html(html, node->as.code.literal.data, node->as.code.literal.len);
cmark_strbuf_puts(html, "</code></pre>\n");
break;
case CMARK_NODE_HTML_BLOCK:
cr(html);
if (options & CMARK_OPT_SAFE) {
cmark_strbuf_puts(html, "<!-- raw HTML omitted -->");
} else {
cmark_strbuf_put(html, node->as.literal.data, node->as.literal.len);
}
cr(html);
break;
case CMARK_NODE_CUSTOM_BLOCK:
cr(html);
if (entering) {
cmark_strbuf_put(html, node->as.custom.on_enter.data,
node->as.custom.on_enter.len);
} else {
cmark_strbuf_put(html, node->as.custom.on_exit.data,
node->as.custom.on_exit.len);
}
cr(html);
break;
case CMARK_NODE_THEMATIC_BREAK:
cr(html);
cmark_strbuf_puts(html, "<hr");
S_render_sourcepos(node, html, options);
cmark_strbuf_puts(html, " />\n");
break;
case CMARK_NODE_PARAGRAPH:
parent = cmark_node_parent(node);
grandparent = cmark_node_parent(parent);
if (grandparent != NULL && grandparent->type == CMARK_NODE_LIST) {
tight = grandparent->as.list.tight;
} else {
tight = false;
}
if (!tight) {
if (entering) {
cr(html);
cmark_strbuf_puts(html, "<p");
S_render_sourcepos(node, html, options);
cmark_strbuf_putc(html, '>');
} else {
cmark_strbuf_puts(html, "</p>\n");
}
}
break;
case CMARK_NODE_TEXT:
escape_html(html, node->as.literal.data, node->as.literal.len);
break;
case CMARK_NODE_LINEBREAK:
cmark_strbuf_puts(html, "<br />\n");
break;
case CMARK_NODE_SOFTBREAK:
if (options & CMARK_OPT_HARDBREAKS) {
cmark_strbuf_puts(html, "<br />\n");
} else if (options & CMARK_OPT_NOBREAKS) {
cmark_strbuf_putc(html, ' ');
} else {
cmark_strbuf_putc(html, '\n');
}
break;
case CMARK_NODE_CODE:
cmark_strbuf_puts(html, "<code>");
escape_html(html, node->as.literal.data, node->as.literal.len);
cmark_strbuf_puts(html, "</code>");
break;
case CMARK_NODE_HTML_INLINE:
if (options & CMARK_OPT_SAFE) {
cmark_strbuf_puts(html, "<!-- raw HTML omitted -->");
} else {
cmark_strbuf_put(html, node->as.literal.data, node->as.literal.len);
}
break;
case CMARK_NODE_CUSTOM_INLINE:
if (entering) {
cmark_strbuf_put(html, node->as.custom.on_enter.data,
node->as.custom.on_enter.len);
} else {
cmark_strbuf_put(html, node->as.custom.on_exit.data,
node->as.custom.on_exit.len);
}
break;
case CMARK_NODE_STRONG:
if (entering) {
cmark_strbuf_puts(html, "<strong>");
} else {
cmark_strbuf_puts(html, "</strong>");
}
break;
case CMARK_NODE_EMPH:
if (entering) {
cmark_strbuf_puts(html, "<em>");
} else {
cmark_strbuf_puts(html, "</em>");
}
break;
case CMARK_NODE_LINK:
if (entering) {
cmark_strbuf_puts(html, "<a href=\"");
if (!((options & CMARK_OPT_SAFE) &&
scan_dangerous_url(&node->as.link.url, 0))) {
houdini_escape_href(html, node->as.link.url.data,
node->as.link.url.len);
}
if (node->as.link.title.len) {
cmark_strbuf_puts(html, "\" title=\"");
escape_html(html, node->as.link.title.data, node->as.link.title.len);
}
cmark_strbuf_puts(html, "\">");
} else {
cmark_strbuf_puts(html, "</a>");
}
break;
case CMARK_NODE_IMAGE:
if (entering) {
cmark_strbuf_puts(html, "<img src=\"");
if (!((options & CMARK_OPT_SAFE) &&
scan_dangerous_url(&node->as.link.url, 0))) {
houdini_escape_href(html, node->as.link.url.data,
node->as.link.url.len);
}
cmark_strbuf_puts(html, "\" alt=\"");
state->plain = node;
} else {
if (node->as.link.title.len) {
cmark_strbuf_puts(html, "\" title=\"");
escape_html(html, node->as.link.title.data, node->as.link.title.len);
}
cmark_strbuf_puts(html, "\" />");
}
break;
default:
assert(false);
break;
}
// cmark_strbuf_putc(html, 'x');
return 1;
}
char *cmark_render_html(cmark_node *root, int options) {
char *result;
cmark_strbuf html = CMARK_BUF_INIT(cmark_node_mem(root));
cmark_event_type ev_type;
cmark_node *cur;
struct render_state state = {&html, NULL};
cmark_iter *iter = cmark_iter_new(root);
while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
cur = cmark_iter_get_node(iter);
S_render_node(cur, ev_type, &state, options);
}
result = (char *)cmark_strbuf_detach(&html);
cmark_iter_free(iter);
return result;
}

File diff suppressed because it is too large Load Diff

View File

@ -1,21 +0,0 @@
#ifndef CMARK_INLINES_H
#define CMARK_INLINES_H
#ifdef __cplusplus
extern "C" {
#endif
cmark_chunk cmark_clean_url(cmark_mem *mem, cmark_chunk *url);
cmark_chunk cmark_clean_title(cmark_mem *mem, cmark_chunk *title);
void cmark_parse_inlines(cmark_mem *mem, cmark_node *parent,
cmark_reference_map *refmap, int options);
bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_chunk *input,
cmark_reference_map *refmap);
#ifdef __cplusplus
}
#endif
#endif

View File

@ -1,121 +0,0 @@
#include <assert.h>
#include <stdlib.h>
#include "config.h"
#include "node.h"
#include "cmark.h"
#include "iterator.h"
static const int S_leaf_mask =
(1 << CMARK_NODE_HTML_BLOCK) | (1 << CMARK_NODE_THEMATIC_BREAK) |
(1 << CMARK_NODE_CODE_BLOCK) | (1 << CMARK_NODE_TEXT) |
(1 << CMARK_NODE_SOFTBREAK) | (1 << CMARK_NODE_LINEBREAK) |
(1 << CMARK_NODE_CODE) | (1 << CMARK_NODE_HTML_INLINE);
cmark_iter *cmark_iter_new(cmark_node *root) {
if (root == NULL) {
return NULL;
}
cmark_mem *mem = root->content.mem;
cmark_iter *iter = (cmark_iter *)mem->calloc(1, sizeof(cmark_iter));
iter->mem = mem;
iter->root = root;
iter->cur.ev_type = CMARK_EVENT_NONE;
iter->cur.node = NULL;
iter->next.ev_type = CMARK_EVENT_ENTER;
iter->next.node = root;
return iter;
}
void cmark_iter_free(cmark_iter *iter) { iter->mem->free(iter); }
static bool S_is_leaf(cmark_node *node) {
return ((1 << node->type) & S_leaf_mask) != 0;
}
cmark_event_type cmark_iter_next(cmark_iter *iter) {
cmark_event_type ev_type = iter->next.ev_type;
cmark_node *node = iter->next.node;
iter->cur.ev_type = ev_type;
iter->cur.node = node;
if (ev_type == CMARK_EVENT_DONE) {
return ev_type;
}
/* roll forward to next item, setting both fields */
if (ev_type == CMARK_EVENT_ENTER && !S_is_leaf(node)) {
if (node->first_child == NULL) {
/* stay on this node but exit */
iter->next.ev_type = CMARK_EVENT_EXIT;
} else {
iter->next.ev_type = CMARK_EVENT_ENTER;
iter->next.node = node->first_child;
}
} else if (node == iter->root) {
/* don't move past root */
iter->next.ev_type = CMARK_EVENT_DONE;
iter->next.node = NULL;
} else if (node->next) {
iter->next.ev_type = CMARK_EVENT_ENTER;
iter->next.node = node->next;
} else if (node->parent) {
iter->next.ev_type = CMARK_EVENT_EXIT;
iter->next.node = node->parent;
} else {
assert(false);
iter->next.ev_type = CMARK_EVENT_DONE;
iter->next.node = NULL;
}
return ev_type;
}
void cmark_iter_reset(cmark_iter *iter, cmark_node *current,
cmark_event_type event_type) {
iter->next.ev_type = event_type;
iter->next.node = current;
cmark_iter_next(iter);
}
cmark_node *cmark_iter_get_node(cmark_iter *iter) { return iter->cur.node; }
cmark_event_type cmark_iter_get_event_type(cmark_iter *iter) {
return iter->cur.ev_type;
}
cmark_node *cmark_iter_get_root(cmark_iter *iter) { return iter->root; }
void cmark_consolidate_text_nodes(cmark_node *root) {
if (root == NULL) {
return;
}
cmark_iter *iter = cmark_iter_new(root);
cmark_strbuf buf = CMARK_BUF_INIT(iter->mem);
cmark_event_type ev_type;
cmark_node *cur, *tmp, *next;
while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
cur = cmark_iter_get_node(iter);
if (ev_type == CMARK_EVENT_ENTER && cur->type == CMARK_NODE_TEXT &&
cur->next && cur->next->type == CMARK_NODE_TEXT) {
cmark_strbuf_clear(&buf);
cmark_strbuf_put(&buf, cur->as.literal.data, cur->as.literal.len);
tmp = cur->next;
while (tmp && tmp->type == CMARK_NODE_TEXT) {
cmark_iter_next(iter); // advance pointer
cmark_strbuf_put(&buf, tmp->as.literal.data, tmp->as.literal.len);
cur->end_column = tmp->end_column;
next = tmp->next;
cmark_node_free(tmp);
tmp = next;
}
cmark_chunk_free(iter->mem, &cur->as.literal);
cur->as.literal = cmark_chunk_buf_detach(&buf);
}
}
cmark_strbuf_free(&buf);
cmark_iter_free(iter);
}

View File

@ -1,27 +0,0 @@
#ifndef CMARK_ITERATOR_H
#define CMARK_ITERATOR_H
#ifdef __cplusplus
extern "C" {
#endif
#include "cmark.h"
#include "memory.h"
typedef struct {
cmark_event_type ev_type;
cmark_node *node;
} cmark_iter_state;
struct cmark_iter {
cmark_mem *mem;
cmark_node *root;
cmark_iter_state cur;
cmark_iter_state next;
};
#ifdef __cplusplus
}
#endif
#endif

View File

@ -1,453 +0,0 @@
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include "config.h"
#include "cmark.h"
#include "node.h"
#include "buffer.h"
#include "utf8.h"
#include "scanners.h"
#include "render.h"
#define OUT(s, wrap, escaping) renderer->out(renderer, s, wrap, escaping)
#define LIT(s) renderer->out(renderer, s, false, LITERAL)
#define CR() renderer->cr(renderer)
#define BLANKLINE() renderer->blankline(renderer)
#define LIST_NUMBER_STRING_SIZE 20
static CMARK_INLINE void outc(cmark_renderer *renderer, cmark_escaping escape,
int32_t c, unsigned char nextc) {
if (escape == LITERAL) {
cmark_render_code_point(renderer, c);
return;
}
switch (c) {
case 123: // '{'
case 125: // '}'
case 35: // '#'
case 37: // '%'
case 38: // '&'
cmark_render_ascii(renderer, "\\");
cmark_render_code_point(renderer, c);
break;
case 36: // '$'
case 95: // '_'
if (escape == NORMAL) {
cmark_render_ascii(renderer, "\\");
}
cmark_render_code_point(renderer, c);
break;
case 45: // '-'
if (nextc == 45) { // prevent ligature
cmark_render_ascii(renderer, "-{}");
} else {
cmark_render_ascii(renderer, "-");
}
break;
case 126: // '~'
if (escape == NORMAL) {
cmark_render_ascii(renderer, "\\textasciitilde{}");
} else {
cmark_render_code_point(renderer, c);
}
break;
case 94: // '^'
cmark_render_ascii(renderer, "\\^{}");
break;
case 92: // '\\'
if (escape == URL) {
// / acts as path sep even on windows:
cmark_render_ascii(renderer, "/");
} else {
cmark_render_ascii(renderer, "\\textbackslash{}");
}
break;
case 124: // '|'
cmark_render_ascii(renderer, "\\textbar{}");
break;
case 60: // '<'
cmark_render_ascii(renderer, "\\textless{}");
break;
case 62: // '>'
cmark_render_ascii(renderer, "\\textgreater{}");
break;
case 91: // '['
case 93: // ']'
cmark_render_ascii(renderer, "{");
cmark_render_code_point(renderer, c);
cmark_render_ascii(renderer, "}");
break;
case 34: // '"'
cmark_render_ascii(renderer, "\\textquotedbl{}");
// requires \usepackage[T1]{fontenc}
break;
case 39: // '\''
cmark_render_ascii(renderer, "\\textquotesingle{}");
// requires \usepackage{textcomp}
break;
case 160: // nbsp
cmark_render_ascii(renderer, "~");
break;
case 8230: // hellip
cmark_render_ascii(renderer, "\\ldots{}");
break;
case 8216: // lsquo
if (escape == NORMAL) {
cmark_render_ascii(renderer, "`");
} else {
cmark_render_code_point(renderer, c);
}
break;
case 8217: // rsquo
if (escape == NORMAL) {
cmark_render_ascii(renderer, "\'");
} else {
cmark_render_code_point(renderer, c);
}
break;
case 8220: // ldquo
if (escape == NORMAL) {
cmark_render_ascii(renderer, "``");
} else {
cmark_render_code_point(renderer, c);
}
break;
case 8221: // rdquo
if (escape == NORMAL) {
cmark_render_ascii(renderer, "''");
} else {
cmark_render_code_point(renderer, c);
}
break;
case 8212: // emdash
if (escape == NORMAL) {
cmark_render_ascii(renderer, "---");
} else {
cmark_render_code_point(renderer, c);
}
break;
case 8211: // endash
if (escape == NORMAL) {
cmark_render_ascii(renderer, "--");
} else {
cmark_render_code_point(renderer, c);
}
break;
default:
cmark_render_code_point(renderer, c);
}
}
typedef enum {
NO_LINK,
URL_AUTOLINK,
EMAIL_AUTOLINK,
NORMAL_LINK,
INTERNAL_LINK
} link_type;
static link_type get_link_type(cmark_node *node) {
size_t title_len, url_len;
cmark_node *link_text;
char *realurl;
int realurllen;
bool isemail = false;
if (node->type != CMARK_NODE_LINK) {
return NO_LINK;
}
const char *url = cmark_node_get_url(node);
cmark_chunk url_chunk = cmark_chunk_literal(url);
if (url && *url == '#') {
return INTERNAL_LINK;
}
url_len = strlen(url);
if (url_len == 0 || scan_scheme(&url_chunk, 0) == 0) {
return NO_LINK;
}
const char *title = cmark_node_get_title(node);
title_len = strlen(title);
// if it has a title, we can't treat it as an autolink:
if (title_len == 0) {
link_text = node->first_child;
cmark_consolidate_text_nodes(link_text);
if (!link_text)
return NO_LINK;
realurl = (char *)url;
realurllen = (int)url_len;
if (strncmp(realurl, "mailto:", 7) == 0) {
realurl += 7;
realurllen -= 7;
isemail = true;
}
if (realurllen == link_text->as.literal.len &&
strncmp(realurl, (char *)link_text->as.literal.data,
link_text->as.literal.len) == 0) {
if (isemail) {
return EMAIL_AUTOLINK;
} else {
return URL_AUTOLINK;
}
}
}
return NORMAL_LINK;
}
static int S_get_enumlevel(cmark_node *node) {
int enumlevel = 0;
cmark_node *tmp = node;
while (tmp) {
if (tmp->type == CMARK_NODE_LIST &&
cmark_node_get_list_type(node) == CMARK_ORDERED_LIST) {
enumlevel++;
}
tmp = tmp->parent;
}
return enumlevel;
}
static int S_render_node(cmark_renderer *renderer, cmark_node *node,
cmark_event_type ev_type, int options) {
int list_number;
int enumlevel;
char list_number_string[LIST_NUMBER_STRING_SIZE];
bool entering = (ev_type == CMARK_EVENT_ENTER);
cmark_list_type list_type;
bool allow_wrap = renderer->width > 0 && !(CMARK_OPT_NOBREAKS & options);
// avoid warning about unused parameter:
(void)(options);
switch (node->type) {
case CMARK_NODE_DOCUMENT:
break;
case CMARK_NODE_BLOCK_QUOTE:
if (entering) {
LIT("\\begin{quote}");
CR();
} else {
LIT("\\end{quote}");
BLANKLINE();
}
break;
case CMARK_NODE_LIST:
list_type = cmark_node_get_list_type(node);
if (entering) {
LIT("\\begin{");
LIT(list_type == CMARK_ORDERED_LIST ? "enumerate" : "itemize");
LIT("}");
CR();
list_number = cmark_node_get_list_start(node);
if (list_number > 1) {
enumlevel = S_get_enumlevel(node);
// latex normally supports only five levels
if (enumlevel >= 1 && enumlevel <= 5) {
snprintf(list_number_string, LIST_NUMBER_STRING_SIZE, "%d",
list_number);
LIT("\\setcounter{enum");
switch (enumlevel) {
case 1: LIT("i"); break;
case 2: LIT("ii"); break;
case 3: LIT("iii"); break;
case 4: LIT("iv"); break;
case 5: LIT("v"); break;
default: LIT("i"); break;
}
LIT("}{");
OUT(list_number_string, false, NORMAL);
LIT("}");
}
CR();
}
} else {
LIT("\\end{");
LIT(list_type == CMARK_ORDERED_LIST ? "enumerate" : "itemize");
LIT("}");
BLANKLINE();
}
break;
case CMARK_NODE_ITEM:
if (entering) {
LIT("\\item ");
} else {
CR();
}
break;
case CMARK_NODE_HEADING:
if (entering) {
switch (cmark_node_get_heading_level(node)) {
case 1:
LIT("\\section");
break;
case 2:
LIT("\\subsection");
break;
case 3:
LIT("\\subsubsection");
break;
case 4:
LIT("\\paragraph");
break;
case 5:
LIT("\\subparagraph");
break;
}
LIT("{");
} else {
LIT("}");
BLANKLINE();
}
break;
case CMARK_NODE_CODE_BLOCK:
CR();
LIT("\\begin{verbatim}");
CR();
OUT(cmark_node_get_literal(node), false, LITERAL);
CR();
LIT("\\end{verbatim}");
BLANKLINE();
break;
case CMARK_NODE_HTML_BLOCK:
break;
case CMARK_NODE_CUSTOM_BLOCK:
CR();
OUT(entering ? cmark_node_get_on_enter(node) : cmark_node_get_on_exit(node),
false, LITERAL);
CR();
break;
case CMARK_NODE_THEMATIC_BREAK:
BLANKLINE();
LIT("\\begin{center}\\rule{0.5\\linewidth}{\\linethickness}\\end{center}");
BLANKLINE();
break;
case CMARK_NODE_PARAGRAPH:
if (!entering) {
BLANKLINE();
}
break;
case CMARK_NODE_TEXT:
OUT(cmark_node_get_literal(node), allow_wrap, NORMAL);
break;
case CMARK_NODE_LINEBREAK:
LIT("\\\\");
CR();
break;
case CMARK_NODE_SOFTBREAK:
if (options & CMARK_OPT_HARDBREAKS) {
LIT("\\\\");
CR();
} else if (renderer->width == 0 && !(CMARK_OPT_NOBREAKS & options)) {
CR();
} else {
OUT(" ", allow_wrap, NORMAL);
}
break;
case CMARK_NODE_CODE:
LIT("\\texttt{");
OUT(cmark_node_get_literal(node), false, NORMAL);
LIT("}");
break;
case CMARK_NODE_HTML_INLINE:
break;
case CMARK_NODE_CUSTOM_INLINE:
OUT(entering ? cmark_node_get_on_enter(node) : cmark_node_get_on_exit(node),
false, LITERAL);
break;
case CMARK_NODE_STRONG:
if (entering) {
LIT("\\textbf{");
} else {
LIT("}");
}
break;
case CMARK_NODE_EMPH:
if (entering) {
LIT("\\emph{");
} else {
LIT("}");
}
break;
case CMARK_NODE_LINK:
if (entering) {
const char *url = cmark_node_get_url(node);
// requires \usepackage{hyperref}
switch (get_link_type(node)) {
case URL_AUTOLINK:
LIT("\\url{");
OUT(url, false, URL);
LIT("}");
return 0; // Don't process further nodes to avoid double-rendering artefacts
case EMAIL_AUTOLINK:
LIT("\\href{");
OUT(url, false, URL);
LIT("}\\nolinkurl{");
break;
case NORMAL_LINK:
LIT("\\href{");
OUT(url, false, URL);
LIT("}{");
break;
case INTERNAL_LINK:
LIT("\\protect\\hyperlink{");
OUT(url + 1, false, URL);
LIT("}{");
break;
case NO_LINK:
LIT("{"); // error?
}
} else {
LIT("}");
}
break;
case CMARK_NODE_IMAGE:
if (entering) {
LIT("\\protect\\includegraphics{");
// requires \include{graphicx}
OUT(cmark_node_get_url(node), false, URL);
LIT("}");
return 0;
}
break;
default:
assert(false);
break;
}
return 1;
}
char *cmark_render_latex(cmark_node *root, int options, int width) {
return cmark_render(root, options, width, outc, S_render_node);
}

View File

@ -1,211 +0,0 @@
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include "config.h"
#include "memory.h"
#include "cmark.h"
#include "node.h"
#if defined(__OpenBSD__)
# include <sys/param.h>
# if OpenBSD >= 201605
# define USE_PLEDGE
# include <unistd.h>
# endif
#endif
#if defined(_WIN32) && !defined(__CYGWIN__)
#include <io.h>
#include <fcntl.h>
#endif
typedef enum {
FORMAT_NONE,
FORMAT_HTML,
FORMAT_XML,
FORMAT_MAN,
FORMAT_COMMONMARK,
FORMAT_LATEX
} writer_format;
void print_usage() {
printf("Usage: cmark [FILE*]\n");
printf("Options:\n");
printf(" --to, -t FORMAT Specify output format (html, xml, man, "
"commonmark, latex)\n");
printf(" --width WIDTH Specify wrap width (default 0 = nowrap)\n");
printf(" --sourcepos Include source position attribute\n");
printf(" --hardbreaks Treat newlines as hard line breaks\n");
printf(" --nobreaks Render soft line breaks as spaces\n");
printf(" --safe Suppress raw HTML and dangerous URLs\n");
printf(" --smart Use smart punctuation\n");
printf(" --validate-utf8 Replace UTF-8 invalid sequences with U+FFFD\n");
printf(" --help, -h Print usage information\n");
printf(" --version Print version\n");
}
static void print_document(cmark_node *document, writer_format writer,
int options, int width) {
char *result;
switch (writer) {
case FORMAT_HTML:
result = cmark_render_html(document, options);
break;
case FORMAT_XML:
result = cmark_render_xml(document, options);
break;
case FORMAT_MAN:
result = cmark_render_man(document, options, width);
break;
case FORMAT_COMMONMARK:
result = cmark_render_commonmark(document, options, width);
break;
case FORMAT_LATEX:
result = cmark_render_latex(document, options, width);
break;
default:
fprintf(stderr, "Unknown format %d\n", writer);
exit(1);
}
printf("%s", result);
cmark_node_mem(document)->free(result);
}
int main(int argc, char *argv[]) {
int i, numfps = 0;
int *files;
char buffer[4096];
cmark_parser *parser;
size_t bytes;
cmark_node *document;
int width = 0;
char *unparsed;
writer_format writer = FORMAT_HTML;
int options = CMARK_OPT_DEFAULT;
#ifdef USE_PLEDGE
if (pledge("stdio rpath", NULL) != 0) {
perror("pledge");
return 1;
}
#endif
#if defined(_WIN32) && !defined(__CYGWIN__)
_setmode(_fileno(stdin), _O_BINARY);
_setmode(_fileno(stdout), _O_BINARY);
#endif
files = (int *)calloc(argc, sizeof(*files));
for (i = 1; i < argc; i++) {
if (strcmp(argv[i], "--version") == 0) {
printf("cmark %s", CMARK_VERSION_STRING);
printf(" - CommonMark converter\n(C) 2014-2016 John MacFarlane\n");
exit(0);
} else if (strcmp(argv[i], "--sourcepos") == 0) {
options |= CMARK_OPT_SOURCEPOS;
} else if (strcmp(argv[i], "--hardbreaks") == 0) {
options |= CMARK_OPT_HARDBREAKS;
} else if (strcmp(argv[i], "--nobreaks") == 0) {
options |= CMARK_OPT_NOBREAKS;
} else if (strcmp(argv[i], "--smart") == 0) {
options |= CMARK_OPT_SMART;
} else if (strcmp(argv[i], "--safe") == 0) {
options |= CMARK_OPT_SAFE;
} else if (strcmp(argv[i], "--validate-utf8") == 0) {
options |= CMARK_OPT_VALIDATE_UTF8;
} else if ((strcmp(argv[i], "--help") == 0) ||
(strcmp(argv[i], "-h") == 0)) {
print_usage();
exit(0);
} else if (strcmp(argv[i], "--width") == 0) {
i += 1;
if (i < argc) {
width = (int)strtol(argv[i], &unparsed, 10);
if (unparsed && strlen(unparsed) > 0) {
fprintf(stderr, "failed parsing width '%s' at '%s'\n", argv[i],
unparsed);
exit(1);
}
} else {
fprintf(stderr, "--width requires an argument\n");
exit(1);
}
} else if ((strcmp(argv[i], "-t") == 0) || (strcmp(argv[i], "--to") == 0)) {
i += 1;
if (i < argc) {
if (strcmp(argv[i], "man") == 0) {
writer = FORMAT_MAN;
} else if (strcmp(argv[i], "html") == 0) {
writer = FORMAT_HTML;
} else if (strcmp(argv[i], "xml") == 0) {
writer = FORMAT_XML;
} else if (strcmp(argv[i], "commonmark") == 0) {
writer = FORMAT_COMMONMARK;
} else if (strcmp(argv[i], "latex") == 0) {
writer = FORMAT_LATEX;
} else {
fprintf(stderr, "Unknown format %s\n", argv[i]);
exit(1);
}
} else {
fprintf(stderr, "No argument provided for %s\n", argv[i - 1]);
exit(1);
}
} else if (*argv[i] == '-') {
print_usage();
exit(1);
} else { // treat as file argument
files[numfps++] = i;
}
}
parser = cmark_parser_new(options);
for (i = 0; i < numfps; i++) {
FILE *fp = fopen(argv[files[i]], "rb");
if (fp == NULL) {
fprintf(stderr, "Error opening file %s: %s\n", argv[files[i]],
strerror(errno));
exit(1);
}
while ((bytes = fread(buffer, 1, sizeof(buffer), fp)) > 0) {
cmark_parser_feed(parser, buffer, bytes);
if (bytes < sizeof(buffer)) {
break;
}
}
fclose(fp);
}
if (numfps == 0) {
while ((bytes = fread(buffer, 1, sizeof(buffer), stdin)) > 0) {
cmark_parser_feed(parser, buffer, bytes);
if (bytes < sizeof(buffer)) {
break;
}
}
}
#ifdef USE_PLEDGE
if (pledge("stdio", NULL) != 0) {
perror("pledge");
return 1;
}
#endif
document = cmark_parser_finish(parser);
cmark_parser_free(parser);
print_document(document, writer, options, width);
cmark_node_free(document);
free(files);
return 0;
}

View File

@ -1,252 +0,0 @@
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include "config.h"
#include "cmark.h"
#include "node.h"
#include "buffer.h"
#include "utf8.h"
#include "render.h"
#define OUT(s, wrap, escaping) renderer->out(renderer, s, wrap, escaping)
#define LIT(s) renderer->out(renderer, s, false, LITERAL)
#define CR() renderer->cr(renderer)
#define BLANKLINE() renderer->blankline(renderer)
#define LIST_NUMBER_SIZE 20
// Functions to convert cmark_nodes to groff man strings.
static void S_outc(cmark_renderer *renderer, cmark_escaping escape, int32_t c,
unsigned char nextc) {
(void)(nextc);
if (escape == LITERAL) {
cmark_render_code_point(renderer, c);
return;
}
switch (c) {
case 46:
if (renderer->begin_line) {
cmark_render_ascii(renderer, "\\&.");
} else {
cmark_render_code_point(renderer, c);
}
break;
case 39:
if (renderer->begin_line) {
cmark_render_ascii(renderer, "\\&'");
} else {
cmark_render_code_point(renderer, c);
}
break;
case 45:
cmark_render_ascii(renderer, "\\-");
break;
case 92:
cmark_render_ascii(renderer, "\\e");
break;
case 8216: // left single quote
cmark_render_ascii(renderer, "\\[oq]");
break;
case 8217: // right single quote
cmark_render_ascii(renderer, "\\[cq]");
break;
case 8220: // left double quote
cmark_render_ascii(renderer, "\\[lq]");
break;
case 8221: // right double quote
cmark_render_ascii(renderer, "\\[rq]");
break;
case 8212: // em dash
cmark_render_ascii(renderer, "\\[em]");
break;
case 8211: // en dash
cmark_render_ascii(renderer, "\\[en]");
break;
default:
cmark_render_code_point(renderer, c);
}
}
static int S_render_node(cmark_renderer *renderer, cmark_node *node,
cmark_event_type ev_type, int options) {
cmark_node *tmp;
int list_number;
bool entering = (ev_type == CMARK_EVENT_ENTER);
bool allow_wrap = renderer->width > 0 && !(CMARK_OPT_NOBREAKS & options);
// avoid unused parameter error:
(void)(options);
switch (node->type) {
case CMARK_NODE_DOCUMENT:
break;
case CMARK_NODE_BLOCK_QUOTE:
if (entering) {
CR();
LIT(".RS");
CR();
} else {
CR();
LIT(".RE");
CR();
}
break;
case CMARK_NODE_LIST:
break;
case CMARK_NODE_ITEM:
if (entering) {
CR();
LIT(".IP ");
if (cmark_node_get_list_type(node->parent) == CMARK_BULLET_LIST) {
LIT("\\[bu] 2");
} else {
list_number = cmark_node_get_list_start(node->parent);
tmp = node;
while (tmp->prev) {
tmp = tmp->prev;
list_number += 1;
}
char list_number_s[LIST_NUMBER_SIZE];
snprintf(list_number_s, LIST_NUMBER_SIZE, "\"%d.\" 4", list_number);
LIT(list_number_s);
}
CR();
} else {
CR();
}
break;
case CMARK_NODE_HEADING:
if (entering) {
CR();
LIT(cmark_node_get_heading_level(node) == 1 ? ".SH" : ".SS");
CR();
} else {
CR();
}
break;
case CMARK_NODE_CODE_BLOCK:
CR();
LIT(".IP\n.nf\n\\f[C]\n");
OUT(cmark_node_get_literal(node), false, NORMAL);
CR();
LIT("\\f[]\n.fi");
CR();
break;
case CMARK_NODE_HTML_BLOCK:
break;
case CMARK_NODE_CUSTOM_BLOCK:
CR();
OUT(entering ? cmark_node_get_on_enter(node) : cmark_node_get_on_exit(node),
false, LITERAL);
CR();
break;
case CMARK_NODE_THEMATIC_BREAK:
CR();
LIT(".PP\n * * * * *");
CR();
break;
case CMARK_NODE_PARAGRAPH:
if (entering) {
// no blank line if first paragraph in list:
if (node->parent && node->parent->type == CMARK_NODE_ITEM &&
node->prev == NULL) {
// no blank line or .PP
} else {
CR();
LIT(".PP");
CR();
}
} else {
CR();
}
break;
case CMARK_NODE_TEXT:
OUT(cmark_node_get_literal(node), allow_wrap, NORMAL);
break;
case CMARK_NODE_LINEBREAK:
LIT(".PD 0\n.P\n.PD");
CR();
break;
case CMARK_NODE_SOFTBREAK:
if (options & CMARK_OPT_HARDBREAKS) {
LIT(".PD 0\n.P\n.PD");
CR();
} else if (renderer->width == 0 && !(CMARK_OPT_NOBREAKS & options)) {
CR();
} else {
OUT(" ", allow_wrap, LITERAL);
}
break;
case CMARK_NODE_CODE:
LIT("\\f[C]");
OUT(cmark_node_get_literal(node), allow_wrap, NORMAL);
LIT("\\f[]");
break;
case CMARK_NODE_HTML_INLINE:
break;
case CMARK_NODE_CUSTOM_INLINE:
OUT(entering ? cmark_node_get_on_enter(node) : cmark_node_get_on_exit(node),
false, LITERAL);
break;
case CMARK_NODE_STRONG:
if (entering) {
LIT("\\f[B]");
} else {
LIT("\\f[]");
}
break;
case CMARK_NODE_EMPH:
if (entering) {
LIT("\\f[I]");
} else {
LIT("\\f[]");
}
break;
case CMARK_NODE_LINK:
if (!entering) {
LIT(" (");
OUT(cmark_node_get_url(node), allow_wrap, URL);
LIT(")");
}
break;
case CMARK_NODE_IMAGE:
if (entering) {
LIT("[IMAGE: ");
} else {
LIT("]");
}
break;
default:
assert(false);
break;
}
return 1;
}
char *cmark_render_man(cmark_node *root, int options, int width) {
return cmark_render(root, options, width, S_outc, S_render_node);
}

View File

@ -1,858 +0,0 @@
#include <stdlib.h>
#include <string.h>
#include "config.h"
#include "node.h"
static void S_node_unlink(cmark_node *node);
#define NODE_MEM(node) cmark_node_mem(node)
static CMARK_INLINE bool S_is_block(cmark_node *node) {
if (node == NULL) {
return false;
}
return node->type >= CMARK_NODE_FIRST_BLOCK &&
node->type <= CMARK_NODE_LAST_BLOCK;
}
static CMARK_INLINE bool S_is_inline(cmark_node *node) {
if (node == NULL) {
return false;
}
return node->type >= CMARK_NODE_FIRST_INLINE &&
node->type <= CMARK_NODE_LAST_INLINE;
}
static bool S_can_contain(cmark_node *node, cmark_node *child) {
cmark_node *cur;
if (node == NULL || child == NULL) {
return false;
}
// Verify that child is not an ancestor of node or equal to node.
cur = node;
do {
if (cur == child) {
return false;
}
cur = cur->parent;
} while (cur != NULL);
if (child->type == CMARK_NODE_DOCUMENT) {
return false;
}
switch (node->type) {
case CMARK_NODE_DOCUMENT:
case CMARK_NODE_BLOCK_QUOTE:
case CMARK_NODE_ITEM:
return S_is_block(child) && child->type != CMARK_NODE_ITEM;
case CMARK_NODE_LIST:
return child->type == CMARK_NODE_ITEM;
case CMARK_NODE_CUSTOM_BLOCK:
return true;
case CMARK_NODE_PARAGRAPH:
case CMARK_NODE_HEADING:
case CMARK_NODE_EMPH:
case CMARK_NODE_STRONG:
case CMARK_NODE_LINK:
case CMARK_NODE_IMAGE:
case CMARK_NODE_CUSTOM_INLINE:
return S_is_inline(child);
default:
break;
}
return false;
}
cmark_node *cmark_node_new_with_mem(cmark_node_type type, cmark_mem *mem) {
cmark_node *node = (cmark_node *)mem->calloc(1, sizeof(*node));
cmark_strbuf_init(mem, &node->content, 0);
node->type = (uint16_t)type;
switch (node->type) {
case CMARK_NODE_HEADING:
node->as.heading.level = 1;
break;
case CMARK_NODE_LIST: {
cmark_list *list = &node->as.list;
list->list_type = CMARK_BULLET_LIST;
list->start = 0;
list->tight = false;
break;
}
default:
break;
}
return node;
}
cmark_node *cmark_node_new(cmark_node_type type) {
extern cmark_mem DEFAULT_MEM_ALLOCATOR;
return cmark_node_new_with_mem(type, &DEFAULT_MEM_ALLOCATOR);
}
// Free a cmark_node list and any children.
static void S_free_nodes(cmark_node *e) {
cmark_node *next;
while (e != NULL) {
cmark_strbuf_free(&e->content);
switch (e->type) {
case CMARK_NODE_CODE_BLOCK:
cmark_chunk_free(NODE_MEM(e), &e->as.code.info);
cmark_chunk_free(NODE_MEM(e), &e->as.code.literal);
break;
case CMARK_NODE_TEXT:
case CMARK_NODE_HTML_INLINE:
case CMARK_NODE_CODE:
case CMARK_NODE_HTML_BLOCK:
cmark_chunk_free(NODE_MEM(e), &e->as.literal);
break;
case CMARK_NODE_LINK:
case CMARK_NODE_IMAGE:
cmark_chunk_free(NODE_MEM(e), &e->as.link.url);
cmark_chunk_free(NODE_MEM(e), &e->as.link.title);
break;
case CMARK_NODE_CUSTOM_BLOCK:
case CMARK_NODE_CUSTOM_INLINE:
cmark_chunk_free(NODE_MEM(e), &e->as.custom.on_enter);
cmark_chunk_free(NODE_MEM(e), &e->as.custom.on_exit);
break;
default:
break;
}
if (e->last_child) {
// Splice children into list
e->last_child->next = e->next;
e->next = e->first_child;
}
next = e->next;
NODE_MEM(e)->free(e);
e = next;
}
}
void cmark_node_free(cmark_node *node) {
S_node_unlink(node);
node->next = NULL;
S_free_nodes(node);
}
cmark_node_type cmark_node_get_type(cmark_node *node) {
if (node == NULL) {
return CMARK_NODE_NONE;
} else {
return (cmark_node_type)node->type;
}
}
const char *cmark_node_get_type_string(cmark_node *node) {
if (node == NULL) {
return "NONE";
}
switch (node->type) {
case CMARK_NODE_NONE:
return "none";
case CMARK_NODE_DOCUMENT:
return "document";
case CMARK_NODE_BLOCK_QUOTE:
return "block_quote";
case CMARK_NODE_LIST:
return "list";
case CMARK_NODE_ITEM:
return "item";
case CMARK_NODE_CODE_BLOCK:
return "code_block";
case CMARK_NODE_HTML_BLOCK:
return "html_block";
case CMARK_NODE_CUSTOM_BLOCK:
return "custom_block";
case CMARK_NODE_PARAGRAPH:
return "paragraph";
case CMARK_NODE_HEADING:
return "heading";
case CMARK_NODE_THEMATIC_BREAK:
return "thematic_break";
case CMARK_NODE_TEXT:
return "text";
case CMARK_NODE_SOFTBREAK:
return "softbreak";
case CMARK_NODE_LINEBREAK:
return "linebreak";
case CMARK_NODE_CODE:
return "code";
case CMARK_NODE_HTML_INLINE:
return "html_inline";
case CMARK_NODE_CUSTOM_INLINE:
return "custom_inline";
case CMARK_NODE_EMPH:
return "emph";
case CMARK_NODE_STRONG:
return "strong";
case CMARK_NODE_LINK:
return "link";
case CMARK_NODE_IMAGE:
return "image";
}
return "<unknown>";
}
cmark_node *cmark_node_next(cmark_node *node) {
if (node == NULL) {
return NULL;
} else {
return node->next;
}
}
cmark_node *cmark_node_previous(cmark_node *node) {
if (node == NULL) {
return NULL;
} else {
return node->prev;
}
}
cmark_node *cmark_node_parent(cmark_node *node) {
if (node == NULL) {
return NULL;
} else {
return node->parent;
}
}
cmark_node *cmark_node_first_child(cmark_node *node) {
if (node == NULL) {
return NULL;
} else {
return node->first_child;
}
}
cmark_node *cmark_node_last_child(cmark_node *node) {
if (node == NULL) {
return NULL;
} else {
return node->last_child;
}
}
void *cmark_node_get_user_data(cmark_node *node) {
if (node == NULL) {
return NULL;
} else {
return node->user_data;
}
}
int cmark_node_set_user_data(cmark_node *node, void *user_data) {
if (node == NULL) {
return 0;
}
node->user_data = user_data;
return 1;
}
const char *cmark_node_get_literal(cmark_node *node) {
if (node == NULL) {
return NULL;
}
switch (node->type) {
case CMARK_NODE_HTML_BLOCK:
case CMARK_NODE_TEXT:
case CMARK_NODE_HTML_INLINE:
case CMARK_NODE_CODE:
return cmark_chunk_to_cstr(NODE_MEM(node), &node->as.literal);
case CMARK_NODE_CODE_BLOCK:
return cmark_chunk_to_cstr(NODE_MEM(node), &node->as.code.literal);
default:
break;
}
return NULL;
}
int cmark_node_set_literal(cmark_node *node, const char *content) {
if (node == NULL) {
return 0;
}
switch (node->type) {
case CMARK_NODE_HTML_BLOCK:
case CMARK_NODE_TEXT:
case CMARK_NODE_HTML_INLINE:
case CMARK_NODE_CODE:
cmark_chunk_set_cstr(NODE_MEM(node), &node->as.literal, content);
return 1;
case CMARK_NODE_CODE_BLOCK:
cmark_chunk_set_cstr(NODE_MEM(node), &node->as.code.literal, content);
return 1;
default:
break;
}
return 0;
}
int cmark_node_get_heading_level(cmark_node *node) {
if (node == NULL) {
return 0;
}
switch (node->type) {
case CMARK_NODE_HEADING:
return node->as.heading.level;
default:
break;
}
return 0;
}
int cmark_node_set_heading_level(cmark_node *node, int level) {
if (node == NULL || level < 1 || level > 6) {
return 0;
}
switch (node->type) {
case CMARK_NODE_HEADING:
node->as.heading.level = level;
return 1;
default:
break;
}
return 0;
}
cmark_list_type cmark_node_get_list_type(cmark_node *node) {
if (node == NULL) {
return CMARK_NO_LIST;
}
if (node->type == CMARK_NODE_LIST) {
return node->as.list.list_type;
} else {
return CMARK_NO_LIST;
}
}
int cmark_node_set_list_type(cmark_node *node, cmark_list_type type) {
if (!(type == CMARK_BULLET_LIST || type == CMARK_ORDERED_LIST)) {
return 0;
}
if (node == NULL) {
return 0;
}
if (node->type == CMARK_NODE_LIST) {
node->as.list.list_type = type;
return 1;
} else {
return 0;
}
}
cmark_delim_type cmark_node_get_list_delim(cmark_node *node) {
if (node == NULL) {
return CMARK_NO_DELIM;
}
if (node->type == CMARK_NODE_LIST) {
return node->as.list.delimiter;
} else {
return CMARK_NO_DELIM;
}
}
int cmark_node_set_list_delim(cmark_node *node, cmark_delim_type delim) {
if (!(delim == CMARK_PERIOD_DELIM || delim == CMARK_PAREN_DELIM)) {
return 0;
}
if (node == NULL) {
return 0;
}
if (node->type == CMARK_NODE_LIST) {
node->as.list.delimiter = delim;
return 1;
} else {
return 0;
}
}
int cmark_node_get_list_start(cmark_node *node) {
if (node == NULL) {
return 0;
}
if (node->type == CMARK_NODE_LIST) {
return node->as.list.start;
} else {
return 0;
}
}
int cmark_node_set_list_start(cmark_node *node, int start) {
if (node == NULL || start < 0) {
return 0;
}
if (node->type == CMARK_NODE_LIST) {
node->as.list.start = start;
return 1;
} else {
return 0;
}
}
int cmark_node_get_list_tight(cmark_node *node) {
if (node == NULL) {
return 0;
}
if (node->type == CMARK_NODE_LIST) {
return node->as.list.tight;
} else {
return 0;
}
}
int cmark_node_set_list_tight(cmark_node *node, int tight) {
if (node == NULL) {
return 0;
}
if (node->type == CMARK_NODE_LIST) {
node->as.list.tight = tight == 1;
return 1;
} else {
return 0;
}
}
const char *cmark_node_get_fence_info(cmark_node *node) {
if (node == NULL) {
return NULL;
}
if (node->type == CMARK_NODE_CODE_BLOCK) {
return cmark_chunk_to_cstr(NODE_MEM(node), &node->as.code.info);
} else {
return NULL;
}
}
int cmark_node_set_fence_info(cmark_node *node, const char *info) {
if (node == NULL) {
return 0;
}
if (node->type == CMARK_NODE_CODE_BLOCK) {
cmark_chunk_set_cstr(NODE_MEM(node), &node->as.code.info, info);
return 1;
} else {
return 0;
}
}
const char *cmark_node_get_url(cmark_node *node) {
if (node == NULL) {
return NULL;
}
switch (node->type) {
case CMARK_NODE_LINK:
case CMARK_NODE_IMAGE:
return cmark_chunk_to_cstr(NODE_MEM(node), &node->as.link.url);
default:
break;
}
return NULL;
}
int cmark_node_set_url(cmark_node *node, const char *url) {
if (node == NULL) {
return 0;
}
switch (node->type) {
case CMARK_NODE_LINK:
case CMARK_NODE_IMAGE:
cmark_chunk_set_cstr(NODE_MEM(node), &node->as.link.url, url);
return 1;
default:
break;
}
return 0;
}
const char *cmark_node_get_title(cmark_node *node) {
if (node == NULL) {
return NULL;
}
switch (node->type) {
case CMARK_NODE_LINK:
case CMARK_NODE_IMAGE:
return cmark_chunk_to_cstr(NODE_MEM(node), &node->as.link.title);
default:
break;
}
return NULL;
}
int cmark_node_set_title(cmark_node *node, const char *title) {
if (node == NULL) {
return 0;
}
switch (node->type) {
case CMARK_NODE_LINK:
case CMARK_NODE_IMAGE:
cmark_chunk_set_cstr(NODE_MEM(node), &node->as.link.title, title);
return 1;
default:
break;
}
return 0;
}
const char *cmark_node_get_on_enter(cmark_node *node) {
if (node == NULL) {
return NULL;
}
switch (node->type) {
case CMARK_NODE_CUSTOM_INLINE:
case CMARK_NODE_CUSTOM_BLOCK:
return cmark_chunk_to_cstr(NODE_MEM(node), &node->as.custom.on_enter);
default:
break;
}
return NULL;
}
int cmark_node_set_on_enter(cmark_node *node, const char *on_enter) {
if (node == NULL) {
return 0;
}
switch (node->type) {
case CMARK_NODE_CUSTOM_INLINE:
case CMARK_NODE_CUSTOM_BLOCK:
cmark_chunk_set_cstr(NODE_MEM(node), &node->as.custom.on_enter, on_enter);
return 1;
default:
break;
}
return 0;
}
const char *cmark_node_get_on_exit(cmark_node *node) {
if (node == NULL) {
return NULL;
}
switch (node->type) {
case CMARK_NODE_CUSTOM_INLINE:
case CMARK_NODE_CUSTOM_BLOCK:
return cmark_chunk_to_cstr(NODE_MEM(node), &node->as.custom.on_exit);
default:
break;
}
return NULL;
}
int cmark_node_set_on_exit(cmark_node *node, const char *on_exit) {
if (node == NULL) {
return 0;
}
switch (node->type) {
case CMARK_NODE_CUSTOM_INLINE:
case CMARK_NODE_CUSTOM_BLOCK:
cmark_chunk_set_cstr(NODE_MEM(node), &node->as.custom.on_exit, on_exit);
return 1;
default:
break;
}
return 0;
}
int cmark_node_get_start_line(cmark_node *node) {
if (node == NULL) {
return 0;
}
return node->start_line;
}
int cmark_node_get_start_column(cmark_node *node) {
if (node == NULL) {
return 0;
}
return node->start_column;
}
int cmark_node_get_end_line(cmark_node *node) {
if (node == NULL) {
return 0;
}
return node->end_line;
}
int cmark_node_get_end_column(cmark_node *node) {
if (node == NULL) {
return 0;
}
return node->end_column;
}
// Unlink a node without adjusting its next, prev, and parent pointers.
static void S_node_unlink(cmark_node *node) {
if (node == NULL) {
return;
}
if (node->prev) {
node->prev->next = node->next;
}
if (node->next) {
node->next->prev = node->prev;
}
// Adjust first_child and last_child of parent.
cmark_node *parent = node->parent;
if (parent) {
if (parent->first_child == node) {
parent->first_child = node->next;
}
if (parent->last_child == node) {
parent->last_child = node->prev;
}
}
}
void cmark_node_unlink(cmark_node *node) {
S_node_unlink(node);
node->next = NULL;
node->prev = NULL;
node->parent = NULL;
}
int cmark_node_insert_before(cmark_node *node, cmark_node *sibling) {
if (node == NULL || sibling == NULL) {
return 0;
}
if (!node->parent || !S_can_contain(node->parent, sibling)) {
return 0;
}
S_node_unlink(sibling);
cmark_node *old_prev = node->prev;
// Insert 'sibling' between 'old_prev' and 'node'.
if (old_prev) {
old_prev->next = sibling;
}
sibling->prev = old_prev;
sibling->next = node;
node->prev = sibling;
// Set new parent.
cmark_node *parent = node->parent;
sibling->parent = parent;
// Adjust first_child of parent if inserted as first child.
if (parent && !old_prev) {
parent->first_child = sibling;
}
return 1;
}
int cmark_node_insert_after(cmark_node *node, cmark_node *sibling) {
if (node == NULL || sibling == NULL) {
return 0;
}
if (!node->parent || !S_can_contain(node->parent, sibling)) {
return 0;
}
S_node_unlink(sibling);
cmark_node *old_next = node->next;
// Insert 'sibling' between 'node' and 'old_next'.
if (old_next) {
old_next->prev = sibling;
}
sibling->next = old_next;
sibling->prev = node;
node->next = sibling;
// Set new parent.
cmark_node *parent = node->parent;
sibling->parent = parent;
// Adjust last_child of parent if inserted as last child.
if (parent && !old_next) {
parent->last_child = sibling;
}
return 1;
}
int cmark_node_replace(cmark_node *oldnode, cmark_node *newnode) {
if (!cmark_node_insert_before(oldnode, newnode)) {
return 0;
}
cmark_node_unlink(oldnode);
return 1;
}
int cmark_node_prepend_child(cmark_node *node, cmark_node *child) {
if (!S_can_contain(node, child)) {
return 0;
}
S_node_unlink(child);
cmark_node *old_first_child = node->first_child;
child->next = old_first_child;
child->prev = NULL;
child->parent = node;
node->first_child = child;
if (old_first_child) {
old_first_child->prev = child;
} else {
// Also set last_child if node previously had no children.
node->last_child = child;
}
return 1;
}
int cmark_node_append_child(cmark_node *node, cmark_node *child) {
if (!S_can_contain(node, child)) {
return 0;
}
S_node_unlink(child);
cmark_node *old_last_child = node->last_child;
child->next = NULL;
child->prev = old_last_child;
child->parent = node;
node->last_child = child;
if (old_last_child) {
old_last_child->next = child;
} else {
// Also set first_child if node previously had no children.
node->first_child = child;
}
return 1;
}
static void S_print_error(FILE *out, cmark_node *node, const char *elem) {
if (out == NULL) {
return;
}
fprintf(out, "Invalid '%s' in node type %s at %d:%d\n", elem,
cmark_node_get_type_string(node), node->start_line,
node->start_column);
}
int cmark_node_check(cmark_node *node, FILE *out) {
cmark_node *cur;
int errors = 0;
if (!node) {
return 0;
}
cur = node;
for (;;) {
if (cur->first_child) {
if (cur->first_child->prev != NULL) {
S_print_error(out, cur->first_child, "prev");
cur->first_child->prev = NULL;
++errors;
}
if (cur->first_child->parent != cur) {
S_print_error(out, cur->first_child, "parent");
cur->first_child->parent = cur;
++errors;
}
cur = cur->first_child;
continue;
}
next_sibling:
if (cur == node) {
break;
}
if (cur->next) {
if (cur->next->prev != cur) {
S_print_error(out, cur->next, "prev");
cur->next->prev = cur;
++errors;
}
if (cur->next->parent != cur->parent) {
S_print_error(out, cur->next, "parent");
cur->next->parent = cur->parent;
++errors;
}
cur = cur->next;
continue;
}
if (cur->parent->last_child != cur) {
S_print_error(out, cur->parent, "last_child");
cur->parent->last_child = cur;
++errors;
}
cur = cur->parent;
goto next_sibling;
}
return errors;
}

View File

@ -1,93 +0,0 @@
#ifndef CMARK_NODE_H
#define CMARK_NODE_H
#ifdef __cplusplus
extern "C" {
#endif
#include <stdio.h>
#include <stdint.h>
#include "cmark.h"
#include "buffer.h"
#include "chunk.h"
typedef struct {
cmark_list_type list_type;
int marker_offset;
int padding;
int start;
cmark_delim_type delimiter;
unsigned char bullet_char;
bool tight;
} cmark_list;
typedef struct {
cmark_chunk info;
cmark_chunk literal;
uint8_t fence_length;
uint8_t fence_offset;
unsigned char fence_char;
int8_t fenced;
} cmark_code;
typedef struct {
int level;
bool setext;
} cmark_heading;
typedef struct {
cmark_chunk url;
cmark_chunk title;
} cmark_link;
typedef struct {
cmark_chunk on_enter;
cmark_chunk on_exit;
} cmark_custom;
enum cmark_node__internal_flags {
CMARK_NODE__OPEN = (1 << 0),
CMARK_NODE__LAST_LINE_BLANK = (1 << 1),
};
struct cmark_node {
cmark_strbuf content;
struct cmark_node *next;
struct cmark_node *prev;
struct cmark_node *parent;
struct cmark_node *first_child;
struct cmark_node *last_child;
void *user_data;
int start_line;
int start_column;
int end_line;
int end_column;
int internal_offset;
uint16_t type;
uint16_t flags;
union {
cmark_chunk literal;
cmark_list list;
cmark_code code;
cmark_heading heading;
cmark_link link;
cmark_custom custom;
int html_block_type;
} as;
};
static CMARK_INLINE cmark_mem *cmark_node_mem(cmark_node *node) {
return node->content.mem;
}
CMARK_EXPORT int cmark_node_check(cmark_node *node, FILE *out);
#ifdef __cplusplus
}
#endif
#endif

View File

@ -1,39 +0,0 @@
#ifndef CMARK_AST_H
#define CMARK_AST_H
#include <stdio.h>
#include "node.h"
#include "buffer.h"
#include "memory.h"
#ifdef __cplusplus
extern "C" {
#endif
#define MAX_LINK_LABEL_LENGTH 1000
struct cmark_parser {
struct cmark_mem *mem;
struct cmark_reference_map *refmap;
struct cmark_node *root;
struct cmark_node *current;
int line_number;
bufsize_t offset;
bufsize_t column;
bufsize_t first_nonspace;
bufsize_t first_nonspace_column;
int indent;
bool blank;
bool partially_consumed_tab;
cmark_strbuf curline;
bufsize_t last_line_length;
cmark_strbuf linebuf;
int options;
bool last_buffer_ended_with_cr;
};
#ifdef __cplusplus
}
#endif
#endif

View File

@ -1,146 +0,0 @@
#include "cmark.h"
#include "utf8.h"
#include "parser.h"
#include "references.h"
#include "inlines.h"
#include "chunk.h"
static unsigned int refhash(const unsigned char *link_ref) {
unsigned int hash = 0;
while (*link_ref)
hash = (*link_ref++) + (hash << 6) + (hash << 16) - hash;
return hash;
}
static void reference_free(cmark_reference_map *map, cmark_reference *ref) {
cmark_mem *mem = map->mem;
if (ref != NULL) {
mem->free(ref->label);
cmark_chunk_free(mem, &ref->url);
cmark_chunk_free(mem, &ref->title);
mem->free(ref);
}
}
// normalize reference: collapse internal whitespace to single space,
// remove leading/trailing whitespace, case fold
// Return NULL if the reference name is actually empty (i.e. composed
// solely from whitespace)
static unsigned char *normalize_reference(cmark_mem *mem, cmark_chunk *ref) {
cmark_strbuf normalized = CMARK_BUF_INIT(mem);
unsigned char *result;
if (ref == NULL)
return NULL;
if (ref->len == 0)
return NULL;
cmark_utf8proc_case_fold(&normalized, ref->data, ref->len);
cmark_strbuf_trim(&normalized);
cmark_strbuf_normalize_whitespace(&normalized);
result = cmark_strbuf_detach(&normalized);
assert(result);
if (result[0] == '\0') {
mem->free(result);
return NULL;
}
return result;
}
static void add_reference(cmark_reference_map *map, cmark_reference *ref) {
cmark_reference *t = ref->next = map->table[ref->hash % REFMAP_SIZE];
while (t) {
if (t->hash == ref->hash && !strcmp((char *)t->label, (char *)ref->label)) {
reference_free(map, ref);
return;
}
t = t->next;
}
map->table[ref->hash % REFMAP_SIZE] = ref;
}
void cmark_reference_create(cmark_reference_map *map, cmark_chunk *label,
cmark_chunk *url, cmark_chunk *title) {
cmark_reference *ref;
unsigned char *reflabel = normalize_reference(map->mem, label);
/* empty reference name, or composed from only whitespace */
if (reflabel == NULL)
return;
ref = (cmark_reference *)map->mem->calloc(1, sizeof(*ref));
ref->label = reflabel;
ref->hash = refhash(ref->label);
ref->url = cmark_clean_url(map->mem, url);
ref->title = cmark_clean_title(map->mem, title);
ref->next = NULL;
add_reference(map, ref);
}
// Returns reference if refmap contains a reference with matching
// label, otherwise NULL.
cmark_reference *cmark_reference_lookup(cmark_reference_map *map,
cmark_chunk *label) {
cmark_reference *ref = NULL;
unsigned char *norm;
unsigned int hash;
if (label->len < 1 || label->len > MAX_LINK_LABEL_LENGTH)
return NULL;
if (map == NULL)
return NULL;
norm = normalize_reference(map->mem, label);
if (norm == NULL)
return NULL;
hash = refhash(norm);
ref = map->table[hash % REFMAP_SIZE];
while (ref) {
if (ref->hash == hash && !strcmp((char *)ref->label, (char *)norm))
break;
ref = ref->next;
}
map->mem->free(norm);
return ref;
}
void cmark_reference_map_free(cmark_reference_map *map) {
unsigned int i;
if (map == NULL)
return;
for (i = 0; i < REFMAP_SIZE; ++i) {
cmark_reference *ref = map->table[i];
cmark_reference *next;
while (ref) {
next = ref->next;
reference_free(map, ref);
ref = next;
}
}
map->mem->free(map);
}
cmark_reference_map *cmark_reference_map_new(cmark_mem *mem) {
cmark_reference_map *map =
(cmark_reference_map *)mem->calloc(1, sizeof(cmark_reference_map));
map->mem = mem;
return map;
}

View File

@ -1,41 +0,0 @@
#ifndef CMARK_REFERENCES_H
#define CMARK_REFERENCES_H
#include "memory.h"
#include "chunk.h"
#ifdef __cplusplus
extern "C" {
#endif
#define REFMAP_SIZE 16
struct cmark_reference {
struct cmark_reference *next;
unsigned char *label;
cmark_chunk url;
cmark_chunk title;
unsigned int hash;
};
typedef struct cmark_reference cmark_reference;
struct cmark_reference_map {
cmark_mem *mem;
cmark_reference *table[REFMAP_SIZE];
};
typedef struct cmark_reference_map cmark_reference_map;
cmark_reference_map *cmark_reference_map_new(cmark_mem *mem);
void cmark_reference_map_free(cmark_reference_map *map);
cmark_reference *cmark_reference_lookup(cmark_reference_map *map,
cmark_chunk *label);
extern void cmark_reference_create(cmark_reference_map *map, cmark_chunk *label,
cmark_chunk *url, cmark_chunk *title);
#ifdef __cplusplus
}
#endif
#endif

View File

@ -1,186 +0,0 @@
#include <stdlib.h>
#include "buffer.h"
#include "chunk.h"
#include "cmark.h"
#include "utf8.h"
#include "render.h"
#include "node.h"
static CMARK_INLINE void S_cr(cmark_renderer *renderer) {
if (renderer->need_cr < 1) {
renderer->need_cr = 1;
}
}
static CMARK_INLINE void S_blankline(cmark_renderer *renderer) {
if (renderer->need_cr < 2) {
renderer->need_cr = 2;
}
}
static void S_out(cmark_renderer *renderer, const char *source, bool wrap,
cmark_escaping escape) {
int length = strlen(source);
unsigned char nextc;
int32_t c;
int i = 0;
int last_nonspace;
int len;
cmark_chunk remainder = cmark_chunk_literal("");
int k = renderer->buffer->size - 1;
wrap = wrap && !renderer->no_linebreaks;
if (renderer->in_tight_list_item && renderer->need_cr > 1) {
renderer->need_cr = 1;
}
while (renderer->need_cr) {
if (k < 0 || renderer->buffer->ptr[k] == '\n') {
k -= 1;
} else {
cmark_strbuf_putc(renderer->buffer, '\n');
if (renderer->need_cr > 1) {
cmark_strbuf_put(renderer->buffer, renderer->prefix->ptr,
renderer->prefix->size);
}
}
renderer->column = 0;
renderer->last_breakable = 0;
renderer->begin_line = true;
renderer->begin_content = true;
renderer->need_cr -= 1;
}
while (i < length) {
if (renderer->begin_line) {
cmark_strbuf_put(renderer->buffer, renderer->prefix->ptr,
renderer->prefix->size);
// note: this assumes prefix is ascii:
renderer->column = renderer->prefix->size;
}
len = cmark_utf8proc_iterate((const uint8_t *)source + i, length - i, &c);
if (len == -1) { // error condition
return; // return without rendering rest of string
}
nextc = source[i + len];
if (c == 32 && wrap) {
if (!renderer->begin_line) {
last_nonspace = renderer->buffer->size;
cmark_strbuf_putc(renderer->buffer, ' ');
renderer->column += 1;
renderer->begin_line = false;
renderer->begin_content = false;
// skip following spaces
while (source[i + 1] == ' ') {
i++;
}
// We don't allow breaks that make a digit the first character
// because this causes problems with commonmark output.
if (!cmark_isdigit(source[i + 1])) {
renderer->last_breakable = last_nonspace;
}
}
} else if (c == 10) {
cmark_strbuf_putc(renderer->buffer, '\n');
renderer->column = 0;
renderer->begin_line = true;
renderer->begin_content = true;
renderer->last_breakable = 0;
} else if (escape == LITERAL) {
cmark_render_code_point(renderer, c);
renderer->begin_line = false;
// we don't set 'begin_content' to false til we've
// finished parsing a digit. Reason: in commonmark
// we need to escape a potential list marker after
// a digit:
renderer->begin_content =
renderer->begin_content && cmark_isdigit(c) == 1;
} else {
(renderer->outc)(renderer, escape, c, nextc);
renderer->begin_line = false;
renderer->begin_content =
renderer->begin_content && cmark_isdigit(c) == 1;
}
// If adding the character went beyond width, look for an
// earlier place where the line could be broken:
if (renderer->width > 0 && renderer->column > renderer->width &&
!renderer->begin_line && renderer->last_breakable > 0) {
// copy from last_breakable to remainder
cmark_chunk_set_cstr(renderer->mem, &remainder,
(char *)renderer->buffer->ptr +
renderer->last_breakable + 1);
// truncate at last_breakable
cmark_strbuf_truncate(renderer->buffer, renderer->last_breakable);
// add newline, prefix, and remainder
cmark_strbuf_putc(renderer->buffer, '\n');
cmark_strbuf_put(renderer->buffer, renderer->prefix->ptr,
renderer->prefix->size);
cmark_strbuf_put(renderer->buffer, remainder.data, remainder.len);
renderer->column = renderer->prefix->size + remainder.len;
cmark_chunk_free(renderer->mem, &remainder);
renderer->last_breakable = 0;
renderer->begin_line = false;
renderer->begin_content = false;
}
i += len;
}
}
// Assumes no newlines, assumes ascii content:
void cmark_render_ascii(cmark_renderer *renderer, const char *s) {
int origsize = renderer->buffer->size;
cmark_strbuf_puts(renderer->buffer, s);
renderer->column += renderer->buffer->size - origsize;
}
void cmark_render_code_point(cmark_renderer *renderer, uint32_t c) {
cmark_utf8proc_encode_char(c, renderer->buffer);
renderer->column += 1;
}
char *cmark_render(cmark_node *root, int options, int width,
void (*outc)(cmark_renderer *, cmark_escaping, int32_t,
unsigned char),
int (*render_node)(cmark_renderer *renderer,
cmark_node *node,
cmark_event_type ev_type, int options)) {
cmark_mem *mem = cmark_node_mem(root);
cmark_strbuf pref = CMARK_BUF_INIT(mem);
cmark_strbuf buf = CMARK_BUF_INIT(mem);
cmark_node *cur;
cmark_event_type ev_type;
char *result;
cmark_iter *iter = cmark_iter_new(root);
cmark_renderer renderer = {mem, &buf, &pref, 0, width,
0, 0, true, true, false,
false, outc, S_cr, S_blankline, S_out};
while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
cur = cmark_iter_get_node(iter);
if (!render_node(&renderer, cur, ev_type, options)) {
// a false value causes us to skip processing
// the node's contents. this is used for
// autolinks.
cmark_iter_reset(iter, cur, CMARK_EVENT_EXIT);
}
}
// ensure final newline
if (renderer.buffer->ptr[renderer.buffer->size - 1] != '\n') {
cmark_strbuf_putc(renderer.buffer, '\n');
}
result = (char *)cmark_strbuf_detach(renderer.buffer);
cmark_iter_free(iter);
cmark_strbuf_free(renderer.prefix);
cmark_strbuf_free(renderer.buffer);
return result;
}

View File

@ -1,50 +0,0 @@
#ifndef CMARK_RENDER_H
#define CMARK_RENDER_H
#ifdef __cplusplus
extern "C" {
#endif
#include <stdlib.h>
#include "buffer.h"
#include "chunk.h"
#include "memory.h"
typedef enum { LITERAL, NORMAL, TITLE, URL } cmark_escaping;
struct cmark_renderer {
cmark_mem *mem;
cmark_strbuf *buffer;
cmark_strbuf *prefix;
int column;
int width;
int need_cr;
bufsize_t last_breakable;
bool begin_line;
bool begin_content;
bool no_linebreaks;
bool in_tight_list_item;
void (*outc)(struct cmark_renderer *, cmark_escaping, int32_t, unsigned char);
void (*cr)(struct cmark_renderer *);
void (*blankline)(struct cmark_renderer *);
void (*out)(struct cmark_renderer *, const char *, bool, cmark_escaping);
};
typedef struct cmark_renderer cmark_renderer;
void cmark_render_ascii(cmark_renderer *renderer, const char *s);
void cmark_render_code_point(cmark_renderer *renderer, uint32_t c);
char *cmark_render(cmark_node *root, int options, int width,
void (*outc)(cmark_renderer *, cmark_escaping, int32_t,
unsigned char),
int (*render_node)(cmark_renderer *renderer,
cmark_node *node,
cmark_event_type ev_type, int options));
#ifdef __cplusplus
}
#endif
#endif

File diff suppressed because it is too large Load Diff

View File

@ -1,55 +0,0 @@
#include "cmark.h"
#include "chunk.h"
#ifdef __cplusplus
extern "C" {
#endif
bufsize_t _scan_at(bufsize_t (*scanner)(const unsigned char *), cmark_chunk *c,
bufsize_t offset);
bufsize_t _scan_scheme(const unsigned char *p);
bufsize_t _scan_autolink_uri(const unsigned char *p);
bufsize_t _scan_autolink_email(const unsigned char *p);
bufsize_t _scan_html_tag(const unsigned char *p);
bufsize_t _scan_html_block_start(const unsigned char *p);
bufsize_t _scan_html_block_start_7(const unsigned char *p);
bufsize_t _scan_html_block_end_1(const unsigned char *p);
bufsize_t _scan_html_block_end_2(const unsigned char *p);
bufsize_t _scan_html_block_end_3(const unsigned char *p);
bufsize_t _scan_html_block_end_4(const unsigned char *p);
bufsize_t _scan_html_block_end_5(const unsigned char *p);
bufsize_t _scan_link_title(const unsigned char *p);
bufsize_t _scan_spacechars(const unsigned char *p);
bufsize_t _scan_atx_heading_start(const unsigned char *p);
bufsize_t _scan_setext_heading_line(const unsigned char *p);
bufsize_t _scan_thematic_break(const unsigned char *p);
bufsize_t _scan_open_code_fence(const unsigned char *p);
bufsize_t _scan_close_code_fence(const unsigned char *p);
bufsize_t _scan_entity(const unsigned char *p);
bufsize_t _scan_dangerous_url(const unsigned char *p);
#define scan_scheme(c, n) _scan_at(&_scan_scheme, c, n)
#define scan_autolink_uri(c, n) _scan_at(&_scan_autolink_uri, c, n)
#define scan_autolink_email(c, n) _scan_at(&_scan_autolink_email, c, n)
#define scan_html_tag(c, n) _scan_at(&_scan_html_tag, c, n)
#define scan_html_block_start(c, n) _scan_at(&_scan_html_block_start, c, n)
#define scan_html_block_start_7(c, n) _scan_at(&_scan_html_block_start_7, c, n)
#define scan_html_block_end_1(c, n) _scan_at(&_scan_html_block_end_1, c, n)
#define scan_html_block_end_2(c, n) _scan_at(&_scan_html_block_end_2, c, n)
#define scan_html_block_end_3(c, n) _scan_at(&_scan_html_block_end_3, c, n)
#define scan_html_block_end_4(c, n) _scan_at(&_scan_html_block_end_4, c, n)
#define scan_html_block_end_5(c, n) _scan_at(&_scan_html_block_end_5, c, n)
#define scan_link_title(c, n) _scan_at(&_scan_link_title, c, n)
#define scan_spacechars(c, n) _scan_at(&_scan_spacechars, c, n)
#define scan_atx_heading_start(c, n) _scan_at(&_scan_atx_heading_start, c, n)
#define scan_setext_heading_line(c, n) \
_scan_at(&_scan_setext_heading_line, c, n)
#define scan_thematic_break(c, n) _scan_at(&_scan_thematic_break, c, n)
#define scan_open_code_fence(c, n) _scan_at(&_scan_open_code_fence, c, n)
#define scan_close_code_fence(c, n) _scan_at(&_scan_close_code_fence, c, n)
#define scan_entity(c, n) _scan_at(&_scan_entity, c, n)
#define scan_dangerous_url(c, n) _scan_at(&_scan_dangerous_url, c, n)
#ifdef __cplusplus
}
#endif

View File

@ -1,320 +0,0 @@
#include <stdlib.h>
#include "chunk.h"
#include "scanners.h"
bufsize_t _scan_at(bufsize_t (*scanner)(const unsigned char *), cmark_chunk *c, bufsize_t offset)
{
bufsize_t res;
unsigned char *ptr = (unsigned char *)c->data;
if (ptr == NULL || offset > c->len) {
return 0;
} else {
unsigned char lim = ptr[c->len];
ptr[c->len] = '\0';
res = scanner(ptr + offset);
ptr[c->len] = lim;
}
return res;
}
/*!re2c
re2c:define:YYCTYPE = "unsigned char";
re2c:define:YYCURSOR = p;
re2c:define:YYMARKER = marker;
re2c:define:YYCTXMARKER = marker;
re2c:yyfill:enable = 0;
wordchar = [^\x00-\x20];
spacechar = [ \t\v\f\r\n];
reg_char = [^\\()\x00-\x20];
escaped_char = [\\][!"#$%&'()*+,./:;<=>?@[\\\]^_`{|}~-];
tagname = [A-Za-z][A-Za-z0-9-]*;
blocktagname = 'address'|'article'|'aside'|'base'|'basefont'|'blockquote'|'body'|'caption'|'center'|'col'|'colgroup'|'dd'|'details'|'dialog'|'dir'|'div'|'dl'|'dt'|'fieldset'|'figcaption'|'figure'|'footer'|'form'|'frame'|'frameset'|'h1'|'h2'|'h3'|'h4'|'h5'|'h6'|'head'|'header'|'hr'|'html'|'iframe'|'legend'|'li'|'link'|'main'|'menu'|'menuitem'|'nav'|'noframes'|'ol'|'optgroup'|'option'|'p'|'param'|'section'|'source'|'title'|'summary'|'table'|'tbody'|'td'|'tfoot'|'th'|'thead'|'title'|'tr'|'track'|'ul';
attributename = [a-zA-Z_:][a-zA-Z0-9:._-]*;
unquotedvalue = [^ \t\r\n\v\f"'=<>`\x00]+;
singlequotedvalue = ['][^'\x00]*['];
doublequotedvalue = ["][^"\x00]*["];
attributevalue = unquotedvalue | singlequotedvalue | doublequotedvalue;
attributevaluespec = spacechar* [=] spacechar* attributevalue;
attribute = spacechar+ attributename attributevaluespec?;
opentag = tagname attribute* spacechar* [/]? [>];
closetag = [/] tagname spacechar* [>];
htmlcomment = "!---->" | ("!--" ([-]? [^\x00>-]) ([-]? [^\x00-])* "-->");
processinginstruction = "?" ([^?>\x00]+ | [?][^>\x00] | [>])* "?>";
declaration = "!" [A-Z]+ spacechar+ [^>\x00]* ">";
cdata = "![CDATA[" ([^\]\x00]+ | "]" [^\]\x00] | "]]" [^>\x00])* "]]>";
htmltag = opentag | closetag | htmlcomment | processinginstruction |
declaration | cdata;
in_parens_nosp = [(] (reg_char|escaped_char|[\\])* [)];
in_double_quotes = ["] (escaped_char|[^"\x00])* ["];
in_single_quotes = ['] (escaped_char|[^'\x00])* ['];
in_parens = [(] (escaped_char|[^)\x00])* [)];
scheme = [A-Za-z][A-Za-z0-9.+-]{1,31};
*/
// Try to match a scheme including colon.
bufsize_t _scan_scheme(const unsigned char *p)
{
const unsigned char *marker = NULL;
const unsigned char *start = p;
/*!re2c
scheme [:] { return (bufsize_t)(p - start); }
* { return 0; }
*/
}
// Try to match URI autolink after first <, returning number of chars matched.
bufsize_t _scan_autolink_uri(const unsigned char *p)
{
const unsigned char *marker = NULL;
const unsigned char *start = p;
/*!re2c
scheme [:][^\x00-\x20<>]*[>] { return (bufsize_t)(p - start); }
* { return 0; }
*/
}
// Try to match email autolink after first <, returning num of chars matched.
bufsize_t _scan_autolink_email(const unsigned char *p)
{
const unsigned char *marker = NULL;
const unsigned char *start = p;
/*!re2c
[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+
[@]
[a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?
([.][a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*
[>] { return (bufsize_t)(p - start); }
* { return 0; }
*/
}
// Try to match an HTML tag after first <, returning num of chars matched.
bufsize_t _scan_html_tag(const unsigned char *p)
{
const unsigned char *marker = NULL;
const unsigned char *start = p;
/*!re2c
htmltag { return (bufsize_t)(p - start); }
* { return 0; }
*/
}
// Try to match an HTML block tag start line, returning
// an integer code for the type of block (1-6, matching the spec).
// #7 is handled by a separate function, below.
bufsize_t _scan_html_block_start(const unsigned char *p)
{
const unsigned char *marker = NULL;
/*!re2c
[<] ('script'|'pre'|'style') (spacechar | [>]) { return 1; }
'<!--' { return 2; }
'<?' { return 3; }
'<!' [A-Z] { return 4; }
'<![CDATA[' { return 5; }
[<] [/]? blocktagname (spacechar | [/]? [>]) { return 6; }
* { return 0; }
*/
}
// Try to match an HTML block tag start line of type 7, returning
// 7 if successful, 0 if not.
bufsize_t _scan_html_block_start_7(const unsigned char *p)
{
const unsigned char *marker = NULL;
/*!re2c
[<] (opentag | closetag) [\t\n\f ]* [\r\n] { return 7; }
* { return 0; }
*/
}
// Try to match an HTML block end line of type 1
bufsize_t _scan_html_block_end_1(const unsigned char *p)
{
const unsigned char *marker = NULL;
const unsigned char *start = p;
/*!re2c
[^\n\x00]* [<] [/] ('script'|'pre'|'style') [>] { return (bufsize_t)(p - start); }
* { return 0; }
*/
}
// Try to match an HTML block end line of type 2
bufsize_t _scan_html_block_end_2(const unsigned char *p)
{
const unsigned char *marker = NULL;
const unsigned char *start = p;
/*!re2c
[^\n\x00]* '-->' { return (bufsize_t)(p - start); }
* { return 0; }
*/
}
// Try to match an HTML block end line of type 3
bufsize_t _scan_html_block_end_3(const unsigned char *p)
{
const unsigned char *marker = NULL;
const unsigned char *start = p;
/*!re2c
[^\n\x00]* '?>' { return (bufsize_t)(p - start); }
* { return 0; }
*/
}
// Try to match an HTML block end line of type 4
bufsize_t _scan_html_block_end_4(const unsigned char *p)
{
const unsigned char *marker = NULL;
const unsigned char *start = p;
/*!re2c
[^\n\x00]* '>' { return (bufsize_t)(p - start); }
* { return 0; }
*/
}
// Try to match an HTML block end line of type 5
bufsize_t _scan_html_block_end_5(const unsigned char *p)
{
const unsigned char *marker = NULL;
const unsigned char *start = p;
/*!re2c
[^\n\x00]* ']]>' { return (bufsize_t)(p - start); }
* { return 0; }
*/
}
// Try to match a link title (in single quotes, in double quotes, or
// in parentheses), returning number of chars matched. Allow one
// level of internal nesting (quotes within quotes).
bufsize_t _scan_link_title(const unsigned char *p)
{
const unsigned char *marker = NULL;
const unsigned char *start = p;
/*!re2c
["] (escaped_char|[^"\x00])* ["] { return (bufsize_t)(p - start); }
['] (escaped_char|[^'\x00])* ['] { return (bufsize_t)(p - start); }
[(] (escaped_char|[^)\x00])* [)] { return (bufsize_t)(p - start); }
* { return 0; }
*/
}
// Match space characters, including newlines.
bufsize_t _scan_spacechars(const unsigned char *p)
{
const unsigned char *start = p; \
/*!re2c
[ \t\v\f\r\n]+ { return (bufsize_t)(p - start); }
* { return 0; }
*/
}
// Match ATX heading start.
bufsize_t _scan_atx_heading_start(const unsigned char *p)
{
const unsigned char *marker = NULL;
const unsigned char *start = p;
/*!re2c
[#]{1,6} ([ \t]+|[\r\n]) { return (bufsize_t)(p - start); }
* { return 0; }
*/
}
// Match setext heading line. Return 1 for level-1 heading,
// 2 for level-2, 0 for no match.
bufsize_t _scan_setext_heading_line(const unsigned char *p)
{
const unsigned char *marker = NULL;
/*!re2c
[=]+ [ \t]* [\r\n] { return 1; }
[-]+ [ \t]* [\r\n] { return 2; }
* { return 0; }
*/
}
// Scan a thematic break line: "...three or more hyphens, asterisks,
// or underscores on a line by themselves. If you wish, you may use
// spaces between the hyphens or asterisks."
bufsize_t _scan_thematic_break(const unsigned char *p)
{
const unsigned char *marker = NULL;
const unsigned char *start = p;
/*!re2c
([*][ \t]*){3,} [ \t]* [\r\n] { return (bufsize_t)(p - start); }
([_][ \t]*){3,} [ \t]* [\r\n] { return (bufsize_t)(p - start); }
([-][ \t]*){3,} [ \t]* [\r\n] { return (bufsize_t)(p - start); }
* { return 0; }
*/
}
// Scan an opening code fence.
bufsize_t _scan_open_code_fence(const unsigned char *p)
{
const unsigned char *marker = NULL;
const unsigned char *start = p;
/*!re2c
[`]{3,} / [^`\r\n\x00]*[\r\n] { return (bufsize_t)(p - start); }
[~]{3,} / [^\r\n\x00]*[\r\n] { return (bufsize_t)(p - start); }
* { return 0; }
*/
}
// Scan a closing code fence with length at least len.
bufsize_t _scan_close_code_fence(const unsigned char *p)
{
const unsigned char *marker = NULL;
const unsigned char *start = p;
/*!re2c
[`]{3,} / [ \t]*[\r\n] { return (bufsize_t)(p - start); }
[~]{3,} / [ \t]*[\r\n] { return (bufsize_t)(p - start); }
* { return 0; }
*/
}
// Scans an entity.
// Returns number of chars matched.
bufsize_t _scan_entity(const unsigned char *p)
{
const unsigned char *marker = NULL;
const unsigned char *start = p;
/*!re2c
[&] ([#] ([Xx][A-Fa-f0-9]{1,6}|[0-9]{1,7}) |[A-Za-z][A-Za-z0-9]{1,31} ) [;]
{ return (bufsize_t)(p - start); }
* { return 0; }
*/
}
// Returns positive value if a URL begins in a way that is potentially
// dangerous, with javascript:, vbscript:, file:, or data:, otherwise 0.
bufsize_t _scan_dangerous_url(const unsigned char *p)
{
const unsigned char *marker = NULL;
const unsigned char *start = p;
/*!re2c
'data:image/' ('png'|'gif'|'jpeg'|'webp') { return 0; }
'javascript:' | 'vbscript:' | 'file:' | 'data:' { return (bufsize_t)(p - start); }
* { return 0; }
*/
}

View File

@ -1,317 +0,0 @@
#include <stdlib.h>
#include <stdint.h>
#include <assert.h>
#include "cmark_ctype.h"
#include "utf8.h"
static const int8_t utf8proc_utf8class[256] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0};
static void encode_unknown(cmark_strbuf *buf) {
static const uint8_t repl[] = {239, 191, 189};
cmark_strbuf_put(buf, repl, 3);
}
static int utf8proc_charlen(const uint8_t *str, bufsize_t str_len) {
int length, i;
if (!str_len)
return 0;
length = utf8proc_utf8class[str[0]];
if (!length)
return -1;
if (str_len >= 0 && (bufsize_t)length > str_len)
return -str_len;
for (i = 1; i < length; i++) {
if ((str[i] & 0xC0) != 0x80)
return -i;
}
return length;
}
// Validate a single UTF-8 character according to RFC 3629.
static int utf8proc_valid(const uint8_t *str, bufsize_t str_len) {
int length = utf8proc_utf8class[str[0]];
if (!length)
return -1;
if ((bufsize_t)length > str_len)
return -str_len;
switch (length) {
case 2:
if ((str[1] & 0xC0) != 0x80)
return -1;
if (str[0] < 0xC2) {
// Overlong
return -length;
}
break;
case 3:
if ((str[1] & 0xC0) != 0x80)
return -1;
if ((str[2] & 0xC0) != 0x80)
return -2;
if (str[0] == 0xE0) {
if (str[1] < 0xA0) {
// Overlong
return -length;
}
} else if (str[0] == 0xED) {
if (str[1] >= 0xA0) {
// Surrogate
return -length;
}
}
break;
case 4:
if ((str[1] & 0xC0) != 0x80)
return -1;
if ((str[2] & 0xC0) != 0x80)
return -2;
if ((str[3] & 0xC0) != 0x80)
return -3;
if (str[0] == 0xF0) {
if (str[1] < 0x90) {
// Overlong
return -length;
}
} else if (str[0] >= 0xF4) {
if (str[0] > 0xF4 || str[1] >= 0x90) {
// Above 0x10FFFF
return -length;
}
}
break;
}
return length;
}
void cmark_utf8proc_check(cmark_strbuf *ob, const uint8_t *line,
bufsize_t size) {
bufsize_t i = 0;
while (i < size) {
bufsize_t org = i;
int charlen = 0;
while (i < size) {
if (line[i] < 0x80 && line[i] != 0) {
i++;
} else if (line[i] >= 0x80) {
charlen = utf8proc_valid(line + i, size - i);
if (charlen < 0) {
charlen = -charlen;
break;
}
i += charlen;
} else if (line[i] == 0) {
// ASCII NUL is technically valid but rejected
// for security reasons.
charlen = 1;
break;
}
}
if (i > org) {
cmark_strbuf_put(ob, line + org, i - org);
}
if (i >= size) {
break;
} else {
// Invalid UTF-8
encode_unknown(ob);
i += charlen;
}
}
}
int cmark_utf8proc_iterate(const uint8_t *str, bufsize_t str_len,
int32_t *dst) {
int length;
int32_t uc = -1;
*dst = -1;
length = utf8proc_charlen(str, str_len);
if (length < 0)
return -1;
switch (length) {
case 1:
uc = str[0];
break;
case 2:
uc = ((str[0] & 0x1F) << 6) + (str[1] & 0x3F);
if (uc < 0x80)
uc = -1;
break;
case 3:
uc = ((str[0] & 0x0F) << 12) + ((str[1] & 0x3F) << 6) + (str[2] & 0x3F);
if (uc < 0x800 || (uc >= 0xD800 && uc < 0xE000))
uc = -1;
break;
case 4:
uc = ((str[0] & 0x07) << 18) + ((str[1] & 0x3F) << 12) +
((str[2] & 0x3F) << 6) + (str[3] & 0x3F);
if (uc < 0x10000 || uc >= 0x110000)
uc = -1;
break;
}
if (uc < 0)
return -1;
*dst = uc;
return length;
}
void cmark_utf8proc_encode_char(int32_t uc, cmark_strbuf *buf) {
uint8_t dst[4];
bufsize_t len = 0;
assert(uc >= 0);
if (uc < 0x80) {
dst[0] = (uint8_t)(uc);
len = 1;
} else if (uc < 0x800) {
dst[0] = (uint8_t)(0xC0 + (uc >> 6));
dst[1] = 0x80 + (uc & 0x3F);
len = 2;
} else if (uc == 0xFFFF) {
dst[0] = 0xFF;
len = 1;
} else if (uc == 0xFFFE) {
dst[0] = 0xFE;
len = 1;
} else if (uc < 0x10000) {
dst[0] = (uint8_t)(0xE0 + (uc >> 12));
dst[1] = 0x80 + ((uc >> 6) & 0x3F);
dst[2] = 0x80 + (uc & 0x3F);
len = 3;
} else if (uc < 0x110000) {
dst[0] = (uint8_t)(0xF0 + (uc >> 18));
dst[1] = 0x80 + ((uc >> 12) & 0x3F);
dst[2] = 0x80 + ((uc >> 6) & 0x3F);
dst[3] = 0x80 + (uc & 0x3F);
len = 4;
} else {
encode_unknown(buf);
return;
}
cmark_strbuf_put(buf, dst, len);
}
void cmark_utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str,
bufsize_t len) {
int32_t c;
#define bufpush(x) cmark_utf8proc_encode_char(x, dest)
while (len > 0) {
bufsize_t char_len = cmark_utf8proc_iterate(str, len, &c);
if (char_len >= 0) {
#include "case_fold_switch.inc"
} else {
encode_unknown(dest);
char_len = -char_len;
}
str += char_len;
len -= char_len;
}
}
// matches anything in the Zs class, plus LF, CR, TAB, FF.
int cmark_utf8proc_is_space(int32_t uc) {
return (uc == 9 || uc == 10 || uc == 12 || uc == 13 || uc == 32 ||
uc == 160 || uc == 5760 || (uc >= 8192 && uc <= 8202) || uc == 8239 ||
uc == 8287 || uc == 12288);
}
// matches anything in the P[cdefios] classes.
int cmark_utf8proc_is_punctuation(int32_t uc) {
return (
(uc < 128 && cmark_ispunct((char)uc)) || uc == 161 || uc == 167 ||
uc == 171 || uc == 182 || uc == 183 || uc == 187 || uc == 191 ||
uc == 894 || uc == 903 || (uc >= 1370 && uc <= 1375) || uc == 1417 ||
uc == 1418 || uc == 1470 || uc == 1472 || uc == 1475 || uc == 1478 ||
uc == 1523 || uc == 1524 || uc == 1545 || uc == 1546 || uc == 1548 ||
uc == 1549 || uc == 1563 || uc == 1566 || uc == 1567 ||
(uc >= 1642 && uc <= 1645) || uc == 1748 || (uc >= 1792 && uc <= 1805) ||
(uc >= 2039 && uc <= 2041) || (uc >= 2096 && uc <= 2110) || uc == 2142 ||
uc == 2404 || uc == 2405 || uc == 2416 || uc == 2800 || uc == 3572 ||
uc == 3663 || uc == 3674 || uc == 3675 || (uc >= 3844 && uc <= 3858) ||
uc == 3860 || (uc >= 3898 && uc <= 3901) || uc == 3973 ||
(uc >= 4048 && uc <= 4052) || uc == 4057 || uc == 4058 ||
(uc >= 4170 && uc <= 4175) || uc == 4347 || (uc >= 4960 && uc <= 4968) ||
uc == 5120 || uc == 5741 || uc == 5742 || uc == 5787 || uc == 5788 ||
(uc >= 5867 && uc <= 5869) || uc == 5941 || uc == 5942 ||
(uc >= 6100 && uc <= 6102) || (uc >= 6104 && uc <= 6106) ||
(uc >= 6144 && uc <= 6154) || uc == 6468 || uc == 6469 || uc == 6686 ||
uc == 6687 || (uc >= 6816 && uc <= 6822) || (uc >= 6824 && uc <= 6829) ||
(uc >= 7002 && uc <= 7008) || (uc >= 7164 && uc <= 7167) ||
(uc >= 7227 && uc <= 7231) || uc == 7294 || uc == 7295 ||
(uc >= 7360 && uc <= 7367) || uc == 7379 || (uc >= 8208 && uc <= 8231) ||
(uc >= 8240 && uc <= 8259) || (uc >= 8261 && uc <= 8273) ||
(uc >= 8275 && uc <= 8286) || uc == 8317 || uc == 8318 || uc == 8333 ||
uc == 8334 || (uc >= 8968 && uc <= 8971) || uc == 9001 || uc == 9002 ||
(uc >= 10088 && uc <= 10101) || uc == 10181 || uc == 10182 ||
(uc >= 10214 && uc <= 10223) || (uc >= 10627 && uc <= 10648) ||
(uc >= 10712 && uc <= 10715) || uc == 10748 || uc == 10749 ||
(uc >= 11513 && uc <= 11516) || uc == 11518 || uc == 11519 ||
uc == 11632 || (uc >= 11776 && uc <= 11822) ||
(uc >= 11824 && uc <= 11842) || (uc >= 12289 && uc <= 12291) ||
(uc >= 12296 && uc <= 12305) || (uc >= 12308 && uc <= 12319) ||
uc == 12336 || uc == 12349 || uc == 12448 || uc == 12539 || uc == 42238 ||
uc == 42239 || (uc >= 42509 && uc <= 42511) || uc == 42611 ||
uc == 42622 || (uc >= 42738 && uc <= 42743) ||
(uc >= 43124 && uc <= 43127) || uc == 43214 || uc == 43215 ||
(uc >= 43256 && uc <= 43258) || uc == 43310 || uc == 43311 ||
uc == 43359 || (uc >= 43457 && uc <= 43469) || uc == 43486 ||
uc == 43487 || (uc >= 43612 && uc <= 43615) || uc == 43742 ||
uc == 43743 || uc == 43760 || uc == 43761 || uc == 44011 || uc == 64830 ||
uc == 64831 || (uc >= 65040 && uc <= 65049) ||
(uc >= 65072 && uc <= 65106) || (uc >= 65108 && uc <= 65121) ||
uc == 65123 || uc == 65128 || uc == 65130 || uc == 65131 ||
(uc >= 65281 && uc <= 65283) || (uc >= 65285 && uc <= 65290) ||
(uc >= 65292 && uc <= 65295) || uc == 65306 || uc == 65307 ||
uc == 65311 || uc == 65312 || (uc >= 65339 && uc <= 65341) ||
uc == 65343 || uc == 65371 || uc == 65373 ||
(uc >= 65375 && uc <= 65381) || (uc >= 65792 && uc <= 65794) ||
uc == 66463 || uc == 66512 || uc == 66927 || uc == 67671 || uc == 67871 ||
uc == 67903 || (uc >= 68176 && uc <= 68184) || uc == 68223 ||
(uc >= 68336 && uc <= 68342) || (uc >= 68409 && uc <= 68415) ||
(uc >= 68505 && uc <= 68508) || (uc >= 69703 && uc <= 69709) ||
uc == 69819 || uc == 69820 || (uc >= 69822 && uc <= 69825) ||
(uc >= 69952 && uc <= 69955) || uc == 70004 || uc == 70005 ||
(uc >= 70085 && uc <= 70088) || uc == 70093 ||
(uc >= 70200 && uc <= 70205) || uc == 70854 ||
(uc >= 71105 && uc <= 71113) || (uc >= 71233 && uc <= 71235) ||
(uc >= 74864 && uc <= 74868) || uc == 92782 || uc == 92783 ||
uc == 92917 || (uc >= 92983 && uc <= 92987) || uc == 92996 ||
uc == 113823);
}

View File

@ -1,24 +0,0 @@
#ifndef CMARK_UTF8_H
#define CMARK_UTF8_H
#include <stdint.h>
#include "buffer.h"
#ifdef __cplusplus
extern "C" {
#endif
void cmark_utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str,
bufsize_t len);
void cmark_utf8proc_encode_char(int32_t uc, cmark_strbuf *buf);
int cmark_utf8proc_iterate(const uint8_t *str, bufsize_t str_len, int32_t *dst);
void cmark_utf8proc_check(cmark_strbuf *dest, const uint8_t *line,
bufsize_t size);
int cmark_utf8proc_is_space(int32_t uc);
int cmark_utf8proc_is_punctuation(int32_t uc);
#ifdef __cplusplus
}
#endif
#endif

View File

@ -1,170 +0,0 @@
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include "config.h"
#include "cmark.h"
#include "node.h"
#include "buffer.h"
#include "houdini.h"
#define BUFFER_SIZE 100
// Functions to convert cmark_nodes to XML strings.
static void escape_xml(cmark_strbuf *dest, const unsigned char *source,
bufsize_t length) {
houdini_escape_html0(dest, source, length, 0);
}
struct render_state {
cmark_strbuf *xml;
int indent;
};
static CMARK_INLINE void indent(struct render_state *state) {
int i;
for (i = 0; i < state->indent; i++) {
cmark_strbuf_putc(state->xml, ' ');
}
}
static int S_render_node(cmark_node *node, cmark_event_type ev_type,
struct render_state *state, int options) {
cmark_strbuf *xml = state->xml;
bool literal = false;
cmark_delim_type delim;
bool entering = (ev_type == CMARK_EVENT_ENTER);
char buffer[BUFFER_SIZE];
if (entering) {
indent(state);
cmark_strbuf_putc(xml, '<');
cmark_strbuf_puts(xml, cmark_node_get_type_string(node));
if (options & CMARK_OPT_SOURCEPOS && node->start_line != 0) {
snprintf(buffer, BUFFER_SIZE, " sourcepos=\"%d:%d-%d:%d\"",
node->start_line, node->start_column, node->end_line,
node->end_column);
cmark_strbuf_puts(xml, buffer);
}
literal = false;
switch (node->type) {
case CMARK_NODE_DOCUMENT:
cmark_strbuf_puts(xml, " xmlns=\"http://commonmark.org/xml/1.0\"");
break;
case CMARK_NODE_TEXT:
case CMARK_NODE_CODE:
case CMARK_NODE_HTML_BLOCK:
case CMARK_NODE_HTML_INLINE:
cmark_strbuf_puts(xml, " xml:space=\"preserve\">");
escape_xml(xml, node->as.literal.data, node->as.literal.len);
cmark_strbuf_puts(xml, "</");
cmark_strbuf_puts(xml, cmark_node_get_type_string(node));
literal = true;
break;
case CMARK_NODE_LIST:
switch (cmark_node_get_list_type(node)) {
case CMARK_ORDERED_LIST:
cmark_strbuf_puts(xml, " type=\"ordered\"");
snprintf(buffer, BUFFER_SIZE, " start=\"%d\"",
cmark_node_get_list_start(node));
cmark_strbuf_puts(xml, buffer);
delim = cmark_node_get_list_delim(node);
if (delim == CMARK_PAREN_DELIM) {
cmark_strbuf_puts(xml, " delim=\"paren\"");
} else if (delim == CMARK_PERIOD_DELIM) {
cmark_strbuf_puts(xml, " delim=\"period\"");
}
break;
case CMARK_BULLET_LIST:
cmark_strbuf_puts(xml, " type=\"bullet\"");
break;
default:
break;
}
snprintf(buffer, BUFFER_SIZE, " tight=\"%s\"",
(cmark_node_get_list_tight(node) ? "true" : "false"));
cmark_strbuf_puts(xml, buffer);
break;
case CMARK_NODE_HEADING:
snprintf(buffer, BUFFER_SIZE, " level=\"%d\"", node->as.heading.level);
cmark_strbuf_puts(xml, buffer);
break;
case CMARK_NODE_CODE_BLOCK:
if (node->as.code.info.len > 0) {
cmark_strbuf_puts(xml, " info=\"");
escape_xml(xml, node->as.code.info.data, node->as.code.info.len);
cmark_strbuf_putc(xml, '"');
}
cmark_strbuf_puts(xml, " xml:space=\"preserve\">");
escape_xml(xml, node->as.code.literal.data, node->as.code.literal.len);
cmark_strbuf_puts(xml, "</");
cmark_strbuf_puts(xml, cmark_node_get_type_string(node));
literal = true;
break;
case CMARK_NODE_CUSTOM_BLOCK:
case CMARK_NODE_CUSTOM_INLINE:
cmark_strbuf_puts(xml, " on_enter=\"");
escape_xml(xml, node->as.custom.on_enter.data,
node->as.custom.on_enter.len);
cmark_strbuf_putc(xml, '"');
cmark_strbuf_puts(xml, " on_exit=\"");
escape_xml(xml, node->as.custom.on_exit.data,
node->as.custom.on_exit.len);
cmark_strbuf_putc(xml, '"');
break;
case CMARK_NODE_LINK:
case CMARK_NODE_IMAGE:
cmark_strbuf_puts(xml, " destination=\"");
escape_xml(xml, node->as.link.url.data, node->as.link.url.len);
cmark_strbuf_putc(xml, '"');
cmark_strbuf_puts(xml, " title=\"");
escape_xml(xml, node->as.link.title.data, node->as.link.title.len);
cmark_strbuf_putc(xml, '"');
break;
default:
break;
}
if (node->first_child) {
state->indent += 2;
} else if (!literal) {
cmark_strbuf_puts(xml, " /");
}
cmark_strbuf_puts(xml, ">\n");
} else if (node->first_child) {
state->indent -= 2;
indent(state);
cmark_strbuf_puts(xml, "</");
cmark_strbuf_puts(xml, cmark_node_get_type_string(node));
cmark_strbuf_puts(xml, ">\n");
}
return 1;
}
char *cmark_render_xml(cmark_node *root, int options) {
char *result;
cmark_strbuf xml = CMARK_BUF_INIT(cmark_node_mem(root));
cmark_event_type ev_type;
cmark_node *cur;
struct render_state state = {&xml, 0};
cmark_iter *iter = cmark_iter_new(root);
cmark_strbuf_puts(state.xml, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
cmark_strbuf_puts(state.xml,
"<!DOCTYPE document SYSTEM \"CommonMark.dtd\">\n");
while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
cur = cmark_iter_get_node(iter);
S_render_node(cur, ev_type, &state, options);
}
result = (char *)cmark_strbuf_detach(&xml);
cmark_iter_free(iter);
return result;
}

281
include/hoedown/autolink.c Normal file
View File

@ -0,0 +1,281 @@
#include "autolink.h"
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <ctype.h>
#ifndef _MSC_VER
#include <strings.h>
#else
#define strncasecmp _strnicmp
#endif
int
hoedown_autolink_is_safe(const uint8_t *data, size_t size)
{
static const size_t valid_uris_count = 6;
static const char *valid_uris[] = {
"http://", "https://", "/", "#", "ftp://", "mailto:"
};
static const size_t valid_uris_size[] = { 7, 8, 1, 1, 6, 7 };
size_t i;
for (i = 0; i < valid_uris_count; ++i) {
size_t len = valid_uris_size[i];
if (size > len &&
strncasecmp((char *)data, valid_uris[i], len) == 0 &&
isalnum(data[len]))
return 1;
}
return 0;
}
static size_t
autolink_delim(uint8_t *data, size_t link_end, size_t max_rewind, size_t size)
{
uint8_t cclose, copen = 0;
size_t i;
for (i = 0; i < link_end; ++i)
if (data[i] == '<') {
link_end = i;
break;
}
while (link_end > 0) {
if (strchr("?!.,:", data[link_end - 1]) != NULL)
link_end--;
else if (data[link_end - 1] == ';') {
size_t new_end = link_end - 2;
while (new_end > 0 && isalpha(data[new_end]))
new_end--;
if (new_end < link_end - 2 && data[new_end] == '&')
link_end = new_end;
else
link_end--;
}
else break;
}
if (link_end == 0)
return 0;
cclose = data[link_end - 1];
switch (cclose) {
case '"': copen = '"'; break;
case '\'': copen = '\''; break;
case ')': copen = '('; break;
case ']': copen = '['; break;
case '}': copen = '{'; break;
}
if (copen != 0) {
size_t closing = 0;
size_t opening = 0;
size_t i = 0;
/* Try to close the final punctuation sign in this same line;
* if we managed to close it outside of the URL, that means that it's
* not part of the URL. If it closes inside the URL, that means it
* is part of the URL.
*
* Examples:
*
* foo http://www.pokemon.com/Pikachu_(Electric) bar
* => http://www.pokemon.com/Pikachu_(Electric)
*
* foo (http://www.pokemon.com/Pikachu_(Electric)) bar
* => http://www.pokemon.com/Pikachu_(Electric)
*
* foo http://www.pokemon.com/Pikachu_(Electric)) bar
* => http://www.pokemon.com/Pikachu_(Electric))
*
* (foo http://www.pokemon.com/Pikachu_(Electric)) bar
* => foo http://www.pokemon.com/Pikachu_(Electric)
*/
while (i < link_end) {
if (data[i] == copen)
opening++;
else if (data[i] == cclose)
closing++;
i++;
}
if (closing != opening)
link_end--;
}
return link_end;
}
static size_t
check_domain(uint8_t *data, size_t size, int allow_short)
{
size_t i, np = 0;
if (!isalnum(data[0]))
return 0;
for (i = 1; i < size - 1; ++i) {
if (strchr(".:", data[i]) != NULL) np++;
else if (!isalnum(data[i]) && data[i] != '-') break;
}
if (allow_short) {
/* We don't need a valid domain in the strict sense (with
* least one dot; so just make sure it's composed of valid
* domain characters and return the length of the the valid
* sequence. */
return i;
} else {
/* a valid domain needs to have at least a dot.
* that's as far as we get */
return np ? i : 0;
}
}
size_t
hoedown_autolink__www(
size_t *rewind_p,
hoedown_buffer *link,
uint8_t *data,
size_t max_rewind,
size_t size,
unsigned int flags)
{
size_t link_end;
if (max_rewind > 0 && !ispunct(data[-1]) && !isspace(data[-1]))
return 0;
if (size < 4 || memcmp(data, "www.", strlen("www.")) != 0)
return 0;
link_end = check_domain(data, size, 0);
if (link_end == 0)
return 0;
while (link_end < size && !isspace(data[link_end]))
link_end++;
link_end = autolink_delim(data, link_end, max_rewind, size);
if (link_end == 0)
return 0;
hoedown_buffer_put(link, data, link_end);
*rewind_p = 0;
return (int)link_end;
}
size_t
hoedown_autolink__email(
size_t *rewind_p,
hoedown_buffer *link,
uint8_t *data,
size_t max_rewind,
size_t size,
unsigned int flags)
{
size_t link_end, rewind;
int nb = 0, np = 0;
for (rewind = 0; rewind < max_rewind; ++rewind) {
uint8_t c = data[-1 - rewind];
if (isalnum(c))
continue;
if (strchr(".+-_", c) != NULL)
continue;
break;
}
if (rewind == 0)
return 0;
for (link_end = 0; link_end < size; ++link_end) {
uint8_t c = data[link_end];
if (isalnum(c))
continue;
if (c == '@')
nb++;
else if (c == '.' && link_end < size - 1)
np++;
else if (c != '-' && c != '_')
break;
}
if (link_end < 2 || nb != 1 || np == 0 ||
!isalpha(data[link_end - 1]))
return 0;
link_end = autolink_delim(data, link_end, max_rewind, size);
if (link_end == 0)
return 0;
hoedown_buffer_put(link, data - rewind, link_end + rewind);
*rewind_p = rewind;
return link_end;
}
size_t
hoedown_autolink__url(
size_t *rewind_p,
hoedown_buffer *link,
uint8_t *data,
size_t max_rewind,
size_t size,
unsigned int flags)
{
size_t link_end, rewind = 0, domain_len;
if (size < 4 || data[1] != '/' || data[2] != '/')
return 0;
while (rewind < max_rewind && isalpha(data[-1 - rewind]))
rewind++;
if (!hoedown_autolink_is_safe(data - rewind, size + rewind))
return 0;
link_end = strlen("://");
domain_len = check_domain(
data + link_end,
size - link_end,
flags & HOEDOWN_AUTOLINK_SHORT_DOMAINS);
if (domain_len == 0)
return 0;
link_end += domain_len;
while (link_end < size && !isspace(data[link_end]))
link_end++;
link_end = autolink_delim(data, link_end, max_rewind, size);
if (link_end == 0)
return 0;
hoedown_buffer_put(link, data - rewind, link_end + rewind);
*rewind_p = rewind;
return link_end;
}

View File

@ -0,0 +1,46 @@
/* autolink.h - versatile autolinker */
#ifndef HOEDOWN_AUTOLINK_H
#define HOEDOWN_AUTOLINK_H
#include "buffer.h"
#ifdef __cplusplus
extern "C" {
#endif
/*************
* CONSTANTS *
*************/
typedef enum hoedown_autolink_flags {
HOEDOWN_AUTOLINK_SHORT_DOMAINS = (1 << 0)
} hoedown_autolink_flags;
/*************
* FUNCTIONS *
*************/
/* hoedown_autolink_is_safe: verify that a URL has a safe protocol */
int hoedown_autolink_is_safe(const uint8_t *data, size_t size);
/* hoedown_autolink__www: search for the next www link in data */
size_t hoedown_autolink__www(size_t *rewind_p, hoedown_buffer *link,
uint8_t *data, size_t offset, size_t size, hoedown_autolink_flags flags);
/* hoedown_autolink__email: search for the next email in data */
size_t hoedown_autolink__email(size_t *rewind_p, hoedown_buffer *link,
uint8_t *data, size_t offset, size_t size, hoedown_autolink_flags flags);
/* hoedown_autolink__url: search for the next URL in data */
size_t hoedown_autolink__url(size_t *rewind_p, hoedown_buffer *link,
uint8_t *data, size_t offset, size_t size, hoedown_autolink_flags flags);
#ifdef __cplusplus
}
#endif
#endif /** HOEDOWN_AUTOLINK_H **/

308
include/hoedown/buffer.c Normal file
View File

@ -0,0 +1,308 @@
#include "buffer.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
void *
hoedown_malloc(size_t size)
{
void *ret = malloc(size);
if (!ret) {
fprintf(stderr, "Allocation failed.\n");
abort();
}
return ret;
}
void *
hoedown_calloc(size_t nmemb, size_t size)
{
void *ret = calloc(nmemb, size);
if (!ret) {
fprintf(stderr, "Allocation failed.\n");
abort();
}
return ret;
}
void *
hoedown_realloc(void *ptr, size_t size)
{
void *ret = realloc(ptr, size);
if (!ret) {
fprintf(stderr, "Allocation failed.\n");
abort();
}
return ret;
}
void
hoedown_buffer_init(
hoedown_buffer *buf,
size_t unit,
hoedown_realloc_callback data_realloc,
hoedown_free_callback data_free,
hoedown_free_callback buffer_free)
{
assert(buf);
buf->data = NULL;
buf->size = buf->asize = 0;
buf->unit = unit;
buf->data_realloc = data_realloc;
buf->data_free = data_free;
buf->buffer_free = buffer_free;
}
void
hoedown_buffer_uninit(hoedown_buffer *buf)
{
assert(buf && buf->unit);
buf->data_free(buf->data);
}
hoedown_buffer *
hoedown_buffer_new(size_t unit)
{
hoedown_buffer *ret = hoedown_malloc(sizeof (hoedown_buffer));
hoedown_buffer_init(ret, unit, hoedown_realloc, free, free);
return ret;
}
void
hoedown_buffer_free(hoedown_buffer *buf)
{
if (!buf) return;
assert(buf && buf->unit);
buf->data_free(buf->data);
if (buf->buffer_free)
buf->buffer_free(buf);
}
void
hoedown_buffer_reset(hoedown_buffer *buf)
{
assert(buf && buf->unit);
buf->data_free(buf->data);
buf->data = NULL;
buf->size = buf->asize = 0;
}
void
hoedown_buffer_grow(hoedown_buffer *buf, size_t neosz)
{
size_t neoasz;
assert(buf && buf->unit);
if (buf->asize >= neosz)
return;
neoasz = buf->asize + buf->unit;
while (neoasz < neosz)
neoasz += buf->unit;
buf->data = buf->data_realloc(buf->data, neoasz);
buf->asize = neoasz;
}
void
hoedown_buffer_put(hoedown_buffer *buf, const uint8_t *data, size_t size)
{
assert(buf && buf->unit);
if (buf->size + size > buf->asize)
hoedown_buffer_grow(buf, buf->size + size);
memcpy(buf->data + buf->size, data, size);
buf->size += size;
}
void
hoedown_buffer_puts(hoedown_buffer *buf, const char *str)
{
hoedown_buffer_put(buf, (const uint8_t *)str, strlen(str));
}
void
hoedown_buffer_putc(hoedown_buffer *buf, uint8_t c)
{
assert(buf && buf->unit);
if (buf->size >= buf->asize)
hoedown_buffer_grow(buf, buf->size + 1);
buf->data[buf->size] = c;
buf->size += 1;
}
int
hoedown_buffer_putf(hoedown_buffer *buf, FILE *file)
{
assert(buf && buf->unit);
while (!(feof(file) || ferror(file))) {
hoedown_buffer_grow(buf, buf->size + buf->unit);
buf->size += fread(buf->data + buf->size, 1, buf->unit, file);
}
return ferror(file);
}
void
hoedown_buffer_set(hoedown_buffer *buf, const uint8_t *data, size_t size)
{
assert(buf && buf->unit);
if (size > buf->asize)
hoedown_buffer_grow(buf, size);
memcpy(buf->data, data, size);
buf->size = size;
}
void
hoedown_buffer_sets(hoedown_buffer *buf, const char *str)
{
hoedown_buffer_set(buf, (const uint8_t *)str, strlen(str));
}
int
hoedown_buffer_eq(const hoedown_buffer *buf, const uint8_t *data, size_t size)
{
if (buf->size != size) return 0;
return memcmp(buf->data, data, size) == 0;
}
int
hoedown_buffer_eqs(const hoedown_buffer *buf, const char *str)
{
return hoedown_buffer_eq(buf, (const uint8_t *)str, strlen(str));
}
int
hoedown_buffer_prefix(const hoedown_buffer *buf, const char *prefix)
{
size_t i;
for (i = 0; i < buf->size; ++i) {
if (prefix[i] == 0)
return 0;
if (buf->data[i] != prefix[i])
return buf->data[i] - prefix[i];
}
return 0;
}
void
hoedown_buffer_slurp(hoedown_buffer *buf, size_t size)
{
assert(buf && buf->unit);
if (size >= buf->size) {
buf->size = 0;
return;
}
buf->size -= size;
memmove(buf->data, buf->data + size, buf->size);
}
const char *
hoedown_buffer_cstr(hoedown_buffer *buf)
{
assert(buf && buf->unit);
if (buf->size < buf->asize && buf->data[buf->size] == 0)
return (char *)buf->data;
hoedown_buffer_grow(buf, buf->size + 1);
buf->data[buf->size] = 0;
return (char *)buf->data;
}
void
hoedown_buffer_printf(hoedown_buffer *buf, const char *fmt, ...)
{
va_list ap;
int n;
assert(buf && buf->unit);
if (buf->size >= buf->asize)
hoedown_buffer_grow(buf, buf->size + 1);
va_start(ap, fmt);
n = vsnprintf((char *)buf->data + buf->size, buf->asize - buf->size, fmt, ap);
va_end(ap);
if (n < 0) {
#ifndef _MSC_VER
return;
#else
va_start(ap, fmt);
n = _vscprintf(fmt, ap);
va_end(ap);
#endif
}
if ((size_t)n >= buf->asize - buf->size) {
hoedown_buffer_grow(buf, buf->size + n + 1);
va_start(ap, fmt);
n = vsnprintf((char *)buf->data + buf->size, buf->asize - buf->size, fmt, ap);
va_end(ap);
}
if (n < 0)
return;
buf->size += n;
}
void hoedown_buffer_put_utf8(hoedown_buffer *buf, unsigned int c) {
unsigned char unichar[4];
assert(buf && buf->unit);
if (c < 0x80) {
hoedown_buffer_putc(buf, c);
}
else if (c < 0x800) {
unichar[0] = 192 + (c / 64);
unichar[1] = 128 + (c % 64);
hoedown_buffer_put(buf, unichar, 2);
}
else if (c - 0xd800u < 0x800) {
HOEDOWN_BUFPUTSL(buf, "\xef\xbf\xbd");
}
else if (c < 0x10000) {
unichar[0] = 224 + (c / 4096);
unichar[1] = 128 + (c / 64) % 64;
unichar[2] = 128 + (c % 64);
hoedown_buffer_put(buf, unichar, 3);
}
else if (c < 0x110000) {
unichar[0] = 240 + (c / 262144);
unichar[1] = 128 + (c / 4096) % 64;
unichar[2] = 128 + (c / 64) % 64;
unichar[3] = 128 + (c % 64);
hoedown_buffer_put(buf, unichar, 4);
}
else {
HOEDOWN_BUFPUTSL(buf, "\xef\xbf\xbd");
}
}

134
include/hoedown/buffer.h Normal file
View File

@ -0,0 +1,134 @@
/* buffer.h - simple, fast buffers */
#ifndef HOEDOWN_BUFFER_H
#define HOEDOWN_BUFFER_H
#include <stdio.h>
#include <stddef.h>
#include <stdarg.h>
#include <stdint.h>
#include <stdlib.h>
#ifdef __cplusplus
extern "C" {
#endif
#if defined(_MSC_VER)
#define __attribute__(x)
#define inline __inline
#define __builtin_expect(x,n) x
#endif
/*********
* TYPES *
*********/
typedef void *(*hoedown_realloc_callback)(void *, size_t);
typedef void (*hoedown_free_callback)(void *);
struct hoedown_buffer {
uint8_t *data; /* actual character data */
size_t size; /* size of the string */
size_t asize; /* allocated size (0 = volatile buffer) */
size_t unit; /* reallocation unit size (0 = read-only buffer) */
hoedown_realloc_callback data_realloc;
hoedown_free_callback data_free;
hoedown_free_callback buffer_free;
};
typedef struct hoedown_buffer hoedown_buffer;
/*************
* FUNCTIONS *
*************/
/* allocation wrappers */
void *hoedown_malloc(size_t size) __attribute__ ((malloc));
void *hoedown_calloc(size_t nmemb, size_t size) __attribute__ ((malloc));
void *hoedown_realloc(void *ptr, size_t size) __attribute__ ((malloc));
/* hoedown_buffer_init: initialize a buffer with custom allocators */
void hoedown_buffer_init(
hoedown_buffer *buffer,
size_t unit,
hoedown_realloc_callback data_realloc,
hoedown_free_callback data_free,
hoedown_free_callback buffer_free
);
/* hoedown_buffer_uninit: uninitialize an existing buffer */
void hoedown_buffer_uninit(hoedown_buffer *buf);
/* hoedown_buffer_new: allocate a new buffer */
hoedown_buffer *hoedown_buffer_new(size_t unit) __attribute__ ((malloc));
/* hoedown_buffer_reset: free internal data of the buffer */
void hoedown_buffer_reset(hoedown_buffer *buf);
/* hoedown_buffer_grow: increase the allocated size to the given value */
void hoedown_buffer_grow(hoedown_buffer *buf, size_t neosz);
/* hoedown_buffer_put: append raw data to a buffer */
void hoedown_buffer_put(hoedown_buffer *buf, const uint8_t *data, size_t size);
/* hoedown_buffer_puts: append a NUL-terminated string to a buffer */
void hoedown_buffer_puts(hoedown_buffer *buf, const char *str);
/* hoedown_buffer_putc: append a single char to a buffer */
void hoedown_buffer_putc(hoedown_buffer *buf, uint8_t c);
/* hoedown_buffer_putf: read from a file and append to a buffer, until EOF or error */
int hoedown_buffer_putf(hoedown_buffer *buf, FILE* file);
/* hoedown_buffer_set: replace the buffer's contents with raw data */
void hoedown_buffer_set(hoedown_buffer *buf, const uint8_t *data, size_t size);
/* hoedown_buffer_sets: replace the buffer's contents with a NUL-terminated string */
void hoedown_buffer_sets(hoedown_buffer *buf, const char *str);
/* hoedown_buffer_eq: compare a buffer's data with other data for equality */
int hoedown_buffer_eq(const hoedown_buffer *buf, const uint8_t *data, size_t size);
/* hoedown_buffer_eq: compare a buffer's data with NUL-terminated string for equality */
int hoedown_buffer_eqs(const hoedown_buffer *buf, const char *str);
/* hoedown_buffer_prefix: compare the beginning of a buffer with a string */
int hoedown_buffer_prefix(const hoedown_buffer *buf, const char *prefix);
/* hoedown_buffer_slurp: remove a given number of bytes from the head of the buffer */
void hoedown_buffer_slurp(hoedown_buffer *buf, size_t size);
/* hoedown_buffer_cstr: NUL-termination of the string array (making a C-string) */
const char *hoedown_buffer_cstr(hoedown_buffer *buf);
/* hoedown_buffer_printf: formatted printing to a buffer */
void hoedown_buffer_printf(hoedown_buffer *buf, const char *fmt, ...) __attribute__ ((format (printf, 2, 3)));
/* hoedown_buffer_put_utf8: put a Unicode character encoded as UTF-8 */
void hoedown_buffer_put_utf8(hoedown_buffer *buf, unsigned int codepoint);
/* hoedown_buffer_free: free the buffer */
void hoedown_buffer_free(hoedown_buffer *buf);
/* HOEDOWN_BUFPUTSL: optimized hoedown_buffer_puts of a string literal */
#define HOEDOWN_BUFPUTSL(output, literal) \
hoedown_buffer_put(output, (const uint8_t *)literal, sizeof(literal) - 1)
/* HOEDOWN_BUFSETSL: optimized hoedown_buffer_sets of a string literal */
#define HOEDOWN_BUFSETSL(output, literal) \
hoedown_buffer_set(output, (const uint8_t *)literal, sizeof(literal) - 1)
/* HOEDOWN_BUFEQSL: optimized hoedown_buffer_eqs of a string literal */
#define HOEDOWN_BUFEQSL(output, literal) \
hoedown_buffer_eq(output, (const uint8_t *)literal, sizeof(literal) - 1)
#ifdef __cplusplus
}
#endif
#endif /** HOEDOWN_BUFFER_H **/

3012
include/hoedown/document.c Normal file

File diff suppressed because it is too large Load Diff

172
include/hoedown/document.h Normal file
View File

@ -0,0 +1,172 @@
/* document.h - generic markdown parser */
#ifndef HOEDOWN_DOCUMENT_H
#define HOEDOWN_DOCUMENT_H
#include "buffer.h"
#include "autolink.h"
#ifdef __cplusplus
extern "C" {
#endif
/*************
* CONSTANTS *
*************/
typedef enum hoedown_extensions {
/* block-level extensions */
HOEDOWN_EXT_TABLES = (1 << 0),
HOEDOWN_EXT_FENCED_CODE = (1 << 1),
HOEDOWN_EXT_FOOTNOTES = (1 << 2),
/* span-level extensions */
HOEDOWN_EXT_AUTOLINK = (1 << 3),
HOEDOWN_EXT_STRIKETHROUGH = (1 << 4),
HOEDOWN_EXT_UNDERLINE = (1 << 5),
HOEDOWN_EXT_HIGHLIGHT = (1 << 6),
HOEDOWN_EXT_QUOTE = (1 << 7),
HOEDOWN_EXT_SUPERSCRIPT = (1 << 8),
HOEDOWN_EXT_MATH = (1 << 9),
/* other flags */
HOEDOWN_EXT_NO_INTRA_EMPHASIS = (1 << 11),
HOEDOWN_EXT_SPACE_HEADERS = (1 << 12),
HOEDOWN_EXT_MATH_EXPLICIT = (1 << 13),
/* negative flags */
HOEDOWN_EXT_DISABLE_INDENTED_CODE = (1 << 14)
} hoedown_extensions;
#define HOEDOWN_EXT_BLOCK (\
HOEDOWN_EXT_TABLES |\
HOEDOWN_EXT_FENCED_CODE |\
HOEDOWN_EXT_FOOTNOTES )
#define HOEDOWN_EXT_SPAN (\
HOEDOWN_EXT_AUTOLINK |\
HOEDOWN_EXT_STRIKETHROUGH |\
HOEDOWN_EXT_UNDERLINE |\
HOEDOWN_EXT_HIGHLIGHT |\
HOEDOWN_EXT_QUOTE |\
HOEDOWN_EXT_SUPERSCRIPT |\
HOEDOWN_EXT_MATH )
#define HOEDOWN_EXT_FLAGS (\
HOEDOWN_EXT_NO_INTRA_EMPHASIS |\
HOEDOWN_EXT_SPACE_HEADERS |\
HOEDOWN_EXT_MATH_EXPLICIT )
#define HOEDOWN_EXT_NEGATIVE (\
HOEDOWN_EXT_DISABLE_INDENTED_CODE )
typedef enum hoedown_list_flags {
HOEDOWN_LIST_ORDERED = (1 << 0),
HOEDOWN_LI_BLOCK = (1 << 1) /* <li> containing block data */
} hoedown_list_flags;
typedef enum hoedown_table_flags {
HOEDOWN_TABLE_ALIGN_LEFT = 1,
HOEDOWN_TABLE_ALIGN_RIGHT = 2,
HOEDOWN_TABLE_ALIGN_CENTER = 3,
HOEDOWN_TABLE_ALIGNMASK = 3,
HOEDOWN_TABLE_HEADER = 4
} hoedown_table_flags;
typedef enum hoedown_autolink_type {
HOEDOWN_AUTOLINK_NONE, /* used internally when it is not an autolink*/
HOEDOWN_AUTOLINK_NORMAL, /* normal http/http/ftp/mailto/etc link */
HOEDOWN_AUTOLINK_EMAIL /* e-mail link without explit mailto: */
} hoedown_autolink_type;
/*********
* TYPES *
*********/
struct hoedown_document;
typedef struct hoedown_document hoedown_document;
struct hoedown_renderer_data {
void *opaque;
};
typedef struct hoedown_renderer_data hoedown_renderer_data;
/* hoedown_renderer - functions for rendering parsed data */
struct hoedown_renderer {
/* state object */
void *opaque;
/* block level callbacks - NULL skips the block */
void (*blockcode)(hoedown_buffer *ob, const hoedown_buffer *text, const hoedown_buffer *lang, const hoedown_renderer_data *data);
void (*blockquote)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data);
void (*header)(hoedown_buffer *ob, const hoedown_buffer *content, int level, const hoedown_renderer_data *data);
void (*hrule)(hoedown_buffer *ob, const hoedown_renderer_data *data);
void (*list)(hoedown_buffer *ob, const hoedown_buffer *content, hoedown_list_flags flags, const hoedown_renderer_data *data);
void (*listitem)(hoedown_buffer *ob, const hoedown_buffer *content, hoedown_list_flags flags, const hoedown_renderer_data *data);
void (*paragraph)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data);
void (*table)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data);
void (*table_header)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data);
void (*table_body)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data);
void (*table_row)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data);
void (*table_cell)(hoedown_buffer *ob, const hoedown_buffer *content, hoedown_table_flags flags, const hoedown_renderer_data *data);
void (*footnotes)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data);
void (*footnote_def)(hoedown_buffer *ob, const hoedown_buffer *content, unsigned int num, const hoedown_renderer_data *data);
void (*blockhtml)(hoedown_buffer *ob, const hoedown_buffer *text, const hoedown_renderer_data *data);
/* span level callbacks - NULL or return 0 prints the span verbatim */
int (*autolink)(hoedown_buffer *ob, const hoedown_buffer *link, hoedown_autolink_type type, const hoedown_renderer_data *data);
int (*codespan)(hoedown_buffer *ob, const hoedown_buffer *text, const hoedown_renderer_data *data);
int (*double_emphasis)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data);
int (*emphasis)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data);
int (*underline)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data);
int (*highlight)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data);
int (*quote)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data);
int (*image)(hoedown_buffer *ob, const hoedown_buffer *link, const hoedown_buffer *title, const hoedown_buffer *alt, const hoedown_renderer_data *data);
int (*linebreak)(hoedown_buffer *ob, const hoedown_renderer_data *data);
int (*link)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_buffer *link, const hoedown_buffer *title, const hoedown_renderer_data *data);
int (*triple_emphasis)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data);
int (*strikethrough)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data);
int (*superscript)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data);
int (*footnote_ref)(hoedown_buffer *ob, unsigned int num, const hoedown_renderer_data *data);
int (*math)(hoedown_buffer *ob, const hoedown_buffer *text, int displaymode, const hoedown_renderer_data *data);
int (*raw_html)(hoedown_buffer *ob, const hoedown_buffer *text, const hoedown_renderer_data *data);
/* low level callbacks - NULL copies input directly into the output */
void (*entity)(hoedown_buffer *ob, const hoedown_buffer *text, const hoedown_renderer_data *data);
void (*normal_text)(hoedown_buffer *ob, const hoedown_buffer *text, const hoedown_renderer_data *data);
/* miscellaneous callbacks */
void (*doc_header)(hoedown_buffer *ob, int inline_render, const hoedown_renderer_data *data);
void (*doc_footer)(hoedown_buffer *ob, int inline_render, const hoedown_renderer_data *data);
};
typedef struct hoedown_renderer hoedown_renderer;
/*************
* FUNCTIONS *
*************/
/* hoedown_document_new: allocate a new document processor instance */
hoedown_document *hoedown_document_new(
const hoedown_renderer *renderer,
hoedown_extensions extensions,
size_t max_nesting
) __attribute__ ((malloc));
/* hoedown_document_render: render regular Markdown using the document processor */
void hoedown_document_render(hoedown_document *doc, hoedown_buffer *ob, const uint8_t *data, size_t size);
/* hoedown_document_render_inline: render inline Markdown using the document processor */
void hoedown_document_render_inline(hoedown_document *doc, hoedown_buffer *ob, const uint8_t *data, size_t size);
/* hoedown_document_free: deallocate a document processor instance */
void hoedown_document_free(hoedown_document *doc);
#ifdef __cplusplus
}
#endif
#endif /** HOEDOWN_DOCUMENT_H **/

188
include/hoedown/escape.c Normal file
View File

@ -0,0 +1,188 @@
#include "escape.h"
#include <assert.h>
#include <stdio.h>
#include <string.h>
#define likely(x) __builtin_expect((x),1)
#define unlikely(x) __builtin_expect((x),0)
/*
* The following characters will not be escaped:
*
* -_.+!*'(),%#@?=;:/,+&$ alphanum
*
* Note that this character set is the addition of:
*
* - The characters which are safe to be in an URL
* - The characters which are *not* safe to be in
* an URL because they are RESERVED characters.
*
* We assume (lazily) that any RESERVED char that
* appears inside an URL is actually meant to
* have its native function (i.e. as an URL
* component/separator) and hence needs no escaping.
*
* There are two exceptions: the chacters & (amp)
* and ' (single quote) do not appear in the table.
* They are meant to appear in the URL as components,
* yet they require special HTML-entity escaping
* to generate valid HTML markup.
*
* All other characters will be escaped to %XX.
*
*/
static const uint8_t HREF_SAFE[UINT8_MAX+1] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
void
hoedown_escape_href(hoedown_buffer *ob, const uint8_t *data, size_t size)
{
static const char hex_chars[] = "0123456789ABCDEF";
size_t i = 0, mark;
char hex_str[3];
hex_str[0] = '%';
while (i < size) {
mark = i;
while (i < size && HREF_SAFE[data[i]]) i++;
/* Optimization for cases where there's nothing to escape */
if (mark == 0 && i >= size) {
hoedown_buffer_put(ob, data, size);
return;
}
if (likely(i > mark)) {
hoedown_buffer_put(ob, data + mark, i - mark);
}
/* escaping */
if (i >= size)
break;
switch (data[i]) {
/* amp appears all the time in URLs, but needs
* HTML-entity escaping to be inside an href */
case '&':
HOEDOWN_BUFPUTSL(ob, "&amp;");
break;
/* the single quote is a valid URL character
* according to the standard; it needs HTML
* entity escaping too */
case '\'':
HOEDOWN_BUFPUTSL(ob, "&#x27;");
break;
/* the space can be escaped to %20 or a plus
* sign. we're going with the generic escape
* for now. the plus thing is more commonly seen
* when building GET strings */
#if 0
case ' ':
hoedown_buffer_putc(ob, '+');
break;
#endif
/* every other character goes with a %XX escaping */
default:
hex_str[1] = hex_chars[(data[i] >> 4) & 0xF];
hex_str[2] = hex_chars[data[i] & 0xF];
hoedown_buffer_put(ob, (uint8_t *)hex_str, 3);
}
i++;
}
}
/**
* According to the OWASP rules:
*
* & --> &amp;
* < --> &lt;
* > --> &gt;
* " --> &quot;
* ' --> &#x27; &apos; is not recommended
* / --> &#x2F; forward slash is included as it helps end an HTML entity
*
*/
static const uint8_t HTML_ESCAPE_TABLE[UINT8_MAX+1] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 1, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 4,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 6, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
static const char *HTML_ESCAPES[] = {
"",
"&quot;",
"&amp;",
"&#39;",
"&#47;",
"&lt;",
"&gt;"
};
void
hoedown_escape_html(hoedown_buffer *ob, const uint8_t *data, size_t size, int secure)
{
size_t i = 0, mark;
while (1) {
mark = i;
while (i < size && HTML_ESCAPE_TABLE[data[i]] == 0) i++;
/* Optimization for cases where there's nothing to escape */
if (mark == 0 && i >= size) {
hoedown_buffer_put(ob, data, size);
return;
}
if (likely(i > mark))
hoedown_buffer_put(ob, data + mark, i - mark);
if (i >= size) break;
/* The forward slash is only escaped in secure mode */
if (!secure && data[i] == '/') {
hoedown_buffer_putc(ob, '/');
} else {
hoedown_buffer_puts(ob, HTML_ESCAPES[HTML_ESCAPE_TABLE[data[i]]]);
}
i++;
}
}

28
include/hoedown/escape.h Normal file
View File

@ -0,0 +1,28 @@
/* escape.h - escape utilities */
#ifndef HOEDOWN_ESCAPE_H
#define HOEDOWN_ESCAPE_H
#include "buffer.h"
#ifdef __cplusplus
extern "C" {
#endif
/*************
* FUNCTIONS *
*************/
/* hoedown_escape_href: escape (part of) a URL inside HTML */
void hoedown_escape_href(hoedown_buffer *ob, const uint8_t *data, size_t size);
/* hoedown_escape_html: escape HTML */
void hoedown_escape_html(hoedown_buffer *ob, const uint8_t *data, size_t size, int secure);
#ifdef __cplusplus
}
#endif
#endif /** HOEDOWN_ESCAPE_H **/

754
include/hoedown/html.c Normal file
View File

@ -0,0 +1,754 @@
#include "html.h"
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <ctype.h>
#include "escape.h"
#define USE_XHTML(opt) (opt->flags & HOEDOWN_HTML_USE_XHTML)
hoedown_html_tag
hoedown_html_is_tag(const uint8_t *data, size_t size, const char *tagname)
{
size_t i;
int closed = 0;
if (size < 3 || data[0] != '<')
return HOEDOWN_HTML_TAG_NONE;
i = 1;
if (data[i] == '/') {
closed = 1;
i++;
}
for (; i < size; ++i, ++tagname) {
if (*tagname == 0)
break;
if (data[i] != *tagname)
return HOEDOWN_HTML_TAG_NONE;
}
if (i == size)
return HOEDOWN_HTML_TAG_NONE;
if (isspace(data[i]) || data[i] == '>')
return closed ? HOEDOWN_HTML_TAG_CLOSE : HOEDOWN_HTML_TAG_OPEN;
return HOEDOWN_HTML_TAG_NONE;
}
static void escape_html(hoedown_buffer *ob, const uint8_t *source, size_t length)
{
hoedown_escape_html(ob, source, length, 0);
}
static void escape_href(hoedown_buffer *ob, const uint8_t *source, size_t length)
{
hoedown_escape_href(ob, source, length);
}
/********************
* GENERIC RENDERER *
********************/
static int
rndr_autolink(hoedown_buffer *ob, const hoedown_buffer *link, hoedown_autolink_type type, const hoedown_renderer_data *data)
{
hoedown_html_renderer_state *state = data->opaque;
if (!link || !link->size)
return 0;
HOEDOWN_BUFPUTSL(ob, "<a href=\"");
if (type == HOEDOWN_AUTOLINK_EMAIL)
HOEDOWN_BUFPUTSL(ob, "mailto:");
escape_href(ob, link->data, link->size);
if (state->link_attributes) {
hoedown_buffer_putc(ob, '\"');
state->link_attributes(ob, link, data);
hoedown_buffer_putc(ob, '>');
} else {
HOEDOWN_BUFPUTSL(ob, "\">");
}
/*
* Pretty printing: if we get an email address as
* an actual URI, e.g. `mailto:foo@bar.com`, we don't
* want to print the `mailto:` prefix
*/
if (hoedown_buffer_prefix(link, "mailto:") == 0) {
escape_html(ob, link->data + 7, link->size - 7);
} else {
escape_html(ob, link->data, link->size);
}
HOEDOWN_BUFPUTSL(ob, "</a>");
return 1;
}
static void
rndr_blockcode(hoedown_buffer *ob, const hoedown_buffer *text, const hoedown_buffer *lang, const hoedown_renderer_data *data)
{
if (ob->size) hoedown_buffer_putc(ob, '\n');
if (lang) {
HOEDOWN_BUFPUTSL(ob, "<pre><code class=\"language-");
escape_html(ob, lang->data, lang->size);
HOEDOWN_BUFPUTSL(ob, "\">");
} else {
HOEDOWN_BUFPUTSL(ob, "<pre><code>");
}
if (text)
escape_html(ob, text->data, text->size);
HOEDOWN_BUFPUTSL(ob, "</code></pre>\n");
}
static void
rndr_blockquote(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data)
{
if (ob->size) hoedown_buffer_putc(ob, '\n');
HOEDOWN_BUFPUTSL(ob, "<blockquote>\n");
if (content) hoedown_buffer_put(ob, content->data, content->size);
HOEDOWN_BUFPUTSL(ob, "</blockquote>\n");
}
static int
rndr_codespan(hoedown_buffer *ob, const hoedown_buffer *text, const hoedown_renderer_data *data)
{
HOEDOWN_BUFPUTSL(ob, "<code>");
if (text) escape_html(ob, text->data, text->size);
HOEDOWN_BUFPUTSL(ob, "</code>");
return 1;
}
static int
rndr_strikethrough(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data)
{
if (!content || !content->size)
return 0;
HOEDOWN_BUFPUTSL(ob, "<del>");
hoedown_buffer_put(ob, content->data, content->size);
HOEDOWN_BUFPUTSL(ob, "</del>");
return 1;
}
static int
rndr_double_emphasis(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data)
{
if (!content || !content->size)
return 0;
HOEDOWN_BUFPUTSL(ob, "<strong>");
hoedown_buffer_put(ob, content->data, content->size);
HOEDOWN_BUFPUTSL(ob, "</strong>");
return 1;
}
static int
rndr_emphasis(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data)
{
if (!content || !content->size) return 0;
HOEDOWN_BUFPUTSL(ob, "<em>");
if (content) hoedown_buffer_put(ob, content->data, content->size);
HOEDOWN_BUFPUTSL(ob, "</em>");
return 1;
}
static int
rndr_underline(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data)
{
if (!content || !content->size)
return 0;
HOEDOWN_BUFPUTSL(ob, "<u>");
hoedown_buffer_put(ob, content->data, content->size);
HOEDOWN_BUFPUTSL(ob, "</u>");
return 1;
}
static int
rndr_highlight(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data)
{
if (!content || !content->size)
return 0;
HOEDOWN_BUFPUTSL(ob, "<mark>");
hoedown_buffer_put(ob, content->data, content->size);
HOEDOWN_BUFPUTSL(ob, "</mark>");
return 1;
}
static int
rndr_quote(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data)
{
if (!content || !content->size)
return 0;
HOEDOWN_BUFPUTSL(ob, "<q>");
hoedown_buffer_put(ob, content->data, content->size);
HOEDOWN_BUFPUTSL(ob, "</q>");
return 1;
}
static int
rndr_linebreak(hoedown_buffer *ob, const hoedown_renderer_data *data)
{
hoedown_html_renderer_state *state = data->opaque;
hoedown_buffer_puts(ob, USE_XHTML(state) ? "<br/>\n" : "<br>\n");
return 1;
}
static void
rndr_header(hoedown_buffer *ob, const hoedown_buffer *content, int level, const hoedown_renderer_data *data)
{
hoedown_html_renderer_state *state = data->opaque;
if (ob->size)
hoedown_buffer_putc(ob, '\n');
if (level <= state->toc_data.nesting_level)
hoedown_buffer_printf(ob, "<h%d id=\"toc_%d\">", level, state->toc_data.header_count++);
else
hoedown_buffer_printf(ob, "<h%d>", level);
if (content) hoedown_buffer_put(ob, content->data, content->size);
hoedown_buffer_printf(ob, "</h%d>\n", level);
}
static int
rndr_link(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_buffer *link, const hoedown_buffer *title, const hoedown_renderer_data *data)
{
hoedown_html_renderer_state *state = data->opaque;
HOEDOWN_BUFPUTSL(ob, "<a href=\"");
if (link && link->size)
escape_href(ob, link->data, link->size);
if (title && title->size) {
HOEDOWN_BUFPUTSL(ob, "\" title=\"");
escape_html(ob, title->data, title->size);
}
if (state->link_attributes) {
hoedown_buffer_putc(ob, '\"');
state->link_attributes(ob, link, data);
hoedown_buffer_putc(ob, '>');
} else {
HOEDOWN_BUFPUTSL(ob, "\">");
}
if (content && content->size) hoedown_buffer_put(ob, content->data, content->size);
HOEDOWN_BUFPUTSL(ob, "</a>");
return 1;
}
static void
rndr_list(hoedown_buffer *ob, const hoedown_buffer *content, hoedown_list_flags flags, const hoedown_renderer_data *data)
{
if (ob->size) hoedown_buffer_putc(ob, '\n');
hoedown_buffer_put(ob, (const uint8_t *)(flags & HOEDOWN_LIST_ORDERED ? "<ol>\n" : "<ul>\n"), 5);
if (content) hoedown_buffer_put(ob, content->data, content->size);
hoedown_buffer_put(ob, (const uint8_t *)(flags & HOEDOWN_LIST_ORDERED ? "</ol>\n" : "</ul>\n"), 6);
}
static void
rndr_listitem(hoedown_buffer *ob, const hoedown_buffer *content, hoedown_list_flags flags, const hoedown_renderer_data *data)
{
HOEDOWN_BUFPUTSL(ob, "<li>");
if (content) {
size_t size = content->size;
while (size && content->data[size - 1] == '\n')
size--;
hoedown_buffer_put(ob, content->data, size);
}
HOEDOWN_BUFPUTSL(ob, "</li>\n");
}
static void
rndr_paragraph(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data)
{
hoedown_html_renderer_state *state = data->opaque;
size_t i = 0;
if (ob->size) hoedown_buffer_putc(ob, '\n');
if (!content || !content->size)
return;
while (i < content->size && isspace(content->data[i])) i++;
if (i == content->size)
return;
HOEDOWN_BUFPUTSL(ob, "<p>");
if (state->flags & HOEDOWN_HTML_HARD_WRAP) {
size_t org;
while (i < content->size) {
org = i;
while (i < content->size && content->data[i] != '\n')
i++;
if (i > org)
hoedown_buffer_put(ob, content->data + org, i - org);
/*
* do not insert a line break if this newline
* is the last character on the paragraph
*/
if (i >= content->size - 1)
break;
rndr_linebreak(ob, data);
i++;
}
} else {
hoedown_buffer_put(ob, content->data + i, content->size - i);
}
HOEDOWN_BUFPUTSL(ob, "</p>\n");
}
static void
rndr_raw_block(hoedown_buffer *ob, const hoedown_buffer *text, const hoedown_renderer_data *data)
{
size_t org, sz;
if (!text)
return;
/* FIXME: Do we *really* need to trim the HTML? How does that make a difference? */
sz = text->size;
while (sz > 0 && text->data[sz - 1] == '\n')
sz--;
org = 0;
while (org < sz && text->data[org] == '\n')
org++;
if (org >= sz)
return;
if (ob->size)
hoedown_buffer_putc(ob, '\n');
hoedown_buffer_put(ob, text->data + org, sz - org);
hoedown_buffer_putc(ob, '\n');
}
static int
rndr_triple_emphasis(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data)
{
if (!content || !content->size) return 0;
HOEDOWN_BUFPUTSL(ob, "<strong><em>");
hoedown_buffer_put(ob, content->data, content->size);
HOEDOWN_BUFPUTSL(ob, "</em></strong>");
return 1;
}
static void
rndr_hrule(hoedown_buffer *ob, const hoedown_renderer_data *data)
{
hoedown_html_renderer_state *state = data->opaque;
if (ob->size) hoedown_buffer_putc(ob, '\n');
hoedown_buffer_puts(ob, USE_XHTML(state) ? "<hr/>\n" : "<hr>\n");
}
static int
rndr_image(hoedown_buffer *ob, const hoedown_buffer *link, const hoedown_buffer *title, const hoedown_buffer *alt, const hoedown_renderer_data *data)
{
hoedown_html_renderer_state *state = data->opaque;
if (!link || !link->size) return 0;
HOEDOWN_BUFPUTSL(ob, "<img src=\"");
escape_href(ob, link->data, link->size);
HOEDOWN_BUFPUTSL(ob, "\" alt=\"");
if (alt && alt->size)
escape_html(ob, alt->data, alt->size);
if (title && title->size) {
HOEDOWN_BUFPUTSL(ob, "\" title=\"");
escape_html(ob, title->data, title->size); }
hoedown_buffer_puts(ob, USE_XHTML(state) ? "\"/>" : "\">");
return 1;
}
static int
rndr_raw_html(hoedown_buffer *ob, const hoedown_buffer *text, const hoedown_renderer_data *data)
{
hoedown_html_renderer_state *state = data->opaque;
/* ESCAPE overrides SKIP_HTML. It doesn't look to see if
* there are any valid tags, just escapes all of them. */
if((state->flags & HOEDOWN_HTML_ESCAPE) != 0) {
escape_html(ob, text->data, text->size);
return 1;
}
if ((state->flags & HOEDOWN_HTML_SKIP_HTML) != 0)
return 1;
hoedown_buffer_put(ob, text->data, text->size);
return 1;
}
static void
rndr_table(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data)
{
if (ob->size) hoedown_buffer_putc(ob, '\n');
HOEDOWN_BUFPUTSL(ob, "<table>\n");
hoedown_buffer_put(ob, content->data, content->size);
HOEDOWN_BUFPUTSL(ob, "</table>\n");
}
static void
rndr_table_header(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data)
{
if (ob->size) hoedown_buffer_putc(ob, '\n');
HOEDOWN_BUFPUTSL(ob, "<thead>\n");
hoedown_buffer_put(ob, content->data, content->size);
HOEDOWN_BUFPUTSL(ob, "</thead>\n");
}
static void
rndr_table_body(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data)
{
if (ob->size) hoedown_buffer_putc(ob, '\n');
HOEDOWN_BUFPUTSL(ob, "<tbody>\n");
hoedown_buffer_put(ob, content->data, content->size);
HOEDOWN_BUFPUTSL(ob, "</tbody>\n");
}
static void
rndr_tablerow(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data)
{
HOEDOWN_BUFPUTSL(ob, "<tr>\n");
if (content) hoedown_buffer_put(ob, content->data, content->size);
HOEDOWN_BUFPUTSL(ob, "</tr>\n");
}
static void
rndr_tablecell(hoedown_buffer *ob, const hoedown_buffer *content, hoedown_table_flags flags, const hoedown_renderer_data *data)
{
if (flags & HOEDOWN_TABLE_HEADER) {
HOEDOWN_BUFPUTSL(ob, "<th");
} else {
HOEDOWN_BUFPUTSL(ob, "<td");
}
switch (flags & HOEDOWN_TABLE_ALIGNMASK) {
case HOEDOWN_TABLE_ALIGN_CENTER:
HOEDOWN_BUFPUTSL(ob, " style=\"text-align: center\">");
break;
case HOEDOWN_TABLE_ALIGN_LEFT:
HOEDOWN_BUFPUTSL(ob, " style=\"text-align: left\">");
break;
case HOEDOWN_TABLE_ALIGN_RIGHT:
HOEDOWN_BUFPUTSL(ob, " style=\"text-align: right\">");
break;
default:
HOEDOWN_BUFPUTSL(ob, ">");
}
if (content)
hoedown_buffer_put(ob, content->data, content->size);
if (flags & HOEDOWN_TABLE_HEADER) {
HOEDOWN_BUFPUTSL(ob, "</th>\n");
} else {
HOEDOWN_BUFPUTSL(ob, "</td>\n");
}
}
static int
rndr_superscript(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data)
{
if (!content || !content->size) return 0;
HOEDOWN_BUFPUTSL(ob, "<sup>");
hoedown_buffer_put(ob, content->data, content->size);
HOEDOWN_BUFPUTSL(ob, "</sup>");
return 1;
}
static void
rndr_normal_text(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data)
{
if (content)
escape_html(ob, content->data, content->size);
}
static void
rndr_footnotes(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data)
{
hoedown_html_renderer_state *state = data->opaque;
if (ob->size) hoedown_buffer_putc(ob, '\n');
HOEDOWN_BUFPUTSL(ob, "<div class=\"footnotes\">\n");
hoedown_buffer_puts(ob, USE_XHTML(state) ? "<hr/>\n" : "<hr>\n");
HOEDOWN_BUFPUTSL(ob, "<ol>\n");
if (content) hoedown_buffer_put(ob, content->data, content->size);
HOEDOWN_BUFPUTSL(ob, "\n</ol>\n</div>\n");
}
static void
rndr_footnote_def(hoedown_buffer *ob, const hoedown_buffer *content, unsigned int num, const hoedown_renderer_data *data)
{
size_t i = 0;
int pfound = 0;
/* insert anchor at the end of first paragraph block */
if (content) {
while ((i+3) < content->size) {
if (content->data[i++] != '<') continue;
if (content->data[i++] != '/') continue;
if (content->data[i++] != 'p' && content->data[i] != 'P') continue;
if (content->data[i] != '>') continue;
i -= 3;
pfound = 1;
break;
}
}
hoedown_buffer_printf(ob, "\n<li id=\"fn%d\">\n", num);
if (pfound) {
hoedown_buffer_put(ob, content->data, i);
hoedown_buffer_printf(ob, "&nbsp;<a href=\"#fnref%d\" rev=\"footnote\">&#8617;</a>", num);
hoedown_buffer_put(ob, content->data + i, content->size - i);
} else if (content) {
hoedown_buffer_put(ob, content->data, content->size);
}
HOEDOWN_BUFPUTSL(ob, "</li>\n");
}
static int
rndr_footnote_ref(hoedown_buffer *ob, unsigned int num, const hoedown_renderer_data *data)
{
hoedown_buffer_printf(ob, "<sup id=\"fnref%d\"><a href=\"#fn%d\" rel=\"footnote\">%d</a></sup>", num, num, num);
return 1;
}
static int
rndr_math(hoedown_buffer *ob, const hoedown_buffer *text, int displaymode, const hoedown_renderer_data *data)
{
hoedown_buffer_put(ob, (const uint8_t *)(displaymode ? "\\[" : "\\("), 2);
escape_html(ob, text->data, text->size);
hoedown_buffer_put(ob, (const uint8_t *)(displaymode ? "\\]" : "\\)"), 2);
return 1;
}
static void
toc_header(hoedown_buffer *ob, const hoedown_buffer *content, int level, const hoedown_renderer_data *data)
{
hoedown_html_renderer_state *state = data->opaque;
if (level <= state->toc_data.nesting_level) {
/* set the level offset if this is the first header
* we're parsing for the document */
if (state->toc_data.current_level == 0)
state->toc_data.level_offset = level - 1;
level -= state->toc_data.level_offset;
if (level > state->toc_data.current_level) {
while (level > state->toc_data.current_level) {
HOEDOWN_BUFPUTSL(ob, "<ul>\n<li>\n");
state->toc_data.current_level++;
}
} else if (level < state->toc_data.current_level) {
HOEDOWN_BUFPUTSL(ob, "</li>\n");
while (level < state->toc_data.current_level) {
HOEDOWN_BUFPUTSL(ob, "</ul>\n</li>\n");
state->toc_data.current_level--;
}
HOEDOWN_BUFPUTSL(ob,"<li>\n");
} else {
HOEDOWN_BUFPUTSL(ob,"</li>\n<li>\n");
}
hoedown_buffer_printf(ob, "<a href=\"#toc_%d\">", state->toc_data.header_count++);
if (content) hoedown_buffer_put(ob, content->data, content->size);
HOEDOWN_BUFPUTSL(ob, "</a>\n");
}
}
static int
toc_link(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_buffer *link, const hoedown_buffer *title, const hoedown_renderer_data *data)
{
if (content && content->size) hoedown_buffer_put(ob, content->data, content->size);
return 1;
}
static void
toc_finalize(hoedown_buffer *ob, int inline_render, const hoedown_renderer_data *data)
{
hoedown_html_renderer_state *state;
if (inline_render)
return;
state = data->opaque;
while (state->toc_data.current_level > 0) {
HOEDOWN_BUFPUTSL(ob, "</li>\n</ul>\n");
state->toc_data.current_level--;
}
state->toc_data.header_count = 0;
}
hoedown_renderer *
hoedown_html_toc_renderer_new(int nesting_level)
{
static const hoedown_renderer cb_default = {
NULL,
NULL,
NULL,
toc_header,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
rndr_codespan,
rndr_double_emphasis,
rndr_emphasis,
rndr_underline,
rndr_highlight,
rndr_quote,
NULL,
NULL,
toc_link,
rndr_triple_emphasis,
rndr_strikethrough,
rndr_superscript,
NULL,
NULL,
NULL,
NULL,
rndr_normal_text,
NULL,
toc_finalize
};
hoedown_html_renderer_state *state;
hoedown_renderer *renderer;
/* Prepare the state pointer */
state = hoedown_malloc(sizeof(hoedown_html_renderer_state));
memset(state, 0x0, sizeof(hoedown_html_renderer_state));
state->toc_data.nesting_level = nesting_level;
/* Prepare the renderer */
renderer = hoedown_malloc(sizeof(hoedown_renderer));
memcpy(renderer, &cb_default, sizeof(hoedown_renderer));
renderer->opaque = state;
return renderer;
}
hoedown_renderer *
hoedown_html_renderer_new(hoedown_html_flags render_flags, int nesting_level)
{
static const hoedown_renderer cb_default = {
NULL,
rndr_blockcode,
rndr_blockquote,
rndr_header,
rndr_hrule,
rndr_list,
rndr_listitem,
rndr_paragraph,
rndr_table,
rndr_table_header,
rndr_table_body,
rndr_tablerow,
rndr_tablecell,
rndr_footnotes,
rndr_footnote_def,
rndr_raw_block,
rndr_autolink,
rndr_codespan,
rndr_double_emphasis,
rndr_emphasis,
rndr_underline,
rndr_highlight,
rndr_quote,
rndr_image,
rndr_linebreak,
rndr_link,
rndr_triple_emphasis,
rndr_strikethrough,
rndr_superscript,
rndr_footnote_ref,
rndr_math,
rndr_raw_html,
NULL,
rndr_normal_text,
NULL,
NULL
};
hoedown_html_renderer_state *state;
hoedown_renderer *renderer;
/* Prepare the state pointer */
state = hoedown_malloc(sizeof(hoedown_html_renderer_state));
memset(state, 0x0, sizeof(hoedown_html_renderer_state));
state->flags = render_flags;
state->toc_data.nesting_level = nesting_level;
/* Prepare the renderer */
renderer = hoedown_malloc(sizeof(hoedown_renderer));
memcpy(renderer, &cb_default, sizeof(hoedown_renderer));
if (render_flags & HOEDOWN_HTML_SKIP_HTML || render_flags & HOEDOWN_HTML_ESCAPE)
renderer->blockhtml = NULL;
renderer->opaque = state;
return renderer;
}
void
hoedown_html_renderer_free(hoedown_renderer *renderer)
{
free(renderer->opaque);
free(renderer);
}

84
include/hoedown/html.h Normal file
View File

@ -0,0 +1,84 @@
/* html.h - HTML renderer and utilities */
#ifndef HOEDOWN_HTML_H
#define HOEDOWN_HTML_H
#include "document.h"
#include "buffer.h"
#ifdef __cplusplus
extern "C" {
#endif
/*************
* CONSTANTS *
*************/
typedef enum hoedown_html_flags {
HOEDOWN_HTML_SKIP_HTML = (1 << 0),
HOEDOWN_HTML_ESCAPE = (1 << 1),
HOEDOWN_HTML_HARD_WRAP = (1 << 2),
HOEDOWN_HTML_USE_XHTML = (1 << 3)
} hoedown_html_flags;
typedef enum hoedown_html_tag {
HOEDOWN_HTML_TAG_NONE = 0,
HOEDOWN_HTML_TAG_OPEN,
HOEDOWN_HTML_TAG_CLOSE
} hoedown_html_tag;
/*********
* TYPES *
*********/
struct hoedown_html_renderer_state {
void *opaque;
struct {
int header_count;
int current_level;
int level_offset;
int nesting_level;
} toc_data;
hoedown_html_flags flags;
/* extra callbacks */
void (*link_attributes)(hoedown_buffer *ob, const hoedown_buffer *url, const hoedown_renderer_data *data);
};
typedef struct hoedown_html_renderer_state hoedown_html_renderer_state;
/*************
* FUNCTIONS *
*************/
/* hoedown_html_smartypants: process an HTML snippet using SmartyPants for smart punctuation */
void hoedown_html_smartypants(hoedown_buffer *ob, const uint8_t *data, size_t size);
/* hoedown_html_is_tag: checks if data starts with a specific tag, returns the tag type or NONE */
hoedown_html_tag hoedown_html_is_tag(const uint8_t *data, size_t size, const char *tagname);
/* hoedown_html_renderer_new: allocates a regular HTML renderer */
hoedown_renderer *hoedown_html_renderer_new(
hoedown_html_flags render_flags,
int nesting_level
) __attribute__ ((malloc));
/* hoedown_html_toc_renderer_new: like hoedown_html_renderer_new, but the returned renderer produces the Table of Contents */
hoedown_renderer *hoedown_html_toc_renderer_new(
int nesting_level
) __attribute__ ((malloc));
/* hoedown_html_renderer_free: deallocate an HTML renderer */
void hoedown_html_renderer_free(hoedown_renderer *renderer);
#ifdef __cplusplus
}
#endif
#endif /** HOEDOWN_HTML_H **/

View File

@ -0,0 +1,240 @@
/* ANSI-C code produced by gperf version 3.0.3 */
/* Command-line: gperf -L ANSI-C -N hoedown_find_block_tag -c -C -E -S 1 --ignore-case -m100 html_block_names.gperf */
/* Computed positions: -k'1-2' */
#if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \
&& ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \
&& (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \
&& ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \
&& ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \
&& ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \
&& ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \
&& ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \
&& ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \
&& ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \
&& ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \
&& ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \
&& ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \
&& ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \
&& ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \
&& ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \
&& ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \
&& ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \
&& ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \
&& ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \
&& ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \
&& ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \
&& ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126))
/* The character set is not based on ISO-646. */
#error "gperf generated tables don't work with this execution character set. Please report a bug to <bug-gnu-gperf@gnu.org>."
#endif
/* maximum key range = 24, duplicates = 0 */
#ifndef GPERF_DOWNCASE
#define GPERF_DOWNCASE 1
static unsigned char gperf_downcase[256] =
{
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
60, 61, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106,
107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104,
105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134,
135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194,
195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209,
210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224,
225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254,
255
};
#endif
#ifndef GPERF_CASE_STRNCMP
#define GPERF_CASE_STRNCMP 1
static int
gperf_case_strncmp (register const char *s1, register const char *s2, register unsigned int n)
{
for (; n > 0;)
{
unsigned char c1 = gperf_downcase[(unsigned char)*s1++];
unsigned char c2 = gperf_downcase[(unsigned char)*s2++];
if (c1 != 0 && c1 == c2)
{
n--;
continue;
}
return (int)c1 - (int)c2;
}
return 0;
}
#endif
#ifdef __GNUC__
__inline
#else
#ifdef __cplusplus
inline
#endif
#endif
static unsigned int
hash (register const char *str, register unsigned int len)
{
static const unsigned char asso_values[] =
{
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
22, 21, 19, 18, 16, 0, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 1, 25, 0, 25,
1, 0, 0, 13, 0, 25, 25, 11, 2, 1,
0, 25, 25, 5, 0, 2, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 1, 25,
0, 25, 1, 0, 0, 13, 0, 25, 25, 11,
2, 1, 0, 25, 25, 5, 0, 2, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25
};
register int hval = (int)len;
switch (hval)
{
default:
hval += asso_values[(unsigned char)str[1]+1];
/*FALLTHROUGH*/
case 1:
hval += asso_values[(unsigned char)str[0]];
break;
}
return hval;
}
#ifdef __GNUC__
__inline
#ifdef __GNUC_STDC_INLINE__
__attribute__ ((__gnu_inline__))
#endif
#endif
const char *
hoedown_find_block_tag (register const char *str, register unsigned int len)
{
enum
{
TOTAL_KEYWORDS = 24,
MIN_WORD_LENGTH = 1,
MAX_WORD_LENGTH = 10,
MIN_HASH_VALUE = 1,
MAX_HASH_VALUE = 24
};
if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
{
register int key = hash (str, len);
if (key <= MAX_HASH_VALUE && key >= MIN_HASH_VALUE)
{
register const char *resword;
switch (key - 1)
{
case 0:
resword = "p";
goto compare;
case 1:
resword = "h6";
goto compare;
case 2:
resword = "div";
goto compare;
case 3:
resword = "del";
goto compare;
case 4:
resword = "form";
goto compare;
case 5:
resword = "table";
goto compare;
case 6:
resword = "figure";
goto compare;
case 7:
resword = "pre";
goto compare;
case 8:
resword = "fieldset";
goto compare;
case 9:
resword = "noscript";
goto compare;
case 10:
resword = "script";
goto compare;
case 11:
resword = "style";
goto compare;
case 12:
resword = "dl";
goto compare;
case 13:
resword = "ol";
goto compare;
case 14:
resword = "ul";
goto compare;
case 15:
resword = "math";
goto compare;
case 16:
resword = "ins";
goto compare;
case 17:
resword = "h5";
goto compare;
case 18:
resword = "iframe";
goto compare;
case 19:
resword = "h4";
goto compare;
case 20:
resword = "h3";
goto compare;
case 21:
resword = "blockquote";
goto compare;
case 22:
resword = "h2";
goto compare;
case 23:
resword = "h1";
goto compare;
}
return 0;
compare:
if ((((unsigned char)*str ^ (unsigned char)*resword) & ~32) == 0 && !gperf_case_strncmp (str, resword, len) && resword[len] == '\0')
return resword;
}
}
return 0;
}

View File

@ -0,0 +1,435 @@
#include "html.h"
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <ctype.h>
#ifdef _MSC_VER
#define snprintf _snprintf
#endif
struct smartypants_data {
int in_squote;
int in_dquote;
};
static size_t smartypants_cb__ltag(hoedown_buffer *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
static size_t smartypants_cb__dquote(hoedown_buffer *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
static size_t smartypants_cb__amp(hoedown_buffer *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
static size_t smartypants_cb__period(hoedown_buffer *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
static size_t smartypants_cb__number(hoedown_buffer *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
static size_t smartypants_cb__dash(hoedown_buffer *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
static size_t smartypants_cb__parens(hoedown_buffer *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
static size_t smartypants_cb__squote(hoedown_buffer *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
static size_t smartypants_cb__backtick(hoedown_buffer *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
static size_t smartypants_cb__escape(hoedown_buffer *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
static size_t (*smartypants_cb_ptrs[])
(hoedown_buffer *, struct smartypants_data *, uint8_t, const uint8_t *, size_t) =
{
NULL, /* 0 */
smartypants_cb__dash, /* 1 */
smartypants_cb__parens, /* 2 */
smartypants_cb__squote, /* 3 */
smartypants_cb__dquote, /* 4 */
smartypants_cb__amp, /* 5 */
smartypants_cb__period, /* 6 */
smartypants_cb__number, /* 7 */
smartypants_cb__ltag, /* 8 */
smartypants_cb__backtick, /* 9 */
smartypants_cb__escape, /* 10 */
};
static const uint8_t smartypants_cb_chars[UINT8_MAX+1] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 4, 0, 0, 0, 5, 3, 2, 0, 0, 0, 0, 1, 6, 0,
0, 7, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0,
9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
static int
word_boundary(uint8_t c)
{
return c == 0 || isspace(c) || ispunct(c);
}
/*
If 'text' begins with any kind of single quote (e.g. "'" or "&apos;" etc.),
returns the length of the sequence of characters that makes up the single-
quote. Otherwise, returns zero.
*/
static size_t
squote_len(const uint8_t *text, size_t size)
{
static char* single_quote_list[] = { "'", "&#39;", "&#x27;", "&apos;", NULL };
char** p;
for (p = single_quote_list; *p; ++p) {
size_t len = strlen(*p);
if (size >= len && memcmp(text, *p, len) == 0) {
return len;
}
}
return 0;
}
/* Converts " or ' at very beginning or end of a word to left or right quote */
static int
smartypants_quotes(hoedown_buffer *ob, uint8_t previous_char, uint8_t next_char, uint8_t quote, int *is_open)
{
char ent[8];
if (*is_open && !word_boundary(next_char))
return 0;
if (!(*is_open) && !word_boundary(previous_char))
return 0;
snprintf(ent, sizeof(ent), "&%c%cquo;", (*is_open) ? 'r' : 'l', quote);
*is_open = !(*is_open);
hoedown_buffer_puts(ob, ent);
return 1;
}
/*
Converts ' to left or right single quote; but the initial ' might be in
different forms, e.g. &apos; or &#39; or &#x27;.
'squote_text' points to the original single quote, and 'squote_size' is its length.
'text' points at the last character of the single-quote, e.g. ' or ;
*/
static size_t
smartypants_squote(hoedown_buffer *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size,
const uint8_t *squote_text, size_t squote_size)
{
if (size >= 2) {
uint8_t t1 = tolower(text[1]);
size_t next_squote_len = squote_len(text+1, size-1);
/* convert '' to &ldquo; or &rdquo; */
if (next_squote_len > 0) {
uint8_t next_char = (size > 1+next_squote_len) ? text[1+next_squote_len] : 0;
if (smartypants_quotes(ob, previous_char, next_char, 'd', &smrt->in_dquote))
return next_squote_len;
}
/* Tom's, isn't, I'm, I'd */
if ((t1 == 's' || t1 == 't' || t1 == 'm' || t1 == 'd') &&
(size == 3 || word_boundary(text[2]))) {
HOEDOWN_BUFPUTSL(ob, "&rsquo;");
return 0;
}
/* you're, you'll, you've */
if (size >= 3) {
uint8_t t2 = tolower(text[2]);
if (((t1 == 'r' && t2 == 'e') ||
(t1 == 'l' && t2 == 'l') ||
(t1 == 'v' && t2 == 'e')) &&
(size == 4 || word_boundary(text[3]))) {
HOEDOWN_BUFPUTSL(ob, "&rsquo;");
return 0;
}
}
}
if (smartypants_quotes(ob, previous_char, size > 0 ? text[1] : 0, 's', &smrt->in_squote))
return 0;
hoedown_buffer_put(ob, squote_text, squote_size);
return 0;
}
/* Converts ' to left or right single quote. */
static size_t
smartypants_cb__squote(hoedown_buffer *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
{
return smartypants_squote(ob, smrt, previous_char, text, size, text, 1);
}
/* Converts (c), (r), (tm) */
static size_t
smartypants_cb__parens(hoedown_buffer *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
{
if (size >= 3) {
uint8_t t1 = tolower(text[1]);
uint8_t t2 = tolower(text[2]);
if (t1 == 'c' && t2 == ')') {
HOEDOWN_BUFPUTSL(ob, "&copy;");
return 2;
}
if (t1 == 'r' && t2 == ')') {
HOEDOWN_BUFPUTSL(ob, "&reg;");
return 2;
}
if (size >= 4 && t1 == 't' && t2 == 'm' && text[3] == ')') {
HOEDOWN_BUFPUTSL(ob, "&trade;");
return 3;
}
}
hoedown_buffer_putc(ob, text[0]);
return 0;
}
/* Converts "--" to em-dash, etc. */
static size_t
smartypants_cb__dash(hoedown_buffer *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
{
if (size >= 3 && text[1] == '-' && text[2] == '-') {
HOEDOWN_BUFPUTSL(ob, "&mdash;");
return 2;
}
if (size >= 2 && text[1] == '-') {
HOEDOWN_BUFPUTSL(ob, "&ndash;");
return 1;
}
hoedown_buffer_putc(ob, text[0]);
return 0;
}
/* Converts &quot; etc. */
static size_t
smartypants_cb__amp(hoedown_buffer *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
{
size_t len;
if (size >= 6 && memcmp(text, "&quot;", 6) == 0) {
if (smartypants_quotes(ob, previous_char, size >= 7 ? text[6] : 0, 'd', &smrt->in_dquote))
return 5;
}
len = squote_len(text, size);
if (len > 0) {
return (len-1) + smartypants_squote(ob, smrt, previous_char, text+(len-1), size-(len-1), text, len);
}
if (size >= 4 && memcmp(text, "&#0;", 4) == 0)
return 3;
hoedown_buffer_putc(ob, '&');
return 0;
}
/* Converts "..." to ellipsis */
static size_t
smartypants_cb__period(hoedown_buffer *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
{
if (size >= 3 && text[1] == '.' && text[2] == '.') {
HOEDOWN_BUFPUTSL(ob, "&hellip;");
return 2;
}
if (size >= 5 && text[1] == ' ' && text[2] == '.' && text[3] == ' ' && text[4] == '.') {
HOEDOWN_BUFPUTSL(ob, "&hellip;");
return 4;
}
hoedown_buffer_putc(ob, text[0]);
return 0;
}
/* Converts `` to opening double quote */
static size_t
smartypants_cb__backtick(hoedown_buffer *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
{
if (size >= 2 && text[1] == '`') {
if (smartypants_quotes(ob, previous_char, size >= 3 ? text[2] : 0, 'd', &smrt->in_dquote))
return 1;
}
hoedown_buffer_putc(ob, text[0]);
return 0;
}
/* Converts 1/2, 1/4, 3/4 */
static size_t
smartypants_cb__number(hoedown_buffer *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
{
if (word_boundary(previous_char) && size >= 3) {
if (text[0] == '1' && text[1] == '/' && text[2] == '2') {
if (size == 3 || word_boundary(text[3])) {
HOEDOWN_BUFPUTSL(ob, "&frac12;");
return 2;
}
}
if (text[0] == '1' && text[1] == '/' && text[2] == '4') {
if (size == 3 || word_boundary(text[3]) ||
(size >= 5 && tolower(text[3]) == 't' && tolower(text[4]) == 'h')) {
HOEDOWN_BUFPUTSL(ob, "&frac14;");
return 2;
}
}
if (text[0] == '3' && text[1] == '/' && text[2] == '4') {
if (size == 3 || word_boundary(text[3]) ||
(size >= 6 && tolower(text[3]) == 't' && tolower(text[4]) == 'h' && tolower(text[5]) == 's')) {
HOEDOWN_BUFPUTSL(ob, "&frac34;");
return 2;
}
}
}
hoedown_buffer_putc(ob, text[0]);
return 0;
}
/* Converts " to left or right double quote */
static size_t
smartypants_cb__dquote(hoedown_buffer *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
{
if (!smartypants_quotes(ob, previous_char, size > 0 ? text[1] : 0, 'd', &smrt->in_dquote))
HOEDOWN_BUFPUTSL(ob, "&quot;");
return 0;
}
static size_t
smartypants_cb__ltag(hoedown_buffer *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
{
static const char *skip_tags[] = {
"pre", "code", "var", "samp", "kbd", "math", "script", "style"
};
static const size_t skip_tags_count = 8;
size_t tag, i = 0;
/* This is a comment. Copy everything verbatim until --> or EOF is seen. */
if (i + 4 < size && memcmp(text + i, "<!--", 4) == 0) {
i += 4;
while (i + 3 < size && memcmp(text + i, "-->", 3) != 0)
i++;
i += 3;
hoedown_buffer_put(ob, text, i + 1);
return i;
}
while (i < size && text[i] != '>')
i++;
for (tag = 0; tag < skip_tags_count; ++tag) {
if (hoedown_html_is_tag(text, size, skip_tags[tag]) == HOEDOWN_HTML_TAG_OPEN)
break;
}
if (tag < skip_tags_count) {
for (;;) {
while (i < size && text[i] != '<')
i++;
if (i == size)
break;
if (hoedown_html_is_tag(text + i, size - i, skip_tags[tag]) == HOEDOWN_HTML_TAG_CLOSE)
break;
i++;
}
while (i < size && text[i] != '>')
i++;
}
hoedown_buffer_put(ob, text, i + 1);
return i;
}
static size_t
smartypants_cb__escape(hoedown_buffer *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
{
if (size < 2)
return 0;
switch (text[1]) {
case '\\':
case '"':
case '\'':
case '.':
case '-':
case '`':
hoedown_buffer_putc(ob, text[1]);
return 1;
default:
hoedown_buffer_putc(ob, '\\');
return 0;
}
}
#if 0
static struct {
uint8_t c0;
const uint8_t *pattern;
const uint8_t *entity;
int skip;
} smartypants_subs[] = {
{ '\'', "'s>", "&rsquo;", 0 },
{ '\'', "'t>", "&rsquo;", 0 },
{ '\'', "'re>", "&rsquo;", 0 },
{ '\'', "'ll>", "&rsquo;", 0 },
{ '\'', "'ve>", "&rsquo;", 0 },
{ '\'', "'m>", "&rsquo;", 0 },
{ '\'', "'d>", "&rsquo;", 0 },
{ '-', "--", "&mdash;", 1 },
{ '-', "<->", "&ndash;", 0 },
{ '.', "...", "&hellip;", 2 },
{ '.', ". . .", "&hellip;", 4 },
{ '(', "(c)", "&copy;", 2 },
{ '(', "(r)", "&reg;", 2 },
{ '(', "(tm)", "&trade;", 3 },
{ '3', "<3/4>", "&frac34;", 2 },
{ '3', "<3/4ths>", "&frac34;", 2 },
{ '1', "<1/2>", "&frac12;", 2 },
{ '1', "<1/4>", "&frac14;", 2 },
{ '1', "<1/4th>", "&frac14;", 2 },
{ '&', "&#0;", 0, 3 },
};
#endif
void
hoedown_html_smartypants(hoedown_buffer *ob, const uint8_t *text, size_t size)
{
size_t i;
struct smartypants_data smrt = {0, 0};
if (!text)
return;
hoedown_buffer_grow(ob, size);
for (i = 0; i < size; ++i) {
size_t org;
uint8_t action = 0;
org = i;
while (i < size && (action = smartypants_cb_chars[text[i]]) == 0)
i++;
if (i > org)
hoedown_buffer_put(ob, text + org, i - org);
if (i < size) {
i += smartypants_cb_ptrs[(int)action]
(ob, &smrt, i ? text[i - 1] : 0, text + i, size - i);
}
}
}

79
include/hoedown/stack.c Normal file
View File

@ -0,0 +1,79 @@
#include "stack.h"
#include "buffer.h"
#include <stdlib.h>
#include <string.h>
#include <assert.h>
void
hoedown_stack_init(hoedown_stack *st, size_t initial_size)
{
assert(st);
st->item = NULL;
st->size = st->asize = 0;
if (!initial_size)
initial_size = 8;
hoedown_stack_grow(st, initial_size);
}
void
hoedown_stack_uninit(hoedown_stack *st)
{
assert(st);
free(st->item);
}
void
hoedown_stack_grow(hoedown_stack *st, size_t neosz)
{
assert(st);
if (st->asize >= neosz)
return;
st->item = hoedown_realloc(st->item, neosz * sizeof(void *));
memset(st->item + st->asize, 0x0, (neosz - st->asize) * sizeof(void *));
st->asize = neosz;
if (st->size > neosz)
st->size = neosz;
}
void
hoedown_stack_push(hoedown_stack *st, void *item)
{
assert(st);
if (st->size >= st->asize)
hoedown_stack_grow(st, st->size * 2);
st->item[st->size++] = item;
}
void *
hoedown_stack_pop(hoedown_stack *st)
{
assert(st);
if (!st->size)
return NULL;
return st->item[--st->size];
}
void *
hoedown_stack_top(const hoedown_stack *st)
{
assert(st);
if (!st->size)
return NULL;
return st->item[st->size - 1];
}

52
include/hoedown/stack.h Normal file
View File

@ -0,0 +1,52 @@
/* stack.h - simple stacking */
#ifndef HOEDOWN_STACK_H
#define HOEDOWN_STACK_H
#include <stddef.h>
#ifdef __cplusplus
extern "C" {
#endif
/*********
* TYPES *
*********/
struct hoedown_stack {
void **item;
size_t size;
size_t asize;
};
typedef struct hoedown_stack hoedown_stack;
/*************
* FUNCTIONS *
*************/
/* hoedown_stack_init: initialize a stack */
void hoedown_stack_init(hoedown_stack *st, size_t initial_size);
/* hoedown_stack_uninit: free internal data of the stack */
void hoedown_stack_uninit(hoedown_stack *st);
/* hoedown_stack_grow: increase the allocated size to the given value */
void hoedown_stack_grow(hoedown_stack *st, size_t neosz);
/* hoedown_stack_push: push an item to the top of the stack */
void hoedown_stack_push(hoedown_stack *st, void *item);
/* hoedown_stack_pop: retrieve and remove the item at the top of the stack */
void *hoedown_stack_pop(hoedown_stack *st);
/* hoedown_stack_top: retrieve the item at the top of the stack */
void *hoedown_stack_top(const hoedown_stack *st);
#ifdef __cplusplus
}
#endif
#endif /** HOEDOWN_STACK_H **/

View File

@ -0,0 +1,9 @@
#include "version.h"
void
hoedown_version(int *major, int *minor, int *revision)
{
*major = HOEDOWN_VERSION_MAJOR;
*minor = HOEDOWN_VERSION_MINOR;
*revision = HOEDOWN_VERSION_REVISION;
}

33
include/hoedown/version.h Normal file
View File

@ -0,0 +1,33 @@
/* version.h - holds Hoedown's version */
#ifndef HOEDOWN_VERSION_H
#define HOEDOWN_VERSION_H
#ifdef __cplusplus
extern "C" {
#endif
/*************
* CONSTANTS *
*************/
#define HOEDOWN_VERSION "3.0.7"
#define HOEDOWN_VERSION_MAJOR 3
#define HOEDOWN_VERSION_MINOR 0
#define HOEDOWN_VERSION_REVISION 7
/*************
* FUNCTIONS *
*************/
/* hoedown_version: retrieve Hoedown's version numbers */
void hoedown_version(int *major, int *minor, int *revision);
#ifdef __cplusplus
}
#endif
#endif /** HOEDOWN_VERSION_H **/

View File

@ -20,9 +20,6 @@ isEmpty(USE_SYSTEM_SORTFILTERPROXYMODEL) {
isEmpty(USE_SYSTEM_QMATRIXCLIENT) {
USE_SYSTEM_QMATRIXCLIENT = false
}
isEmpty(USE_SYSTEM_CMARK) {
USE_SYSTEM_CMARK = false
}
isEmpty(BUNDLE_FONT) {
BUNDLE_FONT = false
}
@ -39,50 +36,27 @@ $$USE_SYSTEM_SORTFILTERPROXYMODEL {
message("Falling back to built-in SortFilterProxyModel.")
include(include/SortFilterProxyModel/SortFilterProxyModel.pri)
}
$$USE_SYSTEM_CMARK {
PKGCONFIG += libcmark
} else {
message("Falling back to built-in CMark.")
INCLUDEPATH += include/cmark
HEADERS += \
include/cmark/buffer.h \
include/cmark/chunk.h \
include/cmark/cmark.h \
include/cmark/cmark_ctype.h \
include/cmark/cmark_export.h \
include/cmark/config.h \
include/cmark/houdini.h \
include/cmark/inlines.h \
include/cmark/iterator.h \
include/cmark/node.h \
include/cmark/parser.h \
include/cmark/references.h \
include/cmark/render.h \
include/cmark/scanners.h \
include/cmark/utf8.h
SOURCES += \
include/cmark/blocks.c \
include/cmark/buffer.c \
include/cmark/cmark.c \
include/cmark/cmark_ctype.c \
include/cmark/commonmark.c \
include/cmark/entities.inc \
include/cmark/houdini_href_e.c \
include/cmark/houdini_html_e.c \
include/cmark/houdini_html_u.c \
include/cmark/html.c \
include/cmark/inlines.c \
include/cmark/iterator.c \
include/cmark/latex.c \
include/cmark/man.c \
include/cmark/node.c \
include/cmark/references.c \
include/cmark/render.c \
include/cmark/scanners.c \
include/cmark/utf8.c \
include/cmark/xml.c
}
INCLUDEPATH += include/hoedown
HEADERS += \
include/hoedown/autolink.h \
include/hoedown/buffer.h \
include/hoedown/document.h \
include/hoedown/escape.h \
include/hoedown/html.h \
include/hoedown/stack.h \
include/hoedown/version.h
SOURCES += \
include/hoedown/autolink.c \
include/hoedown/buffer.c \
include/hoedown/document.c \
include/hoedown/escape.c \
include/hoedown/html.c \
include/hoedown/html_blocks.c \
include/hoedown/html_smartypants.c \
include/hoedown/stack.c \
include/hoedown/version.c
# The following define makes your compiler emit warnings if you use
# any feature of Qt which as been marked deprecated (the exact warnings

View File

@ -14,7 +14,7 @@
#include <QMetaObject>
#include <QMimeDatabase>
#include "cmark.h"
#include "html.h"
#include "utils.h"
@ -205,8 +205,20 @@ QVariantList SpectralRoom::getUsers(const QString& prefix) {
}
QString SpectralRoom::postMarkdownText(const QString& markdown) {
QByteArray local = markdown.toLocal8Bit();
const char* data = local.data();
QString html = cmark_markdown_to_html(data, local.length(), 0);
return postHtmlText(markdown, html);
unsigned char *sequence = (unsigned char *) qstrdup(markdown.toUtf8().constData());
qint64 length = strlen((char *) sequence);
hoedown_renderer* renderer = hoedown_html_renderer_new(HOEDOWN_HTML_USE_XHTML, 32);
hoedown_extensions extensions = (hoedown_extensions) ((HOEDOWN_EXT_BLOCK | HOEDOWN_EXT_SPAN | HOEDOWN_EXT_MATH_EXPLICIT) & ~HOEDOWN_EXT_QUOTE);
hoedown_document* document = hoedown_document_new(renderer, extensions, 32);
hoedown_buffer* html = hoedown_buffer_new(length);
hoedown_document_render(document, html, sequence, length);
QString result = QString::fromUtf8((char *) html->data, html->size);
free(sequence);
hoedown_buffer_free(html);
hoedown_document_free(document);
hoedown_html_renderer_free(renderer);
return postHtmlText(markdown, result);
}