Change markdown parser.
This commit is contained in:
parent
b71359e92a
commit
1d1cd3410a
|
@ -3,13 +3,13 @@ image: Visual Studio 2017
|
|||
environment:
|
||||
DEPLOY_DIR: Spectral-%APPVEYOR_BUILD_VERSION%
|
||||
matrix:
|
||||
- QTDIR: C:\Qt\5.12.1\mingw73_64
|
||||
- QTDIR: C:\Qt\5.12.1\msvc2017_64
|
||||
VCVARS: "C:\\Program Files (x86)\\Microsoft Visual Studio\\2017\\Community\\VC\\Auxiliary\\Build\\vcvars64.bat"
|
||||
PLATFORM:
|
||||
|
||||
init:
|
||||
- call "%QTDIR%\bin\qtenv2.bat"
|
||||
- set PATH=%PATH%;C:\Qt\Tools\QtCreator\bin;%QT_DIR%\bin;C:\MinGW\bin
|
||||
- set PATH=%PATH%;C:\Qt\Tools\QtCreator\bin
|
||||
- call "%VCVARS%" %platform%
|
||||
- cd /D "%APPVEYOR_BUILD_FOLDER%"
|
||||
|
||||
|
@ -18,10 +18,10 @@ before_build:
|
|||
|
||||
build_script:
|
||||
- qmake spectral.pro CONFIG+=release CONFIG+=qtquickcompiler PREFIX="%DEPLOY_DIR%"
|
||||
- mingw32-make
|
||||
- nmake
|
||||
|
||||
after_build:
|
||||
- mingw32-make install
|
||||
- nmake install
|
||||
- windeployqt --release --qmldir qml --qmldir imports "%DEPLOY_DIR%\spectral.exe"
|
||||
- 7z a spectral.zip "%DEPLOY_DIR%\"
|
||||
|
||||
|
|
|
@ -1,170 +0,0 @@
|
|||
Copyright (c) 2014, John MacFarlane
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following
|
||||
disclaimer in the documentation and/or other materials provided
|
||||
with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
-----
|
||||
|
||||
houdini.h, houdini_href_e.c, houdini_html_e.c, houdini_html_u.c
|
||||
|
||||
derive from https://github.com/vmg/houdini (with some modifications)
|
||||
|
||||
Copyright (C) 2012 Vicent Martí
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
this software and associated documentation files (the "Software"), to deal in
|
||||
the Software without restriction, including without limitation the rights to
|
||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
of the Software, and to permit persons to whom the Software is furnished to do
|
||||
so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
|
||||
-----
|
||||
|
||||
buffer.h, buffer.c, chunk.h
|
||||
|
||||
are derived from code (C) 2012 Github, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
this software and associated documentation files (the "Software"), to deal in
|
||||
the Software without restriction, including without limitation the rights to
|
||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
of the Software, and to permit persons to whom the Software is furnished to do
|
||||
so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
|
||||
-----
|
||||
|
||||
utf8.c and utf8.c
|
||||
|
||||
are derived from utf8proc
|
||||
(<http://www.public-software-group.org/utf8proc>),
|
||||
(C) 2009 Public Software Group e. V., Berlin, Germany.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a
|
||||
copy of this software and associated documentation files (the "Software"),
|
||||
to deal in the Software without restriction, including without limitation
|
||||
the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
and/or sell copies of the Software, and to permit persons to whom the
|
||||
Software is furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
-----
|
||||
|
||||
The normalization code in normalize.py was derived from the
|
||||
markdowntest project, Copyright 2013 Karl Dubost:
|
||||
|
||||
The MIT License (MIT)
|
||||
|
||||
Copyright (c) 2013 Karl Dubost
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of this software and associated documentation files (the
|
||||
"Software"), to deal in the Software without restriction, including
|
||||
without limitation the rights to use, copy, modify, merge, publish,
|
||||
distribute, sublicense, and/or sell copies of the Software, and to
|
||||
permit persons to whom the Software is furnished to do so, subject to
|
||||
the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
||||
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
||||
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
-----
|
||||
|
||||
The CommonMark spec (test/spec.txt) is
|
||||
|
||||
Copyright (C) 2014-15 John MacFarlane
|
||||
|
||||
Released under the Creative Commons CC-BY-SA 4.0 license:
|
||||
<http://creativecommons.org/licenses/by-sa/4.0/>.
|
||||
|
||||
-----
|
||||
|
||||
The test software in test/ is
|
||||
|
||||
Copyright (c) 2014, John MacFarlane
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following
|
||||
disclaimer in the documentation and/or other materials provided
|
||||
with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
File diff suppressed because it is too large
Load Diff
|
@ -1,279 +0,0 @@
|
|||
#include <stdarg.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <limits.h>
|
||||
|
||||
#include "config.h"
|
||||
#include "cmark_ctype.h"
|
||||
#include "buffer.h"
|
||||
#include "memory.h"
|
||||
|
||||
/* Used as default value for cmark_strbuf->ptr so that people can always
|
||||
* assume ptr is non-NULL and zero terminated even for new cmark_strbufs.
|
||||
*/
|
||||
unsigned char cmark_strbuf__initbuf[1];
|
||||
|
||||
#ifndef MIN
|
||||
#define MIN(x, y) ((x < y) ? x : y)
|
||||
#endif
|
||||
|
||||
void cmark_strbuf_init(cmark_mem *mem, cmark_strbuf *buf,
|
||||
bufsize_t initial_size) {
|
||||
buf->mem = mem;
|
||||
buf->asize = 0;
|
||||
buf->size = 0;
|
||||
buf->ptr = cmark_strbuf__initbuf;
|
||||
|
||||
if (initial_size > 0)
|
||||
cmark_strbuf_grow(buf, initial_size);
|
||||
}
|
||||
|
||||
static CMARK_INLINE void S_strbuf_grow_by(cmark_strbuf *buf, bufsize_t add) {
|
||||
cmark_strbuf_grow(buf, buf->size + add);
|
||||
}
|
||||
|
||||
void cmark_strbuf_grow(cmark_strbuf *buf, bufsize_t target_size) {
|
||||
assert(target_size > 0);
|
||||
|
||||
if (target_size < buf->asize)
|
||||
return;
|
||||
|
||||
if (target_size > (bufsize_t)(INT32_MAX / 2)) {
|
||||
fprintf(stderr,
|
||||
"[cmark] cmark_strbuf_grow requests buffer with size > %d, aborting\n",
|
||||
(INT32_MAX / 2));
|
||||
abort();
|
||||
}
|
||||
|
||||
/* Oversize the buffer by 50% to guarantee amortized linear time
|
||||
* complexity on append operations. */
|
||||
bufsize_t new_size = target_size + target_size / 2;
|
||||
new_size += 1;
|
||||
new_size = (new_size + 7) & ~7;
|
||||
|
||||
buf->ptr = (unsigned char *)buf->mem->realloc(buf->asize ? buf->ptr : NULL,
|
||||
new_size);
|
||||
buf->asize = new_size;
|
||||
}
|
||||
|
||||
bufsize_t cmark_strbuf_len(const cmark_strbuf *buf) { return buf->size; }
|
||||
|
||||
void cmark_strbuf_free(cmark_strbuf *buf) {
|
||||
if (!buf)
|
||||
return;
|
||||
|
||||
if (buf->ptr != cmark_strbuf__initbuf)
|
||||
buf->mem->free(buf->ptr);
|
||||
|
||||
cmark_strbuf_init(buf->mem, buf, 0);
|
||||
}
|
||||
|
||||
void cmark_strbuf_clear(cmark_strbuf *buf) {
|
||||
buf->size = 0;
|
||||
|
||||
if (buf->asize > 0)
|
||||
buf->ptr[0] = '\0';
|
||||
}
|
||||
|
||||
void cmark_strbuf_set(cmark_strbuf *buf, const unsigned char *data,
|
||||
bufsize_t len) {
|
||||
if (len <= 0 || data == NULL) {
|
||||
cmark_strbuf_clear(buf);
|
||||
} else {
|
||||
if (data != buf->ptr) {
|
||||
if (len >= buf->asize)
|
||||
cmark_strbuf_grow(buf, len);
|
||||
memmove(buf->ptr, data, len);
|
||||
}
|
||||
buf->size = len;
|
||||
buf->ptr[buf->size] = '\0';
|
||||
}
|
||||
}
|
||||
|
||||
void cmark_strbuf_sets(cmark_strbuf *buf, const char *string) {
|
||||
cmark_strbuf_set(buf, (const unsigned char *)string,
|
||||
string ? strlen(string) : 0);
|
||||
}
|
||||
|
||||
void cmark_strbuf_putc(cmark_strbuf *buf, int c) {
|
||||
S_strbuf_grow_by(buf, 1);
|
||||
buf->ptr[buf->size++] = (unsigned char)(c & 0xFF);
|
||||
buf->ptr[buf->size] = '\0';
|
||||
}
|
||||
|
||||
void cmark_strbuf_put(cmark_strbuf *buf, const unsigned char *data,
|
||||
bufsize_t len) {
|
||||
if (len <= 0)
|
||||
return;
|
||||
|
||||
S_strbuf_grow_by(buf, len);
|
||||
memmove(buf->ptr + buf->size, data, len);
|
||||
buf->size += len;
|
||||
buf->ptr[buf->size] = '\0';
|
||||
}
|
||||
|
||||
void cmark_strbuf_puts(cmark_strbuf *buf, const char *string) {
|
||||
cmark_strbuf_put(buf, (const unsigned char *)string, strlen(string));
|
||||
}
|
||||
|
||||
void cmark_strbuf_copy_cstr(char *data, bufsize_t datasize,
|
||||
const cmark_strbuf *buf) {
|
||||
bufsize_t copylen;
|
||||
|
||||
assert(buf);
|
||||
if (!data || datasize <= 0)
|
||||
return;
|
||||
|
||||
data[0] = '\0';
|
||||
|
||||
if (buf->size == 0 || buf->asize <= 0)
|
||||
return;
|
||||
|
||||
copylen = buf->size;
|
||||
if (copylen > datasize - 1)
|
||||
copylen = datasize - 1;
|
||||
memmove(data, buf->ptr, copylen);
|
||||
data[copylen] = '\0';
|
||||
}
|
||||
|
||||
void cmark_strbuf_swap(cmark_strbuf *buf_a, cmark_strbuf *buf_b) {
|
||||
cmark_strbuf t = *buf_a;
|
||||
*buf_a = *buf_b;
|
||||
*buf_b = t;
|
||||
}
|
||||
|
||||
unsigned char *cmark_strbuf_detach(cmark_strbuf *buf) {
|
||||
unsigned char *data = buf->ptr;
|
||||
|
||||
if (buf->asize == 0) {
|
||||
/* return an empty string */
|
||||
return (unsigned char *)buf->mem->calloc(1, 1);
|
||||
}
|
||||
|
||||
cmark_strbuf_init(buf->mem, buf, 0);
|
||||
return data;
|
||||
}
|
||||
|
||||
int cmark_strbuf_cmp(const cmark_strbuf *a, const cmark_strbuf *b) {
|
||||
int result = memcmp(a->ptr, b->ptr, MIN(a->size, b->size));
|
||||
return (result != 0) ? result
|
||||
: (a->size < b->size) ? -1 : (a->size > b->size) ? 1 : 0;
|
||||
}
|
||||
|
||||
bufsize_t cmark_strbuf_strchr(const cmark_strbuf *buf, int c, bufsize_t pos) {
|
||||
if (pos >= buf->size)
|
||||
return -1;
|
||||
if (pos < 0)
|
||||
pos = 0;
|
||||
|
||||
const unsigned char *p =
|
||||
(unsigned char *)memchr(buf->ptr + pos, c, buf->size - pos);
|
||||
if (!p)
|
||||
return -1;
|
||||
|
||||
return (bufsize_t)(p - (const unsigned char *)buf->ptr);
|
||||
}
|
||||
|
||||
bufsize_t cmark_strbuf_strrchr(const cmark_strbuf *buf, int c, bufsize_t pos) {
|
||||
if (pos < 0 || buf->size == 0)
|
||||
return -1;
|
||||
if (pos >= buf->size)
|
||||
pos = buf->size - 1;
|
||||
|
||||
bufsize_t i;
|
||||
for (i = pos; i >= 0; i--) {
|
||||
if (buf->ptr[i] == (unsigned char)c)
|
||||
return i;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
void cmark_strbuf_truncate(cmark_strbuf *buf, bufsize_t len) {
|
||||
if (len < 0)
|
||||
len = 0;
|
||||
|
||||
if (len < buf->size) {
|
||||
buf->size = len;
|
||||
buf->ptr[buf->size] = '\0';
|
||||
}
|
||||
}
|
||||
|
||||
void cmark_strbuf_drop(cmark_strbuf *buf, bufsize_t n) {
|
||||
if (n > 0) {
|
||||
if (n > buf->size)
|
||||
n = buf->size;
|
||||
buf->size = buf->size - n;
|
||||
if (buf->size)
|
||||
memmove(buf->ptr, buf->ptr + n, buf->size);
|
||||
|
||||
buf->ptr[buf->size] = '\0';
|
||||
}
|
||||
}
|
||||
|
||||
void cmark_strbuf_rtrim(cmark_strbuf *buf) {
|
||||
if (!buf->size)
|
||||
return;
|
||||
|
||||
while (buf->size > 0) {
|
||||
if (!cmark_isspace(buf->ptr[buf->size - 1]))
|
||||
break;
|
||||
|
||||
buf->size--;
|
||||
}
|
||||
|
||||
buf->ptr[buf->size] = '\0';
|
||||
}
|
||||
|
||||
void cmark_strbuf_trim(cmark_strbuf *buf) {
|
||||
bufsize_t i = 0;
|
||||
|
||||
if (!buf->size)
|
||||
return;
|
||||
|
||||
while (i < buf->size && cmark_isspace(buf->ptr[i]))
|
||||
i++;
|
||||
|
||||
cmark_strbuf_drop(buf, i);
|
||||
|
||||
cmark_strbuf_rtrim(buf);
|
||||
}
|
||||
|
||||
// Destructively modify string, collapsing consecutive
|
||||
// space and newline characters into a single space.
|
||||
void cmark_strbuf_normalize_whitespace(cmark_strbuf *s) {
|
||||
bool last_char_was_space = false;
|
||||
bufsize_t r, w;
|
||||
|
||||
for (r = 0, w = 0; r < s->size; ++r) {
|
||||
if (cmark_isspace(s->ptr[r])) {
|
||||
if (!last_char_was_space) {
|
||||
s->ptr[w++] = ' ';
|
||||
last_char_was_space = true;
|
||||
}
|
||||
} else {
|
||||
s->ptr[w++] = s->ptr[r];
|
||||
last_char_was_space = false;
|
||||
}
|
||||
}
|
||||
|
||||
cmark_strbuf_truncate(s, w);
|
||||
}
|
||||
|
||||
// Destructively unescape a string: remove backslashes before punctuation chars.
|
||||
extern void cmark_strbuf_unescape(cmark_strbuf *buf) {
|
||||
bufsize_t r, w;
|
||||
|
||||
for (r = 0, w = 0; r < buf->size; ++r) {
|
||||
if (buf->ptr[r] == '\\' && cmark_ispunct(buf->ptr[r + 1]))
|
||||
r++;
|
||||
|
||||
buf->ptr[w++] = buf->ptr[r];
|
||||
}
|
||||
|
||||
cmark_strbuf_truncate(buf, w);
|
||||
}
|
|
@ -1,82 +0,0 @@
|
|||
#ifndef CMARK_BUFFER_H
|
||||
#define CMARK_BUFFER_H
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdarg.h>
|
||||
#include <string.h>
|
||||
#include <limits.h>
|
||||
#include <stdint.h>
|
||||
#include "config.h"
|
||||
#include "cmark.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef int32_t bufsize_t;
|
||||
|
||||
typedef struct {
|
||||
cmark_mem *mem;
|
||||
unsigned char *ptr;
|
||||
bufsize_t asize, size;
|
||||
} cmark_strbuf;
|
||||
|
||||
extern unsigned char cmark_strbuf__initbuf[];
|
||||
|
||||
#define CMARK_BUF_INIT(mem) \
|
||||
{ mem, cmark_strbuf__initbuf, 0, 0 }
|
||||
|
||||
/**
|
||||
* Initialize a cmark_strbuf structure.
|
||||
*
|
||||
* For the cases where CMARK_BUF_INIT cannot be used to do static
|
||||
* initialization.
|
||||
*/
|
||||
void cmark_strbuf_init(cmark_mem *mem, cmark_strbuf *buf,
|
||||
bufsize_t initial_size);
|
||||
|
||||
/**
|
||||
* Grow the buffer to hold at least `target_size` bytes.
|
||||
*/
|
||||
void cmark_strbuf_grow(cmark_strbuf *buf, bufsize_t target_size);
|
||||
|
||||
void cmark_strbuf_free(cmark_strbuf *buf);
|
||||
void cmark_strbuf_swap(cmark_strbuf *buf_a, cmark_strbuf *buf_b);
|
||||
|
||||
bufsize_t cmark_strbuf_len(const cmark_strbuf *buf);
|
||||
|
||||
int cmark_strbuf_cmp(const cmark_strbuf *a, const cmark_strbuf *b);
|
||||
|
||||
unsigned char *cmark_strbuf_detach(cmark_strbuf *buf);
|
||||
void cmark_strbuf_copy_cstr(char *data, bufsize_t datasize,
|
||||
const cmark_strbuf *buf);
|
||||
|
||||
static CMARK_INLINE const char *cmark_strbuf_cstr(const cmark_strbuf *buf) {
|
||||
return (char *)buf->ptr;
|
||||
}
|
||||
|
||||
#define cmark_strbuf_at(buf, n) ((buf)->ptr[n])
|
||||
|
||||
void cmark_strbuf_set(cmark_strbuf *buf, const unsigned char *data,
|
||||
bufsize_t len);
|
||||
void cmark_strbuf_sets(cmark_strbuf *buf, const char *string);
|
||||
void cmark_strbuf_putc(cmark_strbuf *buf, int c);
|
||||
void cmark_strbuf_put(cmark_strbuf *buf, const unsigned char *data,
|
||||
bufsize_t len);
|
||||
void cmark_strbuf_puts(cmark_strbuf *buf, const char *string);
|
||||
void cmark_strbuf_clear(cmark_strbuf *buf);
|
||||
|
||||
bufsize_t cmark_strbuf_strchr(const cmark_strbuf *buf, int c, bufsize_t pos);
|
||||
bufsize_t cmark_strbuf_strrchr(const cmark_strbuf *buf, int c, bufsize_t pos);
|
||||
void cmark_strbuf_drop(cmark_strbuf *buf, bufsize_t n);
|
||||
void cmark_strbuf_truncate(cmark_strbuf *buf, bufsize_t len);
|
||||
void cmark_strbuf_rtrim(cmark_strbuf *buf);
|
||||
void cmark_strbuf_trim(cmark_strbuf *buf);
|
||||
void cmark_strbuf_normalize_whitespace(cmark_strbuf *s);
|
||||
void cmark_strbuf_unescape(cmark_strbuf *s);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
File diff suppressed because it is too large
Load Diff
|
@ -1,120 +0,0 @@
|
|||
#ifndef CMARK_CHUNK_H
|
||||
#define CMARK_CHUNK_H
|
||||
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <assert.h>
|
||||
#include "cmark.h"
|
||||
#include "buffer.h"
|
||||
#include "memory.h"
|
||||
#include "cmark_ctype.h"
|
||||
|
||||
#define CMARK_CHUNK_EMPTY \
|
||||
{ NULL, 0, 0 }
|
||||
|
||||
typedef struct {
|
||||
unsigned char *data;
|
||||
bufsize_t len;
|
||||
bufsize_t alloc; // also implies a NULL-terminated string
|
||||
} cmark_chunk;
|
||||
|
||||
static CMARK_INLINE void cmark_chunk_free(cmark_mem *mem, cmark_chunk *c) {
|
||||
if (c->alloc)
|
||||
mem->free(c->data);
|
||||
|
||||
c->data = NULL;
|
||||
c->alloc = 0;
|
||||
c->len = 0;
|
||||
}
|
||||
|
||||
static CMARK_INLINE void cmark_chunk_ltrim(cmark_chunk *c) {
|
||||
assert(!c->alloc);
|
||||
|
||||
while (c->len && cmark_isspace(c->data[0])) {
|
||||
c->data++;
|
||||
c->len--;
|
||||
}
|
||||
}
|
||||
|
||||
static CMARK_INLINE void cmark_chunk_rtrim(cmark_chunk *c) {
|
||||
assert(!c->alloc);
|
||||
|
||||
while (c->len > 0) {
|
||||
if (!cmark_isspace(c->data[c->len - 1]))
|
||||
break;
|
||||
|
||||
c->len--;
|
||||
}
|
||||
}
|
||||
|
||||
static CMARK_INLINE void cmark_chunk_trim(cmark_chunk *c) {
|
||||
cmark_chunk_ltrim(c);
|
||||
cmark_chunk_rtrim(c);
|
||||
}
|
||||
|
||||
static CMARK_INLINE bufsize_t cmark_chunk_strchr(cmark_chunk *ch, int c,
|
||||
bufsize_t offset) {
|
||||
const unsigned char *p =
|
||||
(unsigned char *)memchr(ch->data + offset, c, ch->len - offset);
|
||||
return p ? (bufsize_t)(p - ch->data) : ch->len;
|
||||
}
|
||||
|
||||
static CMARK_INLINE const char *cmark_chunk_to_cstr(cmark_mem *mem,
|
||||
cmark_chunk *c) {
|
||||
unsigned char *str;
|
||||
|
||||
if (c->alloc) {
|
||||
return (char *)c->data;
|
||||
}
|
||||
str = (unsigned char *)mem->calloc(c->len + 1, 1);
|
||||
if (c->len > 0) {
|
||||
memcpy(str, c->data, c->len);
|
||||
}
|
||||
str[c->len] = 0;
|
||||
c->data = str;
|
||||
c->alloc = 1;
|
||||
|
||||
return (char *)str;
|
||||
}
|
||||
|
||||
static CMARK_INLINE void cmark_chunk_set_cstr(cmark_mem *mem, cmark_chunk *c,
|
||||
const char *str) {
|
||||
unsigned char *old = c->alloc ? c->data : NULL;
|
||||
if (str == NULL) {
|
||||
c->len = 0;
|
||||
c->data = NULL;
|
||||
c->alloc = 0;
|
||||
} else {
|
||||
c->len = (bufsize_t)strlen(str);
|
||||
c->data = (unsigned char *)mem->calloc(c->len + 1, 1);
|
||||
c->alloc = 1;
|
||||
memcpy(c->data, str, c->len + 1);
|
||||
}
|
||||
if (old != NULL) {
|
||||
mem->free(old);
|
||||
}
|
||||
}
|
||||
|
||||
static CMARK_INLINE cmark_chunk cmark_chunk_literal(const char *data) {
|
||||
bufsize_t len = data ? (bufsize_t)strlen(data) : 0;
|
||||
cmark_chunk c = {(unsigned char *)data, len, 0};
|
||||
return c;
|
||||
}
|
||||
|
||||
static CMARK_INLINE cmark_chunk cmark_chunk_dup(const cmark_chunk *ch,
|
||||
bufsize_t pos, bufsize_t len) {
|
||||
cmark_chunk c = {ch->data + pos, len, 0};
|
||||
return c;
|
||||
}
|
||||
|
||||
static CMARK_INLINE cmark_chunk cmark_chunk_buf_detach(cmark_strbuf *buf) {
|
||||
cmark_chunk c;
|
||||
|
||||
c.len = buf->size;
|
||||
c.data = cmark_strbuf_detach(buf);
|
||||
c.alloc = 1;
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
#endif
|
|
@ -1,43 +0,0 @@
|
|||
#include <stdlib.h>
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include "node.h"
|
||||
#include "houdini.h"
|
||||
#include "cmark.h"
|
||||
#include "buffer.h"
|
||||
|
||||
int cmark_version() { return CMARK_VERSION; }
|
||||
|
||||
const char *cmark_version_string() { return CMARK_VERSION_STRING; }
|
||||
|
||||
static void *xcalloc(size_t nmem, size_t size) {
|
||||
void *ptr = calloc(nmem, size);
|
||||
if (!ptr) {
|
||||
fprintf(stderr, "[cmark] calloc returned null pointer, aborting\n");
|
||||
abort();
|
||||
}
|
||||
return ptr;
|
||||
}
|
||||
|
||||
static void *xrealloc(void *ptr, size_t size) {
|
||||
void *new_ptr = realloc(ptr, size);
|
||||
if (!new_ptr) {
|
||||
fprintf(stderr, "[cmark] realloc returned null pointer, aborting\n");
|
||||
abort();
|
||||
}
|
||||
return new_ptr;
|
||||
}
|
||||
|
||||
cmark_mem DEFAULT_MEM_ALLOCATOR = {xcalloc, xrealloc, free};
|
||||
|
||||
char *cmark_markdown_to_html(const char *text, size_t len, int options) {
|
||||
cmark_node *doc;
|
||||
char *result;
|
||||
|
||||
doc = cmark_parse_document(text, len, options);
|
||||
|
||||
result = cmark_render_html(doc, options);
|
||||
cmark_node_free(doc);
|
||||
|
||||
return result;
|
||||
}
|
|
@ -1,644 +0,0 @@
|
|||
#ifndef CMARK_H
|
||||
#define CMARK_H
|
||||
|
||||
#include <stdio.h>
|
||||
#include <cmark_export.h>
|
||||
#include <cmark_version.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/** # NAME
|
||||
*
|
||||
* **cmark** - CommonMark parsing, manipulating, and rendering
|
||||
*/
|
||||
|
||||
/** # DESCRIPTION
|
||||
*
|
||||
* ## Simple Interface
|
||||
*/
|
||||
|
||||
/** Convert 'text' (assumed to be a UTF-8 encoded string with length
|
||||
* 'len') from CommonMark Markdown to HTML, returning a null-terminated,
|
||||
* UTF-8-encoded string. It is the caller's responsibility
|
||||
* to free the returned buffer.
|
||||
*/
|
||||
CMARK_EXPORT
|
||||
char *cmark_markdown_to_html(const char *text, size_t len, int options);
|
||||
|
||||
/** ## Node Structure
|
||||
*/
|
||||
|
||||
typedef enum {
|
||||
/* Error status */
|
||||
CMARK_NODE_NONE,
|
||||
|
||||
/* Block */
|
||||
CMARK_NODE_DOCUMENT,
|
||||
CMARK_NODE_BLOCK_QUOTE,
|
||||
CMARK_NODE_LIST,
|
||||
CMARK_NODE_ITEM,
|
||||
CMARK_NODE_CODE_BLOCK,
|
||||
CMARK_NODE_HTML_BLOCK,
|
||||
CMARK_NODE_CUSTOM_BLOCK,
|
||||
CMARK_NODE_PARAGRAPH,
|
||||
CMARK_NODE_HEADING,
|
||||
CMARK_NODE_THEMATIC_BREAK,
|
||||
|
||||
CMARK_NODE_FIRST_BLOCK = CMARK_NODE_DOCUMENT,
|
||||
CMARK_NODE_LAST_BLOCK = CMARK_NODE_THEMATIC_BREAK,
|
||||
|
||||
/* Inline */
|
||||
CMARK_NODE_TEXT,
|
||||
CMARK_NODE_SOFTBREAK,
|
||||
CMARK_NODE_LINEBREAK,
|
||||
CMARK_NODE_CODE,
|
||||
CMARK_NODE_HTML_INLINE,
|
||||
CMARK_NODE_CUSTOM_INLINE,
|
||||
CMARK_NODE_EMPH,
|
||||
CMARK_NODE_STRONG,
|
||||
CMARK_NODE_LINK,
|
||||
CMARK_NODE_IMAGE,
|
||||
|
||||
CMARK_NODE_FIRST_INLINE = CMARK_NODE_TEXT,
|
||||
CMARK_NODE_LAST_INLINE = CMARK_NODE_IMAGE,
|
||||
} cmark_node_type;
|
||||
|
||||
/* For backwards compatibility: */
|
||||
#define CMARK_NODE_HEADER CMARK_NODE_HEADING
|
||||
#define CMARK_NODE_HRULE CMARK_NODE_THEMATIC_BREAK
|
||||
#define CMARK_NODE_HTML CMARK_NODE_HTML_BLOCK
|
||||
#define CMARK_NODE_INLINE_HTML CMARK_NODE_HTML_INLINE
|
||||
|
||||
typedef enum {
|
||||
CMARK_NO_LIST,
|
||||
CMARK_BULLET_LIST,
|
||||
CMARK_ORDERED_LIST
|
||||
} cmark_list_type;
|
||||
|
||||
typedef enum {
|
||||
CMARK_NO_DELIM,
|
||||
CMARK_PERIOD_DELIM,
|
||||
CMARK_PAREN_DELIM
|
||||
} cmark_delim_type;
|
||||
|
||||
typedef struct cmark_node cmark_node;
|
||||
typedef struct cmark_parser cmark_parser;
|
||||
typedef struct cmark_iter cmark_iter;
|
||||
|
||||
/**
|
||||
* ## Custom memory allocator support
|
||||
*/
|
||||
|
||||
/** Defines the memory allocation functions to be used by CMark
|
||||
* when parsing and allocating a document tree
|
||||
*/
|
||||
typedef struct cmark_mem {
|
||||
void *(*calloc)(size_t, size_t);
|
||||
void *(*realloc)(void *, size_t);
|
||||
void (*free)(void *);
|
||||
} cmark_mem;
|
||||
|
||||
/**
|
||||
* ## Creating and Destroying Nodes
|
||||
*/
|
||||
|
||||
/** Creates a new node of type 'type'. Note that the node may have
|
||||
* other required properties, which it is the caller's responsibility
|
||||
* to assign.
|
||||
*/
|
||||
CMARK_EXPORT cmark_node *cmark_node_new(cmark_node_type type);
|
||||
|
||||
/** Same as `cmark_node_new`, but explicitly listing the memory
|
||||
* allocator used to allocate the node. Note: be sure to use the same
|
||||
* allocator for every node in a tree, or bad things can happen.
|
||||
*/
|
||||
CMARK_EXPORT cmark_node *cmark_node_new_with_mem(cmark_node_type type,
|
||||
cmark_mem *mem);
|
||||
|
||||
/** Frees the memory allocated for a node and any children.
|
||||
*/
|
||||
CMARK_EXPORT void cmark_node_free(cmark_node *node);
|
||||
|
||||
/**
|
||||
* ## Tree Traversal
|
||||
*/
|
||||
|
||||
/** Returns the next node in the sequence after 'node', or NULL if
|
||||
* there is none.
|
||||
*/
|
||||
CMARK_EXPORT cmark_node *cmark_node_next(cmark_node *node);
|
||||
|
||||
/** Returns the previous node in the sequence after 'node', or NULL if
|
||||
* there is none.
|
||||
*/
|
||||
CMARK_EXPORT cmark_node *cmark_node_previous(cmark_node *node);
|
||||
|
||||
/** Returns the parent of 'node', or NULL if there is none.
|
||||
*/
|
||||
CMARK_EXPORT cmark_node *cmark_node_parent(cmark_node *node);
|
||||
|
||||
/** Returns the first child of 'node', or NULL if 'node' has no children.
|
||||
*/
|
||||
CMARK_EXPORT cmark_node *cmark_node_first_child(cmark_node *node);
|
||||
|
||||
/** Returns the last child of 'node', or NULL if 'node' has no children.
|
||||
*/
|
||||
CMARK_EXPORT cmark_node *cmark_node_last_child(cmark_node *node);
|
||||
|
||||
/**
|
||||
* ## Iterator
|
||||
*
|
||||
* An iterator will walk through a tree of nodes, starting from a root
|
||||
* node, returning one node at a time, together with information about
|
||||
* whether the node is being entered or exited. The iterator will
|
||||
* first descend to a child node, if there is one. When there is no
|
||||
* child, the iterator will go to the next sibling. When there is no
|
||||
* next sibling, the iterator will return to the parent (but with
|
||||
* a 'cmark_event_type' of `CMARK_EVENT_EXIT`). The iterator will
|
||||
* return `CMARK_EVENT_DONE` when it reaches the root node again.
|
||||
* One natural application is an HTML renderer, where an `ENTER` event
|
||||
* outputs an open tag and an `EXIT` event outputs a close tag.
|
||||
* An iterator might also be used to transform an AST in some systematic
|
||||
* way, for example, turning all level-3 headings into regular paragraphs.
|
||||
*
|
||||
* void
|
||||
* usage_example(cmark_node *root) {
|
||||
* cmark_event_type ev_type;
|
||||
* cmark_iter *iter = cmark_iter_new(root);
|
||||
*
|
||||
* while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
|
||||
* cmark_node *cur = cmark_iter_get_node(iter);
|
||||
* // Do something with `cur` and `ev_type`
|
||||
* }
|
||||
*
|
||||
* cmark_iter_free(iter);
|
||||
* }
|
||||
*
|
||||
* Iterators will never return `EXIT` events for leaf nodes, which are nodes
|
||||
* of type:
|
||||
*
|
||||
* * CMARK_NODE_HTML_BLOCK
|
||||
* * CMARK_NODE_THEMATIC_BREAK
|
||||
* * CMARK_NODE_CODE_BLOCK
|
||||
* * CMARK_NODE_TEXT
|
||||
* * CMARK_NODE_SOFTBREAK
|
||||
* * CMARK_NODE_LINEBREAK
|
||||
* * CMARK_NODE_CODE
|
||||
* * CMARK_NODE_HTML_INLINE
|
||||
*
|
||||
* Nodes must only be modified after an `EXIT` event, or an `ENTER` event for
|
||||
* leaf nodes.
|
||||
*/
|
||||
|
||||
typedef enum {
|
||||
CMARK_EVENT_NONE,
|
||||
CMARK_EVENT_DONE,
|
||||
CMARK_EVENT_ENTER,
|
||||
CMARK_EVENT_EXIT
|
||||
} cmark_event_type;
|
||||
|
||||
/** Creates a new iterator starting at 'root'. The current node and event
|
||||
* type are undefined until 'cmark_iter_next' is called for the first time.
|
||||
* The memory allocated for the iterator should be released using
|
||||
* 'cmark_iter_free' when it is no longer needed.
|
||||
*/
|
||||
CMARK_EXPORT
|
||||
cmark_iter *cmark_iter_new(cmark_node *root);
|
||||
|
||||
/** Frees the memory allocated for an iterator.
|
||||
*/
|
||||
CMARK_EXPORT
|
||||
void cmark_iter_free(cmark_iter *iter);
|
||||
|
||||
/** Advances to the next node and returns the event type (`CMARK_EVENT_ENTER`,
|
||||
* `CMARK_EVENT_EXIT` or `CMARK_EVENT_DONE`).
|
||||
*/
|
||||
CMARK_EXPORT
|
||||
cmark_event_type cmark_iter_next(cmark_iter *iter);
|
||||
|
||||
/** Returns the current node.
|
||||
*/
|
||||
CMARK_EXPORT
|
||||
cmark_node *cmark_iter_get_node(cmark_iter *iter);
|
||||
|
||||
/** Returns the current event type.
|
||||
*/
|
||||
CMARK_EXPORT
|
||||
cmark_event_type cmark_iter_get_event_type(cmark_iter *iter);
|
||||
|
||||
/** Returns the root node.
|
||||
*/
|
||||
CMARK_EXPORT
|
||||
cmark_node *cmark_iter_get_root(cmark_iter *iter);
|
||||
|
||||
/** Resets the iterator so that the current node is 'current' and
|
||||
* the event type is 'event_type'. The new current node must be a
|
||||
* descendant of the root node or the root node itself.
|
||||
*/
|
||||
CMARK_EXPORT
|
||||
void cmark_iter_reset(cmark_iter *iter, cmark_node *current,
|
||||
cmark_event_type event_type);
|
||||
|
||||
/**
|
||||
* ## Accessors
|
||||
*/
|
||||
|
||||
/** Returns the user data of 'node'.
|
||||
*/
|
||||
CMARK_EXPORT void *cmark_node_get_user_data(cmark_node *node);
|
||||
|
||||
/** Sets arbitrary user data for 'node'. Returns 1 on success,
|
||||
* 0 on failure.
|
||||
*/
|
||||
CMARK_EXPORT int cmark_node_set_user_data(cmark_node *node, void *user_data);
|
||||
|
||||
/** Returns the type of 'node', or `CMARK_NODE_NONE` on error.
|
||||
*/
|
||||
CMARK_EXPORT cmark_node_type cmark_node_get_type(cmark_node *node);
|
||||
|
||||
/** Like 'cmark_node_get_type', but returns a string representation
|
||||
of the type, or `"<unknown>"`.
|
||||
*/
|
||||
CMARK_EXPORT
|
||||
const char *cmark_node_get_type_string(cmark_node *node);
|
||||
|
||||
/** Returns the string contents of 'node', or an empty
|
||||
string if none is set. Returns NULL if called on a
|
||||
node that does not have string content.
|
||||
*/
|
||||
CMARK_EXPORT const char *cmark_node_get_literal(cmark_node *node);
|
||||
|
||||
/** Sets the string contents of 'node'. Returns 1 on success,
|
||||
* 0 on failure.
|
||||
*/
|
||||
CMARK_EXPORT int cmark_node_set_literal(cmark_node *node, const char *content);
|
||||
|
||||
/** Returns the heading level of 'node', or 0 if 'node' is not a heading.
|
||||
*/
|
||||
CMARK_EXPORT int cmark_node_get_heading_level(cmark_node *node);
|
||||
|
||||
/* For backwards compatibility */
|
||||
#define cmark_node_get_header_level cmark_node_get_heading_level
|
||||
#define cmark_node_set_header_level cmark_node_set_heading_level
|
||||
|
||||
/** Sets the heading level of 'node', returning 1 on success and 0 on error.
|
||||
*/
|
||||
CMARK_EXPORT int cmark_node_set_heading_level(cmark_node *node, int level);
|
||||
|
||||
/** Returns the list type of 'node', or `CMARK_NO_LIST` if 'node'
|
||||
* is not a list.
|
||||
*/
|
||||
CMARK_EXPORT cmark_list_type cmark_node_get_list_type(cmark_node *node);
|
||||
|
||||
/** Sets the list type of 'node', returning 1 on success and 0 on error.
|
||||
*/
|
||||
CMARK_EXPORT int cmark_node_set_list_type(cmark_node *node,
|
||||
cmark_list_type type);
|
||||
|
||||
/** Returns the list delimiter type of 'node', or `CMARK_NO_DELIM` if 'node'
|
||||
* is not a list.
|
||||
*/
|
||||
CMARK_EXPORT cmark_delim_type cmark_node_get_list_delim(cmark_node *node);
|
||||
|
||||
/** Sets the list delimiter type of 'node', returning 1 on success and 0
|
||||
* on error.
|
||||
*/
|
||||
CMARK_EXPORT int cmark_node_set_list_delim(cmark_node *node,
|
||||
cmark_delim_type delim);
|
||||
|
||||
/** Returns starting number of 'node', if it is an ordered list, otherwise 0.
|
||||
*/
|
||||
CMARK_EXPORT int cmark_node_get_list_start(cmark_node *node);
|
||||
|
||||
/** Sets starting number of 'node', if it is an ordered list. Returns 1
|
||||
* on success, 0 on failure.
|
||||
*/
|
||||
CMARK_EXPORT int cmark_node_set_list_start(cmark_node *node, int start);
|
||||
|
||||
/** Returns 1 if 'node' is a tight list, 0 otherwise.
|
||||
*/
|
||||
CMARK_EXPORT int cmark_node_get_list_tight(cmark_node *node);
|
||||
|
||||
/** Sets the "tightness" of a list. Returns 1 on success, 0 on failure.
|
||||
*/
|
||||
CMARK_EXPORT int cmark_node_set_list_tight(cmark_node *node, int tight);
|
||||
|
||||
/** Returns the info string from a fenced code block.
|
||||
*/
|
||||
CMARK_EXPORT const char *cmark_node_get_fence_info(cmark_node *node);
|
||||
|
||||
/** Sets the info string in a fenced code block, returning 1 on
|
||||
* success and 0 on failure.
|
||||
*/
|
||||
CMARK_EXPORT int cmark_node_set_fence_info(cmark_node *node, const char *info);
|
||||
|
||||
/** Returns the URL of a link or image 'node', or an empty string
|
||||
if no URL is set. Returns NULL if called on a node that is
|
||||
not a link or image.
|
||||
*/
|
||||
CMARK_EXPORT const char *cmark_node_get_url(cmark_node *node);
|
||||
|
||||
/** Sets the URL of a link or image 'node'. Returns 1 on success,
|
||||
* 0 on failure.
|
||||
*/
|
||||
CMARK_EXPORT int cmark_node_set_url(cmark_node *node, const char *url);
|
||||
|
||||
/** Returns the title of a link or image 'node', or an empty
|
||||
string if no title is set. Returns NULL if called on a node
|
||||
that is not a link or image.
|
||||
*/
|
||||
CMARK_EXPORT const char *cmark_node_get_title(cmark_node *node);
|
||||
|
||||
/** Sets the title of a link or image 'node'. Returns 1 on success,
|
||||
* 0 on failure.
|
||||
*/
|
||||
CMARK_EXPORT int cmark_node_set_title(cmark_node *node, const char *title);
|
||||
|
||||
/** Returns the literal "on enter" text for a custom 'node', or
|
||||
an empty string if no on_enter is set. Returns NULL if called
|
||||
on a non-custom node.
|
||||
*/
|
||||
CMARK_EXPORT const char *cmark_node_get_on_enter(cmark_node *node);
|
||||
|
||||
/** Sets the literal text to render "on enter" for a custom 'node'.
|
||||
Any children of the node will be rendered after this text.
|
||||
Returns 1 on success 0 on failure.
|
||||
*/
|
||||
CMARK_EXPORT int cmark_node_set_on_enter(cmark_node *node,
|
||||
const char *on_enter);
|
||||
|
||||
/** Returns the literal "on exit" text for a custom 'node', or
|
||||
an empty string if no on_exit is set. Returns NULL if
|
||||
called on a non-custom node.
|
||||
*/
|
||||
CMARK_EXPORT const char *cmark_node_get_on_exit(cmark_node *node);
|
||||
|
||||
/** Sets the literal text to render "on exit" for a custom 'node'.
|
||||
Any children of the node will be rendered before this text.
|
||||
Returns 1 on success 0 on failure.
|
||||
*/
|
||||
CMARK_EXPORT int cmark_node_set_on_exit(cmark_node *node, const char *on_exit);
|
||||
|
||||
/** Returns the line on which 'node' begins.
|
||||
*/
|
||||
CMARK_EXPORT int cmark_node_get_start_line(cmark_node *node);
|
||||
|
||||
/** Returns the column at which 'node' begins.
|
||||
*/
|
||||
CMARK_EXPORT int cmark_node_get_start_column(cmark_node *node);
|
||||
|
||||
/** Returns the line on which 'node' ends.
|
||||
*/
|
||||
CMARK_EXPORT int cmark_node_get_end_line(cmark_node *node);
|
||||
|
||||
/** Returns the column at which 'node' ends.
|
||||
*/
|
||||
CMARK_EXPORT int cmark_node_get_end_column(cmark_node *node);
|
||||
|
||||
/**
|
||||
* ## Tree Manipulation
|
||||
*/
|
||||
|
||||
/** Unlinks a 'node', removing it from the tree, but not freeing its
|
||||
* memory. (Use 'cmark_node_free' for that.)
|
||||
*/
|
||||
CMARK_EXPORT void cmark_node_unlink(cmark_node *node);
|
||||
|
||||
/** Inserts 'sibling' before 'node'. Returns 1 on success, 0 on failure.
|
||||
*/
|
||||
CMARK_EXPORT int cmark_node_insert_before(cmark_node *node,
|
||||
cmark_node *sibling);
|
||||
|
||||
/** Inserts 'sibling' after 'node'. Returns 1 on success, 0 on failure.
|
||||
*/
|
||||
CMARK_EXPORT int cmark_node_insert_after(cmark_node *node, cmark_node *sibling);
|
||||
|
||||
/** Replaces 'oldnode' with 'newnode' and unlinks 'oldnode' (but does
|
||||
* not free its memory).
|
||||
* Returns 1 on success, 0 on failure.
|
||||
*/
|
||||
CMARK_EXPORT int cmark_node_replace(cmark_node *oldnode, cmark_node *newnode);
|
||||
|
||||
/** Adds 'child' to the beginning of the children of 'node'.
|
||||
* Returns 1 on success, 0 on failure.
|
||||
*/
|
||||
CMARK_EXPORT int cmark_node_prepend_child(cmark_node *node, cmark_node *child);
|
||||
|
||||
/** Adds 'child' to the end of the children of 'node'.
|
||||
* Returns 1 on success, 0 on failure.
|
||||
*/
|
||||
CMARK_EXPORT int cmark_node_append_child(cmark_node *node, cmark_node *child);
|
||||
|
||||
/** Consolidates adjacent text nodes.
|
||||
*/
|
||||
CMARK_EXPORT void cmark_consolidate_text_nodes(cmark_node *root);
|
||||
|
||||
/**
|
||||
* ## Parsing
|
||||
*
|
||||
* Simple interface:
|
||||
*
|
||||
* cmark_node *document = cmark_parse_document("Hello *world*", 13,
|
||||
* CMARK_OPT_DEFAULT);
|
||||
*
|
||||
* Streaming interface:
|
||||
*
|
||||
* cmark_parser *parser = cmark_parser_new(CMARK_OPT_DEFAULT);
|
||||
* FILE *fp = fopen("myfile.md", "rb");
|
||||
* while ((bytes = fread(buffer, 1, sizeof(buffer), fp)) > 0) {
|
||||
* cmark_parser_feed(parser, buffer, bytes);
|
||||
* if (bytes < sizeof(buffer)) {
|
||||
* break;
|
||||
* }
|
||||
* }
|
||||
* document = cmark_parser_finish(parser);
|
||||
* cmark_parser_free(parser);
|
||||
*/
|
||||
|
||||
/** Creates a new parser object.
|
||||
*/
|
||||
CMARK_EXPORT
|
||||
cmark_parser *cmark_parser_new(int options);
|
||||
|
||||
/** Creates a new parser object with the given memory allocator
|
||||
*/
|
||||
CMARK_EXPORT
|
||||
cmark_parser *cmark_parser_new_with_mem(int options, cmark_mem *mem);
|
||||
|
||||
/** Frees memory allocated for a parser object.
|
||||
*/
|
||||
CMARK_EXPORT
|
||||
void cmark_parser_free(cmark_parser *parser);
|
||||
|
||||
/** Feeds a string of length 'len' to 'parser'.
|
||||
*/
|
||||
CMARK_EXPORT
|
||||
void cmark_parser_feed(cmark_parser *parser, const char *buffer, size_t len);
|
||||
|
||||
/** Finish parsing and return a pointer to a tree of nodes.
|
||||
*/
|
||||
CMARK_EXPORT
|
||||
cmark_node *cmark_parser_finish(cmark_parser *parser);
|
||||
|
||||
/** Parse a CommonMark document in 'buffer' of length 'len'.
|
||||
* Returns a pointer to a tree of nodes. The memory allocated for
|
||||
* the node tree should be released using 'cmark_node_free'
|
||||
* when it is no longer needed.
|
||||
*/
|
||||
CMARK_EXPORT
|
||||
cmark_node *cmark_parse_document(const char *buffer, size_t len, int options);
|
||||
|
||||
/** Parse a CommonMark document in file 'f', returning a pointer to
|
||||
* a tree of nodes. The memory allocated for the node tree should be
|
||||
* released using 'cmark_node_free' when it is no longer needed.
|
||||
*/
|
||||
CMARK_EXPORT
|
||||
cmark_node *cmark_parse_file(FILE *f, int options);
|
||||
|
||||
/**
|
||||
* ## Rendering
|
||||
*/
|
||||
|
||||
/** Render a 'node' tree as XML. It is the caller's responsibility
|
||||
* to free the returned buffer.
|
||||
*/
|
||||
CMARK_EXPORT
|
||||
char *cmark_render_xml(cmark_node *root, int options);
|
||||
|
||||
/** Render a 'node' tree as an HTML fragment. It is up to the user
|
||||
* to add an appropriate header and footer. It is the caller's
|
||||
* responsibility to free the returned buffer.
|
||||
*/
|
||||
CMARK_EXPORT
|
||||
char *cmark_render_html(cmark_node *root, int options);
|
||||
|
||||
/** Render a 'node' tree as a groff man page, without the header.
|
||||
* It is the caller's responsibility to free the returned buffer.
|
||||
*/
|
||||
CMARK_EXPORT
|
||||
char *cmark_render_man(cmark_node *root, int options, int width);
|
||||
|
||||
/** Render a 'node' tree as a commonmark document.
|
||||
* It is the caller's responsibility to free the returned buffer.
|
||||
*/
|
||||
CMARK_EXPORT
|
||||
char *cmark_render_commonmark(cmark_node *root, int options, int width);
|
||||
|
||||
/** Render a 'node' tree as a LaTeX document.
|
||||
* It is the caller's responsibility to free the returned buffer.
|
||||
*/
|
||||
CMARK_EXPORT
|
||||
char *cmark_render_latex(cmark_node *root, int options, int width);
|
||||
|
||||
/**
|
||||
* ## Options
|
||||
*/
|
||||
|
||||
/** Default options.
|
||||
*/
|
||||
#define CMARK_OPT_DEFAULT 0
|
||||
|
||||
/**
|
||||
* ### Options affecting rendering
|
||||
*/
|
||||
|
||||
/** Include a `data-sourcepos` attribute on all block elements.
|
||||
*/
|
||||
#define CMARK_OPT_SOURCEPOS (1 << 1)
|
||||
|
||||
/** Render `softbreak` elements as hard line breaks.
|
||||
*/
|
||||
#define CMARK_OPT_HARDBREAKS (1 << 2)
|
||||
|
||||
/** Suppress raw HTML and unsafe links (`javascript:`, `vbscript:`,
|
||||
* `file:`, and `data:`, except for `image/png`, `image/gif`,
|
||||
* `image/jpeg`, or `image/webp` mime types). Raw HTML is replaced
|
||||
* by a placeholder HTML comment. Unsafe links are replaced by
|
||||
* empty strings.
|
||||
*/
|
||||
#define CMARK_OPT_SAFE (1 << 3)
|
||||
|
||||
/** Render `softbreak` elements as spaces.
|
||||
*/
|
||||
#define CMARK_OPT_NOBREAKS (1 << 4)
|
||||
|
||||
/**
|
||||
* ### Options affecting parsing
|
||||
*/
|
||||
|
||||
/** Legacy option (no effect).
|
||||
*/
|
||||
#define CMARK_OPT_NORMALIZE (1 << 8)
|
||||
|
||||
/** Validate UTF-8 in the input before parsing, replacing illegal
|
||||
* sequences with the replacement character U+FFFD.
|
||||
*/
|
||||
#define CMARK_OPT_VALIDATE_UTF8 (1 << 9)
|
||||
|
||||
/** Convert straight quotes to curly, --- to em dashes, -- to en dashes.
|
||||
*/
|
||||
#define CMARK_OPT_SMART (1 << 10)
|
||||
|
||||
/**
|
||||
* ## Version information
|
||||
*/
|
||||
|
||||
/** The library version as integer for runtime checks. Also available as
|
||||
* macro CMARK_VERSION for compile time checks.
|
||||
*
|
||||
* * Bits 16-23 contain the major version.
|
||||
* * Bits 8-15 contain the minor version.
|
||||
* * Bits 0-7 contain the patchlevel.
|
||||
*
|
||||
* In hexadecimal format, the number 0x010203 represents version 1.2.3.
|
||||
*/
|
||||
CMARK_EXPORT
|
||||
int cmark_version(void);
|
||||
|
||||
/** The library version string for runtime checks. Also available as
|
||||
* macro CMARK_VERSION_STRING for compile time checks.
|
||||
*/
|
||||
CMARK_EXPORT
|
||||
const char *cmark_version_string(void);
|
||||
|
||||
/** # AUTHORS
|
||||
*
|
||||
* John MacFarlane, Vicent Marti, Kārlis Gaņģis, Nick Wellnhofer.
|
||||
*/
|
||||
|
||||
#ifndef CMARK_NO_SHORT_NAMES
|
||||
#define NODE_DOCUMENT CMARK_NODE_DOCUMENT
|
||||
#define NODE_BLOCK_QUOTE CMARK_NODE_BLOCK_QUOTE
|
||||
#define NODE_LIST CMARK_NODE_LIST
|
||||
#define NODE_ITEM CMARK_NODE_ITEM
|
||||
#define NODE_CODE_BLOCK CMARK_NODE_CODE_BLOCK
|
||||
#define NODE_HTML_BLOCK CMARK_NODE_HTML_BLOCK
|
||||
#define NODE_CUSTOM_BLOCK CMARK_NODE_CUSTOM_BLOCK
|
||||
#define NODE_PARAGRAPH CMARK_NODE_PARAGRAPH
|
||||
#define NODE_HEADING CMARK_NODE_HEADING
|
||||
#define NODE_HEADER CMARK_NODE_HEADER
|
||||
#define NODE_THEMATIC_BREAK CMARK_NODE_THEMATIC_BREAK
|
||||
#define NODE_HRULE CMARK_NODE_HRULE
|
||||
#define NODE_TEXT CMARK_NODE_TEXT
|
||||
#define NODE_SOFTBREAK CMARK_NODE_SOFTBREAK
|
||||
#define NODE_LINEBREAK CMARK_NODE_LINEBREAK
|
||||
#define NODE_CODE CMARK_NODE_CODE
|
||||
#define NODE_HTML_INLINE CMARK_NODE_HTML_INLINE
|
||||
#define NODE_CUSTOM_INLINE CMARK_NODE_CUSTOM_INLINE
|
||||
#define NODE_EMPH CMARK_NODE_EMPH
|
||||
#define NODE_STRONG CMARK_NODE_STRONG
|
||||
#define NODE_LINK CMARK_NODE_LINK
|
||||
#define NODE_IMAGE CMARK_NODE_IMAGE
|
||||
#define BULLET_LIST CMARK_BULLET_LIST
|
||||
#define ORDERED_LIST CMARK_ORDERED_LIST
|
||||
#define PERIOD_DELIM CMARK_PERIOD_DELIM
|
||||
#define PAREN_DELIM CMARK_PAREN_DELIM
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
|
@ -1,44 +0,0 @@
|
|||
#include <stdint.h>
|
||||
|
||||
#include "cmark_ctype.h"
|
||||
|
||||
/** 1 = space, 2 = punct, 3 = digit, 4 = alpha, 0 = other
|
||||
*/
|
||||
static const uint8_t cmark_ctype_class[256] = {
|
||||
/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
|
||||
/* 0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0,
|
||||
/* 1 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
/* 2 */ 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
/* 3 */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2,
|
||||
/* 4 */ 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||
/* 5 */ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 2,
|
||||
/* 6 */ 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||
/* 7 */ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 0,
|
||||
/* 8 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
/* 9 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
/* a */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
/* b */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
/* c */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
/* d */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
/* e */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
/* f */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
|
||||
|
||||
/**
|
||||
* Returns 1 if c is a "whitespace" character as defined by the spec.
|
||||
*/
|
||||
int cmark_isspace(char c) { return cmark_ctype_class[(uint8_t)c] == 1; }
|
||||
|
||||
/**
|
||||
* Returns 1 if c is an ascii punctuation character.
|
||||
*/
|
||||
int cmark_ispunct(char c) { return cmark_ctype_class[(uint8_t)c] == 2; }
|
||||
|
||||
int cmark_isalnum(char c) {
|
||||
uint8_t result;
|
||||
result = cmark_ctype_class[(uint8_t)c];
|
||||
return (result == 3 || result == 4);
|
||||
}
|
||||
|
||||
int cmark_isdigit(char c) { return cmark_ctype_class[(uint8_t)c] == 3; }
|
||||
|
||||
int cmark_isalpha(char c) { return cmark_ctype_class[(uint8_t)c] == 4; }
|
|
@ -1,26 +0,0 @@
|
|||
#ifndef CMARK_CMARK_CTYPE_H
|
||||
#define CMARK_CMARK_CTYPE_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/** Locale-independent versions of functions from ctype.h.
|
||||
* We want cmark to behave the same no matter what the system locale.
|
||||
*/
|
||||
|
||||
int cmark_isspace(char c);
|
||||
|
||||
int cmark_ispunct(char c);
|
||||
|
||||
int cmark_isalnum(char c);
|
||||
|
||||
int cmark_isdigit(char c);
|
||||
|
||||
int cmark_isalpha(char c);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
|
@ -1,42 +0,0 @@
|
|||
|
||||
#ifndef CMARK_EXPORT_H
|
||||
#define CMARK_EXPORT_H
|
||||
|
||||
#ifdef CMARK_STATIC_DEFINE
|
||||
# define CMARK_EXPORT
|
||||
# define CMARK_NO_EXPORT
|
||||
#else
|
||||
# ifndef CMARK_EXPORT
|
||||
# ifdef libcmark_EXPORTS
|
||||
/* We are building this library */
|
||||
# define CMARK_EXPORT __attribute__((visibility("default")))
|
||||
# else
|
||||
/* We are using this library */
|
||||
# define CMARK_EXPORT __attribute__((visibility("default")))
|
||||
# endif
|
||||
# endif
|
||||
|
||||
# ifndef CMARK_NO_EXPORT
|
||||
# define CMARK_NO_EXPORT __attribute__((visibility("hidden")))
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifndef CMARK_DEPRECATED
|
||||
# define CMARK_DEPRECATED __attribute__ ((__deprecated__))
|
||||
#endif
|
||||
|
||||
#ifndef CMARK_DEPRECATED_EXPORT
|
||||
# define CMARK_DEPRECATED_EXPORT CMARK_EXPORT CMARK_DEPRECATED
|
||||
#endif
|
||||
|
||||
#ifndef CMARK_DEPRECATED_NO_EXPORT
|
||||
# define CMARK_DEPRECATED_NO_EXPORT CMARK_NO_EXPORT CMARK_DEPRECATED
|
||||
#endif
|
||||
|
||||
#if 0 /* DEFINE_NO_DEPRECATED */
|
||||
# ifndef CMARK_NO_DEPRECATED
|
||||
# define CMARK_NO_DEPRECATED
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#endif /* CMARK_EXPORT_H */
|
|
@ -1,7 +0,0 @@
|
|||
#ifndef CMARK_VERSION_H
|
||||
#define CMARK_VERSION_H
|
||||
|
||||
#define CMARK_VERSION ((0 << 16) | (28 << 8) | 3)
|
||||
#define CMARK_VERSION_STRING "0.28.3"
|
||||
|
||||
#endif
|
|
@ -1,481 +0,0 @@
|
|||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include "config.h"
|
||||
#include "cmark.h"
|
||||
#include "node.h"
|
||||
#include "buffer.h"
|
||||
#include "utf8.h"
|
||||
#include "scanners.h"
|
||||
#include "render.h"
|
||||
|
||||
#define OUT(s, wrap, escaping) renderer->out(renderer, s, wrap, escaping)
|
||||
#define LIT(s) renderer->out(renderer, s, false, LITERAL)
|
||||
#define CR() renderer->cr(renderer)
|
||||
#define BLANKLINE() renderer->blankline(renderer)
|
||||
#define ENCODED_SIZE 20
|
||||
#define LISTMARKER_SIZE 20
|
||||
|
||||
// Functions to convert cmark_nodes to commonmark strings.
|
||||
|
||||
static CMARK_INLINE void outc(cmark_renderer *renderer, cmark_escaping escape,
|
||||
int32_t c, unsigned char nextc) {
|
||||
bool needs_escaping = false;
|
||||
bool follows_digit =
|
||||
renderer->buffer->size > 0 &&
|
||||
cmark_isdigit(renderer->buffer->ptr[renderer->buffer->size - 1]);
|
||||
char encoded[ENCODED_SIZE];
|
||||
|
||||
needs_escaping =
|
||||
c < 0x80 && escape != LITERAL &&
|
||||
((escape == NORMAL &&
|
||||
(c == '*' || c == '_' || c == '[' || c == ']' || c == '#' || c == '<' ||
|
||||
c == '>' || c == '\\' || c == '`' || c == '!' ||
|
||||
(c == '&' && cmark_isalpha(nextc)) || (c == '!' && nextc == '[') ||
|
||||
(renderer->begin_content && (c == '-' || c == '+' || c == '=') &&
|
||||
// begin_content doesn't get set to false til we've passed digits
|
||||
// at the beginning of line, so...
|
||||
!follows_digit) ||
|
||||
(renderer->begin_content && (c == '.' || c == ')') && follows_digit &&
|
||||
(nextc == 0 || cmark_isspace(nextc))))) ||
|
||||
(escape == URL &&
|
||||
(c == '`' || c == '<' || c == '>' || cmark_isspace(c) || c == '\\' ||
|
||||
c == ')' || c == '(')) ||
|
||||
(escape == TITLE &&
|
||||
(c == '`' || c == '<' || c == '>' || c == '"' || c == '\\')));
|
||||
|
||||
if (needs_escaping) {
|
||||
if (cmark_isspace(c)) {
|
||||
// use percent encoding for spaces
|
||||
snprintf(encoded, ENCODED_SIZE, "%%%2x", c);
|
||||
cmark_strbuf_puts(renderer->buffer, encoded);
|
||||
renderer->column += 3;
|
||||
} else {
|
||||
cmark_render_ascii(renderer, "\\");
|
||||
cmark_render_code_point(renderer, c);
|
||||
}
|
||||
} else {
|
||||
cmark_render_code_point(renderer, c);
|
||||
}
|
||||
}
|
||||
|
||||
static int longest_backtick_sequence(const char *code) {
|
||||
int longest = 0;
|
||||
int current = 0;
|
||||
size_t i = 0;
|
||||
size_t code_len = strlen(code);
|
||||
while (i <= code_len) {
|
||||
if (code[i] == '`') {
|
||||
current++;
|
||||
} else {
|
||||
if (current > longest) {
|
||||
longest = current;
|
||||
}
|
||||
current = 0;
|
||||
}
|
||||
i++;
|
||||
}
|
||||
return longest;
|
||||
}
|
||||
|
||||
static int shortest_unused_backtick_sequence(const char *code) {
|
||||
// note: if the shortest sequence is >= 32, this returns 32
|
||||
// so as not to overflow the bit array.
|
||||
uint32_t used = 1;
|
||||
int current = 0;
|
||||
size_t i = 0;
|
||||
size_t code_len = strlen(code);
|
||||
while (i <= code_len) {
|
||||
if (code[i] == '`') {
|
||||
current++;
|
||||
} else {
|
||||
if (current > 0 && current < 32) {
|
||||
used |= (1U << current);
|
||||
}
|
||||
current = 0;
|
||||
}
|
||||
i++;
|
||||
}
|
||||
// return number of first bit that is 0:
|
||||
i = 0;
|
||||
while (i < 32 && used & 1) {
|
||||
used = used >> 1;
|
||||
i++;
|
||||
}
|
||||
return (int)i;
|
||||
}
|
||||
|
||||
static bool is_autolink(cmark_node *node) {
|
||||
cmark_chunk *title;
|
||||
cmark_chunk *url;
|
||||
cmark_node *link_text;
|
||||
char *realurl;
|
||||
int realurllen;
|
||||
|
||||
if (node->type != CMARK_NODE_LINK) {
|
||||
return false;
|
||||
}
|
||||
|
||||
url = &node->as.link.url;
|
||||
if (url->len == 0 || scan_scheme(url, 0) == 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
title = &node->as.link.title;
|
||||
// if it has a title, we can't treat it as an autolink:
|
||||
if (title->len > 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
link_text = node->first_child;
|
||||
if (link_text == NULL) {
|
||||
return false;
|
||||
}
|
||||
cmark_consolidate_text_nodes(link_text);
|
||||
realurl = (char *)url->data;
|
||||
realurllen = url->len;
|
||||
if (strncmp(realurl, "mailto:", 7) == 0) {
|
||||
realurl += 7;
|
||||
realurllen -= 7;
|
||||
}
|
||||
return (realurllen == link_text->as.literal.len &&
|
||||
strncmp(realurl, (char *)link_text->as.literal.data,
|
||||
link_text->as.literal.len) == 0);
|
||||
}
|
||||
|
||||
// if node is a block node, returns node.
|
||||
// otherwise returns first block-level node that is an ancestor of node.
|
||||
// if there is no block-level ancestor, returns NULL.
|
||||
static cmark_node *get_containing_block(cmark_node *node) {
|
||||
while (node) {
|
||||
if (node->type >= CMARK_NODE_FIRST_BLOCK &&
|
||||
node->type <= CMARK_NODE_LAST_BLOCK) {
|
||||
return node;
|
||||
} else {
|
||||
node = node->parent;
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static int S_render_node(cmark_renderer *renderer, cmark_node *node,
|
||||
cmark_event_type ev_type, int options) {
|
||||
cmark_node *tmp;
|
||||
int list_number;
|
||||
cmark_delim_type list_delim;
|
||||
int numticks;
|
||||
bool extra_spaces;
|
||||
int i;
|
||||
bool entering = (ev_type == CMARK_EVENT_ENTER);
|
||||
const char *info, *code, *title;
|
||||
char fencechar[2] = {'\0', '\0'};
|
||||
size_t info_len, code_len;
|
||||
char listmarker[LISTMARKER_SIZE];
|
||||
char *emph_delim;
|
||||
bool first_in_list_item;
|
||||
bufsize_t marker_width;
|
||||
bool allow_wrap = renderer->width > 0 && !(CMARK_OPT_NOBREAKS & options) &&
|
||||
!(CMARK_OPT_HARDBREAKS & options);
|
||||
|
||||
// Don't adjust tight list status til we've started the list.
|
||||
// Otherwise we loose the blank line between a paragraph and
|
||||
// a following list.
|
||||
if (!(node->type == CMARK_NODE_ITEM && node->prev == NULL && entering)) {
|
||||
tmp = get_containing_block(node);
|
||||
renderer->in_tight_list_item =
|
||||
tmp && // tmp might be NULL if there is no containing block
|
||||
((tmp->type == CMARK_NODE_ITEM &&
|
||||
cmark_node_get_list_tight(tmp->parent)) ||
|
||||
(tmp && tmp->parent && tmp->parent->type == CMARK_NODE_ITEM &&
|
||||
cmark_node_get_list_tight(tmp->parent->parent)));
|
||||
}
|
||||
|
||||
switch (node->type) {
|
||||
case CMARK_NODE_DOCUMENT:
|
||||
break;
|
||||
|
||||
case CMARK_NODE_BLOCK_QUOTE:
|
||||
if (entering) {
|
||||
LIT("> ");
|
||||
renderer->begin_content = true;
|
||||
cmark_strbuf_puts(renderer->prefix, "> ");
|
||||
} else {
|
||||
cmark_strbuf_truncate(renderer->prefix, renderer->prefix->size - 2);
|
||||
BLANKLINE();
|
||||
}
|
||||
break;
|
||||
|
||||
case CMARK_NODE_LIST:
|
||||
if (!entering && node->next && (node->next->type == CMARK_NODE_CODE_BLOCK ||
|
||||
node->next->type == CMARK_NODE_LIST)) {
|
||||
// this ensures that a following indented code block or list will be
|
||||
// inteprereted correctly.
|
||||
CR();
|
||||
LIT("<!-- end list -->");
|
||||
BLANKLINE();
|
||||
}
|
||||
break;
|
||||
|
||||
case CMARK_NODE_ITEM:
|
||||
if (cmark_node_get_list_type(node->parent) == CMARK_BULLET_LIST) {
|
||||
marker_width = 4;
|
||||
} else {
|
||||
list_number = cmark_node_get_list_start(node->parent);
|
||||
list_delim = cmark_node_get_list_delim(node->parent);
|
||||
tmp = node;
|
||||
while (tmp->prev) {
|
||||
tmp = tmp->prev;
|
||||
list_number += 1;
|
||||
}
|
||||
// we ensure a width of at least 4 so
|
||||
// we get nice transition from single digits
|
||||
// to double
|
||||
snprintf(listmarker, LISTMARKER_SIZE, "%d%s%s", list_number,
|
||||
list_delim == CMARK_PAREN_DELIM ? ")" : ".",
|
||||
list_number < 10 ? " " : " ");
|
||||
marker_width = strlen(listmarker);
|
||||
}
|
||||
if (entering) {
|
||||
if (cmark_node_get_list_type(node->parent) == CMARK_BULLET_LIST) {
|
||||
LIT(" - ");
|
||||
renderer->begin_content = true;
|
||||
} else {
|
||||
LIT(listmarker);
|
||||
renderer->begin_content = true;
|
||||
}
|
||||
for (i = marker_width; i--;) {
|
||||
cmark_strbuf_putc(renderer->prefix, ' ');
|
||||
}
|
||||
} else {
|
||||
cmark_strbuf_truncate(renderer->prefix,
|
||||
renderer->prefix->size - marker_width);
|
||||
CR();
|
||||
}
|
||||
break;
|
||||
|
||||
case CMARK_NODE_HEADING:
|
||||
if (entering) {
|
||||
for (i = cmark_node_get_heading_level(node); i > 0; i--) {
|
||||
LIT("#");
|
||||
}
|
||||
LIT(" ");
|
||||
renderer->begin_content = true;
|
||||
renderer->no_linebreaks = true;
|
||||
} else {
|
||||
renderer->no_linebreaks = false;
|
||||
BLANKLINE();
|
||||
}
|
||||
break;
|
||||
|
||||
case CMARK_NODE_CODE_BLOCK:
|
||||
first_in_list_item = node->prev == NULL && node->parent &&
|
||||
node->parent->type == CMARK_NODE_ITEM;
|
||||
|
||||
if (!first_in_list_item) {
|
||||
BLANKLINE();
|
||||
}
|
||||
info = cmark_node_get_fence_info(node);
|
||||
info_len = strlen(info);
|
||||
fencechar[0] = strchr(info, '`') == NULL ? '`' : '~';
|
||||
code = cmark_node_get_literal(node);
|
||||
code_len = strlen(code);
|
||||
// use indented form if no info, and code doesn't
|
||||
// begin or end with a blank line, and code isn't
|
||||
// first thing in a list item
|
||||
if (info_len == 0 && (code_len > 2 && !cmark_isspace(code[0]) &&
|
||||
!(cmark_isspace(code[code_len - 1]) &&
|
||||
cmark_isspace(code[code_len - 2]))) &&
|
||||
!first_in_list_item) {
|
||||
LIT(" ");
|
||||
cmark_strbuf_puts(renderer->prefix, " ");
|
||||
OUT(cmark_node_get_literal(node), false, LITERAL);
|
||||
cmark_strbuf_truncate(renderer->prefix, renderer->prefix->size - 4);
|
||||
} else {
|
||||
numticks = longest_backtick_sequence(code) + 1;
|
||||
if (numticks < 3) {
|
||||
numticks = 3;
|
||||
}
|
||||
for (i = 0; i < numticks; i++) {
|
||||
LIT(fencechar);
|
||||
}
|
||||
LIT(" ");
|
||||
OUT(info, false, LITERAL);
|
||||
CR();
|
||||
OUT(cmark_node_get_literal(node), false, LITERAL);
|
||||
CR();
|
||||
for (i = 0; i < numticks; i++) {
|
||||
LIT(fencechar);
|
||||
}
|
||||
}
|
||||
BLANKLINE();
|
||||
break;
|
||||
|
||||
case CMARK_NODE_HTML_BLOCK:
|
||||
BLANKLINE();
|
||||
OUT(cmark_node_get_literal(node), false, LITERAL);
|
||||
BLANKLINE();
|
||||
break;
|
||||
|
||||
case CMARK_NODE_CUSTOM_BLOCK:
|
||||
BLANKLINE();
|
||||
OUT(entering ? cmark_node_get_on_enter(node) : cmark_node_get_on_exit(node),
|
||||
false, LITERAL);
|
||||
BLANKLINE();
|
||||
break;
|
||||
|
||||
case CMARK_NODE_THEMATIC_BREAK:
|
||||
BLANKLINE();
|
||||
LIT("-----");
|
||||
BLANKLINE();
|
||||
break;
|
||||
|
||||
case CMARK_NODE_PARAGRAPH:
|
||||
if (!entering) {
|
||||
BLANKLINE();
|
||||
}
|
||||
break;
|
||||
|
||||
case CMARK_NODE_TEXT:
|
||||
OUT(cmark_node_get_literal(node), allow_wrap, NORMAL);
|
||||
break;
|
||||
|
||||
case CMARK_NODE_LINEBREAK:
|
||||
if (!(CMARK_OPT_HARDBREAKS & options)) {
|
||||
LIT(" ");
|
||||
}
|
||||
CR();
|
||||
break;
|
||||
|
||||
case CMARK_NODE_SOFTBREAK:
|
||||
if (CMARK_OPT_HARDBREAKS & options) {
|
||||
LIT(" ");
|
||||
CR();
|
||||
} else if (!renderer->no_linebreaks && renderer->width == 0 &&
|
||||
!(CMARK_OPT_HARDBREAKS & options) &&
|
||||
!(CMARK_OPT_NOBREAKS & options)) {
|
||||
CR();
|
||||
} else {
|
||||
OUT(" ", allow_wrap, LITERAL);
|
||||
}
|
||||
break;
|
||||
|
||||
case CMARK_NODE_CODE:
|
||||
code = cmark_node_get_literal(node);
|
||||
code_len = strlen(code);
|
||||
numticks = shortest_unused_backtick_sequence(code);
|
||||
extra_spaces = code_len == 0 ||
|
||||
code[0] == '`' || code[code_len - 1] == '`' ||
|
||||
code[0] == ' ' || code[code_len - 1] == ' ';
|
||||
for (i = 0; i < numticks; i++) {
|
||||
LIT("`");
|
||||
}
|
||||
if (extra_spaces) {
|
||||
LIT(" ");
|
||||
}
|
||||
OUT(cmark_node_get_literal(node), allow_wrap, LITERAL);
|
||||
if (extra_spaces) {
|
||||
LIT(" ");
|
||||
}
|
||||
for (i = 0; i < numticks; i++) {
|
||||
LIT("`");
|
||||
}
|
||||
break;
|
||||
|
||||
case CMARK_NODE_HTML_INLINE:
|
||||
OUT(cmark_node_get_literal(node), false, LITERAL);
|
||||
break;
|
||||
|
||||
case CMARK_NODE_CUSTOM_INLINE:
|
||||
OUT(entering ? cmark_node_get_on_enter(node) : cmark_node_get_on_exit(node),
|
||||
false, LITERAL);
|
||||
break;
|
||||
|
||||
case CMARK_NODE_STRONG:
|
||||
if (entering) {
|
||||
LIT("**");
|
||||
} else {
|
||||
LIT("**");
|
||||
}
|
||||
break;
|
||||
|
||||
case CMARK_NODE_EMPH:
|
||||
// If we have EMPH(EMPH(x)), we need to use *_x_*
|
||||
// because **x** is STRONG(x):
|
||||
if (node->parent && node->parent->type == CMARK_NODE_EMPH &&
|
||||
node->next == NULL && node->prev == NULL) {
|
||||
emph_delim = "_";
|
||||
} else {
|
||||
emph_delim = "*";
|
||||
}
|
||||
if (entering) {
|
||||
LIT(emph_delim);
|
||||
} else {
|
||||
LIT(emph_delim);
|
||||
}
|
||||
break;
|
||||
|
||||
case CMARK_NODE_LINK:
|
||||
if (is_autolink(node)) {
|
||||
if (entering) {
|
||||
LIT("<");
|
||||
if (strncmp(cmark_node_get_url(node), "mailto:", 7) == 0) {
|
||||
LIT((const char *)cmark_node_get_url(node) + 7);
|
||||
} else {
|
||||
LIT((const char *)cmark_node_get_url(node));
|
||||
}
|
||||
LIT(">");
|
||||
// return signal to skip contents of node...
|
||||
return 0;
|
||||
}
|
||||
} else {
|
||||
if (entering) {
|
||||
LIT("[");
|
||||
} else {
|
||||
LIT("](");
|
||||
OUT(cmark_node_get_url(node), false, URL);
|
||||
title = cmark_node_get_title(node);
|
||||
if (strlen(title) > 0) {
|
||||
LIT(" \"");
|
||||
OUT(title, false, TITLE);
|
||||
LIT("\"");
|
||||
}
|
||||
LIT(")");
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case CMARK_NODE_IMAGE:
|
||||
if (entering) {
|
||||
LIT("![");
|
||||
} else {
|
||||
LIT("](");
|
||||
OUT(cmark_node_get_url(node), false, URL);
|
||||
title = cmark_node_get_title(node);
|
||||
if (strlen(title) > 0) {
|
||||
OUT(" \"", allow_wrap, LITERAL);
|
||||
OUT(title, false, TITLE);
|
||||
LIT("\"");
|
||||
}
|
||||
LIT(")");
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
assert(false);
|
||||
break;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
char *cmark_render_commonmark(cmark_node *root, int options, int width) {
|
||||
if (options & CMARK_OPT_HARDBREAKS) {
|
||||
// disable breaking on width, since it has
|
||||
// a different meaning with OPT_HARDBREAKS
|
||||
width = 0;
|
||||
}
|
||||
return cmark_render(root, options, width, outc, S_render_node);
|
||||
}
|
|
@ -1,76 +0,0 @@
|
|||
#ifndef CMARK_CONFIG_H
|
||||
#define CMARK_CONFIG_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define HAVE_STDBOOL_H
|
||||
|
||||
#ifdef HAVE_STDBOOL_H
|
||||
#include <stdbool.h>
|
||||
#elif !defined(__cplusplus)
|
||||
typedef char bool;
|
||||
#endif
|
||||
|
||||
#define HAVE___BUILTIN_EXPECT
|
||||
|
||||
#define HAVE___ATTRIBUTE__
|
||||
|
||||
#ifdef HAVE___ATTRIBUTE__
|
||||
#define CMARK_ATTRIBUTE(list) __attribute__ (list)
|
||||
#else
|
||||
#define CMARK_ATTRIBUTE(list)
|
||||
#endif
|
||||
|
||||
#ifndef CMARK_INLINE
|
||||
#if defined(_MSC_VER) && !defined(__cplusplus)
|
||||
#define CMARK_INLINE __inline
|
||||
#else
|
||||
#define CMARK_INLINE inline
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* snprintf and vsnprintf fallbacks for MSVC before 2015,
|
||||
due to Valentin Milea http://stackoverflow.com/questions/2915672/
|
||||
*/
|
||||
|
||||
#if defined(_MSC_VER) && _MSC_VER < 1900
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdarg.h>
|
||||
|
||||
#define snprintf c99_snprintf
|
||||
#define vsnprintf c99_vsnprintf
|
||||
|
||||
CMARK_INLINE int c99_vsnprintf(char *outBuf, size_t size, const char *format, va_list ap)
|
||||
{
|
||||
int count = -1;
|
||||
|
||||
if (size != 0)
|
||||
count = _vsnprintf_s(outBuf, size, _TRUNCATE, format, ap);
|
||||
if (count == -1)
|
||||
count = _vscprintf(format, ap);
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
CMARK_INLINE int c99_snprintf(char *outBuf, size_t size, const char *format, ...)
|
||||
{
|
||||
int count;
|
||||
va_list ap;
|
||||
|
||||
va_start(ap, format);
|
||||
count = c99_vsnprintf(outBuf, size, format, ap);
|
||||
va_end(ap);
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
File diff suppressed because it is too large
Load Diff
|
@ -1,51 +0,0 @@
|
|||
#ifndef CMARK_HOUDINI_H
|
||||
#define CMARK_HOUDINI_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <stdint.h>
|
||||
#include "config.h"
|
||||
#include "buffer.h"
|
||||
|
||||
#ifdef HAVE___BUILTIN_EXPECT
|
||||
#define likely(x) __builtin_expect((x), 1)
|
||||
#define unlikely(x) __builtin_expect((x), 0)
|
||||
#else
|
||||
#define likely(x) (x)
|
||||
#define unlikely(x) (x)
|
||||
#endif
|
||||
|
||||
#ifdef HOUDINI_USE_LOCALE
|
||||
#define _isxdigit(c) isxdigit(c)
|
||||
#define _isdigit(c) isdigit(c)
|
||||
#else
|
||||
/*
|
||||
* Helper _isdigit methods -- do not trust the current locale
|
||||
* */
|
||||
#define _isxdigit(c) (strchr("0123456789ABCDEFabcdef", (c)) != NULL)
|
||||
#define _isdigit(c) ((c) >= '0' && (c) <= '9')
|
||||
#endif
|
||||
|
||||
#define HOUDINI_ESCAPED_SIZE(x) (((x)*12) / 10)
|
||||
#define HOUDINI_UNESCAPED_SIZE(x) (x)
|
||||
|
||||
extern bufsize_t houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src,
|
||||
bufsize_t size);
|
||||
extern int houdini_escape_html(cmark_strbuf *ob, const uint8_t *src,
|
||||
bufsize_t size);
|
||||
extern int houdini_escape_html0(cmark_strbuf *ob, const uint8_t *src,
|
||||
bufsize_t size, int secure);
|
||||
extern int houdini_unescape_html(cmark_strbuf *ob, const uint8_t *src,
|
||||
bufsize_t size);
|
||||
extern void houdini_unescape_html_f(cmark_strbuf *ob, const uint8_t *src,
|
||||
bufsize_t size);
|
||||
extern int houdini_escape_href(cmark_strbuf *ob, const uint8_t *src,
|
||||
bufsize_t size);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
|
@ -1,100 +0,0 @@
|
|||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "houdini.h"
|
||||
|
||||
/*
|
||||
* The following characters will not be escaped:
|
||||
*
|
||||
* -_.+!*'(),%#@?=;:/,+&$ alphanum
|
||||
*
|
||||
* Note that this character set is the addition of:
|
||||
*
|
||||
* - The characters which are safe to be in an URL
|
||||
* - The characters which are *not* safe to be in
|
||||
* an URL because they are RESERVED characters.
|
||||
*
|
||||
* We assume (lazily) that any RESERVED char that
|
||||
* appears inside an URL is actually meant to
|
||||
* have its native function (i.e. as an URL
|
||||
* component/separator) and hence needs no escaping.
|
||||
*
|
||||
* There are two exceptions: the chacters & (amp)
|
||||
* and ' (single quote) do not appear in the table.
|
||||
* They are meant to appear in the URL as components,
|
||||
* yet they require special HTML-entity escaping
|
||||
* to generate valid HTML markup.
|
||||
*
|
||||
* All other characters will be escaped to %XX.
|
||||
*
|
||||
*/
|
||||
static const char HREF_SAFE[] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
|
||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
};
|
||||
|
||||
int houdini_escape_href(cmark_strbuf *ob, const uint8_t *src, bufsize_t size) {
|
||||
static const uint8_t hex_chars[] = "0123456789ABCDEF";
|
||||
bufsize_t i = 0, org;
|
||||
uint8_t hex_str[3];
|
||||
|
||||
hex_str[0] = '%';
|
||||
|
||||
while (i < size) {
|
||||
org = i;
|
||||
while (i < size && HREF_SAFE[src[i]] != 0)
|
||||
i++;
|
||||
|
||||
if (likely(i > org))
|
||||
cmark_strbuf_put(ob, src + org, i - org);
|
||||
|
||||
/* escaping */
|
||||
if (i >= size)
|
||||
break;
|
||||
|
||||
switch (src[i]) {
|
||||
/* amp appears all the time in URLs, but needs
|
||||
* HTML-entity escaping to be inside an href */
|
||||
case '&':
|
||||
cmark_strbuf_puts(ob, "&");
|
||||
break;
|
||||
|
||||
/* the single quote is a valid URL character
|
||||
* according to the standard; it needs HTML
|
||||
* entity escaping too */
|
||||
case '\'':
|
||||
cmark_strbuf_puts(ob, "'");
|
||||
break;
|
||||
|
||||
/* the space can be escaped to %20 or a plus
|
||||
* sign. we're going with the generic escape
|
||||
* for now. the plus thing is more commonly seen
|
||||
* when building GET strings */
|
||||
#if 0
|
||||
case ' ':
|
||||
cmark_strbuf_putc(ob, '+');
|
||||
break;
|
||||
#endif
|
||||
|
||||
/* every other character goes with a %XX escaping */
|
||||
default:
|
||||
hex_str[1] = hex_chars[(src[i] >> 4) & 0xF];
|
||||
hex_str[2] = hex_chars[src[i] & 0xF];
|
||||
cmark_strbuf_put(ob, hex_str, 3);
|
||||
}
|
||||
|
||||
i++;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
|
@ -1,66 +0,0 @@
|
|||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "houdini.h"
|
||||
|
||||
/**
|
||||
* According to the OWASP rules:
|
||||
*
|
||||
* & --> &
|
||||
* < --> <
|
||||
* > --> >
|
||||
* " --> "
|
||||
* ' --> ' ' is not recommended
|
||||
* / --> / forward slash is included as it helps end an HTML entity
|
||||
*
|
||||
*/
|
||||
static const char HTML_ESCAPE_TABLE[] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 4,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
};
|
||||
|
||||
static const char *HTML_ESCAPES[] = {"", """, "&", "'",
|
||||
"/", "<", ">"};
|
||||
|
||||
int houdini_escape_html0(cmark_strbuf *ob, const uint8_t *src, bufsize_t size,
|
||||
int secure) {
|
||||
bufsize_t i = 0, org, esc = 0;
|
||||
|
||||
while (i < size) {
|
||||
org = i;
|
||||
while (i < size && (esc = HTML_ESCAPE_TABLE[src[i]]) == 0)
|
||||
i++;
|
||||
|
||||
if (i > org)
|
||||
cmark_strbuf_put(ob, src + org, i - org);
|
||||
|
||||
/* escaping */
|
||||
if (unlikely(i >= size))
|
||||
break;
|
||||
|
||||
/* The forward slash is only escaped in secure mode */
|
||||
if ((src[i] == '/' || src[i] == '\'') && !secure) {
|
||||
cmark_strbuf_putc(ob, src[i]);
|
||||
} else {
|
||||
cmark_strbuf_puts(ob, HTML_ESCAPES[esc]);
|
||||
}
|
||||
|
||||
i++;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
int houdini_escape_html(cmark_strbuf *ob, const uint8_t *src, bufsize_t size) {
|
||||
return houdini_escape_html0(ob, src, size, 1);
|
||||
}
|
|
@ -1,149 +0,0 @@
|
|||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "buffer.h"
|
||||
#include "houdini.h"
|
||||
#include "utf8.h"
|
||||
#include "entities.inc"
|
||||
|
||||
/* Binary tree lookup code for entities added by JGM */
|
||||
|
||||
static const unsigned char *S_lookup(int i, int low, int hi,
|
||||
const unsigned char *s, int len) {
|
||||
int j;
|
||||
int cmp =
|
||||
strncmp((const char *)s, (const char *)cmark_entities[i].entity, len);
|
||||
if (cmp == 0 && cmark_entities[i].entity[len] == 0) {
|
||||
return (const unsigned char *)cmark_entities[i].bytes;
|
||||
} else if (cmp <= 0 && i > low) {
|
||||
j = i - ((i - low) / 2);
|
||||
if (j == i)
|
||||
j -= 1;
|
||||
return S_lookup(j, low, i - 1, s, len);
|
||||
} else if (cmp > 0 && i < hi) {
|
||||
j = i + ((hi - i) / 2);
|
||||
if (j == i)
|
||||
j += 1;
|
||||
return S_lookup(j, i + 1, hi, s, len);
|
||||
} else {
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static const unsigned char *S_lookup_entity(const unsigned char *s, int len) {
|
||||
return S_lookup(CMARK_NUM_ENTITIES / 2, 0, CMARK_NUM_ENTITIES - 1, s, len);
|
||||
}
|
||||
|
||||
bufsize_t houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src,
|
||||
bufsize_t size) {
|
||||
bufsize_t i = 0;
|
||||
|
||||
if (size >= 3 && src[0] == '#') {
|
||||
int codepoint = 0;
|
||||
int num_digits = 0;
|
||||
|
||||
if (_isdigit(src[1])) {
|
||||
for (i = 1; i < size && _isdigit(src[i]); ++i) {
|
||||
codepoint = (codepoint * 10) + (src[i] - '0');
|
||||
|
||||
if (codepoint >= 0x110000) {
|
||||
// Keep counting digits but
|
||||
// avoid integer overflow.
|
||||
codepoint = 0x110000;
|
||||
}
|
||||
}
|
||||
|
||||
num_digits = i - 1;
|
||||
}
|
||||
|
||||
else if (src[1] == 'x' || src[1] == 'X') {
|
||||
for (i = 2; i < size && _isxdigit(src[i]); ++i) {
|
||||
codepoint = (codepoint * 16) + ((src[i] | 32) % 39 - 9);
|
||||
|
||||
if (codepoint >= 0x110000) {
|
||||
// Keep counting digits but
|
||||
// avoid integer overflow.
|
||||
codepoint = 0x110000;
|
||||
}
|
||||
}
|
||||
|
||||
num_digits = i - 2;
|
||||
}
|
||||
|
||||
if (num_digits >= 1 && num_digits <= 8 && i < size && src[i] == ';') {
|
||||
if (codepoint == 0 || (codepoint >= 0xD800 && codepoint < 0xE000) ||
|
||||
codepoint >= 0x110000) {
|
||||
codepoint = 0xFFFD;
|
||||
}
|
||||
cmark_utf8proc_encode_char(codepoint, ob);
|
||||
return i + 1;
|
||||
}
|
||||
}
|
||||
|
||||
else {
|
||||
if (size > CMARK_ENTITY_MAX_LENGTH)
|
||||
size = CMARK_ENTITY_MAX_LENGTH;
|
||||
|
||||
for (i = CMARK_ENTITY_MIN_LENGTH; i < size; ++i) {
|
||||
if (src[i] == ' ')
|
||||
break;
|
||||
|
||||
if (src[i] == ';') {
|
||||
const unsigned char *entity = S_lookup_entity(src, i);
|
||||
|
||||
if (entity != NULL) {
|
||||
cmark_strbuf_puts(ob, (const char *)entity);
|
||||
return i + 1;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int houdini_unescape_html(cmark_strbuf *ob, const uint8_t *src,
|
||||
bufsize_t size) {
|
||||
bufsize_t i = 0, org, ent;
|
||||
|
||||
while (i < size) {
|
||||
org = i;
|
||||
while (i < size && src[i] != '&')
|
||||
i++;
|
||||
|
||||
if (likely(i > org)) {
|
||||
if (unlikely(org == 0)) {
|
||||
if (i >= size)
|
||||
return 0;
|
||||
|
||||
cmark_strbuf_grow(ob, HOUDINI_UNESCAPED_SIZE(size));
|
||||
}
|
||||
|
||||
cmark_strbuf_put(ob, src + org, i - org);
|
||||
}
|
||||
|
||||
/* escaping */
|
||||
if (i >= size)
|
||||
break;
|
||||
|
||||
i++;
|
||||
|
||||
ent = houdini_unescape_ent(ob, src + i, size - i);
|
||||
i += ent;
|
||||
|
||||
/* not really an entity */
|
||||
if (ent == 0)
|
||||
cmark_strbuf_putc(ob, '&');
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
void houdini_unescape_html_f(cmark_strbuf *ob, const uint8_t *src,
|
||||
bufsize_t size) {
|
||||
if (!houdini_unescape_html(ob, src, size))
|
||||
cmark_strbuf_put(ob, src, size);
|
||||
}
|
|
@ -1,341 +0,0 @@
|
|||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
#include "cmark_ctype.h"
|
||||
#include "config.h"
|
||||
#include "cmark.h"
|
||||
#include "node.h"
|
||||
#include "buffer.h"
|
||||
#include "houdini.h"
|
||||
#include "scanners.h"
|
||||
|
||||
#define BUFFER_SIZE 100
|
||||
|
||||
// Functions to convert cmark_nodes to HTML strings.
|
||||
|
||||
static void escape_html(cmark_strbuf *dest, const unsigned char *source,
|
||||
bufsize_t length) {
|
||||
houdini_escape_html0(dest, source, length, 0);
|
||||
}
|
||||
|
||||
static CMARK_INLINE void cr(cmark_strbuf *html) {
|
||||
if (html->size && html->ptr[html->size - 1] != '\n')
|
||||
cmark_strbuf_putc(html, '\n');
|
||||
}
|
||||
|
||||
struct render_state {
|
||||
cmark_strbuf *html;
|
||||
cmark_node *plain;
|
||||
};
|
||||
|
||||
static void S_render_sourcepos(cmark_node *node, cmark_strbuf *html,
|
||||
int options) {
|
||||
char buffer[BUFFER_SIZE];
|
||||
if (CMARK_OPT_SOURCEPOS & options) {
|
||||
snprintf(buffer, BUFFER_SIZE, " data-sourcepos=\"%d:%d-%d:%d\"",
|
||||
cmark_node_get_start_line(node), cmark_node_get_start_column(node),
|
||||
cmark_node_get_end_line(node), cmark_node_get_end_column(node));
|
||||
cmark_strbuf_puts(html, buffer);
|
||||
}
|
||||
}
|
||||
|
||||
static int S_render_node(cmark_node *node, cmark_event_type ev_type,
|
||||
struct render_state *state, int options) {
|
||||
cmark_node *parent;
|
||||
cmark_node *grandparent;
|
||||
cmark_strbuf *html = state->html;
|
||||
char start_heading[] = "<h0";
|
||||
char end_heading[] = "</h0";
|
||||
bool tight;
|
||||
char buffer[BUFFER_SIZE];
|
||||
|
||||
bool entering = (ev_type == CMARK_EVENT_ENTER);
|
||||
|
||||
if (state->plain == node) { // back at original node
|
||||
state->plain = NULL;
|
||||
}
|
||||
|
||||
if (state->plain != NULL) {
|
||||
switch (node->type) {
|
||||
case CMARK_NODE_TEXT:
|
||||
case CMARK_NODE_CODE:
|
||||
case CMARK_NODE_HTML_INLINE:
|
||||
escape_html(html, node->as.literal.data, node->as.literal.len);
|
||||
break;
|
||||
|
||||
case CMARK_NODE_LINEBREAK:
|
||||
case CMARK_NODE_SOFTBREAK:
|
||||
cmark_strbuf_putc(html, ' ');
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
switch (node->type) {
|
||||
case CMARK_NODE_DOCUMENT:
|
||||
break;
|
||||
|
||||
case CMARK_NODE_BLOCK_QUOTE:
|
||||
if (entering) {
|
||||
cr(html);
|
||||
cmark_strbuf_puts(html, "<blockquote");
|
||||
S_render_sourcepos(node, html, options);
|
||||
cmark_strbuf_puts(html, ">\n");
|
||||
} else {
|
||||
cr(html);
|
||||
cmark_strbuf_puts(html, "</blockquote>\n");
|
||||
}
|
||||
break;
|
||||
|
||||
case CMARK_NODE_LIST: {
|
||||
cmark_list_type list_type = node->as.list.list_type;
|
||||
int start = node->as.list.start;
|
||||
|
||||
if (entering) {
|
||||
cr(html);
|
||||
if (list_type == CMARK_BULLET_LIST) {
|
||||
cmark_strbuf_puts(html, "<ul");
|
||||
S_render_sourcepos(node, html, options);
|
||||
cmark_strbuf_puts(html, ">\n");
|
||||
} else if (start == 1) {
|
||||
cmark_strbuf_puts(html, "<ol");
|
||||
S_render_sourcepos(node, html, options);
|
||||
cmark_strbuf_puts(html, ">\n");
|
||||
} else {
|
||||
snprintf(buffer, BUFFER_SIZE, "<ol start=\"%d\"", start);
|
||||
cmark_strbuf_puts(html, buffer);
|
||||
S_render_sourcepos(node, html, options);
|
||||
cmark_strbuf_puts(html, ">\n");
|
||||
}
|
||||
} else {
|
||||
cmark_strbuf_puts(html,
|
||||
list_type == CMARK_BULLET_LIST ? "</ul>\n" : "</ol>\n");
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case CMARK_NODE_ITEM:
|
||||
if (entering) {
|
||||
cr(html);
|
||||
cmark_strbuf_puts(html, "<li");
|
||||
S_render_sourcepos(node, html, options);
|
||||
cmark_strbuf_putc(html, '>');
|
||||
} else {
|
||||
cmark_strbuf_puts(html, "</li>\n");
|
||||
}
|
||||
break;
|
||||
|
||||
case CMARK_NODE_HEADING:
|
||||
if (entering) {
|
||||
cr(html);
|
||||
start_heading[2] = (char)('0' + node->as.heading.level);
|
||||
cmark_strbuf_puts(html, start_heading);
|
||||
S_render_sourcepos(node, html, options);
|
||||
cmark_strbuf_putc(html, '>');
|
||||
} else {
|
||||
end_heading[3] = (char)('0' + node->as.heading.level);
|
||||
cmark_strbuf_puts(html, end_heading);
|
||||
cmark_strbuf_puts(html, ">\n");
|
||||
}
|
||||
break;
|
||||
|
||||
case CMARK_NODE_CODE_BLOCK:
|
||||
cr(html);
|
||||
|
||||
if (node->as.code.info.len == 0) {
|
||||
cmark_strbuf_puts(html, "<pre");
|
||||
S_render_sourcepos(node, html, options);
|
||||
cmark_strbuf_puts(html, "><code>");
|
||||
} else {
|
||||
bufsize_t first_tag = 0;
|
||||
while (first_tag < node->as.code.info.len &&
|
||||
!cmark_isspace(node->as.code.info.data[first_tag])) {
|
||||
first_tag += 1;
|
||||
}
|
||||
|
||||
cmark_strbuf_puts(html, "<pre");
|
||||
S_render_sourcepos(node, html, options);
|
||||
cmark_strbuf_puts(html, "><code class=\"language-");
|
||||
escape_html(html, node->as.code.info.data, first_tag);
|
||||
cmark_strbuf_puts(html, "\">");
|
||||
}
|
||||
|
||||
escape_html(html, node->as.code.literal.data, node->as.code.literal.len);
|
||||
cmark_strbuf_puts(html, "</code></pre>\n");
|
||||
break;
|
||||
|
||||
case CMARK_NODE_HTML_BLOCK:
|
||||
cr(html);
|
||||
if (options & CMARK_OPT_SAFE) {
|
||||
cmark_strbuf_puts(html, "<!-- raw HTML omitted -->");
|
||||
} else {
|
||||
cmark_strbuf_put(html, node->as.literal.data, node->as.literal.len);
|
||||
}
|
||||
cr(html);
|
||||
break;
|
||||
|
||||
case CMARK_NODE_CUSTOM_BLOCK:
|
||||
cr(html);
|
||||
if (entering) {
|
||||
cmark_strbuf_put(html, node->as.custom.on_enter.data,
|
||||
node->as.custom.on_enter.len);
|
||||
} else {
|
||||
cmark_strbuf_put(html, node->as.custom.on_exit.data,
|
||||
node->as.custom.on_exit.len);
|
||||
}
|
||||
cr(html);
|
||||
break;
|
||||
|
||||
case CMARK_NODE_THEMATIC_BREAK:
|
||||
cr(html);
|
||||
cmark_strbuf_puts(html, "<hr");
|
||||
S_render_sourcepos(node, html, options);
|
||||
cmark_strbuf_puts(html, " />\n");
|
||||
break;
|
||||
|
||||
case CMARK_NODE_PARAGRAPH:
|
||||
parent = cmark_node_parent(node);
|
||||
grandparent = cmark_node_parent(parent);
|
||||
if (grandparent != NULL && grandparent->type == CMARK_NODE_LIST) {
|
||||
tight = grandparent->as.list.tight;
|
||||
} else {
|
||||
tight = false;
|
||||
}
|
||||
if (!tight) {
|
||||
if (entering) {
|
||||
cr(html);
|
||||
cmark_strbuf_puts(html, "<p");
|
||||
S_render_sourcepos(node, html, options);
|
||||
cmark_strbuf_putc(html, '>');
|
||||
} else {
|
||||
cmark_strbuf_puts(html, "</p>\n");
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case CMARK_NODE_TEXT:
|
||||
escape_html(html, node->as.literal.data, node->as.literal.len);
|
||||
break;
|
||||
|
||||
case CMARK_NODE_LINEBREAK:
|
||||
cmark_strbuf_puts(html, "<br />\n");
|
||||
break;
|
||||
|
||||
case CMARK_NODE_SOFTBREAK:
|
||||
if (options & CMARK_OPT_HARDBREAKS) {
|
||||
cmark_strbuf_puts(html, "<br />\n");
|
||||
} else if (options & CMARK_OPT_NOBREAKS) {
|
||||
cmark_strbuf_putc(html, ' ');
|
||||
} else {
|
||||
cmark_strbuf_putc(html, '\n');
|
||||
}
|
||||
break;
|
||||
|
||||
case CMARK_NODE_CODE:
|
||||
cmark_strbuf_puts(html, "<code>");
|
||||
escape_html(html, node->as.literal.data, node->as.literal.len);
|
||||
cmark_strbuf_puts(html, "</code>");
|
||||
break;
|
||||
|
||||
case CMARK_NODE_HTML_INLINE:
|
||||
if (options & CMARK_OPT_SAFE) {
|
||||
cmark_strbuf_puts(html, "<!-- raw HTML omitted -->");
|
||||
} else {
|
||||
cmark_strbuf_put(html, node->as.literal.data, node->as.literal.len);
|
||||
}
|
||||
break;
|
||||
|
||||
case CMARK_NODE_CUSTOM_INLINE:
|
||||
if (entering) {
|
||||
cmark_strbuf_put(html, node->as.custom.on_enter.data,
|
||||
node->as.custom.on_enter.len);
|
||||
} else {
|
||||
cmark_strbuf_put(html, node->as.custom.on_exit.data,
|
||||
node->as.custom.on_exit.len);
|
||||
}
|
||||
break;
|
||||
|
||||
case CMARK_NODE_STRONG:
|
||||
if (entering) {
|
||||
cmark_strbuf_puts(html, "<strong>");
|
||||
} else {
|
||||
cmark_strbuf_puts(html, "</strong>");
|
||||
}
|
||||
break;
|
||||
|
||||
case CMARK_NODE_EMPH:
|
||||
if (entering) {
|
||||
cmark_strbuf_puts(html, "<em>");
|
||||
} else {
|
||||
cmark_strbuf_puts(html, "</em>");
|
||||
}
|
||||
break;
|
||||
|
||||
case CMARK_NODE_LINK:
|
||||
if (entering) {
|
||||
cmark_strbuf_puts(html, "<a href=\"");
|
||||
if (!((options & CMARK_OPT_SAFE) &&
|
||||
scan_dangerous_url(&node->as.link.url, 0))) {
|
||||
houdini_escape_href(html, node->as.link.url.data,
|
||||
node->as.link.url.len);
|
||||
}
|
||||
if (node->as.link.title.len) {
|
||||
cmark_strbuf_puts(html, "\" title=\"");
|
||||
escape_html(html, node->as.link.title.data, node->as.link.title.len);
|
||||
}
|
||||
cmark_strbuf_puts(html, "\">");
|
||||
} else {
|
||||
cmark_strbuf_puts(html, "</a>");
|
||||
}
|
||||
break;
|
||||
|
||||
case CMARK_NODE_IMAGE:
|
||||
if (entering) {
|
||||
cmark_strbuf_puts(html, "<img src=\"");
|
||||
if (!((options & CMARK_OPT_SAFE) &&
|
||||
scan_dangerous_url(&node->as.link.url, 0))) {
|
||||
houdini_escape_href(html, node->as.link.url.data,
|
||||
node->as.link.url.len);
|
||||
}
|
||||
cmark_strbuf_puts(html, "\" alt=\"");
|
||||
state->plain = node;
|
||||
} else {
|
||||
if (node->as.link.title.len) {
|
||||
cmark_strbuf_puts(html, "\" title=\"");
|
||||
escape_html(html, node->as.link.title.data, node->as.link.title.len);
|
||||
}
|
||||
|
||||
cmark_strbuf_puts(html, "\" />");
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
assert(false);
|
||||
break;
|
||||
}
|
||||
|
||||
// cmark_strbuf_putc(html, 'x');
|
||||
return 1;
|
||||
}
|
||||
|
||||
char *cmark_render_html(cmark_node *root, int options) {
|
||||
char *result;
|
||||
cmark_strbuf html = CMARK_BUF_INIT(cmark_node_mem(root));
|
||||
cmark_event_type ev_type;
|
||||
cmark_node *cur;
|
||||
struct render_state state = {&html, NULL};
|
||||
cmark_iter *iter = cmark_iter_new(root);
|
||||
|
||||
while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
|
||||
cur = cmark_iter_get_node(iter);
|
||||
S_render_node(cur, ev_type, &state, options);
|
||||
}
|
||||
result = (char *)cmark_strbuf_detach(&html);
|
||||
|
||||
cmark_iter_free(iter);
|
||||
return result;
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -1,21 +0,0 @@
|
|||
#ifndef CMARK_INLINES_H
|
||||
#define CMARK_INLINES_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
cmark_chunk cmark_clean_url(cmark_mem *mem, cmark_chunk *url);
|
||||
cmark_chunk cmark_clean_title(cmark_mem *mem, cmark_chunk *title);
|
||||
|
||||
void cmark_parse_inlines(cmark_mem *mem, cmark_node *parent,
|
||||
cmark_reference_map *refmap, int options);
|
||||
|
||||
bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_chunk *input,
|
||||
cmark_reference_map *refmap);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
|
@ -1,121 +0,0 @@
|
|||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "config.h"
|
||||
#include "node.h"
|
||||
#include "cmark.h"
|
||||
#include "iterator.h"
|
||||
|
||||
static const int S_leaf_mask =
|
||||
(1 << CMARK_NODE_HTML_BLOCK) | (1 << CMARK_NODE_THEMATIC_BREAK) |
|
||||
(1 << CMARK_NODE_CODE_BLOCK) | (1 << CMARK_NODE_TEXT) |
|
||||
(1 << CMARK_NODE_SOFTBREAK) | (1 << CMARK_NODE_LINEBREAK) |
|
||||
(1 << CMARK_NODE_CODE) | (1 << CMARK_NODE_HTML_INLINE);
|
||||
|
||||
cmark_iter *cmark_iter_new(cmark_node *root) {
|
||||
if (root == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
cmark_mem *mem = root->content.mem;
|
||||
cmark_iter *iter = (cmark_iter *)mem->calloc(1, sizeof(cmark_iter));
|
||||
iter->mem = mem;
|
||||
iter->root = root;
|
||||
iter->cur.ev_type = CMARK_EVENT_NONE;
|
||||
iter->cur.node = NULL;
|
||||
iter->next.ev_type = CMARK_EVENT_ENTER;
|
||||
iter->next.node = root;
|
||||
return iter;
|
||||
}
|
||||
|
||||
void cmark_iter_free(cmark_iter *iter) { iter->mem->free(iter); }
|
||||
|
||||
static bool S_is_leaf(cmark_node *node) {
|
||||
return ((1 << node->type) & S_leaf_mask) != 0;
|
||||
}
|
||||
|
||||
cmark_event_type cmark_iter_next(cmark_iter *iter) {
|
||||
cmark_event_type ev_type = iter->next.ev_type;
|
||||
cmark_node *node = iter->next.node;
|
||||
|
||||
iter->cur.ev_type = ev_type;
|
||||
iter->cur.node = node;
|
||||
|
||||
if (ev_type == CMARK_EVENT_DONE) {
|
||||
return ev_type;
|
||||
}
|
||||
|
||||
/* roll forward to next item, setting both fields */
|
||||
if (ev_type == CMARK_EVENT_ENTER && !S_is_leaf(node)) {
|
||||
if (node->first_child == NULL) {
|
||||
/* stay on this node but exit */
|
||||
iter->next.ev_type = CMARK_EVENT_EXIT;
|
||||
} else {
|
||||
iter->next.ev_type = CMARK_EVENT_ENTER;
|
||||
iter->next.node = node->first_child;
|
||||
}
|
||||
} else if (node == iter->root) {
|
||||
/* don't move past root */
|
||||
iter->next.ev_type = CMARK_EVENT_DONE;
|
||||
iter->next.node = NULL;
|
||||
} else if (node->next) {
|
||||
iter->next.ev_type = CMARK_EVENT_ENTER;
|
||||
iter->next.node = node->next;
|
||||
} else if (node->parent) {
|
||||
iter->next.ev_type = CMARK_EVENT_EXIT;
|
||||
iter->next.node = node->parent;
|
||||
} else {
|
||||
assert(false);
|
||||
iter->next.ev_type = CMARK_EVENT_DONE;
|
||||
iter->next.node = NULL;
|
||||
}
|
||||
|
||||
return ev_type;
|
||||
}
|
||||
|
||||
void cmark_iter_reset(cmark_iter *iter, cmark_node *current,
|
||||
cmark_event_type event_type) {
|
||||
iter->next.ev_type = event_type;
|
||||
iter->next.node = current;
|
||||
cmark_iter_next(iter);
|
||||
}
|
||||
|
||||
cmark_node *cmark_iter_get_node(cmark_iter *iter) { return iter->cur.node; }
|
||||
|
||||
cmark_event_type cmark_iter_get_event_type(cmark_iter *iter) {
|
||||
return iter->cur.ev_type;
|
||||
}
|
||||
|
||||
cmark_node *cmark_iter_get_root(cmark_iter *iter) { return iter->root; }
|
||||
|
||||
void cmark_consolidate_text_nodes(cmark_node *root) {
|
||||
if (root == NULL) {
|
||||
return;
|
||||
}
|
||||
cmark_iter *iter = cmark_iter_new(root);
|
||||
cmark_strbuf buf = CMARK_BUF_INIT(iter->mem);
|
||||
cmark_event_type ev_type;
|
||||
cmark_node *cur, *tmp, *next;
|
||||
|
||||
while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
|
||||
cur = cmark_iter_get_node(iter);
|
||||
if (ev_type == CMARK_EVENT_ENTER && cur->type == CMARK_NODE_TEXT &&
|
||||
cur->next && cur->next->type == CMARK_NODE_TEXT) {
|
||||
cmark_strbuf_clear(&buf);
|
||||
cmark_strbuf_put(&buf, cur->as.literal.data, cur->as.literal.len);
|
||||
tmp = cur->next;
|
||||
while (tmp && tmp->type == CMARK_NODE_TEXT) {
|
||||
cmark_iter_next(iter); // advance pointer
|
||||
cmark_strbuf_put(&buf, tmp->as.literal.data, tmp->as.literal.len);
|
||||
cur->end_column = tmp->end_column;
|
||||
next = tmp->next;
|
||||
cmark_node_free(tmp);
|
||||
tmp = next;
|
||||
}
|
||||
cmark_chunk_free(iter->mem, &cur->as.literal);
|
||||
cur->as.literal = cmark_chunk_buf_detach(&buf);
|
||||
}
|
||||
}
|
||||
|
||||
cmark_strbuf_free(&buf);
|
||||
cmark_iter_free(iter);
|
||||
}
|
|
@ -1,27 +0,0 @@
|
|||
#ifndef CMARK_ITERATOR_H
|
||||
#define CMARK_ITERATOR_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include "cmark.h"
|
||||
#include "memory.h"
|
||||
|
||||
typedef struct {
|
||||
cmark_event_type ev_type;
|
||||
cmark_node *node;
|
||||
} cmark_iter_state;
|
||||
|
||||
struct cmark_iter {
|
||||
cmark_mem *mem;
|
||||
cmark_node *root;
|
||||
cmark_iter_state cur;
|
||||
cmark_iter_state next;
|
||||
};
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
|
@ -1,453 +0,0 @@
|
|||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include "config.h"
|
||||
#include "cmark.h"
|
||||
#include "node.h"
|
||||
#include "buffer.h"
|
||||
#include "utf8.h"
|
||||
#include "scanners.h"
|
||||
#include "render.h"
|
||||
|
||||
#define OUT(s, wrap, escaping) renderer->out(renderer, s, wrap, escaping)
|
||||
#define LIT(s) renderer->out(renderer, s, false, LITERAL)
|
||||
#define CR() renderer->cr(renderer)
|
||||
#define BLANKLINE() renderer->blankline(renderer)
|
||||
#define LIST_NUMBER_STRING_SIZE 20
|
||||
|
||||
static CMARK_INLINE void outc(cmark_renderer *renderer, cmark_escaping escape,
|
||||
int32_t c, unsigned char nextc) {
|
||||
if (escape == LITERAL) {
|
||||
cmark_render_code_point(renderer, c);
|
||||
return;
|
||||
}
|
||||
|
||||
switch (c) {
|
||||
case 123: // '{'
|
||||
case 125: // '}'
|
||||
case 35: // '#'
|
||||
case 37: // '%'
|
||||
case 38: // '&'
|
||||
cmark_render_ascii(renderer, "\\");
|
||||
cmark_render_code_point(renderer, c);
|
||||
break;
|
||||
case 36: // '$'
|
||||
case 95: // '_'
|
||||
if (escape == NORMAL) {
|
||||
cmark_render_ascii(renderer, "\\");
|
||||
}
|
||||
cmark_render_code_point(renderer, c);
|
||||
break;
|
||||
case 45: // '-'
|
||||
if (nextc == 45) { // prevent ligature
|
||||
cmark_render_ascii(renderer, "-{}");
|
||||
} else {
|
||||
cmark_render_ascii(renderer, "-");
|
||||
}
|
||||
break;
|
||||
case 126: // '~'
|
||||
if (escape == NORMAL) {
|
||||
cmark_render_ascii(renderer, "\\textasciitilde{}");
|
||||
} else {
|
||||
cmark_render_code_point(renderer, c);
|
||||
}
|
||||
break;
|
||||
case 94: // '^'
|
||||
cmark_render_ascii(renderer, "\\^{}");
|
||||
break;
|
||||
case 92: // '\\'
|
||||
if (escape == URL) {
|
||||
// / acts as path sep even on windows:
|
||||
cmark_render_ascii(renderer, "/");
|
||||
} else {
|
||||
cmark_render_ascii(renderer, "\\textbackslash{}");
|
||||
}
|
||||
break;
|
||||
case 124: // '|'
|
||||
cmark_render_ascii(renderer, "\\textbar{}");
|
||||
break;
|
||||
case 60: // '<'
|
||||
cmark_render_ascii(renderer, "\\textless{}");
|
||||
break;
|
||||
case 62: // '>'
|
||||
cmark_render_ascii(renderer, "\\textgreater{}");
|
||||
break;
|
||||
case 91: // '['
|
||||
case 93: // ']'
|
||||
cmark_render_ascii(renderer, "{");
|
||||
cmark_render_code_point(renderer, c);
|
||||
cmark_render_ascii(renderer, "}");
|
||||
break;
|
||||
case 34: // '"'
|
||||
cmark_render_ascii(renderer, "\\textquotedbl{}");
|
||||
// requires \usepackage[T1]{fontenc}
|
||||
break;
|
||||
case 39: // '\''
|
||||
cmark_render_ascii(renderer, "\\textquotesingle{}");
|
||||
// requires \usepackage{textcomp}
|
||||
break;
|
||||
case 160: // nbsp
|
||||
cmark_render_ascii(renderer, "~");
|
||||
break;
|
||||
case 8230: // hellip
|
||||
cmark_render_ascii(renderer, "\\ldots{}");
|
||||
break;
|
||||
case 8216: // lsquo
|
||||
if (escape == NORMAL) {
|
||||
cmark_render_ascii(renderer, "`");
|
||||
} else {
|
||||
cmark_render_code_point(renderer, c);
|
||||
}
|
||||
break;
|
||||
case 8217: // rsquo
|
||||
if (escape == NORMAL) {
|
||||
cmark_render_ascii(renderer, "\'");
|
||||
} else {
|
||||
cmark_render_code_point(renderer, c);
|
||||
}
|
||||
break;
|
||||
case 8220: // ldquo
|
||||
if (escape == NORMAL) {
|
||||
cmark_render_ascii(renderer, "``");
|
||||
} else {
|
||||
cmark_render_code_point(renderer, c);
|
||||
}
|
||||
break;
|
||||
case 8221: // rdquo
|
||||
if (escape == NORMAL) {
|
||||
cmark_render_ascii(renderer, "''");
|
||||
} else {
|
||||
cmark_render_code_point(renderer, c);
|
||||
}
|
||||
break;
|
||||
case 8212: // emdash
|
||||
if (escape == NORMAL) {
|
||||
cmark_render_ascii(renderer, "---");
|
||||
} else {
|
||||
cmark_render_code_point(renderer, c);
|
||||
}
|
||||
break;
|
||||
case 8211: // endash
|
||||
if (escape == NORMAL) {
|
||||
cmark_render_ascii(renderer, "--");
|
||||
} else {
|
||||
cmark_render_code_point(renderer, c);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
cmark_render_code_point(renderer, c);
|
||||
}
|
||||
}
|
||||
|
||||
typedef enum {
|
||||
NO_LINK,
|
||||
URL_AUTOLINK,
|
||||
EMAIL_AUTOLINK,
|
||||
NORMAL_LINK,
|
||||
INTERNAL_LINK
|
||||
} link_type;
|
||||
|
||||
static link_type get_link_type(cmark_node *node) {
|
||||
size_t title_len, url_len;
|
||||
cmark_node *link_text;
|
||||
char *realurl;
|
||||
int realurllen;
|
||||
bool isemail = false;
|
||||
|
||||
if (node->type != CMARK_NODE_LINK) {
|
||||
return NO_LINK;
|
||||
}
|
||||
|
||||
const char *url = cmark_node_get_url(node);
|
||||
cmark_chunk url_chunk = cmark_chunk_literal(url);
|
||||
|
||||
if (url && *url == '#') {
|
||||
return INTERNAL_LINK;
|
||||
}
|
||||
|
||||
url_len = strlen(url);
|
||||
if (url_len == 0 || scan_scheme(&url_chunk, 0) == 0) {
|
||||
return NO_LINK;
|
||||
}
|
||||
|
||||
const char *title = cmark_node_get_title(node);
|
||||
title_len = strlen(title);
|
||||
// if it has a title, we can't treat it as an autolink:
|
||||
if (title_len == 0) {
|
||||
|
||||
link_text = node->first_child;
|
||||
cmark_consolidate_text_nodes(link_text);
|
||||
|
||||
if (!link_text)
|
||||
return NO_LINK;
|
||||
|
||||
realurl = (char *)url;
|
||||
realurllen = (int)url_len;
|
||||
if (strncmp(realurl, "mailto:", 7) == 0) {
|
||||
realurl += 7;
|
||||
realurllen -= 7;
|
||||
isemail = true;
|
||||
}
|
||||
if (realurllen == link_text->as.literal.len &&
|
||||
strncmp(realurl, (char *)link_text->as.literal.data,
|
||||
link_text->as.literal.len) == 0) {
|
||||
if (isemail) {
|
||||
return EMAIL_AUTOLINK;
|
||||
} else {
|
||||
return URL_AUTOLINK;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return NORMAL_LINK;
|
||||
}
|
||||
|
||||
static int S_get_enumlevel(cmark_node *node) {
|
||||
int enumlevel = 0;
|
||||
cmark_node *tmp = node;
|
||||
while (tmp) {
|
||||
if (tmp->type == CMARK_NODE_LIST &&
|
||||
cmark_node_get_list_type(node) == CMARK_ORDERED_LIST) {
|
||||
enumlevel++;
|
||||
}
|
||||
tmp = tmp->parent;
|
||||
}
|
||||
return enumlevel;
|
||||
}
|
||||
|
||||
static int S_render_node(cmark_renderer *renderer, cmark_node *node,
|
||||
cmark_event_type ev_type, int options) {
|
||||
int list_number;
|
||||
int enumlevel;
|
||||
char list_number_string[LIST_NUMBER_STRING_SIZE];
|
||||
bool entering = (ev_type == CMARK_EVENT_ENTER);
|
||||
cmark_list_type list_type;
|
||||
bool allow_wrap = renderer->width > 0 && !(CMARK_OPT_NOBREAKS & options);
|
||||
|
||||
// avoid warning about unused parameter:
|
||||
(void)(options);
|
||||
|
||||
switch (node->type) {
|
||||
case CMARK_NODE_DOCUMENT:
|
||||
break;
|
||||
|
||||
case CMARK_NODE_BLOCK_QUOTE:
|
||||
if (entering) {
|
||||
LIT("\\begin{quote}");
|
||||
CR();
|
||||
} else {
|
||||
LIT("\\end{quote}");
|
||||
BLANKLINE();
|
||||
}
|
||||
break;
|
||||
|
||||
case CMARK_NODE_LIST:
|
||||
list_type = cmark_node_get_list_type(node);
|
||||
if (entering) {
|
||||
LIT("\\begin{");
|
||||
LIT(list_type == CMARK_ORDERED_LIST ? "enumerate" : "itemize");
|
||||
LIT("}");
|
||||
CR();
|
||||
list_number = cmark_node_get_list_start(node);
|
||||
if (list_number > 1) {
|
||||
enumlevel = S_get_enumlevel(node);
|
||||
// latex normally supports only five levels
|
||||
if (enumlevel >= 1 && enumlevel <= 5) {
|
||||
snprintf(list_number_string, LIST_NUMBER_STRING_SIZE, "%d",
|
||||
list_number);
|
||||
LIT("\\setcounter{enum");
|
||||
switch (enumlevel) {
|
||||
case 1: LIT("i"); break;
|
||||
case 2: LIT("ii"); break;
|
||||
case 3: LIT("iii"); break;
|
||||
case 4: LIT("iv"); break;
|
||||
case 5: LIT("v"); break;
|
||||
default: LIT("i"); break;
|
||||
}
|
||||
LIT("}{");
|
||||
OUT(list_number_string, false, NORMAL);
|
||||
LIT("}");
|
||||
}
|
||||
CR();
|
||||
}
|
||||
} else {
|
||||
LIT("\\end{");
|
||||
LIT(list_type == CMARK_ORDERED_LIST ? "enumerate" : "itemize");
|
||||
LIT("}");
|
||||
BLANKLINE();
|
||||
}
|
||||
break;
|
||||
|
||||
case CMARK_NODE_ITEM:
|
||||
if (entering) {
|
||||
LIT("\\item ");
|
||||
} else {
|
||||
CR();
|
||||
}
|
||||
break;
|
||||
|
||||
case CMARK_NODE_HEADING:
|
||||
if (entering) {
|
||||
switch (cmark_node_get_heading_level(node)) {
|
||||
case 1:
|
||||
LIT("\\section");
|
||||
break;
|
||||
case 2:
|
||||
LIT("\\subsection");
|
||||
break;
|
||||
case 3:
|
||||
LIT("\\subsubsection");
|
||||
break;
|
||||
case 4:
|
||||
LIT("\\paragraph");
|
||||
break;
|
||||
case 5:
|
||||
LIT("\\subparagraph");
|
||||
break;
|
||||
}
|
||||
LIT("{");
|
||||
} else {
|
||||
LIT("}");
|
||||
BLANKLINE();
|
||||
}
|
||||
break;
|
||||
|
||||
case CMARK_NODE_CODE_BLOCK:
|
||||
CR();
|
||||
LIT("\\begin{verbatim}");
|
||||
CR();
|
||||
OUT(cmark_node_get_literal(node), false, LITERAL);
|
||||
CR();
|
||||
LIT("\\end{verbatim}");
|
||||
BLANKLINE();
|
||||
break;
|
||||
|
||||
case CMARK_NODE_HTML_BLOCK:
|
||||
break;
|
||||
|
||||
case CMARK_NODE_CUSTOM_BLOCK:
|
||||
CR();
|
||||
OUT(entering ? cmark_node_get_on_enter(node) : cmark_node_get_on_exit(node),
|
||||
false, LITERAL);
|
||||
CR();
|
||||
break;
|
||||
|
||||
case CMARK_NODE_THEMATIC_BREAK:
|
||||
BLANKLINE();
|
||||
LIT("\\begin{center}\\rule{0.5\\linewidth}{\\linethickness}\\end{center}");
|
||||
BLANKLINE();
|
||||
break;
|
||||
|
||||
case CMARK_NODE_PARAGRAPH:
|
||||
if (!entering) {
|
||||
BLANKLINE();
|
||||
}
|
||||
break;
|
||||
|
||||
case CMARK_NODE_TEXT:
|
||||
OUT(cmark_node_get_literal(node), allow_wrap, NORMAL);
|
||||
break;
|
||||
|
||||
case CMARK_NODE_LINEBREAK:
|
||||
LIT("\\\\");
|
||||
CR();
|
||||
break;
|
||||
|
||||
case CMARK_NODE_SOFTBREAK:
|
||||
if (options & CMARK_OPT_HARDBREAKS) {
|
||||
LIT("\\\\");
|
||||
CR();
|
||||
} else if (renderer->width == 0 && !(CMARK_OPT_NOBREAKS & options)) {
|
||||
CR();
|
||||
} else {
|
||||
OUT(" ", allow_wrap, NORMAL);
|
||||
}
|
||||
break;
|
||||
|
||||
case CMARK_NODE_CODE:
|
||||
LIT("\\texttt{");
|
||||
OUT(cmark_node_get_literal(node), false, NORMAL);
|
||||
LIT("}");
|
||||
break;
|
||||
|
||||
case CMARK_NODE_HTML_INLINE:
|
||||
break;
|
||||
|
||||
case CMARK_NODE_CUSTOM_INLINE:
|
||||
OUT(entering ? cmark_node_get_on_enter(node) : cmark_node_get_on_exit(node),
|
||||
false, LITERAL);
|
||||
break;
|
||||
|
||||
case CMARK_NODE_STRONG:
|
||||
if (entering) {
|
||||
LIT("\\textbf{");
|
||||
} else {
|
||||
LIT("}");
|
||||
}
|
||||
break;
|
||||
|
||||
case CMARK_NODE_EMPH:
|
||||
if (entering) {
|
||||
LIT("\\emph{");
|
||||
} else {
|
||||
LIT("}");
|
||||
}
|
||||
break;
|
||||
|
||||
case CMARK_NODE_LINK:
|
||||
if (entering) {
|
||||
const char *url = cmark_node_get_url(node);
|
||||
// requires \usepackage{hyperref}
|
||||
switch (get_link_type(node)) {
|
||||
case URL_AUTOLINK:
|
||||
LIT("\\url{");
|
||||
OUT(url, false, URL);
|
||||
LIT("}");
|
||||
return 0; // Don't process further nodes to avoid double-rendering artefacts
|
||||
case EMAIL_AUTOLINK:
|
||||
LIT("\\href{");
|
||||
OUT(url, false, URL);
|
||||
LIT("}\\nolinkurl{");
|
||||
break;
|
||||
case NORMAL_LINK:
|
||||
LIT("\\href{");
|
||||
OUT(url, false, URL);
|
||||
LIT("}{");
|
||||
break;
|
||||
case INTERNAL_LINK:
|
||||
LIT("\\protect\\hyperlink{");
|
||||
OUT(url + 1, false, URL);
|
||||
LIT("}{");
|
||||
break;
|
||||
case NO_LINK:
|
||||
LIT("{"); // error?
|
||||
}
|
||||
} else {
|
||||
LIT("}");
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
case CMARK_NODE_IMAGE:
|
||||
if (entering) {
|
||||
LIT("\\protect\\includegraphics{");
|
||||
// requires \include{graphicx}
|
||||
OUT(cmark_node_get_url(node), false, URL);
|
||||
LIT("}");
|
||||
return 0;
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
assert(false);
|
||||
break;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
char *cmark_render_latex(cmark_node *root, int options, int width) {
|
||||
return cmark_render(root, options, width, outc, S_render_node);
|
||||
}
|
|
@ -1,211 +0,0 @@
|
|||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <errno.h>
|
||||
#include "config.h"
|
||||
#include "memory.h"
|
||||
#include "cmark.h"
|
||||
#include "node.h"
|
||||
|
||||
#if defined(__OpenBSD__)
|
||||
# include <sys/param.h>
|
||||
# if OpenBSD >= 201605
|
||||
# define USE_PLEDGE
|
||||
# include <unistd.h>
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if defined(_WIN32) && !defined(__CYGWIN__)
|
||||
#include <io.h>
|
||||
#include <fcntl.h>
|
||||
#endif
|
||||
|
||||
typedef enum {
|
||||
FORMAT_NONE,
|
||||
FORMAT_HTML,
|
||||
FORMAT_XML,
|
||||
FORMAT_MAN,
|
||||
FORMAT_COMMONMARK,
|
||||
FORMAT_LATEX
|
||||
} writer_format;
|
||||
|
||||
void print_usage() {
|
||||
printf("Usage: cmark [FILE*]\n");
|
||||
printf("Options:\n");
|
||||
printf(" --to, -t FORMAT Specify output format (html, xml, man, "
|
||||
"commonmark, latex)\n");
|
||||
printf(" --width WIDTH Specify wrap width (default 0 = nowrap)\n");
|
||||
printf(" --sourcepos Include source position attribute\n");
|
||||
printf(" --hardbreaks Treat newlines as hard line breaks\n");
|
||||
printf(" --nobreaks Render soft line breaks as spaces\n");
|
||||
printf(" --safe Suppress raw HTML and dangerous URLs\n");
|
||||
printf(" --smart Use smart punctuation\n");
|
||||
printf(" --validate-utf8 Replace UTF-8 invalid sequences with U+FFFD\n");
|
||||
printf(" --help, -h Print usage information\n");
|
||||
printf(" --version Print version\n");
|
||||
}
|
||||
|
||||
static void print_document(cmark_node *document, writer_format writer,
|
||||
int options, int width) {
|
||||
char *result;
|
||||
|
||||
switch (writer) {
|
||||
case FORMAT_HTML:
|
||||
result = cmark_render_html(document, options);
|
||||
break;
|
||||
case FORMAT_XML:
|
||||
result = cmark_render_xml(document, options);
|
||||
break;
|
||||
case FORMAT_MAN:
|
||||
result = cmark_render_man(document, options, width);
|
||||
break;
|
||||
case FORMAT_COMMONMARK:
|
||||
result = cmark_render_commonmark(document, options, width);
|
||||
break;
|
||||
case FORMAT_LATEX:
|
||||
result = cmark_render_latex(document, options, width);
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr, "Unknown format %d\n", writer);
|
||||
exit(1);
|
||||
}
|
||||
printf("%s", result);
|
||||
cmark_node_mem(document)->free(result);
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
int i, numfps = 0;
|
||||
int *files;
|
||||
char buffer[4096];
|
||||
cmark_parser *parser;
|
||||
size_t bytes;
|
||||
cmark_node *document;
|
||||
int width = 0;
|
||||
char *unparsed;
|
||||
writer_format writer = FORMAT_HTML;
|
||||
int options = CMARK_OPT_DEFAULT;
|
||||
|
||||
#ifdef USE_PLEDGE
|
||||
if (pledge("stdio rpath", NULL) != 0) {
|
||||
perror("pledge");
|
||||
return 1;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(_WIN32) && !defined(__CYGWIN__)
|
||||
_setmode(_fileno(stdin), _O_BINARY);
|
||||
_setmode(_fileno(stdout), _O_BINARY);
|
||||
#endif
|
||||
|
||||
files = (int *)calloc(argc, sizeof(*files));
|
||||
|
||||
for (i = 1; i < argc; i++) {
|
||||
if (strcmp(argv[i], "--version") == 0) {
|
||||
printf("cmark %s", CMARK_VERSION_STRING);
|
||||
printf(" - CommonMark converter\n(C) 2014-2016 John MacFarlane\n");
|
||||
exit(0);
|
||||
} else if (strcmp(argv[i], "--sourcepos") == 0) {
|
||||
options |= CMARK_OPT_SOURCEPOS;
|
||||
} else if (strcmp(argv[i], "--hardbreaks") == 0) {
|
||||
options |= CMARK_OPT_HARDBREAKS;
|
||||
} else if (strcmp(argv[i], "--nobreaks") == 0) {
|
||||
options |= CMARK_OPT_NOBREAKS;
|
||||
} else if (strcmp(argv[i], "--smart") == 0) {
|
||||
options |= CMARK_OPT_SMART;
|
||||
} else if (strcmp(argv[i], "--safe") == 0) {
|
||||
options |= CMARK_OPT_SAFE;
|
||||
} else if (strcmp(argv[i], "--validate-utf8") == 0) {
|
||||
options |= CMARK_OPT_VALIDATE_UTF8;
|
||||
} else if ((strcmp(argv[i], "--help") == 0) ||
|
||||
(strcmp(argv[i], "-h") == 0)) {
|
||||
print_usage();
|
||||
exit(0);
|
||||
} else if (strcmp(argv[i], "--width") == 0) {
|
||||
i += 1;
|
||||
if (i < argc) {
|
||||
width = (int)strtol(argv[i], &unparsed, 10);
|
||||
if (unparsed && strlen(unparsed) > 0) {
|
||||
fprintf(stderr, "failed parsing width '%s' at '%s'\n", argv[i],
|
||||
unparsed);
|
||||
exit(1);
|
||||
}
|
||||
} else {
|
||||
fprintf(stderr, "--width requires an argument\n");
|
||||
exit(1);
|
||||
}
|
||||
} else if ((strcmp(argv[i], "-t") == 0) || (strcmp(argv[i], "--to") == 0)) {
|
||||
i += 1;
|
||||
if (i < argc) {
|
||||
if (strcmp(argv[i], "man") == 0) {
|
||||
writer = FORMAT_MAN;
|
||||
} else if (strcmp(argv[i], "html") == 0) {
|
||||
writer = FORMAT_HTML;
|
||||
} else if (strcmp(argv[i], "xml") == 0) {
|
||||
writer = FORMAT_XML;
|
||||
} else if (strcmp(argv[i], "commonmark") == 0) {
|
||||
writer = FORMAT_COMMONMARK;
|
||||
} else if (strcmp(argv[i], "latex") == 0) {
|
||||
writer = FORMAT_LATEX;
|
||||
} else {
|
||||
fprintf(stderr, "Unknown format %s\n", argv[i]);
|
||||
exit(1);
|
||||
}
|
||||
} else {
|
||||
fprintf(stderr, "No argument provided for %s\n", argv[i - 1]);
|
||||
exit(1);
|
||||
}
|
||||
} else if (*argv[i] == '-') {
|
||||
print_usage();
|
||||
exit(1);
|
||||
} else { // treat as file argument
|
||||
files[numfps++] = i;
|
||||
}
|
||||
}
|
||||
|
||||
parser = cmark_parser_new(options);
|
||||
for (i = 0; i < numfps; i++) {
|
||||
FILE *fp = fopen(argv[files[i]], "rb");
|
||||
if (fp == NULL) {
|
||||
fprintf(stderr, "Error opening file %s: %s\n", argv[files[i]],
|
||||
strerror(errno));
|
||||
exit(1);
|
||||
}
|
||||
|
||||
while ((bytes = fread(buffer, 1, sizeof(buffer), fp)) > 0) {
|
||||
cmark_parser_feed(parser, buffer, bytes);
|
||||
if (bytes < sizeof(buffer)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
fclose(fp);
|
||||
}
|
||||
|
||||
if (numfps == 0) {
|
||||
|
||||
while ((bytes = fread(buffer, 1, sizeof(buffer), stdin)) > 0) {
|
||||
cmark_parser_feed(parser, buffer, bytes);
|
||||
if (bytes < sizeof(buffer)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef USE_PLEDGE
|
||||
if (pledge("stdio", NULL) != 0) {
|
||||
perror("pledge");
|
||||
return 1;
|
||||
}
|
||||
#endif
|
||||
|
||||
document = cmark_parser_finish(parser);
|
||||
cmark_parser_free(parser);
|
||||
|
||||
print_document(document, writer, options, width);
|
||||
|
||||
cmark_node_free(document);
|
||||
|
||||
free(files);
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -1,252 +0,0 @@
|
|||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include "config.h"
|
||||
#include "cmark.h"
|
||||
#include "node.h"
|
||||
#include "buffer.h"
|
||||
#include "utf8.h"
|
||||
#include "render.h"
|
||||
|
||||
#define OUT(s, wrap, escaping) renderer->out(renderer, s, wrap, escaping)
|
||||
#define LIT(s) renderer->out(renderer, s, false, LITERAL)
|
||||
#define CR() renderer->cr(renderer)
|
||||
#define BLANKLINE() renderer->blankline(renderer)
|
||||
#define LIST_NUMBER_SIZE 20
|
||||
|
||||
// Functions to convert cmark_nodes to groff man strings.
|
||||
static void S_outc(cmark_renderer *renderer, cmark_escaping escape, int32_t c,
|
||||
unsigned char nextc) {
|
||||
(void)(nextc);
|
||||
|
||||
if (escape == LITERAL) {
|
||||
cmark_render_code_point(renderer, c);
|
||||
return;
|
||||
}
|
||||
|
||||
switch (c) {
|
||||
case 46:
|
||||
if (renderer->begin_line) {
|
||||
cmark_render_ascii(renderer, "\\&.");
|
||||
} else {
|
||||
cmark_render_code_point(renderer, c);
|
||||
}
|
||||
break;
|
||||
case 39:
|
||||
if (renderer->begin_line) {
|
||||
cmark_render_ascii(renderer, "\\&'");
|
||||
} else {
|
||||
cmark_render_code_point(renderer, c);
|
||||
}
|
||||
break;
|
||||
case 45:
|
||||
cmark_render_ascii(renderer, "\\-");
|
||||
break;
|
||||
case 92:
|
||||
cmark_render_ascii(renderer, "\\e");
|
||||
break;
|
||||
case 8216: // left single quote
|
||||
cmark_render_ascii(renderer, "\\[oq]");
|
||||
break;
|
||||
case 8217: // right single quote
|
||||
cmark_render_ascii(renderer, "\\[cq]");
|
||||
break;
|
||||
case 8220: // left double quote
|
||||
cmark_render_ascii(renderer, "\\[lq]");
|
||||
break;
|
||||
case 8221: // right double quote
|
||||
cmark_render_ascii(renderer, "\\[rq]");
|
||||
break;
|
||||
case 8212: // em dash
|
||||
cmark_render_ascii(renderer, "\\[em]");
|
||||
break;
|
||||
case 8211: // en dash
|
||||
cmark_render_ascii(renderer, "\\[en]");
|
||||
break;
|
||||
default:
|
||||
cmark_render_code_point(renderer, c);
|
||||
}
|
||||
}
|
||||
|
||||
static int S_render_node(cmark_renderer *renderer, cmark_node *node,
|
||||
cmark_event_type ev_type, int options) {
|
||||
cmark_node *tmp;
|
||||
int list_number;
|
||||
bool entering = (ev_type == CMARK_EVENT_ENTER);
|
||||
bool allow_wrap = renderer->width > 0 && !(CMARK_OPT_NOBREAKS & options);
|
||||
|
||||
// avoid unused parameter error:
|
||||
(void)(options);
|
||||
|
||||
switch (node->type) {
|
||||
case CMARK_NODE_DOCUMENT:
|
||||
break;
|
||||
|
||||
case CMARK_NODE_BLOCK_QUOTE:
|
||||
if (entering) {
|
||||
CR();
|
||||
LIT(".RS");
|
||||
CR();
|
||||
} else {
|
||||
CR();
|
||||
LIT(".RE");
|
||||
CR();
|
||||
}
|
||||
break;
|
||||
|
||||
case CMARK_NODE_LIST:
|
||||
break;
|
||||
|
||||
case CMARK_NODE_ITEM:
|
||||
if (entering) {
|
||||
CR();
|
||||
LIT(".IP ");
|
||||
if (cmark_node_get_list_type(node->parent) == CMARK_BULLET_LIST) {
|
||||
LIT("\\[bu] 2");
|
||||
} else {
|
||||
list_number = cmark_node_get_list_start(node->parent);
|
||||
tmp = node;
|
||||
while (tmp->prev) {
|
||||
tmp = tmp->prev;
|
||||
list_number += 1;
|
||||
}
|
||||
char list_number_s[LIST_NUMBER_SIZE];
|
||||
snprintf(list_number_s, LIST_NUMBER_SIZE, "\"%d.\" 4", list_number);
|
||||
LIT(list_number_s);
|
||||
}
|
||||
CR();
|
||||
} else {
|
||||
CR();
|
||||
}
|
||||
break;
|
||||
|
||||
case CMARK_NODE_HEADING:
|
||||
if (entering) {
|
||||
CR();
|
||||
LIT(cmark_node_get_heading_level(node) == 1 ? ".SH" : ".SS");
|
||||
CR();
|
||||
} else {
|
||||
CR();
|
||||
}
|
||||
break;
|
||||
|
||||
case CMARK_NODE_CODE_BLOCK:
|
||||
CR();
|
||||
LIT(".IP\n.nf\n\\f[C]\n");
|
||||
OUT(cmark_node_get_literal(node), false, NORMAL);
|
||||
CR();
|
||||
LIT("\\f[]\n.fi");
|
||||
CR();
|
||||
break;
|
||||
|
||||
case CMARK_NODE_HTML_BLOCK:
|
||||
break;
|
||||
|
||||
case CMARK_NODE_CUSTOM_BLOCK:
|
||||
CR();
|
||||
OUT(entering ? cmark_node_get_on_enter(node) : cmark_node_get_on_exit(node),
|
||||
false, LITERAL);
|
||||
CR();
|
||||
break;
|
||||
|
||||
case CMARK_NODE_THEMATIC_BREAK:
|
||||
CR();
|
||||
LIT(".PP\n * * * * *");
|
||||
CR();
|
||||
break;
|
||||
|
||||
case CMARK_NODE_PARAGRAPH:
|
||||
if (entering) {
|
||||
// no blank line if first paragraph in list:
|
||||
if (node->parent && node->parent->type == CMARK_NODE_ITEM &&
|
||||
node->prev == NULL) {
|
||||
// no blank line or .PP
|
||||
} else {
|
||||
CR();
|
||||
LIT(".PP");
|
||||
CR();
|
||||
}
|
||||
} else {
|
||||
CR();
|
||||
}
|
||||
break;
|
||||
|
||||
case CMARK_NODE_TEXT:
|
||||
OUT(cmark_node_get_literal(node), allow_wrap, NORMAL);
|
||||
break;
|
||||
|
||||
case CMARK_NODE_LINEBREAK:
|
||||
LIT(".PD 0\n.P\n.PD");
|
||||
CR();
|
||||
break;
|
||||
|
||||
case CMARK_NODE_SOFTBREAK:
|
||||
if (options & CMARK_OPT_HARDBREAKS) {
|
||||
LIT(".PD 0\n.P\n.PD");
|
||||
CR();
|
||||
} else if (renderer->width == 0 && !(CMARK_OPT_NOBREAKS & options)) {
|
||||
CR();
|
||||
} else {
|
||||
OUT(" ", allow_wrap, LITERAL);
|
||||
}
|
||||
break;
|
||||
|
||||
case CMARK_NODE_CODE:
|
||||
LIT("\\f[C]");
|
||||
OUT(cmark_node_get_literal(node), allow_wrap, NORMAL);
|
||||
LIT("\\f[]");
|
||||
break;
|
||||
|
||||
case CMARK_NODE_HTML_INLINE:
|
||||
break;
|
||||
|
||||
case CMARK_NODE_CUSTOM_INLINE:
|
||||
OUT(entering ? cmark_node_get_on_enter(node) : cmark_node_get_on_exit(node),
|
||||
false, LITERAL);
|
||||
break;
|
||||
|
||||
case CMARK_NODE_STRONG:
|
||||
if (entering) {
|
||||
LIT("\\f[B]");
|
||||
} else {
|
||||
LIT("\\f[]");
|
||||
}
|
||||
break;
|
||||
|
||||
case CMARK_NODE_EMPH:
|
||||
if (entering) {
|
||||
LIT("\\f[I]");
|
||||
} else {
|
||||
LIT("\\f[]");
|
||||
}
|
||||
break;
|
||||
|
||||
case CMARK_NODE_LINK:
|
||||
if (!entering) {
|
||||
LIT(" (");
|
||||
OUT(cmark_node_get_url(node), allow_wrap, URL);
|
||||
LIT(")");
|
||||
}
|
||||
break;
|
||||
|
||||
case CMARK_NODE_IMAGE:
|
||||
if (entering) {
|
||||
LIT("[IMAGE: ");
|
||||
} else {
|
||||
LIT("]");
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
assert(false);
|
||||
break;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
char *cmark_render_man(cmark_node *root, int options, int width) {
|
||||
return cmark_render(root, options, width, S_outc, S_render_node);
|
||||
}
|
|
@ -1,858 +0,0 @@
|
|||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "config.h"
|
||||
#include "node.h"
|
||||
|
||||
static void S_node_unlink(cmark_node *node);
|
||||
|
||||
#define NODE_MEM(node) cmark_node_mem(node)
|
||||
|
||||
static CMARK_INLINE bool S_is_block(cmark_node *node) {
|
||||
if (node == NULL) {
|
||||
return false;
|
||||
}
|
||||
return node->type >= CMARK_NODE_FIRST_BLOCK &&
|
||||
node->type <= CMARK_NODE_LAST_BLOCK;
|
||||
}
|
||||
|
||||
static CMARK_INLINE bool S_is_inline(cmark_node *node) {
|
||||
if (node == NULL) {
|
||||
return false;
|
||||
}
|
||||
return node->type >= CMARK_NODE_FIRST_INLINE &&
|
||||
node->type <= CMARK_NODE_LAST_INLINE;
|
||||
}
|
||||
|
||||
static bool S_can_contain(cmark_node *node, cmark_node *child) {
|
||||
cmark_node *cur;
|
||||
|
||||
if (node == NULL || child == NULL) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Verify that child is not an ancestor of node or equal to node.
|
||||
cur = node;
|
||||
do {
|
||||
if (cur == child) {
|
||||
return false;
|
||||
}
|
||||
cur = cur->parent;
|
||||
} while (cur != NULL);
|
||||
|
||||
if (child->type == CMARK_NODE_DOCUMENT) {
|
||||
return false;
|
||||
}
|
||||
|
||||
switch (node->type) {
|
||||
case CMARK_NODE_DOCUMENT:
|
||||
case CMARK_NODE_BLOCK_QUOTE:
|
||||
case CMARK_NODE_ITEM:
|
||||
return S_is_block(child) && child->type != CMARK_NODE_ITEM;
|
||||
|
||||
case CMARK_NODE_LIST:
|
||||
return child->type == CMARK_NODE_ITEM;
|
||||
|
||||
case CMARK_NODE_CUSTOM_BLOCK:
|
||||
return true;
|
||||
|
||||
case CMARK_NODE_PARAGRAPH:
|
||||
case CMARK_NODE_HEADING:
|
||||
case CMARK_NODE_EMPH:
|
||||
case CMARK_NODE_STRONG:
|
||||
case CMARK_NODE_LINK:
|
||||
case CMARK_NODE_IMAGE:
|
||||
case CMARK_NODE_CUSTOM_INLINE:
|
||||
return S_is_inline(child);
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
cmark_node *cmark_node_new_with_mem(cmark_node_type type, cmark_mem *mem) {
|
||||
cmark_node *node = (cmark_node *)mem->calloc(1, sizeof(*node));
|
||||
cmark_strbuf_init(mem, &node->content, 0);
|
||||
node->type = (uint16_t)type;
|
||||
|
||||
switch (node->type) {
|
||||
case CMARK_NODE_HEADING:
|
||||
node->as.heading.level = 1;
|
||||
break;
|
||||
|
||||
case CMARK_NODE_LIST: {
|
||||
cmark_list *list = &node->as.list;
|
||||
list->list_type = CMARK_BULLET_LIST;
|
||||
list->start = 0;
|
||||
list->tight = false;
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
cmark_node *cmark_node_new(cmark_node_type type) {
|
||||
extern cmark_mem DEFAULT_MEM_ALLOCATOR;
|
||||
return cmark_node_new_with_mem(type, &DEFAULT_MEM_ALLOCATOR);
|
||||
}
|
||||
|
||||
// Free a cmark_node list and any children.
|
||||
static void S_free_nodes(cmark_node *e) {
|
||||
cmark_node *next;
|
||||
while (e != NULL) {
|
||||
cmark_strbuf_free(&e->content);
|
||||
switch (e->type) {
|
||||
case CMARK_NODE_CODE_BLOCK:
|
||||
cmark_chunk_free(NODE_MEM(e), &e->as.code.info);
|
||||
cmark_chunk_free(NODE_MEM(e), &e->as.code.literal);
|
||||
break;
|
||||
case CMARK_NODE_TEXT:
|
||||
case CMARK_NODE_HTML_INLINE:
|
||||
case CMARK_NODE_CODE:
|
||||
case CMARK_NODE_HTML_BLOCK:
|
||||
cmark_chunk_free(NODE_MEM(e), &e->as.literal);
|
||||
break;
|
||||
case CMARK_NODE_LINK:
|
||||
case CMARK_NODE_IMAGE:
|
||||
cmark_chunk_free(NODE_MEM(e), &e->as.link.url);
|
||||
cmark_chunk_free(NODE_MEM(e), &e->as.link.title);
|
||||
break;
|
||||
case CMARK_NODE_CUSTOM_BLOCK:
|
||||
case CMARK_NODE_CUSTOM_INLINE:
|
||||
cmark_chunk_free(NODE_MEM(e), &e->as.custom.on_enter);
|
||||
cmark_chunk_free(NODE_MEM(e), &e->as.custom.on_exit);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
if (e->last_child) {
|
||||
// Splice children into list
|
||||
e->last_child->next = e->next;
|
||||
e->next = e->first_child;
|
||||
}
|
||||
next = e->next;
|
||||
NODE_MEM(e)->free(e);
|
||||
e = next;
|
||||
}
|
||||
}
|
||||
|
||||
void cmark_node_free(cmark_node *node) {
|
||||
S_node_unlink(node);
|
||||
node->next = NULL;
|
||||
S_free_nodes(node);
|
||||
}
|
||||
|
||||
cmark_node_type cmark_node_get_type(cmark_node *node) {
|
||||
if (node == NULL) {
|
||||
return CMARK_NODE_NONE;
|
||||
} else {
|
||||
return (cmark_node_type)node->type;
|
||||
}
|
||||
}
|
||||
|
||||
const char *cmark_node_get_type_string(cmark_node *node) {
|
||||
if (node == NULL) {
|
||||
return "NONE";
|
||||
}
|
||||
|
||||
switch (node->type) {
|
||||
case CMARK_NODE_NONE:
|
||||
return "none";
|
||||
case CMARK_NODE_DOCUMENT:
|
||||
return "document";
|
||||
case CMARK_NODE_BLOCK_QUOTE:
|
||||
return "block_quote";
|
||||
case CMARK_NODE_LIST:
|
||||
return "list";
|
||||
case CMARK_NODE_ITEM:
|
||||
return "item";
|
||||
case CMARK_NODE_CODE_BLOCK:
|
||||
return "code_block";
|
||||
case CMARK_NODE_HTML_BLOCK:
|
||||
return "html_block";
|
||||
case CMARK_NODE_CUSTOM_BLOCK:
|
||||
return "custom_block";
|
||||
case CMARK_NODE_PARAGRAPH:
|
||||
return "paragraph";
|
||||
case CMARK_NODE_HEADING:
|
||||
return "heading";
|
||||
case CMARK_NODE_THEMATIC_BREAK:
|
||||
return "thematic_break";
|
||||
case CMARK_NODE_TEXT:
|
||||
return "text";
|
||||
case CMARK_NODE_SOFTBREAK:
|
||||
return "softbreak";
|
||||
case CMARK_NODE_LINEBREAK:
|
||||
return "linebreak";
|
||||
case CMARK_NODE_CODE:
|
||||
return "code";
|
||||
case CMARK_NODE_HTML_INLINE:
|
||||
return "html_inline";
|
||||
case CMARK_NODE_CUSTOM_INLINE:
|
||||
return "custom_inline";
|
||||
case CMARK_NODE_EMPH:
|
||||
return "emph";
|
||||
case CMARK_NODE_STRONG:
|
||||
return "strong";
|
||||
case CMARK_NODE_LINK:
|
||||
return "link";
|
||||
case CMARK_NODE_IMAGE:
|
||||
return "image";
|
||||
}
|
||||
|
||||
return "<unknown>";
|
||||
}
|
||||
|
||||
cmark_node *cmark_node_next(cmark_node *node) {
|
||||
if (node == NULL) {
|
||||
return NULL;
|
||||
} else {
|
||||
return node->next;
|
||||
}
|
||||
}
|
||||
|
||||
cmark_node *cmark_node_previous(cmark_node *node) {
|
||||
if (node == NULL) {
|
||||
return NULL;
|
||||
} else {
|
||||
return node->prev;
|
||||
}
|
||||
}
|
||||
|
||||
cmark_node *cmark_node_parent(cmark_node *node) {
|
||||
if (node == NULL) {
|
||||
return NULL;
|
||||
} else {
|
||||
return node->parent;
|
||||
}
|
||||
}
|
||||
|
||||
cmark_node *cmark_node_first_child(cmark_node *node) {
|
||||
if (node == NULL) {
|
||||
return NULL;
|
||||
} else {
|
||||
return node->first_child;
|
||||
}
|
||||
}
|
||||
|
||||
cmark_node *cmark_node_last_child(cmark_node *node) {
|
||||
if (node == NULL) {
|
||||
return NULL;
|
||||
} else {
|
||||
return node->last_child;
|
||||
}
|
||||
}
|
||||
|
||||
void *cmark_node_get_user_data(cmark_node *node) {
|
||||
if (node == NULL) {
|
||||
return NULL;
|
||||
} else {
|
||||
return node->user_data;
|
||||
}
|
||||
}
|
||||
|
||||
int cmark_node_set_user_data(cmark_node *node, void *user_data) {
|
||||
if (node == NULL) {
|
||||
return 0;
|
||||
}
|
||||
node->user_data = user_data;
|
||||
return 1;
|
||||
}
|
||||
|
||||
const char *cmark_node_get_literal(cmark_node *node) {
|
||||
if (node == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
switch (node->type) {
|
||||
case CMARK_NODE_HTML_BLOCK:
|
||||
case CMARK_NODE_TEXT:
|
||||
case CMARK_NODE_HTML_INLINE:
|
||||
case CMARK_NODE_CODE:
|
||||
return cmark_chunk_to_cstr(NODE_MEM(node), &node->as.literal);
|
||||
|
||||
case CMARK_NODE_CODE_BLOCK:
|
||||
return cmark_chunk_to_cstr(NODE_MEM(node), &node->as.code.literal);
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int cmark_node_set_literal(cmark_node *node, const char *content) {
|
||||
if (node == NULL) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
switch (node->type) {
|
||||
case CMARK_NODE_HTML_BLOCK:
|
||||
case CMARK_NODE_TEXT:
|
||||
case CMARK_NODE_HTML_INLINE:
|
||||
case CMARK_NODE_CODE:
|
||||
cmark_chunk_set_cstr(NODE_MEM(node), &node->as.literal, content);
|
||||
return 1;
|
||||
|
||||
case CMARK_NODE_CODE_BLOCK:
|
||||
cmark_chunk_set_cstr(NODE_MEM(node), &node->as.code.literal, content);
|
||||
return 1;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int cmark_node_get_heading_level(cmark_node *node) {
|
||||
if (node == NULL) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
switch (node->type) {
|
||||
case CMARK_NODE_HEADING:
|
||||
return node->as.heading.level;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int cmark_node_set_heading_level(cmark_node *node, int level) {
|
||||
if (node == NULL || level < 1 || level > 6) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
switch (node->type) {
|
||||
case CMARK_NODE_HEADING:
|
||||
node->as.heading.level = level;
|
||||
return 1;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
cmark_list_type cmark_node_get_list_type(cmark_node *node) {
|
||||
if (node == NULL) {
|
||||
return CMARK_NO_LIST;
|
||||
}
|
||||
|
||||
if (node->type == CMARK_NODE_LIST) {
|
||||
return node->as.list.list_type;
|
||||
} else {
|
||||
return CMARK_NO_LIST;
|
||||
}
|
||||
}
|
||||
|
||||
int cmark_node_set_list_type(cmark_node *node, cmark_list_type type) {
|
||||
if (!(type == CMARK_BULLET_LIST || type == CMARK_ORDERED_LIST)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (node == NULL) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (node->type == CMARK_NODE_LIST) {
|
||||
node->as.list.list_type = type;
|
||||
return 1;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
cmark_delim_type cmark_node_get_list_delim(cmark_node *node) {
|
||||
if (node == NULL) {
|
||||
return CMARK_NO_DELIM;
|
||||
}
|
||||
|
||||
if (node->type == CMARK_NODE_LIST) {
|
||||
return node->as.list.delimiter;
|
||||
} else {
|
||||
return CMARK_NO_DELIM;
|
||||
}
|
||||
}
|
||||
|
||||
int cmark_node_set_list_delim(cmark_node *node, cmark_delim_type delim) {
|
||||
if (!(delim == CMARK_PERIOD_DELIM || delim == CMARK_PAREN_DELIM)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (node == NULL) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (node->type == CMARK_NODE_LIST) {
|
||||
node->as.list.delimiter = delim;
|
||||
return 1;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
int cmark_node_get_list_start(cmark_node *node) {
|
||||
if (node == NULL) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (node->type == CMARK_NODE_LIST) {
|
||||
return node->as.list.start;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
int cmark_node_set_list_start(cmark_node *node, int start) {
|
||||
if (node == NULL || start < 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (node->type == CMARK_NODE_LIST) {
|
||||
node->as.list.start = start;
|
||||
return 1;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
int cmark_node_get_list_tight(cmark_node *node) {
|
||||
if (node == NULL) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (node->type == CMARK_NODE_LIST) {
|
||||
return node->as.list.tight;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
int cmark_node_set_list_tight(cmark_node *node, int tight) {
|
||||
if (node == NULL) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (node->type == CMARK_NODE_LIST) {
|
||||
node->as.list.tight = tight == 1;
|
||||
return 1;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
const char *cmark_node_get_fence_info(cmark_node *node) {
|
||||
if (node == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (node->type == CMARK_NODE_CODE_BLOCK) {
|
||||
return cmark_chunk_to_cstr(NODE_MEM(node), &node->as.code.info);
|
||||
} else {
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
int cmark_node_set_fence_info(cmark_node *node, const char *info) {
|
||||
if (node == NULL) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (node->type == CMARK_NODE_CODE_BLOCK) {
|
||||
cmark_chunk_set_cstr(NODE_MEM(node), &node->as.code.info, info);
|
||||
return 1;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
const char *cmark_node_get_url(cmark_node *node) {
|
||||
if (node == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
switch (node->type) {
|
||||
case CMARK_NODE_LINK:
|
||||
case CMARK_NODE_IMAGE:
|
||||
return cmark_chunk_to_cstr(NODE_MEM(node), &node->as.link.url);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int cmark_node_set_url(cmark_node *node, const char *url) {
|
||||
if (node == NULL) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
switch (node->type) {
|
||||
case CMARK_NODE_LINK:
|
||||
case CMARK_NODE_IMAGE:
|
||||
cmark_chunk_set_cstr(NODE_MEM(node), &node->as.link.url, url);
|
||||
return 1;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
const char *cmark_node_get_title(cmark_node *node) {
|
||||
if (node == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
switch (node->type) {
|
||||
case CMARK_NODE_LINK:
|
||||
case CMARK_NODE_IMAGE:
|
||||
return cmark_chunk_to_cstr(NODE_MEM(node), &node->as.link.title);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int cmark_node_set_title(cmark_node *node, const char *title) {
|
||||
if (node == NULL) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
switch (node->type) {
|
||||
case CMARK_NODE_LINK:
|
||||
case CMARK_NODE_IMAGE:
|
||||
cmark_chunk_set_cstr(NODE_MEM(node), &node->as.link.title, title);
|
||||
return 1;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
const char *cmark_node_get_on_enter(cmark_node *node) {
|
||||
if (node == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
switch (node->type) {
|
||||
case CMARK_NODE_CUSTOM_INLINE:
|
||||
case CMARK_NODE_CUSTOM_BLOCK:
|
||||
return cmark_chunk_to_cstr(NODE_MEM(node), &node->as.custom.on_enter);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int cmark_node_set_on_enter(cmark_node *node, const char *on_enter) {
|
||||
if (node == NULL) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
switch (node->type) {
|
||||
case CMARK_NODE_CUSTOM_INLINE:
|
||||
case CMARK_NODE_CUSTOM_BLOCK:
|
||||
cmark_chunk_set_cstr(NODE_MEM(node), &node->as.custom.on_enter, on_enter);
|
||||
return 1;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
const char *cmark_node_get_on_exit(cmark_node *node) {
|
||||
if (node == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
switch (node->type) {
|
||||
case CMARK_NODE_CUSTOM_INLINE:
|
||||
case CMARK_NODE_CUSTOM_BLOCK:
|
||||
return cmark_chunk_to_cstr(NODE_MEM(node), &node->as.custom.on_exit);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int cmark_node_set_on_exit(cmark_node *node, const char *on_exit) {
|
||||
if (node == NULL) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
switch (node->type) {
|
||||
case CMARK_NODE_CUSTOM_INLINE:
|
||||
case CMARK_NODE_CUSTOM_BLOCK:
|
||||
cmark_chunk_set_cstr(NODE_MEM(node), &node->as.custom.on_exit, on_exit);
|
||||
return 1;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int cmark_node_get_start_line(cmark_node *node) {
|
||||
if (node == NULL) {
|
||||
return 0;
|
||||
}
|
||||
return node->start_line;
|
||||
}
|
||||
|
||||
int cmark_node_get_start_column(cmark_node *node) {
|
||||
if (node == NULL) {
|
||||
return 0;
|
||||
}
|
||||
return node->start_column;
|
||||
}
|
||||
|
||||
int cmark_node_get_end_line(cmark_node *node) {
|
||||
if (node == NULL) {
|
||||
return 0;
|
||||
}
|
||||
return node->end_line;
|
||||
}
|
||||
|
||||
int cmark_node_get_end_column(cmark_node *node) {
|
||||
if (node == NULL) {
|
||||
return 0;
|
||||
}
|
||||
return node->end_column;
|
||||
}
|
||||
|
||||
// Unlink a node without adjusting its next, prev, and parent pointers.
|
||||
static void S_node_unlink(cmark_node *node) {
|
||||
if (node == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (node->prev) {
|
||||
node->prev->next = node->next;
|
||||
}
|
||||
if (node->next) {
|
||||
node->next->prev = node->prev;
|
||||
}
|
||||
|
||||
// Adjust first_child and last_child of parent.
|
||||
cmark_node *parent = node->parent;
|
||||
if (parent) {
|
||||
if (parent->first_child == node) {
|
||||
parent->first_child = node->next;
|
||||
}
|
||||
if (parent->last_child == node) {
|
||||
parent->last_child = node->prev;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void cmark_node_unlink(cmark_node *node) {
|
||||
S_node_unlink(node);
|
||||
|
||||
node->next = NULL;
|
||||
node->prev = NULL;
|
||||
node->parent = NULL;
|
||||
}
|
||||
|
||||
int cmark_node_insert_before(cmark_node *node, cmark_node *sibling) {
|
||||
if (node == NULL || sibling == NULL) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!node->parent || !S_can_contain(node->parent, sibling)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
S_node_unlink(sibling);
|
||||
|
||||
cmark_node *old_prev = node->prev;
|
||||
|
||||
// Insert 'sibling' between 'old_prev' and 'node'.
|
||||
if (old_prev) {
|
||||
old_prev->next = sibling;
|
||||
}
|
||||
sibling->prev = old_prev;
|
||||
sibling->next = node;
|
||||
node->prev = sibling;
|
||||
|
||||
// Set new parent.
|
||||
cmark_node *parent = node->parent;
|
||||
sibling->parent = parent;
|
||||
|
||||
// Adjust first_child of parent if inserted as first child.
|
||||
if (parent && !old_prev) {
|
||||
parent->first_child = sibling;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
int cmark_node_insert_after(cmark_node *node, cmark_node *sibling) {
|
||||
if (node == NULL || sibling == NULL) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!node->parent || !S_can_contain(node->parent, sibling)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
S_node_unlink(sibling);
|
||||
|
||||
cmark_node *old_next = node->next;
|
||||
|
||||
// Insert 'sibling' between 'node' and 'old_next'.
|
||||
if (old_next) {
|
||||
old_next->prev = sibling;
|
||||
}
|
||||
sibling->next = old_next;
|
||||
sibling->prev = node;
|
||||
node->next = sibling;
|
||||
|
||||
// Set new parent.
|
||||
cmark_node *parent = node->parent;
|
||||
sibling->parent = parent;
|
||||
|
||||
// Adjust last_child of parent if inserted as last child.
|
||||
if (parent && !old_next) {
|
||||
parent->last_child = sibling;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
int cmark_node_replace(cmark_node *oldnode, cmark_node *newnode) {
|
||||
if (!cmark_node_insert_before(oldnode, newnode)) {
|
||||
return 0;
|
||||
}
|
||||
cmark_node_unlink(oldnode);
|
||||
return 1;
|
||||
}
|
||||
|
||||
int cmark_node_prepend_child(cmark_node *node, cmark_node *child) {
|
||||
if (!S_can_contain(node, child)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
S_node_unlink(child);
|
||||
|
||||
cmark_node *old_first_child = node->first_child;
|
||||
|
||||
child->next = old_first_child;
|
||||
child->prev = NULL;
|
||||
child->parent = node;
|
||||
node->first_child = child;
|
||||
|
||||
if (old_first_child) {
|
||||
old_first_child->prev = child;
|
||||
} else {
|
||||
// Also set last_child if node previously had no children.
|
||||
node->last_child = child;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
int cmark_node_append_child(cmark_node *node, cmark_node *child) {
|
||||
if (!S_can_contain(node, child)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
S_node_unlink(child);
|
||||
|
||||
cmark_node *old_last_child = node->last_child;
|
||||
|
||||
child->next = NULL;
|
||||
child->prev = old_last_child;
|
||||
child->parent = node;
|
||||
node->last_child = child;
|
||||
|
||||
if (old_last_child) {
|
||||
old_last_child->next = child;
|
||||
} else {
|
||||
// Also set first_child if node previously had no children.
|
||||
node->first_child = child;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void S_print_error(FILE *out, cmark_node *node, const char *elem) {
|
||||
if (out == NULL) {
|
||||
return;
|
||||
}
|
||||
fprintf(out, "Invalid '%s' in node type %s at %d:%d\n", elem,
|
||||
cmark_node_get_type_string(node), node->start_line,
|
||||
node->start_column);
|
||||
}
|
||||
|
||||
int cmark_node_check(cmark_node *node, FILE *out) {
|
||||
cmark_node *cur;
|
||||
int errors = 0;
|
||||
|
||||
if (!node) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
cur = node;
|
||||
for (;;) {
|
||||
if (cur->first_child) {
|
||||
if (cur->first_child->prev != NULL) {
|
||||
S_print_error(out, cur->first_child, "prev");
|
||||
cur->first_child->prev = NULL;
|
||||
++errors;
|
||||
}
|
||||
if (cur->first_child->parent != cur) {
|
||||
S_print_error(out, cur->first_child, "parent");
|
||||
cur->first_child->parent = cur;
|
||||
++errors;
|
||||
}
|
||||
cur = cur->first_child;
|
||||
continue;
|
||||
}
|
||||
|
||||
next_sibling:
|
||||
if (cur == node) {
|
||||
break;
|
||||
}
|
||||
if (cur->next) {
|
||||
if (cur->next->prev != cur) {
|
||||
S_print_error(out, cur->next, "prev");
|
||||
cur->next->prev = cur;
|
||||
++errors;
|
||||
}
|
||||
if (cur->next->parent != cur->parent) {
|
||||
S_print_error(out, cur->next, "parent");
|
||||
cur->next->parent = cur->parent;
|
||||
++errors;
|
||||
}
|
||||
cur = cur->next;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (cur->parent->last_child != cur) {
|
||||
S_print_error(out, cur->parent, "last_child");
|
||||
cur->parent->last_child = cur;
|
||||
++errors;
|
||||
}
|
||||
cur = cur->parent;
|
||||
goto next_sibling;
|
||||
}
|
||||
|
||||
return errors;
|
||||
}
|
|
@ -1,93 +0,0 @@
|
|||
#ifndef CMARK_NODE_H
|
||||
#define CMARK_NODE_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "cmark.h"
|
||||
#include "buffer.h"
|
||||
#include "chunk.h"
|
||||
|
||||
typedef struct {
|
||||
cmark_list_type list_type;
|
||||
int marker_offset;
|
||||
int padding;
|
||||
int start;
|
||||
cmark_delim_type delimiter;
|
||||
unsigned char bullet_char;
|
||||
bool tight;
|
||||
} cmark_list;
|
||||
|
||||
typedef struct {
|
||||
cmark_chunk info;
|
||||
cmark_chunk literal;
|
||||
uint8_t fence_length;
|
||||
uint8_t fence_offset;
|
||||
unsigned char fence_char;
|
||||
int8_t fenced;
|
||||
} cmark_code;
|
||||
|
||||
typedef struct {
|
||||
int level;
|
||||
bool setext;
|
||||
} cmark_heading;
|
||||
|
||||
typedef struct {
|
||||
cmark_chunk url;
|
||||
cmark_chunk title;
|
||||
} cmark_link;
|
||||
|
||||
typedef struct {
|
||||
cmark_chunk on_enter;
|
||||
cmark_chunk on_exit;
|
||||
} cmark_custom;
|
||||
|
||||
enum cmark_node__internal_flags {
|
||||
CMARK_NODE__OPEN = (1 << 0),
|
||||
CMARK_NODE__LAST_LINE_BLANK = (1 << 1),
|
||||
};
|
||||
|
||||
struct cmark_node {
|
||||
cmark_strbuf content;
|
||||
|
||||
struct cmark_node *next;
|
||||
struct cmark_node *prev;
|
||||
struct cmark_node *parent;
|
||||
struct cmark_node *first_child;
|
||||
struct cmark_node *last_child;
|
||||
|
||||
void *user_data;
|
||||
|
||||
int start_line;
|
||||
int start_column;
|
||||
int end_line;
|
||||
int end_column;
|
||||
int internal_offset;
|
||||
uint16_t type;
|
||||
uint16_t flags;
|
||||
|
||||
union {
|
||||
cmark_chunk literal;
|
||||
cmark_list list;
|
||||
cmark_code code;
|
||||
cmark_heading heading;
|
||||
cmark_link link;
|
||||
cmark_custom custom;
|
||||
int html_block_type;
|
||||
} as;
|
||||
};
|
||||
|
||||
static CMARK_INLINE cmark_mem *cmark_node_mem(cmark_node *node) {
|
||||
return node->content.mem;
|
||||
}
|
||||
CMARK_EXPORT int cmark_node_check(cmark_node *node, FILE *out);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
|
@ -1,39 +0,0 @@
|
|||
#ifndef CMARK_AST_H
|
||||
#define CMARK_AST_H
|
||||
|
||||
#include <stdio.h>
|
||||
#include "node.h"
|
||||
#include "buffer.h"
|
||||
#include "memory.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define MAX_LINK_LABEL_LENGTH 1000
|
||||
|
||||
struct cmark_parser {
|
||||
struct cmark_mem *mem;
|
||||
struct cmark_reference_map *refmap;
|
||||
struct cmark_node *root;
|
||||
struct cmark_node *current;
|
||||
int line_number;
|
||||
bufsize_t offset;
|
||||
bufsize_t column;
|
||||
bufsize_t first_nonspace;
|
||||
bufsize_t first_nonspace_column;
|
||||
int indent;
|
||||
bool blank;
|
||||
bool partially_consumed_tab;
|
||||
cmark_strbuf curline;
|
||||
bufsize_t last_line_length;
|
||||
cmark_strbuf linebuf;
|
||||
int options;
|
||||
bool last_buffer_ended_with_cr;
|
||||
};
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
|
@ -1,146 +0,0 @@
|
|||
#include "cmark.h"
|
||||
#include "utf8.h"
|
||||
#include "parser.h"
|
||||
#include "references.h"
|
||||
#include "inlines.h"
|
||||
#include "chunk.h"
|
||||
|
||||
static unsigned int refhash(const unsigned char *link_ref) {
|
||||
unsigned int hash = 0;
|
||||
|
||||
while (*link_ref)
|
||||
hash = (*link_ref++) + (hash << 6) + (hash << 16) - hash;
|
||||
|
||||
return hash;
|
||||
}
|
||||
|
||||
static void reference_free(cmark_reference_map *map, cmark_reference *ref) {
|
||||
cmark_mem *mem = map->mem;
|
||||
if (ref != NULL) {
|
||||
mem->free(ref->label);
|
||||
cmark_chunk_free(mem, &ref->url);
|
||||
cmark_chunk_free(mem, &ref->title);
|
||||
mem->free(ref);
|
||||
}
|
||||
}
|
||||
|
||||
// normalize reference: collapse internal whitespace to single space,
|
||||
// remove leading/trailing whitespace, case fold
|
||||
// Return NULL if the reference name is actually empty (i.e. composed
|
||||
// solely from whitespace)
|
||||
static unsigned char *normalize_reference(cmark_mem *mem, cmark_chunk *ref) {
|
||||
cmark_strbuf normalized = CMARK_BUF_INIT(mem);
|
||||
unsigned char *result;
|
||||
|
||||
if (ref == NULL)
|
||||
return NULL;
|
||||
|
||||
if (ref->len == 0)
|
||||
return NULL;
|
||||
|
||||
cmark_utf8proc_case_fold(&normalized, ref->data, ref->len);
|
||||
cmark_strbuf_trim(&normalized);
|
||||
cmark_strbuf_normalize_whitespace(&normalized);
|
||||
|
||||
result = cmark_strbuf_detach(&normalized);
|
||||
assert(result);
|
||||
|
||||
if (result[0] == '\0') {
|
||||
mem->free(result);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static void add_reference(cmark_reference_map *map, cmark_reference *ref) {
|
||||
cmark_reference *t = ref->next = map->table[ref->hash % REFMAP_SIZE];
|
||||
|
||||
while (t) {
|
||||
if (t->hash == ref->hash && !strcmp((char *)t->label, (char *)ref->label)) {
|
||||
reference_free(map, ref);
|
||||
return;
|
||||
}
|
||||
|
||||
t = t->next;
|
||||
}
|
||||
|
||||
map->table[ref->hash % REFMAP_SIZE] = ref;
|
||||
}
|
||||
|
||||
void cmark_reference_create(cmark_reference_map *map, cmark_chunk *label,
|
||||
cmark_chunk *url, cmark_chunk *title) {
|
||||
cmark_reference *ref;
|
||||
unsigned char *reflabel = normalize_reference(map->mem, label);
|
||||
|
||||
/* empty reference name, or composed from only whitespace */
|
||||
if (reflabel == NULL)
|
||||
return;
|
||||
|
||||
ref = (cmark_reference *)map->mem->calloc(1, sizeof(*ref));
|
||||
ref->label = reflabel;
|
||||
ref->hash = refhash(ref->label);
|
||||
ref->url = cmark_clean_url(map->mem, url);
|
||||
ref->title = cmark_clean_title(map->mem, title);
|
||||
ref->next = NULL;
|
||||
|
||||
add_reference(map, ref);
|
||||
}
|
||||
|
||||
// Returns reference if refmap contains a reference with matching
|
||||
// label, otherwise NULL.
|
||||
cmark_reference *cmark_reference_lookup(cmark_reference_map *map,
|
||||
cmark_chunk *label) {
|
||||
cmark_reference *ref = NULL;
|
||||
unsigned char *norm;
|
||||
unsigned int hash;
|
||||
|
||||
if (label->len < 1 || label->len > MAX_LINK_LABEL_LENGTH)
|
||||
return NULL;
|
||||
|
||||
if (map == NULL)
|
||||
return NULL;
|
||||
|
||||
norm = normalize_reference(map->mem, label);
|
||||
if (norm == NULL)
|
||||
return NULL;
|
||||
|
||||
hash = refhash(norm);
|
||||
ref = map->table[hash % REFMAP_SIZE];
|
||||
|
||||
while (ref) {
|
||||
if (ref->hash == hash && !strcmp((char *)ref->label, (char *)norm))
|
||||
break;
|
||||
ref = ref->next;
|
||||
}
|
||||
|
||||
map->mem->free(norm);
|
||||
return ref;
|
||||
}
|
||||
|
||||
void cmark_reference_map_free(cmark_reference_map *map) {
|
||||
unsigned int i;
|
||||
|
||||
if (map == NULL)
|
||||
return;
|
||||
|
||||
for (i = 0; i < REFMAP_SIZE; ++i) {
|
||||
cmark_reference *ref = map->table[i];
|
||||
cmark_reference *next;
|
||||
|
||||
while (ref) {
|
||||
next = ref->next;
|
||||
reference_free(map, ref);
|
||||
ref = next;
|
||||
}
|
||||
}
|
||||
|
||||
map->mem->free(map);
|
||||
}
|
||||
|
||||
cmark_reference_map *cmark_reference_map_new(cmark_mem *mem) {
|
||||
cmark_reference_map *map =
|
||||
(cmark_reference_map *)mem->calloc(1, sizeof(cmark_reference_map));
|
||||
map->mem = mem;
|
||||
return map;
|
||||
}
|
|
@ -1,41 +0,0 @@
|
|||
#ifndef CMARK_REFERENCES_H
|
||||
#define CMARK_REFERENCES_H
|
||||
|
||||
#include "memory.h"
|
||||
#include "chunk.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define REFMAP_SIZE 16
|
||||
|
||||
struct cmark_reference {
|
||||
struct cmark_reference *next;
|
||||
unsigned char *label;
|
||||
cmark_chunk url;
|
||||
cmark_chunk title;
|
||||
unsigned int hash;
|
||||
};
|
||||
|
||||
typedef struct cmark_reference cmark_reference;
|
||||
|
||||
struct cmark_reference_map {
|
||||
cmark_mem *mem;
|
||||
cmark_reference *table[REFMAP_SIZE];
|
||||
};
|
||||
|
||||
typedef struct cmark_reference_map cmark_reference_map;
|
||||
|
||||
cmark_reference_map *cmark_reference_map_new(cmark_mem *mem);
|
||||
void cmark_reference_map_free(cmark_reference_map *map);
|
||||
cmark_reference *cmark_reference_lookup(cmark_reference_map *map,
|
||||
cmark_chunk *label);
|
||||
extern void cmark_reference_create(cmark_reference_map *map, cmark_chunk *label,
|
||||
cmark_chunk *url, cmark_chunk *title);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
|
@ -1,186 +0,0 @@
|
|||
#include <stdlib.h>
|
||||
#include "buffer.h"
|
||||
#include "chunk.h"
|
||||
#include "cmark.h"
|
||||
#include "utf8.h"
|
||||
#include "render.h"
|
||||
#include "node.h"
|
||||
|
||||
static CMARK_INLINE void S_cr(cmark_renderer *renderer) {
|
||||
if (renderer->need_cr < 1) {
|
||||
renderer->need_cr = 1;
|
||||
}
|
||||
}
|
||||
|
||||
static CMARK_INLINE void S_blankline(cmark_renderer *renderer) {
|
||||
if (renderer->need_cr < 2) {
|
||||
renderer->need_cr = 2;
|
||||
}
|
||||
}
|
||||
|
||||
static void S_out(cmark_renderer *renderer, const char *source, bool wrap,
|
||||
cmark_escaping escape) {
|
||||
int length = strlen(source);
|
||||
unsigned char nextc;
|
||||
int32_t c;
|
||||
int i = 0;
|
||||
int last_nonspace;
|
||||
int len;
|
||||
cmark_chunk remainder = cmark_chunk_literal("");
|
||||
int k = renderer->buffer->size - 1;
|
||||
|
||||
wrap = wrap && !renderer->no_linebreaks;
|
||||
|
||||
if (renderer->in_tight_list_item && renderer->need_cr > 1) {
|
||||
renderer->need_cr = 1;
|
||||
}
|
||||
while (renderer->need_cr) {
|
||||
if (k < 0 || renderer->buffer->ptr[k] == '\n') {
|
||||
k -= 1;
|
||||
} else {
|
||||
cmark_strbuf_putc(renderer->buffer, '\n');
|
||||
if (renderer->need_cr > 1) {
|
||||
cmark_strbuf_put(renderer->buffer, renderer->prefix->ptr,
|
||||
renderer->prefix->size);
|
||||
}
|
||||
}
|
||||
renderer->column = 0;
|
||||
renderer->last_breakable = 0;
|
||||
renderer->begin_line = true;
|
||||
renderer->begin_content = true;
|
||||
renderer->need_cr -= 1;
|
||||
}
|
||||
|
||||
while (i < length) {
|
||||
if (renderer->begin_line) {
|
||||
cmark_strbuf_put(renderer->buffer, renderer->prefix->ptr,
|
||||
renderer->prefix->size);
|
||||
// note: this assumes prefix is ascii:
|
||||
renderer->column = renderer->prefix->size;
|
||||
}
|
||||
|
||||
len = cmark_utf8proc_iterate((const uint8_t *)source + i, length - i, &c);
|
||||
if (len == -1) { // error condition
|
||||
return; // return without rendering rest of string
|
||||
}
|
||||
nextc = source[i + len];
|
||||
if (c == 32 && wrap) {
|
||||
if (!renderer->begin_line) {
|
||||
last_nonspace = renderer->buffer->size;
|
||||
cmark_strbuf_putc(renderer->buffer, ' ');
|
||||
renderer->column += 1;
|
||||
renderer->begin_line = false;
|
||||
renderer->begin_content = false;
|
||||
// skip following spaces
|
||||
while (source[i + 1] == ' ') {
|
||||
i++;
|
||||
}
|
||||
// We don't allow breaks that make a digit the first character
|
||||
// because this causes problems with commonmark output.
|
||||
if (!cmark_isdigit(source[i + 1])) {
|
||||
renderer->last_breakable = last_nonspace;
|
||||
}
|
||||
}
|
||||
|
||||
} else if (c == 10) {
|
||||
cmark_strbuf_putc(renderer->buffer, '\n');
|
||||
renderer->column = 0;
|
||||
renderer->begin_line = true;
|
||||
renderer->begin_content = true;
|
||||
renderer->last_breakable = 0;
|
||||
} else if (escape == LITERAL) {
|
||||
cmark_render_code_point(renderer, c);
|
||||
renderer->begin_line = false;
|
||||
// we don't set 'begin_content' to false til we've
|
||||
// finished parsing a digit. Reason: in commonmark
|
||||
// we need to escape a potential list marker after
|
||||
// a digit:
|
||||
renderer->begin_content =
|
||||
renderer->begin_content && cmark_isdigit(c) == 1;
|
||||
} else {
|
||||
(renderer->outc)(renderer, escape, c, nextc);
|
||||
renderer->begin_line = false;
|
||||
renderer->begin_content =
|
||||
renderer->begin_content && cmark_isdigit(c) == 1;
|
||||
}
|
||||
|
||||
// If adding the character went beyond width, look for an
|
||||
// earlier place where the line could be broken:
|
||||
if (renderer->width > 0 && renderer->column > renderer->width &&
|
||||
!renderer->begin_line && renderer->last_breakable > 0) {
|
||||
|
||||
// copy from last_breakable to remainder
|
||||
cmark_chunk_set_cstr(renderer->mem, &remainder,
|
||||
(char *)renderer->buffer->ptr +
|
||||
renderer->last_breakable + 1);
|
||||
// truncate at last_breakable
|
||||
cmark_strbuf_truncate(renderer->buffer, renderer->last_breakable);
|
||||
// add newline, prefix, and remainder
|
||||
cmark_strbuf_putc(renderer->buffer, '\n');
|
||||
cmark_strbuf_put(renderer->buffer, renderer->prefix->ptr,
|
||||
renderer->prefix->size);
|
||||
cmark_strbuf_put(renderer->buffer, remainder.data, remainder.len);
|
||||
renderer->column = renderer->prefix->size + remainder.len;
|
||||
cmark_chunk_free(renderer->mem, &remainder);
|
||||
renderer->last_breakable = 0;
|
||||
renderer->begin_line = false;
|
||||
renderer->begin_content = false;
|
||||
}
|
||||
|
||||
i += len;
|
||||
}
|
||||
}
|
||||
|
||||
// Assumes no newlines, assumes ascii content:
|
||||
void cmark_render_ascii(cmark_renderer *renderer, const char *s) {
|
||||
int origsize = renderer->buffer->size;
|
||||
cmark_strbuf_puts(renderer->buffer, s);
|
||||
renderer->column += renderer->buffer->size - origsize;
|
||||
}
|
||||
|
||||
void cmark_render_code_point(cmark_renderer *renderer, uint32_t c) {
|
||||
cmark_utf8proc_encode_char(c, renderer->buffer);
|
||||
renderer->column += 1;
|
||||
}
|
||||
|
||||
char *cmark_render(cmark_node *root, int options, int width,
|
||||
void (*outc)(cmark_renderer *, cmark_escaping, int32_t,
|
||||
unsigned char),
|
||||
int (*render_node)(cmark_renderer *renderer,
|
||||
cmark_node *node,
|
||||
cmark_event_type ev_type, int options)) {
|
||||
cmark_mem *mem = cmark_node_mem(root);
|
||||
cmark_strbuf pref = CMARK_BUF_INIT(mem);
|
||||
cmark_strbuf buf = CMARK_BUF_INIT(mem);
|
||||
cmark_node *cur;
|
||||
cmark_event_type ev_type;
|
||||
char *result;
|
||||
cmark_iter *iter = cmark_iter_new(root);
|
||||
|
||||
cmark_renderer renderer = {mem, &buf, &pref, 0, width,
|
||||
0, 0, true, true, false,
|
||||
false, outc, S_cr, S_blankline, S_out};
|
||||
|
||||
while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
|
||||
cur = cmark_iter_get_node(iter);
|
||||
if (!render_node(&renderer, cur, ev_type, options)) {
|
||||
// a false value causes us to skip processing
|
||||
// the node's contents. this is used for
|
||||
// autolinks.
|
||||
cmark_iter_reset(iter, cur, CMARK_EVENT_EXIT);
|
||||
}
|
||||
}
|
||||
|
||||
// ensure final newline
|
||||
if (renderer.buffer->ptr[renderer.buffer->size - 1] != '\n') {
|
||||
cmark_strbuf_putc(renderer.buffer, '\n');
|
||||
}
|
||||
|
||||
result = (char *)cmark_strbuf_detach(renderer.buffer);
|
||||
|
||||
cmark_iter_free(iter);
|
||||
cmark_strbuf_free(renderer.prefix);
|
||||
cmark_strbuf_free(renderer.buffer);
|
||||
|
||||
return result;
|
||||
}
|
|
@ -1,50 +0,0 @@
|
|||
#ifndef CMARK_RENDER_H
|
||||
#define CMARK_RENDER_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "buffer.h"
|
||||
#include "chunk.h"
|
||||
#include "memory.h"
|
||||
|
||||
typedef enum { LITERAL, NORMAL, TITLE, URL } cmark_escaping;
|
||||
|
||||
struct cmark_renderer {
|
||||
cmark_mem *mem;
|
||||
cmark_strbuf *buffer;
|
||||
cmark_strbuf *prefix;
|
||||
int column;
|
||||
int width;
|
||||
int need_cr;
|
||||
bufsize_t last_breakable;
|
||||
bool begin_line;
|
||||
bool begin_content;
|
||||
bool no_linebreaks;
|
||||
bool in_tight_list_item;
|
||||
void (*outc)(struct cmark_renderer *, cmark_escaping, int32_t, unsigned char);
|
||||
void (*cr)(struct cmark_renderer *);
|
||||
void (*blankline)(struct cmark_renderer *);
|
||||
void (*out)(struct cmark_renderer *, const char *, bool, cmark_escaping);
|
||||
};
|
||||
|
||||
typedef struct cmark_renderer cmark_renderer;
|
||||
|
||||
void cmark_render_ascii(cmark_renderer *renderer, const char *s);
|
||||
|
||||
void cmark_render_code_point(cmark_renderer *renderer, uint32_t c);
|
||||
|
||||
char *cmark_render(cmark_node *root, int options, int width,
|
||||
void (*outc)(cmark_renderer *, cmark_escaping, int32_t,
|
||||
unsigned char),
|
||||
int (*render_node)(cmark_renderer *renderer,
|
||||
cmark_node *node,
|
||||
cmark_event_type ev_type, int options));
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
13190
include/cmark/scanners.c
13190
include/cmark/scanners.c
File diff suppressed because it is too large
Load Diff
|
@ -1,55 +0,0 @@
|
|||
#include "cmark.h"
|
||||
#include "chunk.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
bufsize_t _scan_at(bufsize_t (*scanner)(const unsigned char *), cmark_chunk *c,
|
||||
bufsize_t offset);
|
||||
bufsize_t _scan_scheme(const unsigned char *p);
|
||||
bufsize_t _scan_autolink_uri(const unsigned char *p);
|
||||
bufsize_t _scan_autolink_email(const unsigned char *p);
|
||||
bufsize_t _scan_html_tag(const unsigned char *p);
|
||||
bufsize_t _scan_html_block_start(const unsigned char *p);
|
||||
bufsize_t _scan_html_block_start_7(const unsigned char *p);
|
||||
bufsize_t _scan_html_block_end_1(const unsigned char *p);
|
||||
bufsize_t _scan_html_block_end_2(const unsigned char *p);
|
||||
bufsize_t _scan_html_block_end_3(const unsigned char *p);
|
||||
bufsize_t _scan_html_block_end_4(const unsigned char *p);
|
||||
bufsize_t _scan_html_block_end_5(const unsigned char *p);
|
||||
bufsize_t _scan_link_title(const unsigned char *p);
|
||||
bufsize_t _scan_spacechars(const unsigned char *p);
|
||||
bufsize_t _scan_atx_heading_start(const unsigned char *p);
|
||||
bufsize_t _scan_setext_heading_line(const unsigned char *p);
|
||||
bufsize_t _scan_thematic_break(const unsigned char *p);
|
||||
bufsize_t _scan_open_code_fence(const unsigned char *p);
|
||||
bufsize_t _scan_close_code_fence(const unsigned char *p);
|
||||
bufsize_t _scan_entity(const unsigned char *p);
|
||||
bufsize_t _scan_dangerous_url(const unsigned char *p);
|
||||
|
||||
#define scan_scheme(c, n) _scan_at(&_scan_scheme, c, n)
|
||||
#define scan_autolink_uri(c, n) _scan_at(&_scan_autolink_uri, c, n)
|
||||
#define scan_autolink_email(c, n) _scan_at(&_scan_autolink_email, c, n)
|
||||
#define scan_html_tag(c, n) _scan_at(&_scan_html_tag, c, n)
|
||||
#define scan_html_block_start(c, n) _scan_at(&_scan_html_block_start, c, n)
|
||||
#define scan_html_block_start_7(c, n) _scan_at(&_scan_html_block_start_7, c, n)
|
||||
#define scan_html_block_end_1(c, n) _scan_at(&_scan_html_block_end_1, c, n)
|
||||
#define scan_html_block_end_2(c, n) _scan_at(&_scan_html_block_end_2, c, n)
|
||||
#define scan_html_block_end_3(c, n) _scan_at(&_scan_html_block_end_3, c, n)
|
||||
#define scan_html_block_end_4(c, n) _scan_at(&_scan_html_block_end_4, c, n)
|
||||
#define scan_html_block_end_5(c, n) _scan_at(&_scan_html_block_end_5, c, n)
|
||||
#define scan_link_title(c, n) _scan_at(&_scan_link_title, c, n)
|
||||
#define scan_spacechars(c, n) _scan_at(&_scan_spacechars, c, n)
|
||||
#define scan_atx_heading_start(c, n) _scan_at(&_scan_atx_heading_start, c, n)
|
||||
#define scan_setext_heading_line(c, n) \
|
||||
_scan_at(&_scan_setext_heading_line, c, n)
|
||||
#define scan_thematic_break(c, n) _scan_at(&_scan_thematic_break, c, n)
|
||||
#define scan_open_code_fence(c, n) _scan_at(&_scan_open_code_fence, c, n)
|
||||
#define scan_close_code_fence(c, n) _scan_at(&_scan_close_code_fence, c, n)
|
||||
#define scan_entity(c, n) _scan_at(&_scan_entity, c, n)
|
||||
#define scan_dangerous_url(c, n) _scan_at(&_scan_dangerous_url, c, n)
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
|
@ -1,320 +0,0 @@
|
|||
#include <stdlib.h>
|
||||
#include "chunk.h"
|
||||
#include "scanners.h"
|
||||
|
||||
bufsize_t _scan_at(bufsize_t (*scanner)(const unsigned char *), cmark_chunk *c, bufsize_t offset)
|
||||
{
|
||||
bufsize_t res;
|
||||
unsigned char *ptr = (unsigned char *)c->data;
|
||||
|
||||
if (ptr == NULL || offset > c->len) {
|
||||
return 0;
|
||||
} else {
|
||||
unsigned char lim = ptr[c->len];
|
||||
|
||||
ptr[c->len] = '\0';
|
||||
res = scanner(ptr + offset);
|
||||
ptr[c->len] = lim;
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
/*!re2c
|
||||
re2c:define:YYCTYPE = "unsigned char";
|
||||
re2c:define:YYCURSOR = p;
|
||||
re2c:define:YYMARKER = marker;
|
||||
re2c:define:YYCTXMARKER = marker;
|
||||
re2c:yyfill:enable = 0;
|
||||
|
||||
wordchar = [^\x00-\x20];
|
||||
|
||||
spacechar = [ \t\v\f\r\n];
|
||||
|
||||
reg_char = [^\\()\x00-\x20];
|
||||
|
||||
escaped_char = [\\][!"#$%&'()*+,./:;<=>?@[\\\]^_`{|}~-];
|
||||
|
||||
tagname = [A-Za-z][A-Za-z0-9-]*;
|
||||
|
||||
blocktagname = 'address'|'article'|'aside'|'base'|'basefont'|'blockquote'|'body'|'caption'|'center'|'col'|'colgroup'|'dd'|'details'|'dialog'|'dir'|'div'|'dl'|'dt'|'fieldset'|'figcaption'|'figure'|'footer'|'form'|'frame'|'frameset'|'h1'|'h2'|'h3'|'h4'|'h5'|'h6'|'head'|'header'|'hr'|'html'|'iframe'|'legend'|'li'|'link'|'main'|'menu'|'menuitem'|'nav'|'noframes'|'ol'|'optgroup'|'option'|'p'|'param'|'section'|'source'|'title'|'summary'|'table'|'tbody'|'td'|'tfoot'|'th'|'thead'|'title'|'tr'|'track'|'ul';
|
||||
|
||||
attributename = [a-zA-Z_:][a-zA-Z0-9:._-]*;
|
||||
|
||||
unquotedvalue = [^ \t\r\n\v\f"'=<>`\x00]+;
|
||||
singlequotedvalue = ['][^'\x00]*['];
|
||||
doublequotedvalue = ["][^"\x00]*["];
|
||||
|
||||
attributevalue = unquotedvalue | singlequotedvalue | doublequotedvalue;
|
||||
|
||||
attributevaluespec = spacechar* [=] spacechar* attributevalue;
|
||||
|
||||
attribute = spacechar+ attributename attributevaluespec?;
|
||||
|
||||
opentag = tagname attribute* spacechar* [/]? [>];
|
||||
closetag = [/] tagname spacechar* [>];
|
||||
|
||||
htmlcomment = "!---->" | ("!--" ([-]? [^\x00>-]) ([-]? [^\x00-])* "-->");
|
||||
|
||||
processinginstruction = "?" ([^?>\x00]+ | [?][^>\x00] | [>])* "?>";
|
||||
|
||||
declaration = "!" [A-Z]+ spacechar+ [^>\x00]* ">";
|
||||
|
||||
cdata = "![CDATA[" ([^\]\x00]+ | "]" [^\]\x00] | "]]" [^>\x00])* "]]>";
|
||||
|
||||
htmltag = opentag | closetag | htmlcomment | processinginstruction |
|
||||
declaration | cdata;
|
||||
|
||||
in_parens_nosp = [(] (reg_char|escaped_char|[\\])* [)];
|
||||
|
||||
in_double_quotes = ["] (escaped_char|[^"\x00])* ["];
|
||||
in_single_quotes = ['] (escaped_char|[^'\x00])* ['];
|
||||
in_parens = [(] (escaped_char|[^)\x00])* [)];
|
||||
|
||||
scheme = [A-Za-z][A-Za-z0-9.+-]{1,31};
|
||||
*/
|
||||
|
||||
// Try to match a scheme including colon.
|
||||
bufsize_t _scan_scheme(const unsigned char *p)
|
||||
{
|
||||
const unsigned char *marker = NULL;
|
||||
const unsigned char *start = p;
|
||||
/*!re2c
|
||||
scheme [:] { return (bufsize_t)(p - start); }
|
||||
* { return 0; }
|
||||
*/
|
||||
}
|
||||
|
||||
// Try to match URI autolink after first <, returning number of chars matched.
|
||||
bufsize_t _scan_autolink_uri(const unsigned char *p)
|
||||
{
|
||||
const unsigned char *marker = NULL;
|
||||
const unsigned char *start = p;
|
||||
/*!re2c
|
||||
scheme [:][^\x00-\x20<>]*[>] { return (bufsize_t)(p - start); }
|
||||
* { return 0; }
|
||||
*/
|
||||
}
|
||||
|
||||
// Try to match email autolink after first <, returning num of chars matched.
|
||||
bufsize_t _scan_autolink_email(const unsigned char *p)
|
||||
{
|
||||
const unsigned char *marker = NULL;
|
||||
const unsigned char *start = p;
|
||||
/*!re2c
|
||||
[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+
|
||||
[@]
|
||||
[a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?
|
||||
([.][a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*
|
||||
[>] { return (bufsize_t)(p - start); }
|
||||
* { return 0; }
|
||||
*/
|
||||
}
|
||||
|
||||
// Try to match an HTML tag after first <, returning num of chars matched.
|
||||
bufsize_t _scan_html_tag(const unsigned char *p)
|
||||
{
|
||||
const unsigned char *marker = NULL;
|
||||
const unsigned char *start = p;
|
||||
/*!re2c
|
||||
htmltag { return (bufsize_t)(p - start); }
|
||||
* { return 0; }
|
||||
*/
|
||||
}
|
||||
|
||||
// Try to match an HTML block tag start line, returning
|
||||
// an integer code for the type of block (1-6, matching the spec).
|
||||
// #7 is handled by a separate function, below.
|
||||
bufsize_t _scan_html_block_start(const unsigned char *p)
|
||||
{
|
||||
const unsigned char *marker = NULL;
|
||||
/*!re2c
|
||||
[<] ('script'|'pre'|'style') (spacechar | [>]) { return 1; }
|
||||
'<!--' { return 2; }
|
||||
'<?' { return 3; }
|
||||
'<!' [A-Z] { return 4; }
|
||||
'<![CDATA[' { return 5; }
|
||||
[<] [/]? blocktagname (spacechar | [/]? [>]) { return 6; }
|
||||
* { return 0; }
|
||||
*/
|
||||
}
|
||||
|
||||
// Try to match an HTML block tag start line of type 7, returning
|
||||
// 7 if successful, 0 if not.
|
||||
bufsize_t _scan_html_block_start_7(const unsigned char *p)
|
||||
{
|
||||
const unsigned char *marker = NULL;
|
||||
/*!re2c
|
||||
[<] (opentag | closetag) [\t\n\f ]* [\r\n] { return 7; }
|
||||
* { return 0; }
|
||||
*/
|
||||
}
|
||||
|
||||
// Try to match an HTML block end line of type 1
|
||||
bufsize_t _scan_html_block_end_1(const unsigned char *p)
|
||||
{
|
||||
const unsigned char *marker = NULL;
|
||||
const unsigned char *start = p;
|
||||
/*!re2c
|
||||
[^\n\x00]* [<] [/] ('script'|'pre'|'style') [>] { return (bufsize_t)(p - start); }
|
||||
* { return 0; }
|
||||
*/
|
||||
}
|
||||
|
||||
// Try to match an HTML block end line of type 2
|
||||
bufsize_t _scan_html_block_end_2(const unsigned char *p)
|
||||
{
|
||||
const unsigned char *marker = NULL;
|
||||
const unsigned char *start = p;
|
||||
/*!re2c
|
||||
[^\n\x00]* '-->' { return (bufsize_t)(p - start); }
|
||||
* { return 0; }
|
||||
*/
|
||||
}
|
||||
|
||||
// Try to match an HTML block end line of type 3
|
||||
bufsize_t _scan_html_block_end_3(const unsigned char *p)
|
||||
{
|
||||
const unsigned char *marker = NULL;
|
||||
const unsigned char *start = p;
|
||||
/*!re2c
|
||||
[^\n\x00]* '?>' { return (bufsize_t)(p - start); }
|
||||
* { return 0; }
|
||||
*/
|
||||
}
|
||||
|
||||
// Try to match an HTML block end line of type 4
|
||||
bufsize_t _scan_html_block_end_4(const unsigned char *p)
|
||||
{
|
||||
const unsigned char *marker = NULL;
|
||||
const unsigned char *start = p;
|
||||
/*!re2c
|
||||
[^\n\x00]* '>' { return (bufsize_t)(p - start); }
|
||||
* { return 0; }
|
||||
*/
|
||||
}
|
||||
|
||||
// Try to match an HTML block end line of type 5
|
||||
bufsize_t _scan_html_block_end_5(const unsigned char *p)
|
||||
{
|
||||
const unsigned char *marker = NULL;
|
||||
const unsigned char *start = p;
|
||||
/*!re2c
|
||||
[^\n\x00]* ']]>' { return (bufsize_t)(p - start); }
|
||||
* { return 0; }
|
||||
*/
|
||||
}
|
||||
|
||||
// Try to match a link title (in single quotes, in double quotes, or
|
||||
// in parentheses), returning number of chars matched. Allow one
|
||||
// level of internal nesting (quotes within quotes).
|
||||
bufsize_t _scan_link_title(const unsigned char *p)
|
||||
{
|
||||
const unsigned char *marker = NULL;
|
||||
const unsigned char *start = p;
|
||||
/*!re2c
|
||||
["] (escaped_char|[^"\x00])* ["] { return (bufsize_t)(p - start); }
|
||||
['] (escaped_char|[^'\x00])* ['] { return (bufsize_t)(p - start); }
|
||||
[(] (escaped_char|[^)\x00])* [)] { return (bufsize_t)(p - start); }
|
||||
* { return 0; }
|
||||
*/
|
||||
}
|
||||
|
||||
// Match space characters, including newlines.
|
||||
bufsize_t _scan_spacechars(const unsigned char *p)
|
||||
{
|
||||
const unsigned char *start = p; \
|
||||
/*!re2c
|
||||
[ \t\v\f\r\n]+ { return (bufsize_t)(p - start); }
|
||||
* { return 0; }
|
||||
*/
|
||||
}
|
||||
|
||||
// Match ATX heading start.
|
||||
bufsize_t _scan_atx_heading_start(const unsigned char *p)
|
||||
{
|
||||
const unsigned char *marker = NULL;
|
||||
const unsigned char *start = p;
|
||||
/*!re2c
|
||||
[#]{1,6} ([ \t]+|[\r\n]) { return (bufsize_t)(p - start); }
|
||||
* { return 0; }
|
||||
*/
|
||||
}
|
||||
|
||||
// Match setext heading line. Return 1 for level-1 heading,
|
||||
// 2 for level-2, 0 for no match.
|
||||
bufsize_t _scan_setext_heading_line(const unsigned char *p)
|
||||
{
|
||||
const unsigned char *marker = NULL;
|
||||
/*!re2c
|
||||
[=]+ [ \t]* [\r\n] { return 1; }
|
||||
[-]+ [ \t]* [\r\n] { return 2; }
|
||||
* { return 0; }
|
||||
*/
|
||||
}
|
||||
|
||||
// Scan a thematic break line: "...three or more hyphens, asterisks,
|
||||
// or underscores on a line by themselves. If you wish, you may use
|
||||
// spaces between the hyphens or asterisks."
|
||||
bufsize_t _scan_thematic_break(const unsigned char *p)
|
||||
{
|
||||
const unsigned char *marker = NULL;
|
||||
const unsigned char *start = p;
|
||||
/*!re2c
|
||||
([*][ \t]*){3,} [ \t]* [\r\n] { return (bufsize_t)(p - start); }
|
||||
([_][ \t]*){3,} [ \t]* [\r\n] { return (bufsize_t)(p - start); }
|
||||
([-][ \t]*){3,} [ \t]* [\r\n] { return (bufsize_t)(p - start); }
|
||||
* { return 0; }
|
||||
*/
|
||||
}
|
||||
|
||||
// Scan an opening code fence.
|
||||
bufsize_t _scan_open_code_fence(const unsigned char *p)
|
||||
{
|
||||
const unsigned char *marker = NULL;
|
||||
const unsigned char *start = p;
|
||||
/*!re2c
|
||||
[`]{3,} / [^`\r\n\x00]*[\r\n] { return (bufsize_t)(p - start); }
|
||||
[~]{3,} / [^\r\n\x00]*[\r\n] { return (bufsize_t)(p - start); }
|
||||
* { return 0; }
|
||||
*/
|
||||
}
|
||||
|
||||
// Scan a closing code fence with length at least len.
|
||||
bufsize_t _scan_close_code_fence(const unsigned char *p)
|
||||
{
|
||||
const unsigned char *marker = NULL;
|
||||
const unsigned char *start = p;
|
||||
/*!re2c
|
||||
[`]{3,} / [ \t]*[\r\n] { return (bufsize_t)(p - start); }
|
||||
[~]{3,} / [ \t]*[\r\n] { return (bufsize_t)(p - start); }
|
||||
* { return 0; }
|
||||
*/
|
||||
}
|
||||
|
||||
// Scans an entity.
|
||||
// Returns number of chars matched.
|
||||
bufsize_t _scan_entity(const unsigned char *p)
|
||||
{
|
||||
const unsigned char *marker = NULL;
|
||||
const unsigned char *start = p;
|
||||
/*!re2c
|
||||
[&] ([#] ([Xx][A-Fa-f0-9]{1,6}|[0-9]{1,7}) |[A-Za-z][A-Za-z0-9]{1,31} ) [;]
|
||||
{ return (bufsize_t)(p - start); }
|
||||
* { return 0; }
|
||||
*/
|
||||
}
|
||||
|
||||
// Returns positive value if a URL begins in a way that is potentially
|
||||
// dangerous, with javascript:, vbscript:, file:, or data:, otherwise 0.
|
||||
bufsize_t _scan_dangerous_url(const unsigned char *p)
|
||||
{
|
||||
const unsigned char *marker = NULL;
|
||||
const unsigned char *start = p;
|
||||
/*!re2c
|
||||
'data:image/' ('png'|'gif'|'jpeg'|'webp') { return 0; }
|
||||
'javascript:' | 'vbscript:' | 'file:' | 'data:' { return (bufsize_t)(p - start); }
|
||||
* { return 0; }
|
||||
*/
|
||||
}
|
||||
|
|
@ -1,317 +0,0 @@
|
|||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include "cmark_ctype.h"
|
||||
#include "utf8.h"
|
||||
|
||||
static const int8_t utf8proc_utf8class[256] = {
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0};
|
||||
|
||||
static void encode_unknown(cmark_strbuf *buf) {
|
||||
static const uint8_t repl[] = {239, 191, 189};
|
||||
cmark_strbuf_put(buf, repl, 3);
|
||||
}
|
||||
|
||||
static int utf8proc_charlen(const uint8_t *str, bufsize_t str_len) {
|
||||
int length, i;
|
||||
|
||||
if (!str_len)
|
||||
return 0;
|
||||
|
||||
length = utf8proc_utf8class[str[0]];
|
||||
|
||||
if (!length)
|
||||
return -1;
|
||||
|
||||
if (str_len >= 0 && (bufsize_t)length > str_len)
|
||||
return -str_len;
|
||||
|
||||
for (i = 1; i < length; i++) {
|
||||
if ((str[i] & 0xC0) != 0x80)
|
||||
return -i;
|
||||
}
|
||||
|
||||
return length;
|
||||
}
|
||||
|
||||
// Validate a single UTF-8 character according to RFC 3629.
|
||||
static int utf8proc_valid(const uint8_t *str, bufsize_t str_len) {
|
||||
int length = utf8proc_utf8class[str[0]];
|
||||
|
||||
if (!length)
|
||||
return -1;
|
||||
|
||||
if ((bufsize_t)length > str_len)
|
||||
return -str_len;
|
||||
|
||||
switch (length) {
|
||||
case 2:
|
||||
if ((str[1] & 0xC0) != 0x80)
|
||||
return -1;
|
||||
if (str[0] < 0xC2) {
|
||||
// Overlong
|
||||
return -length;
|
||||
}
|
||||
break;
|
||||
|
||||
case 3:
|
||||
if ((str[1] & 0xC0) != 0x80)
|
||||
return -1;
|
||||
if ((str[2] & 0xC0) != 0x80)
|
||||
return -2;
|
||||
if (str[0] == 0xE0) {
|
||||
if (str[1] < 0xA0) {
|
||||
// Overlong
|
||||
return -length;
|
||||
}
|
||||
} else if (str[0] == 0xED) {
|
||||
if (str[1] >= 0xA0) {
|
||||
// Surrogate
|
||||
return -length;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case 4:
|
||||
if ((str[1] & 0xC0) != 0x80)
|
||||
return -1;
|
||||
if ((str[2] & 0xC0) != 0x80)
|
||||
return -2;
|
||||
if ((str[3] & 0xC0) != 0x80)
|
||||
return -3;
|
||||
if (str[0] == 0xF0) {
|
||||
if (str[1] < 0x90) {
|
||||
// Overlong
|
||||
return -length;
|
||||
}
|
||||
} else if (str[0] >= 0xF4) {
|
||||
if (str[0] > 0xF4 || str[1] >= 0x90) {
|
||||
// Above 0x10FFFF
|
||||
return -length;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
return length;
|
||||
}
|
||||
|
||||
void cmark_utf8proc_check(cmark_strbuf *ob, const uint8_t *line,
|
||||
bufsize_t size) {
|
||||
bufsize_t i = 0;
|
||||
|
||||
while (i < size) {
|
||||
bufsize_t org = i;
|
||||
int charlen = 0;
|
||||
|
||||
while (i < size) {
|
||||
if (line[i] < 0x80 && line[i] != 0) {
|
||||
i++;
|
||||
} else if (line[i] >= 0x80) {
|
||||
charlen = utf8proc_valid(line + i, size - i);
|
||||
if (charlen < 0) {
|
||||
charlen = -charlen;
|
||||
break;
|
||||
}
|
||||
i += charlen;
|
||||
} else if (line[i] == 0) {
|
||||
// ASCII NUL is technically valid but rejected
|
||||
// for security reasons.
|
||||
charlen = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (i > org) {
|
||||
cmark_strbuf_put(ob, line + org, i - org);
|
||||
}
|
||||
|
||||
if (i >= size) {
|
||||
break;
|
||||
} else {
|
||||
// Invalid UTF-8
|
||||
encode_unknown(ob);
|
||||
i += charlen;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int cmark_utf8proc_iterate(const uint8_t *str, bufsize_t str_len,
|
||||
int32_t *dst) {
|
||||
int length;
|
||||
int32_t uc = -1;
|
||||
|
||||
*dst = -1;
|
||||
length = utf8proc_charlen(str, str_len);
|
||||
if (length < 0)
|
||||
return -1;
|
||||
|
||||
switch (length) {
|
||||
case 1:
|
||||
uc = str[0];
|
||||
break;
|
||||
case 2:
|
||||
uc = ((str[0] & 0x1F) << 6) + (str[1] & 0x3F);
|
||||
if (uc < 0x80)
|
||||
uc = -1;
|
||||
break;
|
||||
case 3:
|
||||
uc = ((str[0] & 0x0F) << 12) + ((str[1] & 0x3F) << 6) + (str[2] & 0x3F);
|
||||
if (uc < 0x800 || (uc >= 0xD800 && uc < 0xE000))
|
||||
uc = -1;
|
||||
break;
|
||||
case 4:
|
||||
uc = ((str[0] & 0x07) << 18) + ((str[1] & 0x3F) << 12) +
|
||||
((str[2] & 0x3F) << 6) + (str[3] & 0x3F);
|
||||
if (uc < 0x10000 || uc >= 0x110000)
|
||||
uc = -1;
|
||||
break;
|
||||
}
|
||||
|
||||
if (uc < 0)
|
||||
return -1;
|
||||
|
||||
*dst = uc;
|
||||
return length;
|
||||
}
|
||||
|
||||
void cmark_utf8proc_encode_char(int32_t uc, cmark_strbuf *buf) {
|
||||
uint8_t dst[4];
|
||||
bufsize_t len = 0;
|
||||
|
||||
assert(uc >= 0);
|
||||
|
||||
if (uc < 0x80) {
|
||||
dst[0] = (uint8_t)(uc);
|
||||
len = 1;
|
||||
} else if (uc < 0x800) {
|
||||
dst[0] = (uint8_t)(0xC0 + (uc >> 6));
|
||||
dst[1] = 0x80 + (uc & 0x3F);
|
||||
len = 2;
|
||||
} else if (uc == 0xFFFF) {
|
||||
dst[0] = 0xFF;
|
||||
len = 1;
|
||||
} else if (uc == 0xFFFE) {
|
||||
dst[0] = 0xFE;
|
||||
len = 1;
|
||||
} else if (uc < 0x10000) {
|
||||
dst[0] = (uint8_t)(0xE0 + (uc >> 12));
|
||||
dst[1] = 0x80 + ((uc >> 6) & 0x3F);
|
||||
dst[2] = 0x80 + (uc & 0x3F);
|
||||
len = 3;
|
||||
} else if (uc < 0x110000) {
|
||||
dst[0] = (uint8_t)(0xF0 + (uc >> 18));
|
||||
dst[1] = 0x80 + ((uc >> 12) & 0x3F);
|
||||
dst[2] = 0x80 + ((uc >> 6) & 0x3F);
|
||||
dst[3] = 0x80 + (uc & 0x3F);
|
||||
len = 4;
|
||||
} else {
|
||||
encode_unknown(buf);
|
||||
return;
|
||||
}
|
||||
|
||||
cmark_strbuf_put(buf, dst, len);
|
||||
}
|
||||
|
||||
void cmark_utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str,
|
||||
bufsize_t len) {
|
||||
int32_t c;
|
||||
|
||||
#define bufpush(x) cmark_utf8proc_encode_char(x, dest)
|
||||
|
||||
while (len > 0) {
|
||||
bufsize_t char_len = cmark_utf8proc_iterate(str, len, &c);
|
||||
|
||||
if (char_len >= 0) {
|
||||
#include "case_fold_switch.inc"
|
||||
} else {
|
||||
encode_unknown(dest);
|
||||
char_len = -char_len;
|
||||
}
|
||||
|
||||
str += char_len;
|
||||
len -= char_len;
|
||||
}
|
||||
}
|
||||
|
||||
// matches anything in the Zs class, plus LF, CR, TAB, FF.
|
||||
int cmark_utf8proc_is_space(int32_t uc) {
|
||||
return (uc == 9 || uc == 10 || uc == 12 || uc == 13 || uc == 32 ||
|
||||
uc == 160 || uc == 5760 || (uc >= 8192 && uc <= 8202) || uc == 8239 ||
|
||||
uc == 8287 || uc == 12288);
|
||||
}
|
||||
|
||||
// matches anything in the P[cdefios] classes.
|
||||
int cmark_utf8proc_is_punctuation(int32_t uc) {
|
||||
return (
|
||||
(uc < 128 && cmark_ispunct((char)uc)) || uc == 161 || uc == 167 ||
|
||||
uc == 171 || uc == 182 || uc == 183 || uc == 187 || uc == 191 ||
|
||||
uc == 894 || uc == 903 || (uc >= 1370 && uc <= 1375) || uc == 1417 ||
|
||||
uc == 1418 || uc == 1470 || uc == 1472 || uc == 1475 || uc == 1478 ||
|
||||
uc == 1523 || uc == 1524 || uc == 1545 || uc == 1546 || uc == 1548 ||
|
||||
uc == 1549 || uc == 1563 || uc == 1566 || uc == 1567 ||
|
||||
(uc >= 1642 && uc <= 1645) || uc == 1748 || (uc >= 1792 && uc <= 1805) ||
|
||||
(uc >= 2039 && uc <= 2041) || (uc >= 2096 && uc <= 2110) || uc == 2142 ||
|
||||
uc == 2404 || uc == 2405 || uc == 2416 || uc == 2800 || uc == 3572 ||
|
||||
uc == 3663 || uc == 3674 || uc == 3675 || (uc >= 3844 && uc <= 3858) ||
|
||||
uc == 3860 || (uc >= 3898 && uc <= 3901) || uc == 3973 ||
|
||||
(uc >= 4048 && uc <= 4052) || uc == 4057 || uc == 4058 ||
|
||||
(uc >= 4170 && uc <= 4175) || uc == 4347 || (uc >= 4960 && uc <= 4968) ||
|
||||
uc == 5120 || uc == 5741 || uc == 5742 || uc == 5787 || uc == 5788 ||
|
||||
(uc >= 5867 && uc <= 5869) || uc == 5941 || uc == 5942 ||
|
||||
(uc >= 6100 && uc <= 6102) || (uc >= 6104 && uc <= 6106) ||
|
||||
(uc >= 6144 && uc <= 6154) || uc == 6468 || uc == 6469 || uc == 6686 ||
|
||||
uc == 6687 || (uc >= 6816 && uc <= 6822) || (uc >= 6824 && uc <= 6829) ||
|
||||
(uc >= 7002 && uc <= 7008) || (uc >= 7164 && uc <= 7167) ||
|
||||
(uc >= 7227 && uc <= 7231) || uc == 7294 || uc == 7295 ||
|
||||
(uc >= 7360 && uc <= 7367) || uc == 7379 || (uc >= 8208 && uc <= 8231) ||
|
||||
(uc >= 8240 && uc <= 8259) || (uc >= 8261 && uc <= 8273) ||
|
||||
(uc >= 8275 && uc <= 8286) || uc == 8317 || uc == 8318 || uc == 8333 ||
|
||||
uc == 8334 || (uc >= 8968 && uc <= 8971) || uc == 9001 || uc == 9002 ||
|
||||
(uc >= 10088 && uc <= 10101) || uc == 10181 || uc == 10182 ||
|
||||
(uc >= 10214 && uc <= 10223) || (uc >= 10627 && uc <= 10648) ||
|
||||
(uc >= 10712 && uc <= 10715) || uc == 10748 || uc == 10749 ||
|
||||
(uc >= 11513 && uc <= 11516) || uc == 11518 || uc == 11519 ||
|
||||
uc == 11632 || (uc >= 11776 && uc <= 11822) ||
|
||||
(uc >= 11824 && uc <= 11842) || (uc >= 12289 && uc <= 12291) ||
|
||||
(uc >= 12296 && uc <= 12305) || (uc >= 12308 && uc <= 12319) ||
|
||||
uc == 12336 || uc == 12349 || uc == 12448 || uc == 12539 || uc == 42238 ||
|
||||
uc == 42239 || (uc >= 42509 && uc <= 42511) || uc == 42611 ||
|
||||
uc == 42622 || (uc >= 42738 && uc <= 42743) ||
|
||||
(uc >= 43124 && uc <= 43127) || uc == 43214 || uc == 43215 ||
|
||||
(uc >= 43256 && uc <= 43258) || uc == 43310 || uc == 43311 ||
|
||||
uc == 43359 || (uc >= 43457 && uc <= 43469) || uc == 43486 ||
|
||||
uc == 43487 || (uc >= 43612 && uc <= 43615) || uc == 43742 ||
|
||||
uc == 43743 || uc == 43760 || uc == 43761 || uc == 44011 || uc == 64830 ||
|
||||
uc == 64831 || (uc >= 65040 && uc <= 65049) ||
|
||||
(uc >= 65072 && uc <= 65106) || (uc >= 65108 && uc <= 65121) ||
|
||||
uc == 65123 || uc == 65128 || uc == 65130 || uc == 65131 ||
|
||||
(uc >= 65281 && uc <= 65283) || (uc >= 65285 && uc <= 65290) ||
|
||||
(uc >= 65292 && uc <= 65295) || uc == 65306 || uc == 65307 ||
|
||||
uc == 65311 || uc == 65312 || (uc >= 65339 && uc <= 65341) ||
|
||||
uc == 65343 || uc == 65371 || uc == 65373 ||
|
||||
(uc >= 65375 && uc <= 65381) || (uc >= 65792 && uc <= 65794) ||
|
||||
uc == 66463 || uc == 66512 || uc == 66927 || uc == 67671 || uc == 67871 ||
|
||||
uc == 67903 || (uc >= 68176 && uc <= 68184) || uc == 68223 ||
|
||||
(uc >= 68336 && uc <= 68342) || (uc >= 68409 && uc <= 68415) ||
|
||||
(uc >= 68505 && uc <= 68508) || (uc >= 69703 && uc <= 69709) ||
|
||||
uc == 69819 || uc == 69820 || (uc >= 69822 && uc <= 69825) ||
|
||||
(uc >= 69952 && uc <= 69955) || uc == 70004 || uc == 70005 ||
|
||||
(uc >= 70085 && uc <= 70088) || uc == 70093 ||
|
||||
(uc >= 70200 && uc <= 70205) || uc == 70854 ||
|
||||
(uc >= 71105 && uc <= 71113) || (uc >= 71233 && uc <= 71235) ||
|
||||
(uc >= 74864 && uc <= 74868) || uc == 92782 || uc == 92783 ||
|
||||
uc == 92917 || (uc >= 92983 && uc <= 92987) || uc == 92996 ||
|
||||
uc == 113823);
|
||||
}
|
|
@ -1,24 +0,0 @@
|
|||
#ifndef CMARK_UTF8_H
|
||||
#define CMARK_UTF8_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include "buffer.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
void cmark_utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str,
|
||||
bufsize_t len);
|
||||
void cmark_utf8proc_encode_char(int32_t uc, cmark_strbuf *buf);
|
||||
int cmark_utf8proc_iterate(const uint8_t *str, bufsize_t str_len, int32_t *dst);
|
||||
void cmark_utf8proc_check(cmark_strbuf *dest, const uint8_t *line,
|
||||
bufsize_t size);
|
||||
int cmark_utf8proc_is_space(int32_t uc);
|
||||
int cmark_utf8proc_is_punctuation(int32_t uc);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
|
@ -1,170 +0,0 @@
|
|||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include "config.h"
|
||||
#include "cmark.h"
|
||||
#include "node.h"
|
||||
#include "buffer.h"
|
||||
#include "houdini.h"
|
||||
|
||||
#define BUFFER_SIZE 100
|
||||
|
||||
// Functions to convert cmark_nodes to XML strings.
|
||||
|
||||
static void escape_xml(cmark_strbuf *dest, const unsigned char *source,
|
||||
bufsize_t length) {
|
||||
houdini_escape_html0(dest, source, length, 0);
|
||||
}
|
||||
|
||||
struct render_state {
|
||||
cmark_strbuf *xml;
|
||||
int indent;
|
||||
};
|
||||
|
||||
static CMARK_INLINE void indent(struct render_state *state) {
|
||||
int i;
|
||||
for (i = 0; i < state->indent; i++) {
|
||||
cmark_strbuf_putc(state->xml, ' ');
|
||||
}
|
||||
}
|
||||
|
||||
static int S_render_node(cmark_node *node, cmark_event_type ev_type,
|
||||
struct render_state *state, int options) {
|
||||
cmark_strbuf *xml = state->xml;
|
||||
bool literal = false;
|
||||
cmark_delim_type delim;
|
||||
bool entering = (ev_type == CMARK_EVENT_ENTER);
|
||||
char buffer[BUFFER_SIZE];
|
||||
|
||||
if (entering) {
|
||||
indent(state);
|
||||
cmark_strbuf_putc(xml, '<');
|
||||
cmark_strbuf_puts(xml, cmark_node_get_type_string(node));
|
||||
|
||||
if (options & CMARK_OPT_SOURCEPOS && node->start_line != 0) {
|
||||
snprintf(buffer, BUFFER_SIZE, " sourcepos=\"%d:%d-%d:%d\"",
|
||||
node->start_line, node->start_column, node->end_line,
|
||||
node->end_column);
|
||||
cmark_strbuf_puts(xml, buffer);
|
||||
}
|
||||
|
||||
literal = false;
|
||||
|
||||
switch (node->type) {
|
||||
case CMARK_NODE_DOCUMENT:
|
||||
cmark_strbuf_puts(xml, " xmlns=\"http://commonmark.org/xml/1.0\"");
|
||||
break;
|
||||
case CMARK_NODE_TEXT:
|
||||
case CMARK_NODE_CODE:
|
||||
case CMARK_NODE_HTML_BLOCK:
|
||||
case CMARK_NODE_HTML_INLINE:
|
||||
cmark_strbuf_puts(xml, " xml:space=\"preserve\">");
|
||||
escape_xml(xml, node->as.literal.data, node->as.literal.len);
|
||||
cmark_strbuf_puts(xml, "</");
|
||||
cmark_strbuf_puts(xml, cmark_node_get_type_string(node));
|
||||
literal = true;
|
||||
break;
|
||||
case CMARK_NODE_LIST:
|
||||
switch (cmark_node_get_list_type(node)) {
|
||||
case CMARK_ORDERED_LIST:
|
||||
cmark_strbuf_puts(xml, " type=\"ordered\"");
|
||||
snprintf(buffer, BUFFER_SIZE, " start=\"%d\"",
|
||||
cmark_node_get_list_start(node));
|
||||
cmark_strbuf_puts(xml, buffer);
|
||||
delim = cmark_node_get_list_delim(node);
|
||||
if (delim == CMARK_PAREN_DELIM) {
|
||||
cmark_strbuf_puts(xml, " delim=\"paren\"");
|
||||
} else if (delim == CMARK_PERIOD_DELIM) {
|
||||
cmark_strbuf_puts(xml, " delim=\"period\"");
|
||||
}
|
||||
break;
|
||||
case CMARK_BULLET_LIST:
|
||||
cmark_strbuf_puts(xml, " type=\"bullet\"");
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
snprintf(buffer, BUFFER_SIZE, " tight=\"%s\"",
|
||||
(cmark_node_get_list_tight(node) ? "true" : "false"));
|
||||
cmark_strbuf_puts(xml, buffer);
|
||||
break;
|
||||
case CMARK_NODE_HEADING:
|
||||
snprintf(buffer, BUFFER_SIZE, " level=\"%d\"", node->as.heading.level);
|
||||
cmark_strbuf_puts(xml, buffer);
|
||||
break;
|
||||
case CMARK_NODE_CODE_BLOCK:
|
||||
if (node->as.code.info.len > 0) {
|
||||
cmark_strbuf_puts(xml, " info=\"");
|
||||
escape_xml(xml, node->as.code.info.data, node->as.code.info.len);
|
||||
cmark_strbuf_putc(xml, '"');
|
||||
}
|
||||
cmark_strbuf_puts(xml, " xml:space=\"preserve\">");
|
||||
escape_xml(xml, node->as.code.literal.data, node->as.code.literal.len);
|
||||
cmark_strbuf_puts(xml, "</");
|
||||
cmark_strbuf_puts(xml, cmark_node_get_type_string(node));
|
||||
literal = true;
|
||||
break;
|
||||
case CMARK_NODE_CUSTOM_BLOCK:
|
||||
case CMARK_NODE_CUSTOM_INLINE:
|
||||
cmark_strbuf_puts(xml, " on_enter=\"");
|
||||
escape_xml(xml, node->as.custom.on_enter.data,
|
||||
node->as.custom.on_enter.len);
|
||||
cmark_strbuf_putc(xml, '"');
|
||||
cmark_strbuf_puts(xml, " on_exit=\"");
|
||||
escape_xml(xml, node->as.custom.on_exit.data,
|
||||
node->as.custom.on_exit.len);
|
||||
cmark_strbuf_putc(xml, '"');
|
||||
break;
|
||||
case CMARK_NODE_LINK:
|
||||
case CMARK_NODE_IMAGE:
|
||||
cmark_strbuf_puts(xml, " destination=\"");
|
||||
escape_xml(xml, node->as.link.url.data, node->as.link.url.len);
|
||||
cmark_strbuf_putc(xml, '"');
|
||||
cmark_strbuf_puts(xml, " title=\"");
|
||||
escape_xml(xml, node->as.link.title.data, node->as.link.title.len);
|
||||
cmark_strbuf_putc(xml, '"');
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
if (node->first_child) {
|
||||
state->indent += 2;
|
||||
} else if (!literal) {
|
||||
cmark_strbuf_puts(xml, " /");
|
||||
}
|
||||
cmark_strbuf_puts(xml, ">\n");
|
||||
|
||||
} else if (node->first_child) {
|
||||
state->indent -= 2;
|
||||
indent(state);
|
||||
cmark_strbuf_puts(xml, "</");
|
||||
cmark_strbuf_puts(xml, cmark_node_get_type_string(node));
|
||||
cmark_strbuf_puts(xml, ">\n");
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
char *cmark_render_xml(cmark_node *root, int options) {
|
||||
char *result;
|
||||
cmark_strbuf xml = CMARK_BUF_INIT(cmark_node_mem(root));
|
||||
cmark_event_type ev_type;
|
||||
cmark_node *cur;
|
||||
struct render_state state = {&xml, 0};
|
||||
|
||||
cmark_iter *iter = cmark_iter_new(root);
|
||||
|
||||
cmark_strbuf_puts(state.xml, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
|
||||
cmark_strbuf_puts(state.xml,
|
||||
"<!DOCTYPE document SYSTEM \"CommonMark.dtd\">\n");
|
||||
while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
|
||||
cur = cmark_iter_get_node(iter);
|
||||
S_render_node(cur, ev_type, &state, options);
|
||||
}
|
||||
result = (char *)cmark_strbuf_detach(&xml);
|
||||
|
||||
cmark_iter_free(iter);
|
||||
return result;
|
||||
}
|
|
@ -0,0 +1,281 @@
|
|||
#include "autolink.h"
|
||||
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <ctype.h>
|
||||
|
||||
#ifndef _MSC_VER
|
||||
#include <strings.h>
|
||||
#else
|
||||
#define strncasecmp _strnicmp
|
||||
#endif
|
||||
|
||||
int
|
||||
hoedown_autolink_is_safe(const uint8_t *data, size_t size)
|
||||
{
|
||||
static const size_t valid_uris_count = 6;
|
||||
static const char *valid_uris[] = {
|
||||
"http://", "https://", "/", "#", "ftp://", "mailto:"
|
||||
};
|
||||
static const size_t valid_uris_size[] = { 7, 8, 1, 1, 6, 7 };
|
||||
size_t i;
|
||||
|
||||
for (i = 0; i < valid_uris_count; ++i) {
|
||||
size_t len = valid_uris_size[i];
|
||||
|
||||
if (size > len &&
|
||||
strncasecmp((char *)data, valid_uris[i], len) == 0 &&
|
||||
isalnum(data[len]))
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static size_t
|
||||
autolink_delim(uint8_t *data, size_t link_end, size_t max_rewind, size_t size)
|
||||
{
|
||||
uint8_t cclose, copen = 0;
|
||||
size_t i;
|
||||
|
||||
for (i = 0; i < link_end; ++i)
|
||||
if (data[i] == '<') {
|
||||
link_end = i;
|
||||
break;
|
||||
}
|
||||
|
||||
while (link_end > 0) {
|
||||
if (strchr("?!.,:", data[link_end - 1]) != NULL)
|
||||
link_end--;
|
||||
|
||||
else if (data[link_end - 1] == ';') {
|
||||
size_t new_end = link_end - 2;
|
||||
|
||||
while (new_end > 0 && isalpha(data[new_end]))
|
||||
new_end--;
|
||||
|
||||
if (new_end < link_end - 2 && data[new_end] == '&')
|
||||
link_end = new_end;
|
||||
else
|
||||
link_end--;
|
||||
}
|
||||
else break;
|
||||
}
|
||||
|
||||
if (link_end == 0)
|
||||
return 0;
|
||||
|
||||
cclose = data[link_end - 1];
|
||||
|
||||
switch (cclose) {
|
||||
case '"': copen = '"'; break;
|
||||
case '\'': copen = '\''; break;
|
||||
case ')': copen = '('; break;
|
||||
case ']': copen = '['; break;
|
||||
case '}': copen = '{'; break;
|
||||
}
|
||||
|
||||
if (copen != 0) {
|
||||
size_t closing = 0;
|
||||
size_t opening = 0;
|
||||
size_t i = 0;
|
||||
|
||||
/* Try to close the final punctuation sign in this same line;
|
||||
* if we managed to close it outside of the URL, that means that it's
|
||||
* not part of the URL. If it closes inside the URL, that means it
|
||||
* is part of the URL.
|
||||
*
|
||||
* Examples:
|
||||
*
|
||||
* foo http://www.pokemon.com/Pikachu_(Electric) bar
|
||||
* => http://www.pokemon.com/Pikachu_(Electric)
|
||||
*
|
||||
* foo (http://www.pokemon.com/Pikachu_(Electric)) bar
|
||||
* => http://www.pokemon.com/Pikachu_(Electric)
|
||||
*
|
||||
* foo http://www.pokemon.com/Pikachu_(Electric)) bar
|
||||
* => http://www.pokemon.com/Pikachu_(Electric))
|
||||
*
|
||||
* (foo http://www.pokemon.com/Pikachu_(Electric)) bar
|
||||
* => foo http://www.pokemon.com/Pikachu_(Electric)
|
||||
*/
|
||||
|
||||
while (i < link_end) {
|
||||
if (data[i] == copen)
|
||||
opening++;
|
||||
else if (data[i] == cclose)
|
||||
closing++;
|
||||
|
||||
i++;
|
||||
}
|
||||
|
||||
if (closing != opening)
|
||||
link_end--;
|
||||
}
|
||||
|
||||
return link_end;
|
||||
}
|
||||
|
||||
static size_t
|
||||
check_domain(uint8_t *data, size_t size, int allow_short)
|
||||
{
|
||||
size_t i, np = 0;
|
||||
|
||||
if (!isalnum(data[0]))
|
||||
return 0;
|
||||
|
||||
for (i = 1; i < size - 1; ++i) {
|
||||
if (strchr(".:", data[i]) != NULL) np++;
|
||||
else if (!isalnum(data[i]) && data[i] != '-') break;
|
||||
}
|
||||
|
||||
if (allow_short) {
|
||||
/* We don't need a valid domain in the strict sense (with
|
||||
* least one dot; so just make sure it's composed of valid
|
||||
* domain characters and return the length of the the valid
|
||||
* sequence. */
|
||||
return i;
|
||||
} else {
|
||||
/* a valid domain needs to have at least a dot.
|
||||
* that's as far as we get */
|
||||
return np ? i : 0;
|
||||
}
|
||||
}
|
||||
|
||||
size_t
|
||||
hoedown_autolink__www(
|
||||
size_t *rewind_p,
|
||||
hoedown_buffer *link,
|
||||
uint8_t *data,
|
||||
size_t max_rewind,
|
||||
size_t size,
|
||||
unsigned int flags)
|
||||
{
|
||||
size_t link_end;
|
||||
|
||||
if (max_rewind > 0 && !ispunct(data[-1]) && !isspace(data[-1]))
|
||||
return 0;
|
||||
|
||||
if (size < 4 || memcmp(data, "www.", strlen("www.")) != 0)
|
||||
return 0;
|
||||
|
||||
link_end = check_domain(data, size, 0);
|
||||
|
||||
if (link_end == 0)
|
||||
return 0;
|
||||
|
||||
while (link_end < size && !isspace(data[link_end]))
|
||||
link_end++;
|
||||
|
||||
link_end = autolink_delim(data, link_end, max_rewind, size);
|
||||
|
||||
if (link_end == 0)
|
||||
return 0;
|
||||
|
||||
hoedown_buffer_put(link, data, link_end);
|
||||
*rewind_p = 0;
|
||||
|
||||
return (int)link_end;
|
||||
}
|
||||
|
||||
size_t
|
||||
hoedown_autolink__email(
|
||||
size_t *rewind_p,
|
||||
hoedown_buffer *link,
|
||||
uint8_t *data,
|
||||
size_t max_rewind,
|
||||
size_t size,
|
||||
unsigned int flags)
|
||||
{
|
||||
size_t link_end, rewind;
|
||||
int nb = 0, np = 0;
|
||||
|
||||
for (rewind = 0; rewind < max_rewind; ++rewind) {
|
||||
uint8_t c = data[-1 - rewind];
|
||||
|
||||
if (isalnum(c))
|
||||
continue;
|
||||
|
||||
if (strchr(".+-_", c) != NULL)
|
||||
continue;
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
if (rewind == 0)
|
||||
return 0;
|
||||
|
||||
for (link_end = 0; link_end < size; ++link_end) {
|
||||
uint8_t c = data[link_end];
|
||||
|
||||
if (isalnum(c))
|
||||
continue;
|
||||
|
||||
if (c == '@')
|
||||
nb++;
|
||||
else if (c == '.' && link_end < size - 1)
|
||||
np++;
|
||||
else if (c != '-' && c != '_')
|
||||
break;
|
||||
}
|
||||
|
||||
if (link_end < 2 || nb != 1 || np == 0 ||
|
||||
!isalpha(data[link_end - 1]))
|
||||
return 0;
|
||||
|
||||
link_end = autolink_delim(data, link_end, max_rewind, size);
|
||||
|
||||
if (link_end == 0)
|
||||
return 0;
|
||||
|
||||
hoedown_buffer_put(link, data - rewind, link_end + rewind);
|
||||
*rewind_p = rewind;
|
||||
|
||||
return link_end;
|
||||
}
|
||||
|
||||
size_t
|
||||
hoedown_autolink__url(
|
||||
size_t *rewind_p,
|
||||
hoedown_buffer *link,
|
||||
uint8_t *data,
|
||||
size_t max_rewind,
|
||||
size_t size,
|
||||
unsigned int flags)
|
||||
{
|
||||
size_t link_end, rewind = 0, domain_len;
|
||||
|
||||
if (size < 4 || data[1] != '/' || data[2] != '/')
|
||||
return 0;
|
||||
|
||||
while (rewind < max_rewind && isalpha(data[-1 - rewind]))
|
||||
rewind++;
|
||||
|
||||
if (!hoedown_autolink_is_safe(data - rewind, size + rewind))
|
||||
return 0;
|
||||
|
||||
link_end = strlen("://");
|
||||
|
||||
domain_len = check_domain(
|
||||
data + link_end,
|
||||
size - link_end,
|
||||
flags & HOEDOWN_AUTOLINK_SHORT_DOMAINS);
|
||||
|
||||
if (domain_len == 0)
|
||||
return 0;
|
||||
|
||||
link_end += domain_len;
|
||||
while (link_end < size && !isspace(data[link_end]))
|
||||
link_end++;
|
||||
|
||||
link_end = autolink_delim(data, link_end, max_rewind, size);
|
||||
|
||||
if (link_end == 0)
|
||||
return 0;
|
||||
|
||||
hoedown_buffer_put(link, data - rewind, link_end + rewind);
|
||||
*rewind_p = rewind;
|
||||
|
||||
return link_end;
|
||||
}
|
|
@ -0,0 +1,46 @@
|
|||
/* autolink.h - versatile autolinker */
|
||||
|
||||
#ifndef HOEDOWN_AUTOLINK_H
|
||||
#define HOEDOWN_AUTOLINK_H
|
||||
|
||||
#include "buffer.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
/*************
|
||||
* CONSTANTS *
|
||||
*************/
|
||||
|
||||
typedef enum hoedown_autolink_flags {
|
||||
HOEDOWN_AUTOLINK_SHORT_DOMAINS = (1 << 0)
|
||||
} hoedown_autolink_flags;
|
||||
|
||||
|
||||
/*************
|
||||
* FUNCTIONS *
|
||||
*************/
|
||||
|
||||
/* hoedown_autolink_is_safe: verify that a URL has a safe protocol */
|
||||
int hoedown_autolink_is_safe(const uint8_t *data, size_t size);
|
||||
|
||||
/* hoedown_autolink__www: search for the next www link in data */
|
||||
size_t hoedown_autolink__www(size_t *rewind_p, hoedown_buffer *link,
|
||||
uint8_t *data, size_t offset, size_t size, hoedown_autolink_flags flags);
|
||||
|
||||
/* hoedown_autolink__email: search for the next email in data */
|
||||
size_t hoedown_autolink__email(size_t *rewind_p, hoedown_buffer *link,
|
||||
uint8_t *data, size_t offset, size_t size, hoedown_autolink_flags flags);
|
||||
|
||||
/* hoedown_autolink__url: search for the next URL in data */
|
||||
size_t hoedown_autolink__url(size_t *rewind_p, hoedown_buffer *link,
|
||||
uint8_t *data, size_t offset, size_t size, hoedown_autolink_flags flags);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /** HOEDOWN_AUTOLINK_H **/
|
|
@ -0,0 +1,308 @@
|
|||
#include "buffer.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
|
||||
void *
|
||||
hoedown_malloc(size_t size)
|
||||
{
|
||||
void *ret = malloc(size);
|
||||
|
||||
if (!ret) {
|
||||
fprintf(stderr, "Allocation failed.\n");
|
||||
abort();
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void *
|
||||
hoedown_calloc(size_t nmemb, size_t size)
|
||||
{
|
||||
void *ret = calloc(nmemb, size);
|
||||
|
||||
if (!ret) {
|
||||
fprintf(stderr, "Allocation failed.\n");
|
||||
abort();
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void *
|
||||
hoedown_realloc(void *ptr, size_t size)
|
||||
{
|
||||
void *ret = realloc(ptr, size);
|
||||
|
||||
if (!ret) {
|
||||
fprintf(stderr, "Allocation failed.\n");
|
||||
abort();
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void
|
||||
hoedown_buffer_init(
|
||||
hoedown_buffer *buf,
|
||||
size_t unit,
|
||||
hoedown_realloc_callback data_realloc,
|
||||
hoedown_free_callback data_free,
|
||||
hoedown_free_callback buffer_free)
|
||||
{
|
||||
assert(buf);
|
||||
|
||||
buf->data = NULL;
|
||||
buf->size = buf->asize = 0;
|
||||
buf->unit = unit;
|
||||
buf->data_realloc = data_realloc;
|
||||
buf->data_free = data_free;
|
||||
buf->buffer_free = buffer_free;
|
||||
}
|
||||
|
||||
void
|
||||
hoedown_buffer_uninit(hoedown_buffer *buf)
|
||||
{
|
||||
assert(buf && buf->unit);
|
||||
buf->data_free(buf->data);
|
||||
}
|
||||
|
||||
hoedown_buffer *
|
||||
hoedown_buffer_new(size_t unit)
|
||||
{
|
||||
hoedown_buffer *ret = hoedown_malloc(sizeof (hoedown_buffer));
|
||||
hoedown_buffer_init(ret, unit, hoedown_realloc, free, free);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void
|
||||
hoedown_buffer_free(hoedown_buffer *buf)
|
||||
{
|
||||
if (!buf) return;
|
||||
assert(buf && buf->unit);
|
||||
|
||||
buf->data_free(buf->data);
|
||||
|
||||
if (buf->buffer_free)
|
||||
buf->buffer_free(buf);
|
||||
}
|
||||
|
||||
void
|
||||
hoedown_buffer_reset(hoedown_buffer *buf)
|
||||
{
|
||||
assert(buf && buf->unit);
|
||||
|
||||
buf->data_free(buf->data);
|
||||
buf->data = NULL;
|
||||
buf->size = buf->asize = 0;
|
||||
}
|
||||
|
||||
void
|
||||
hoedown_buffer_grow(hoedown_buffer *buf, size_t neosz)
|
||||
{
|
||||
size_t neoasz;
|
||||
assert(buf && buf->unit);
|
||||
|
||||
if (buf->asize >= neosz)
|
||||
return;
|
||||
|
||||
neoasz = buf->asize + buf->unit;
|
||||
while (neoasz < neosz)
|
||||
neoasz += buf->unit;
|
||||
|
||||
buf->data = buf->data_realloc(buf->data, neoasz);
|
||||
buf->asize = neoasz;
|
||||
}
|
||||
|
||||
void
|
||||
hoedown_buffer_put(hoedown_buffer *buf, const uint8_t *data, size_t size)
|
||||
{
|
||||
assert(buf && buf->unit);
|
||||
|
||||
if (buf->size + size > buf->asize)
|
||||
hoedown_buffer_grow(buf, buf->size + size);
|
||||
|
||||
memcpy(buf->data + buf->size, data, size);
|
||||
buf->size += size;
|
||||
}
|
||||
|
||||
void
|
||||
hoedown_buffer_puts(hoedown_buffer *buf, const char *str)
|
||||
{
|
||||
hoedown_buffer_put(buf, (const uint8_t *)str, strlen(str));
|
||||
}
|
||||
|
||||
void
|
||||
hoedown_buffer_putc(hoedown_buffer *buf, uint8_t c)
|
||||
{
|
||||
assert(buf && buf->unit);
|
||||
|
||||
if (buf->size >= buf->asize)
|
||||
hoedown_buffer_grow(buf, buf->size + 1);
|
||||
|
||||
buf->data[buf->size] = c;
|
||||
buf->size += 1;
|
||||
}
|
||||
|
||||
int
|
||||
hoedown_buffer_putf(hoedown_buffer *buf, FILE *file)
|
||||
{
|
||||
assert(buf && buf->unit);
|
||||
|
||||
while (!(feof(file) || ferror(file))) {
|
||||
hoedown_buffer_grow(buf, buf->size + buf->unit);
|
||||
buf->size += fread(buf->data + buf->size, 1, buf->unit, file);
|
||||
}
|
||||
|
||||
return ferror(file);
|
||||
}
|
||||
|
||||
void
|
||||
hoedown_buffer_set(hoedown_buffer *buf, const uint8_t *data, size_t size)
|
||||
{
|
||||
assert(buf && buf->unit);
|
||||
|
||||
if (size > buf->asize)
|
||||
hoedown_buffer_grow(buf, size);
|
||||
|
||||
memcpy(buf->data, data, size);
|
||||
buf->size = size;
|
||||
}
|
||||
|
||||
void
|
||||
hoedown_buffer_sets(hoedown_buffer *buf, const char *str)
|
||||
{
|
||||
hoedown_buffer_set(buf, (const uint8_t *)str, strlen(str));
|
||||
}
|
||||
|
||||
int
|
||||
hoedown_buffer_eq(const hoedown_buffer *buf, const uint8_t *data, size_t size)
|
||||
{
|
||||
if (buf->size != size) return 0;
|
||||
return memcmp(buf->data, data, size) == 0;
|
||||
}
|
||||
|
||||
int
|
||||
hoedown_buffer_eqs(const hoedown_buffer *buf, const char *str)
|
||||
{
|
||||
return hoedown_buffer_eq(buf, (const uint8_t *)str, strlen(str));
|
||||
}
|
||||
|
||||
int
|
||||
hoedown_buffer_prefix(const hoedown_buffer *buf, const char *prefix)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
for (i = 0; i < buf->size; ++i) {
|
||||
if (prefix[i] == 0)
|
||||
return 0;
|
||||
|
||||
if (buf->data[i] != prefix[i])
|
||||
return buf->data[i] - prefix[i];
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
hoedown_buffer_slurp(hoedown_buffer *buf, size_t size)
|
||||
{
|
||||
assert(buf && buf->unit);
|
||||
|
||||
if (size >= buf->size) {
|
||||
buf->size = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
buf->size -= size;
|
||||
memmove(buf->data, buf->data + size, buf->size);
|
||||
}
|
||||
|
||||
const char *
|
||||
hoedown_buffer_cstr(hoedown_buffer *buf)
|
||||
{
|
||||
assert(buf && buf->unit);
|
||||
|
||||
if (buf->size < buf->asize && buf->data[buf->size] == 0)
|
||||
return (char *)buf->data;
|
||||
|
||||
hoedown_buffer_grow(buf, buf->size + 1);
|
||||
buf->data[buf->size] = 0;
|
||||
|
||||
return (char *)buf->data;
|
||||
}
|
||||
|
||||
void
|
||||
hoedown_buffer_printf(hoedown_buffer *buf, const char *fmt, ...)
|
||||
{
|
||||
va_list ap;
|
||||
int n;
|
||||
|
||||
assert(buf && buf->unit);
|
||||
|
||||
if (buf->size >= buf->asize)
|
||||
hoedown_buffer_grow(buf, buf->size + 1);
|
||||
|
||||
va_start(ap, fmt);
|
||||
n = vsnprintf((char *)buf->data + buf->size, buf->asize - buf->size, fmt, ap);
|
||||
va_end(ap);
|
||||
|
||||
if (n < 0) {
|
||||
#ifndef _MSC_VER
|
||||
return;
|
||||
#else
|
||||
va_start(ap, fmt);
|
||||
n = _vscprintf(fmt, ap);
|
||||
va_end(ap);
|
||||
#endif
|
||||
}
|
||||
|
||||
if ((size_t)n >= buf->asize - buf->size) {
|
||||
hoedown_buffer_grow(buf, buf->size + n + 1);
|
||||
|
||||
va_start(ap, fmt);
|
||||
n = vsnprintf((char *)buf->data + buf->size, buf->asize - buf->size, fmt, ap);
|
||||
va_end(ap);
|
||||
}
|
||||
|
||||
if (n < 0)
|
||||
return;
|
||||
|
||||
buf->size += n;
|
||||
}
|
||||
|
||||
void hoedown_buffer_put_utf8(hoedown_buffer *buf, unsigned int c) {
|
||||
unsigned char unichar[4];
|
||||
|
||||
assert(buf && buf->unit);
|
||||
|
||||
if (c < 0x80) {
|
||||
hoedown_buffer_putc(buf, c);
|
||||
}
|
||||
else if (c < 0x800) {
|
||||
unichar[0] = 192 + (c / 64);
|
||||
unichar[1] = 128 + (c % 64);
|
||||
hoedown_buffer_put(buf, unichar, 2);
|
||||
}
|
||||
else if (c - 0xd800u < 0x800) {
|
||||
HOEDOWN_BUFPUTSL(buf, "\xef\xbf\xbd");
|
||||
}
|
||||
else if (c < 0x10000) {
|
||||
unichar[0] = 224 + (c / 4096);
|
||||
unichar[1] = 128 + (c / 64) % 64;
|
||||
unichar[2] = 128 + (c % 64);
|
||||
hoedown_buffer_put(buf, unichar, 3);
|
||||
}
|
||||
else if (c < 0x110000) {
|
||||
unichar[0] = 240 + (c / 262144);
|
||||
unichar[1] = 128 + (c / 4096) % 64;
|
||||
unichar[2] = 128 + (c / 64) % 64;
|
||||
unichar[3] = 128 + (c % 64);
|
||||
hoedown_buffer_put(buf, unichar, 4);
|
||||
}
|
||||
else {
|
||||
HOEDOWN_BUFPUTSL(buf, "\xef\xbf\xbd");
|
||||
}
|
||||
}
|
|
@ -0,0 +1,134 @@
|
|||
/* buffer.h - simple, fast buffers */
|
||||
|
||||
#ifndef HOEDOWN_BUFFER_H
|
||||
#define HOEDOWN_BUFFER_H
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stddef.h>
|
||||
#include <stdarg.h>
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#define __attribute__(x)
|
||||
#define inline __inline
|
||||
#define __builtin_expect(x,n) x
|
||||
#endif
|
||||
|
||||
|
||||
/*********
|
||||
* TYPES *
|
||||
*********/
|
||||
|
||||
typedef void *(*hoedown_realloc_callback)(void *, size_t);
|
||||
typedef void (*hoedown_free_callback)(void *);
|
||||
|
||||
struct hoedown_buffer {
|
||||
uint8_t *data; /* actual character data */
|
||||
size_t size; /* size of the string */
|
||||
size_t asize; /* allocated size (0 = volatile buffer) */
|
||||
size_t unit; /* reallocation unit size (0 = read-only buffer) */
|
||||
|
||||
hoedown_realloc_callback data_realloc;
|
||||
hoedown_free_callback data_free;
|
||||
hoedown_free_callback buffer_free;
|
||||
};
|
||||
|
||||
typedef struct hoedown_buffer hoedown_buffer;
|
||||
|
||||
|
||||
/*************
|
||||
* FUNCTIONS *
|
||||
*************/
|
||||
|
||||
/* allocation wrappers */
|
||||
void *hoedown_malloc(size_t size) __attribute__ ((malloc));
|
||||
void *hoedown_calloc(size_t nmemb, size_t size) __attribute__ ((malloc));
|
||||
void *hoedown_realloc(void *ptr, size_t size) __attribute__ ((malloc));
|
||||
|
||||
/* hoedown_buffer_init: initialize a buffer with custom allocators */
|
||||
void hoedown_buffer_init(
|
||||
hoedown_buffer *buffer,
|
||||
size_t unit,
|
||||
hoedown_realloc_callback data_realloc,
|
||||
hoedown_free_callback data_free,
|
||||
hoedown_free_callback buffer_free
|
||||
);
|
||||
|
||||
/* hoedown_buffer_uninit: uninitialize an existing buffer */
|
||||
void hoedown_buffer_uninit(hoedown_buffer *buf);
|
||||
|
||||
/* hoedown_buffer_new: allocate a new buffer */
|
||||
hoedown_buffer *hoedown_buffer_new(size_t unit) __attribute__ ((malloc));
|
||||
|
||||
/* hoedown_buffer_reset: free internal data of the buffer */
|
||||
void hoedown_buffer_reset(hoedown_buffer *buf);
|
||||
|
||||
/* hoedown_buffer_grow: increase the allocated size to the given value */
|
||||
void hoedown_buffer_grow(hoedown_buffer *buf, size_t neosz);
|
||||
|
||||
/* hoedown_buffer_put: append raw data to a buffer */
|
||||
void hoedown_buffer_put(hoedown_buffer *buf, const uint8_t *data, size_t size);
|
||||
|
||||
/* hoedown_buffer_puts: append a NUL-terminated string to a buffer */
|
||||
void hoedown_buffer_puts(hoedown_buffer *buf, const char *str);
|
||||
|
||||
/* hoedown_buffer_putc: append a single char to a buffer */
|
||||
void hoedown_buffer_putc(hoedown_buffer *buf, uint8_t c);
|
||||
|
||||
/* hoedown_buffer_putf: read from a file and append to a buffer, until EOF or error */
|
||||
int hoedown_buffer_putf(hoedown_buffer *buf, FILE* file);
|
||||
|
||||
/* hoedown_buffer_set: replace the buffer's contents with raw data */
|
||||
void hoedown_buffer_set(hoedown_buffer *buf, const uint8_t *data, size_t size);
|
||||
|
||||
/* hoedown_buffer_sets: replace the buffer's contents with a NUL-terminated string */
|
||||
void hoedown_buffer_sets(hoedown_buffer *buf, const char *str);
|
||||
|
||||
/* hoedown_buffer_eq: compare a buffer's data with other data for equality */
|
||||
int hoedown_buffer_eq(const hoedown_buffer *buf, const uint8_t *data, size_t size);
|
||||
|
||||
/* hoedown_buffer_eq: compare a buffer's data with NUL-terminated string for equality */
|
||||
int hoedown_buffer_eqs(const hoedown_buffer *buf, const char *str);
|
||||
|
||||
/* hoedown_buffer_prefix: compare the beginning of a buffer with a string */
|
||||
int hoedown_buffer_prefix(const hoedown_buffer *buf, const char *prefix);
|
||||
|
||||
/* hoedown_buffer_slurp: remove a given number of bytes from the head of the buffer */
|
||||
void hoedown_buffer_slurp(hoedown_buffer *buf, size_t size);
|
||||
|
||||
/* hoedown_buffer_cstr: NUL-termination of the string array (making a C-string) */
|
||||
const char *hoedown_buffer_cstr(hoedown_buffer *buf);
|
||||
|
||||
/* hoedown_buffer_printf: formatted printing to a buffer */
|
||||
void hoedown_buffer_printf(hoedown_buffer *buf, const char *fmt, ...) __attribute__ ((format (printf, 2, 3)));
|
||||
|
||||
/* hoedown_buffer_put_utf8: put a Unicode character encoded as UTF-8 */
|
||||
void hoedown_buffer_put_utf8(hoedown_buffer *buf, unsigned int codepoint);
|
||||
|
||||
/* hoedown_buffer_free: free the buffer */
|
||||
void hoedown_buffer_free(hoedown_buffer *buf);
|
||||
|
||||
|
||||
/* HOEDOWN_BUFPUTSL: optimized hoedown_buffer_puts of a string literal */
|
||||
#define HOEDOWN_BUFPUTSL(output, literal) \
|
||||
hoedown_buffer_put(output, (const uint8_t *)literal, sizeof(literal) - 1)
|
||||
|
||||
/* HOEDOWN_BUFSETSL: optimized hoedown_buffer_sets of a string literal */
|
||||
#define HOEDOWN_BUFSETSL(output, literal) \
|
||||
hoedown_buffer_set(output, (const uint8_t *)literal, sizeof(literal) - 1)
|
||||
|
||||
/* HOEDOWN_BUFEQSL: optimized hoedown_buffer_eqs of a string literal */
|
||||
#define HOEDOWN_BUFEQSL(output, literal) \
|
||||
hoedown_buffer_eq(output, (const uint8_t *)literal, sizeof(literal) - 1)
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /** HOEDOWN_BUFFER_H **/
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,172 @@
|
|||
/* document.h - generic markdown parser */
|
||||
|
||||
#ifndef HOEDOWN_DOCUMENT_H
|
||||
#define HOEDOWN_DOCUMENT_H
|
||||
|
||||
#include "buffer.h"
|
||||
#include "autolink.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
/*************
|
||||
* CONSTANTS *
|
||||
*************/
|
||||
|
||||
typedef enum hoedown_extensions {
|
||||
/* block-level extensions */
|
||||
HOEDOWN_EXT_TABLES = (1 << 0),
|
||||
HOEDOWN_EXT_FENCED_CODE = (1 << 1),
|
||||
HOEDOWN_EXT_FOOTNOTES = (1 << 2),
|
||||
|
||||
/* span-level extensions */
|
||||
HOEDOWN_EXT_AUTOLINK = (1 << 3),
|
||||
HOEDOWN_EXT_STRIKETHROUGH = (1 << 4),
|
||||
HOEDOWN_EXT_UNDERLINE = (1 << 5),
|
||||
HOEDOWN_EXT_HIGHLIGHT = (1 << 6),
|
||||
HOEDOWN_EXT_QUOTE = (1 << 7),
|
||||
HOEDOWN_EXT_SUPERSCRIPT = (1 << 8),
|
||||
HOEDOWN_EXT_MATH = (1 << 9),
|
||||
|
||||
/* other flags */
|
||||
HOEDOWN_EXT_NO_INTRA_EMPHASIS = (1 << 11),
|
||||
HOEDOWN_EXT_SPACE_HEADERS = (1 << 12),
|
||||
HOEDOWN_EXT_MATH_EXPLICIT = (1 << 13),
|
||||
|
||||
/* negative flags */
|
||||
HOEDOWN_EXT_DISABLE_INDENTED_CODE = (1 << 14)
|
||||
} hoedown_extensions;
|
||||
|
||||
#define HOEDOWN_EXT_BLOCK (\
|
||||
HOEDOWN_EXT_TABLES |\
|
||||
HOEDOWN_EXT_FENCED_CODE |\
|
||||
HOEDOWN_EXT_FOOTNOTES )
|
||||
|
||||
#define HOEDOWN_EXT_SPAN (\
|
||||
HOEDOWN_EXT_AUTOLINK |\
|
||||
HOEDOWN_EXT_STRIKETHROUGH |\
|
||||
HOEDOWN_EXT_UNDERLINE |\
|
||||
HOEDOWN_EXT_HIGHLIGHT |\
|
||||
HOEDOWN_EXT_QUOTE |\
|
||||
HOEDOWN_EXT_SUPERSCRIPT |\
|
||||
HOEDOWN_EXT_MATH )
|
||||
|
||||
#define HOEDOWN_EXT_FLAGS (\
|
||||
HOEDOWN_EXT_NO_INTRA_EMPHASIS |\
|
||||
HOEDOWN_EXT_SPACE_HEADERS |\
|
||||
HOEDOWN_EXT_MATH_EXPLICIT )
|
||||
|
||||
#define HOEDOWN_EXT_NEGATIVE (\
|
||||
HOEDOWN_EXT_DISABLE_INDENTED_CODE )
|
||||
|
||||
typedef enum hoedown_list_flags {
|
||||
HOEDOWN_LIST_ORDERED = (1 << 0),
|
||||
HOEDOWN_LI_BLOCK = (1 << 1) /* <li> containing block data */
|
||||
} hoedown_list_flags;
|
||||
|
||||
typedef enum hoedown_table_flags {
|
||||
HOEDOWN_TABLE_ALIGN_LEFT = 1,
|
||||
HOEDOWN_TABLE_ALIGN_RIGHT = 2,
|
||||
HOEDOWN_TABLE_ALIGN_CENTER = 3,
|
||||
HOEDOWN_TABLE_ALIGNMASK = 3,
|
||||
HOEDOWN_TABLE_HEADER = 4
|
||||
} hoedown_table_flags;
|
||||
|
||||
typedef enum hoedown_autolink_type {
|
||||
HOEDOWN_AUTOLINK_NONE, /* used internally when it is not an autolink*/
|
||||
HOEDOWN_AUTOLINK_NORMAL, /* normal http/http/ftp/mailto/etc link */
|
||||
HOEDOWN_AUTOLINK_EMAIL /* e-mail link without explit mailto: */
|
||||
} hoedown_autolink_type;
|
||||
|
||||
|
||||
/*********
|
||||
* TYPES *
|
||||
*********/
|
||||
|
||||
struct hoedown_document;
|
||||
typedef struct hoedown_document hoedown_document;
|
||||
|
||||
struct hoedown_renderer_data {
|
||||
void *opaque;
|
||||
};
|
||||
typedef struct hoedown_renderer_data hoedown_renderer_data;
|
||||
|
||||
/* hoedown_renderer - functions for rendering parsed data */
|
||||
struct hoedown_renderer {
|
||||
/* state object */
|
||||
void *opaque;
|
||||
|
||||
/* block level callbacks - NULL skips the block */
|
||||
void (*blockcode)(hoedown_buffer *ob, const hoedown_buffer *text, const hoedown_buffer *lang, const hoedown_renderer_data *data);
|
||||
void (*blockquote)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data);
|
||||
void (*header)(hoedown_buffer *ob, const hoedown_buffer *content, int level, const hoedown_renderer_data *data);
|
||||
void (*hrule)(hoedown_buffer *ob, const hoedown_renderer_data *data);
|
||||
void (*list)(hoedown_buffer *ob, const hoedown_buffer *content, hoedown_list_flags flags, const hoedown_renderer_data *data);
|
||||
void (*listitem)(hoedown_buffer *ob, const hoedown_buffer *content, hoedown_list_flags flags, const hoedown_renderer_data *data);
|
||||
void (*paragraph)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data);
|
||||
void (*table)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data);
|
||||
void (*table_header)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data);
|
||||
void (*table_body)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data);
|
||||
void (*table_row)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data);
|
||||
void (*table_cell)(hoedown_buffer *ob, const hoedown_buffer *content, hoedown_table_flags flags, const hoedown_renderer_data *data);
|
||||
void (*footnotes)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data);
|
||||
void (*footnote_def)(hoedown_buffer *ob, const hoedown_buffer *content, unsigned int num, const hoedown_renderer_data *data);
|
||||
void (*blockhtml)(hoedown_buffer *ob, const hoedown_buffer *text, const hoedown_renderer_data *data);
|
||||
|
||||
/* span level callbacks - NULL or return 0 prints the span verbatim */
|
||||
int (*autolink)(hoedown_buffer *ob, const hoedown_buffer *link, hoedown_autolink_type type, const hoedown_renderer_data *data);
|
||||
int (*codespan)(hoedown_buffer *ob, const hoedown_buffer *text, const hoedown_renderer_data *data);
|
||||
int (*double_emphasis)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data);
|
||||
int (*emphasis)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data);
|
||||
int (*underline)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data);
|
||||
int (*highlight)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data);
|
||||
int (*quote)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data);
|
||||
int (*image)(hoedown_buffer *ob, const hoedown_buffer *link, const hoedown_buffer *title, const hoedown_buffer *alt, const hoedown_renderer_data *data);
|
||||
int (*linebreak)(hoedown_buffer *ob, const hoedown_renderer_data *data);
|
||||
int (*link)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_buffer *link, const hoedown_buffer *title, const hoedown_renderer_data *data);
|
||||
int (*triple_emphasis)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data);
|
||||
int (*strikethrough)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data);
|
||||
int (*superscript)(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data);
|
||||
int (*footnote_ref)(hoedown_buffer *ob, unsigned int num, const hoedown_renderer_data *data);
|
||||
int (*math)(hoedown_buffer *ob, const hoedown_buffer *text, int displaymode, const hoedown_renderer_data *data);
|
||||
int (*raw_html)(hoedown_buffer *ob, const hoedown_buffer *text, const hoedown_renderer_data *data);
|
||||
|
||||
/* low level callbacks - NULL copies input directly into the output */
|
||||
void (*entity)(hoedown_buffer *ob, const hoedown_buffer *text, const hoedown_renderer_data *data);
|
||||
void (*normal_text)(hoedown_buffer *ob, const hoedown_buffer *text, const hoedown_renderer_data *data);
|
||||
|
||||
/* miscellaneous callbacks */
|
||||
void (*doc_header)(hoedown_buffer *ob, int inline_render, const hoedown_renderer_data *data);
|
||||
void (*doc_footer)(hoedown_buffer *ob, int inline_render, const hoedown_renderer_data *data);
|
||||
};
|
||||
typedef struct hoedown_renderer hoedown_renderer;
|
||||
|
||||
|
||||
/*************
|
||||
* FUNCTIONS *
|
||||
*************/
|
||||
|
||||
/* hoedown_document_new: allocate a new document processor instance */
|
||||
hoedown_document *hoedown_document_new(
|
||||
const hoedown_renderer *renderer,
|
||||
hoedown_extensions extensions,
|
||||
size_t max_nesting
|
||||
) __attribute__ ((malloc));
|
||||
|
||||
/* hoedown_document_render: render regular Markdown using the document processor */
|
||||
void hoedown_document_render(hoedown_document *doc, hoedown_buffer *ob, const uint8_t *data, size_t size);
|
||||
|
||||
/* hoedown_document_render_inline: render inline Markdown using the document processor */
|
||||
void hoedown_document_render_inline(hoedown_document *doc, hoedown_buffer *ob, const uint8_t *data, size_t size);
|
||||
|
||||
/* hoedown_document_free: deallocate a document processor instance */
|
||||
void hoedown_document_free(hoedown_document *doc);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /** HOEDOWN_DOCUMENT_H **/
|
|
@ -0,0 +1,188 @@
|
|||
#include "escape.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
|
||||
#define likely(x) __builtin_expect((x),1)
|
||||
#define unlikely(x) __builtin_expect((x),0)
|
||||
|
||||
|
||||
/*
|
||||
* The following characters will not be escaped:
|
||||
*
|
||||
* -_.+!*'(),%#@?=;:/,+&$ alphanum
|
||||
*
|
||||
* Note that this character set is the addition of:
|
||||
*
|
||||
* - The characters which are safe to be in an URL
|
||||
* - The characters which are *not* safe to be in
|
||||
* an URL because they are RESERVED characters.
|
||||
*
|
||||
* We assume (lazily) that any RESERVED char that
|
||||
* appears inside an URL is actually meant to
|
||||
* have its native function (i.e. as an URL
|
||||
* component/separator) and hence needs no escaping.
|
||||
*
|
||||
* There are two exceptions: the chacters & (amp)
|
||||
* and ' (single quote) do not appear in the table.
|
||||
* They are meant to appear in the URL as components,
|
||||
* yet they require special HTML-entity escaping
|
||||
* to generate valid HTML markup.
|
||||
*
|
||||
* All other characters will be escaped to %XX.
|
||||
*
|
||||
*/
|
||||
static const uint8_t HREF_SAFE[UINT8_MAX+1] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
|
||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
};
|
||||
|
||||
void
|
||||
hoedown_escape_href(hoedown_buffer *ob, const uint8_t *data, size_t size)
|
||||
{
|
||||
static const char hex_chars[] = "0123456789ABCDEF";
|
||||
size_t i = 0, mark;
|
||||
char hex_str[3];
|
||||
|
||||
hex_str[0] = '%';
|
||||
|
||||
while (i < size) {
|
||||
mark = i;
|
||||
while (i < size && HREF_SAFE[data[i]]) i++;
|
||||
|
||||
/* Optimization for cases where there's nothing to escape */
|
||||
if (mark == 0 && i >= size) {
|
||||
hoedown_buffer_put(ob, data, size);
|
||||
return;
|
||||
}
|
||||
|
||||
if (likely(i > mark)) {
|
||||
hoedown_buffer_put(ob, data + mark, i - mark);
|
||||
}
|
||||
|
||||
/* escaping */
|
||||
if (i >= size)
|
||||
break;
|
||||
|
||||
switch (data[i]) {
|
||||
/* amp appears all the time in URLs, but needs
|
||||
* HTML-entity escaping to be inside an href */
|
||||
case '&':
|
||||
HOEDOWN_BUFPUTSL(ob, "&");
|
||||
break;
|
||||
|
||||
/* the single quote is a valid URL character
|
||||
* according to the standard; it needs HTML
|
||||
* entity escaping too */
|
||||
case '\'':
|
||||
HOEDOWN_BUFPUTSL(ob, "'");
|
||||
break;
|
||||
|
||||
/* the space can be escaped to %20 or a plus
|
||||
* sign. we're going with the generic escape
|
||||
* for now. the plus thing is more commonly seen
|
||||
* when building GET strings */
|
||||
#if 0
|
||||
case ' ':
|
||||
hoedown_buffer_putc(ob, '+');
|
||||
break;
|
||||
#endif
|
||||
|
||||
/* every other character goes with a %XX escaping */
|
||||
default:
|
||||
hex_str[1] = hex_chars[(data[i] >> 4) & 0xF];
|
||||
hex_str[2] = hex_chars[data[i] & 0xF];
|
||||
hoedown_buffer_put(ob, (uint8_t *)hex_str, 3);
|
||||
}
|
||||
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* According to the OWASP rules:
|
||||
*
|
||||
* & --> &
|
||||
* < --> <
|
||||
* > --> >
|
||||
* " --> "
|
||||
* ' --> ' ' is not recommended
|
||||
* / --> / forward slash is included as it helps end an HTML entity
|
||||
*
|
||||
*/
|
||||
static const uint8_t HTML_ESCAPE_TABLE[UINT8_MAX+1] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 1, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 4,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 6, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
};
|
||||
|
||||
static const char *HTML_ESCAPES[] = {
|
||||
"",
|
||||
""",
|
||||
"&",
|
||||
"'",
|
||||
"/",
|
||||
"<",
|
||||
">"
|
||||
};
|
||||
|
||||
void
|
||||
hoedown_escape_html(hoedown_buffer *ob, const uint8_t *data, size_t size, int secure)
|
||||
{
|
||||
size_t i = 0, mark;
|
||||
|
||||
while (1) {
|
||||
mark = i;
|
||||
while (i < size && HTML_ESCAPE_TABLE[data[i]] == 0) i++;
|
||||
|
||||
/* Optimization for cases where there's nothing to escape */
|
||||
if (mark == 0 && i >= size) {
|
||||
hoedown_buffer_put(ob, data, size);
|
||||
return;
|
||||
}
|
||||
|
||||
if (likely(i > mark))
|
||||
hoedown_buffer_put(ob, data + mark, i - mark);
|
||||
|
||||
if (i >= size) break;
|
||||
|
||||
/* The forward slash is only escaped in secure mode */
|
||||
if (!secure && data[i] == '/') {
|
||||
hoedown_buffer_putc(ob, '/');
|
||||
} else {
|
||||
hoedown_buffer_puts(ob, HTML_ESCAPES[HTML_ESCAPE_TABLE[data[i]]]);
|
||||
}
|
||||
|
||||
i++;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,28 @@
|
|||
/* escape.h - escape utilities */
|
||||
|
||||
#ifndef HOEDOWN_ESCAPE_H
|
||||
#define HOEDOWN_ESCAPE_H
|
||||
|
||||
#include "buffer.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
/*************
|
||||
* FUNCTIONS *
|
||||
*************/
|
||||
|
||||
/* hoedown_escape_href: escape (part of) a URL inside HTML */
|
||||
void hoedown_escape_href(hoedown_buffer *ob, const uint8_t *data, size_t size);
|
||||
|
||||
/* hoedown_escape_html: escape HTML */
|
||||
void hoedown_escape_html(hoedown_buffer *ob, const uint8_t *data, size_t size, int secure);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /** HOEDOWN_ESCAPE_H **/
|
|
@ -0,0 +1,754 @@
|
|||
#include "html.h"
|
||||
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <ctype.h>
|
||||
|
||||
#include "escape.h"
|
||||
|
||||
#define USE_XHTML(opt) (opt->flags & HOEDOWN_HTML_USE_XHTML)
|
||||
|
||||
hoedown_html_tag
|
||||
hoedown_html_is_tag(const uint8_t *data, size_t size, const char *tagname)
|
||||
{
|
||||
size_t i;
|
||||
int closed = 0;
|
||||
|
||||
if (size < 3 || data[0] != '<')
|
||||
return HOEDOWN_HTML_TAG_NONE;
|
||||
|
||||
i = 1;
|
||||
|
||||
if (data[i] == '/') {
|
||||
closed = 1;
|
||||
i++;
|
||||
}
|
||||
|
||||
for (; i < size; ++i, ++tagname) {
|
||||
if (*tagname == 0)
|
||||
break;
|
||||
|
||||
if (data[i] != *tagname)
|
||||
return HOEDOWN_HTML_TAG_NONE;
|
||||
}
|
||||
|
||||
if (i == size)
|
||||
return HOEDOWN_HTML_TAG_NONE;
|
||||
|
||||
if (isspace(data[i]) || data[i] == '>')
|
||||
return closed ? HOEDOWN_HTML_TAG_CLOSE : HOEDOWN_HTML_TAG_OPEN;
|
||||
|
||||
return HOEDOWN_HTML_TAG_NONE;
|
||||
}
|
||||
|
||||
static void escape_html(hoedown_buffer *ob, const uint8_t *source, size_t length)
|
||||
{
|
||||
hoedown_escape_html(ob, source, length, 0);
|
||||
}
|
||||
|
||||
static void escape_href(hoedown_buffer *ob, const uint8_t *source, size_t length)
|
||||
{
|
||||
hoedown_escape_href(ob, source, length);
|
||||
}
|
||||
|
||||
/********************
|
||||
* GENERIC RENDERER *
|
||||
********************/
|
||||
static int
|
||||
rndr_autolink(hoedown_buffer *ob, const hoedown_buffer *link, hoedown_autolink_type type, const hoedown_renderer_data *data)
|
||||
{
|
||||
hoedown_html_renderer_state *state = data->opaque;
|
||||
|
||||
if (!link || !link->size)
|
||||
return 0;
|
||||
|
||||
HOEDOWN_BUFPUTSL(ob, "<a href=\"");
|
||||
if (type == HOEDOWN_AUTOLINK_EMAIL)
|
||||
HOEDOWN_BUFPUTSL(ob, "mailto:");
|
||||
escape_href(ob, link->data, link->size);
|
||||
|
||||
if (state->link_attributes) {
|
||||
hoedown_buffer_putc(ob, '\"');
|
||||
state->link_attributes(ob, link, data);
|
||||
hoedown_buffer_putc(ob, '>');
|
||||
} else {
|
||||
HOEDOWN_BUFPUTSL(ob, "\">");
|
||||
}
|
||||
|
||||
/*
|
||||
* Pretty printing: if we get an email address as
|
||||
* an actual URI, e.g. `mailto:foo@bar.com`, we don't
|
||||
* want to print the `mailto:` prefix
|
||||
*/
|
||||
if (hoedown_buffer_prefix(link, "mailto:") == 0) {
|
||||
escape_html(ob, link->data + 7, link->size - 7);
|
||||
} else {
|
||||
escape_html(ob, link->data, link->size);
|
||||
}
|
||||
|
||||
HOEDOWN_BUFPUTSL(ob, "</a>");
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void
|
||||
rndr_blockcode(hoedown_buffer *ob, const hoedown_buffer *text, const hoedown_buffer *lang, const hoedown_renderer_data *data)
|
||||
{
|
||||
if (ob->size) hoedown_buffer_putc(ob, '\n');
|
||||
|
||||
if (lang) {
|
||||
HOEDOWN_BUFPUTSL(ob, "<pre><code class=\"language-");
|
||||
escape_html(ob, lang->data, lang->size);
|
||||
HOEDOWN_BUFPUTSL(ob, "\">");
|
||||
} else {
|
||||
HOEDOWN_BUFPUTSL(ob, "<pre><code>");
|
||||
}
|
||||
|
||||
if (text)
|
||||
escape_html(ob, text->data, text->size);
|
||||
|
||||
HOEDOWN_BUFPUTSL(ob, "</code></pre>\n");
|
||||
}
|
||||
|
||||
static void
|
||||
rndr_blockquote(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data)
|
||||
{
|
||||
if (ob->size) hoedown_buffer_putc(ob, '\n');
|
||||
HOEDOWN_BUFPUTSL(ob, "<blockquote>\n");
|
||||
if (content) hoedown_buffer_put(ob, content->data, content->size);
|
||||
HOEDOWN_BUFPUTSL(ob, "</blockquote>\n");
|
||||
}
|
||||
|
||||
static int
|
||||
rndr_codespan(hoedown_buffer *ob, const hoedown_buffer *text, const hoedown_renderer_data *data)
|
||||
{
|
||||
HOEDOWN_BUFPUTSL(ob, "<code>");
|
||||
if (text) escape_html(ob, text->data, text->size);
|
||||
HOEDOWN_BUFPUTSL(ob, "</code>");
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
rndr_strikethrough(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data)
|
||||
{
|
||||
if (!content || !content->size)
|
||||
return 0;
|
||||
|
||||
HOEDOWN_BUFPUTSL(ob, "<del>");
|
||||
hoedown_buffer_put(ob, content->data, content->size);
|
||||
HOEDOWN_BUFPUTSL(ob, "</del>");
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
rndr_double_emphasis(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data)
|
||||
{
|
||||
if (!content || !content->size)
|
||||
return 0;
|
||||
|
||||
HOEDOWN_BUFPUTSL(ob, "<strong>");
|
||||
hoedown_buffer_put(ob, content->data, content->size);
|
||||
HOEDOWN_BUFPUTSL(ob, "</strong>");
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
rndr_emphasis(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data)
|
||||
{
|
||||
if (!content || !content->size) return 0;
|
||||
HOEDOWN_BUFPUTSL(ob, "<em>");
|
||||
if (content) hoedown_buffer_put(ob, content->data, content->size);
|
||||
HOEDOWN_BUFPUTSL(ob, "</em>");
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
rndr_underline(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data)
|
||||
{
|
||||
if (!content || !content->size)
|
||||
return 0;
|
||||
|
||||
HOEDOWN_BUFPUTSL(ob, "<u>");
|
||||
hoedown_buffer_put(ob, content->data, content->size);
|
||||
HOEDOWN_BUFPUTSL(ob, "</u>");
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
rndr_highlight(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data)
|
||||
{
|
||||
if (!content || !content->size)
|
||||
return 0;
|
||||
|
||||
HOEDOWN_BUFPUTSL(ob, "<mark>");
|
||||
hoedown_buffer_put(ob, content->data, content->size);
|
||||
HOEDOWN_BUFPUTSL(ob, "</mark>");
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
rndr_quote(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data)
|
||||
{
|
||||
if (!content || !content->size)
|
||||
return 0;
|
||||
|
||||
HOEDOWN_BUFPUTSL(ob, "<q>");
|
||||
hoedown_buffer_put(ob, content->data, content->size);
|
||||
HOEDOWN_BUFPUTSL(ob, "</q>");
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
rndr_linebreak(hoedown_buffer *ob, const hoedown_renderer_data *data)
|
||||
{
|
||||
hoedown_html_renderer_state *state = data->opaque;
|
||||
hoedown_buffer_puts(ob, USE_XHTML(state) ? "<br/>\n" : "<br>\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void
|
||||
rndr_header(hoedown_buffer *ob, const hoedown_buffer *content, int level, const hoedown_renderer_data *data)
|
||||
{
|
||||
hoedown_html_renderer_state *state = data->opaque;
|
||||
|
||||
if (ob->size)
|
||||
hoedown_buffer_putc(ob, '\n');
|
||||
|
||||
if (level <= state->toc_data.nesting_level)
|
||||
hoedown_buffer_printf(ob, "<h%d id=\"toc_%d\">", level, state->toc_data.header_count++);
|
||||
else
|
||||
hoedown_buffer_printf(ob, "<h%d>", level);
|
||||
|
||||
if (content) hoedown_buffer_put(ob, content->data, content->size);
|
||||
hoedown_buffer_printf(ob, "</h%d>\n", level);
|
||||
}
|
||||
|
||||
static int
|
||||
rndr_link(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_buffer *link, const hoedown_buffer *title, const hoedown_renderer_data *data)
|
||||
{
|
||||
hoedown_html_renderer_state *state = data->opaque;
|
||||
|
||||
HOEDOWN_BUFPUTSL(ob, "<a href=\"");
|
||||
|
||||
if (link && link->size)
|
||||
escape_href(ob, link->data, link->size);
|
||||
|
||||
if (title && title->size) {
|
||||
HOEDOWN_BUFPUTSL(ob, "\" title=\"");
|
||||
escape_html(ob, title->data, title->size);
|
||||
}
|
||||
|
||||
if (state->link_attributes) {
|
||||
hoedown_buffer_putc(ob, '\"');
|
||||
state->link_attributes(ob, link, data);
|
||||
hoedown_buffer_putc(ob, '>');
|
||||
} else {
|
||||
HOEDOWN_BUFPUTSL(ob, "\">");
|
||||
}
|
||||
|
||||
if (content && content->size) hoedown_buffer_put(ob, content->data, content->size);
|
||||
HOEDOWN_BUFPUTSL(ob, "</a>");
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void
|
||||
rndr_list(hoedown_buffer *ob, const hoedown_buffer *content, hoedown_list_flags flags, const hoedown_renderer_data *data)
|
||||
{
|
||||
if (ob->size) hoedown_buffer_putc(ob, '\n');
|
||||
hoedown_buffer_put(ob, (const uint8_t *)(flags & HOEDOWN_LIST_ORDERED ? "<ol>\n" : "<ul>\n"), 5);
|
||||
if (content) hoedown_buffer_put(ob, content->data, content->size);
|
||||
hoedown_buffer_put(ob, (const uint8_t *)(flags & HOEDOWN_LIST_ORDERED ? "</ol>\n" : "</ul>\n"), 6);
|
||||
}
|
||||
|
||||
static void
|
||||
rndr_listitem(hoedown_buffer *ob, const hoedown_buffer *content, hoedown_list_flags flags, const hoedown_renderer_data *data)
|
||||
{
|
||||
HOEDOWN_BUFPUTSL(ob, "<li>");
|
||||
if (content) {
|
||||
size_t size = content->size;
|
||||
while (size && content->data[size - 1] == '\n')
|
||||
size--;
|
||||
|
||||
hoedown_buffer_put(ob, content->data, size);
|
||||
}
|
||||
HOEDOWN_BUFPUTSL(ob, "</li>\n");
|
||||
}
|
||||
|
||||
static void
|
||||
rndr_paragraph(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data)
|
||||
{
|
||||
hoedown_html_renderer_state *state = data->opaque;
|
||||
size_t i = 0;
|
||||
|
||||
if (ob->size) hoedown_buffer_putc(ob, '\n');
|
||||
|
||||
if (!content || !content->size)
|
||||
return;
|
||||
|
||||
while (i < content->size && isspace(content->data[i])) i++;
|
||||
|
||||
if (i == content->size)
|
||||
return;
|
||||
|
||||
HOEDOWN_BUFPUTSL(ob, "<p>");
|
||||
if (state->flags & HOEDOWN_HTML_HARD_WRAP) {
|
||||
size_t org;
|
||||
while (i < content->size) {
|
||||
org = i;
|
||||
while (i < content->size && content->data[i] != '\n')
|
||||
i++;
|
||||
|
||||
if (i > org)
|
||||
hoedown_buffer_put(ob, content->data + org, i - org);
|
||||
|
||||
/*
|
||||
* do not insert a line break if this newline
|
||||
* is the last character on the paragraph
|
||||
*/
|
||||
if (i >= content->size - 1)
|
||||
break;
|
||||
|
||||
rndr_linebreak(ob, data);
|
||||
i++;
|
||||
}
|
||||
} else {
|
||||
hoedown_buffer_put(ob, content->data + i, content->size - i);
|
||||
}
|
||||
HOEDOWN_BUFPUTSL(ob, "</p>\n");
|
||||
}
|
||||
|
||||
static void
|
||||
rndr_raw_block(hoedown_buffer *ob, const hoedown_buffer *text, const hoedown_renderer_data *data)
|
||||
{
|
||||
size_t org, sz;
|
||||
|
||||
if (!text)
|
||||
return;
|
||||
|
||||
/* FIXME: Do we *really* need to trim the HTML? How does that make a difference? */
|
||||
sz = text->size;
|
||||
while (sz > 0 && text->data[sz - 1] == '\n')
|
||||
sz--;
|
||||
|
||||
org = 0;
|
||||
while (org < sz && text->data[org] == '\n')
|
||||
org++;
|
||||
|
||||
if (org >= sz)
|
||||
return;
|
||||
|
||||
if (ob->size)
|
||||
hoedown_buffer_putc(ob, '\n');
|
||||
|
||||
hoedown_buffer_put(ob, text->data + org, sz - org);
|
||||
hoedown_buffer_putc(ob, '\n');
|
||||
}
|
||||
|
||||
static int
|
||||
rndr_triple_emphasis(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data)
|
||||
{
|
||||
if (!content || !content->size) return 0;
|
||||
HOEDOWN_BUFPUTSL(ob, "<strong><em>");
|
||||
hoedown_buffer_put(ob, content->data, content->size);
|
||||
HOEDOWN_BUFPUTSL(ob, "</em></strong>");
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void
|
||||
rndr_hrule(hoedown_buffer *ob, const hoedown_renderer_data *data)
|
||||
{
|
||||
hoedown_html_renderer_state *state = data->opaque;
|
||||
if (ob->size) hoedown_buffer_putc(ob, '\n');
|
||||
hoedown_buffer_puts(ob, USE_XHTML(state) ? "<hr/>\n" : "<hr>\n");
|
||||
}
|
||||
|
||||
static int
|
||||
rndr_image(hoedown_buffer *ob, const hoedown_buffer *link, const hoedown_buffer *title, const hoedown_buffer *alt, const hoedown_renderer_data *data)
|
||||
{
|
||||
hoedown_html_renderer_state *state = data->opaque;
|
||||
if (!link || !link->size) return 0;
|
||||
|
||||
HOEDOWN_BUFPUTSL(ob, "<img src=\"");
|
||||
escape_href(ob, link->data, link->size);
|
||||
HOEDOWN_BUFPUTSL(ob, "\" alt=\"");
|
||||
|
||||
if (alt && alt->size)
|
||||
escape_html(ob, alt->data, alt->size);
|
||||
|
||||
if (title && title->size) {
|
||||
HOEDOWN_BUFPUTSL(ob, "\" title=\"");
|
||||
escape_html(ob, title->data, title->size); }
|
||||
|
||||
hoedown_buffer_puts(ob, USE_XHTML(state) ? "\"/>" : "\">");
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
rndr_raw_html(hoedown_buffer *ob, const hoedown_buffer *text, const hoedown_renderer_data *data)
|
||||
{
|
||||
hoedown_html_renderer_state *state = data->opaque;
|
||||
|
||||
/* ESCAPE overrides SKIP_HTML. It doesn't look to see if
|
||||
* there are any valid tags, just escapes all of them. */
|
||||
if((state->flags & HOEDOWN_HTML_ESCAPE) != 0) {
|
||||
escape_html(ob, text->data, text->size);
|
||||
return 1;
|
||||
}
|
||||
|
||||
if ((state->flags & HOEDOWN_HTML_SKIP_HTML) != 0)
|
||||
return 1;
|
||||
|
||||
hoedown_buffer_put(ob, text->data, text->size);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void
|
||||
rndr_table(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data)
|
||||
{
|
||||
if (ob->size) hoedown_buffer_putc(ob, '\n');
|
||||
HOEDOWN_BUFPUTSL(ob, "<table>\n");
|
||||
hoedown_buffer_put(ob, content->data, content->size);
|
||||
HOEDOWN_BUFPUTSL(ob, "</table>\n");
|
||||
}
|
||||
|
||||
static void
|
||||
rndr_table_header(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data)
|
||||
{
|
||||
if (ob->size) hoedown_buffer_putc(ob, '\n');
|
||||
HOEDOWN_BUFPUTSL(ob, "<thead>\n");
|
||||
hoedown_buffer_put(ob, content->data, content->size);
|
||||
HOEDOWN_BUFPUTSL(ob, "</thead>\n");
|
||||
}
|
||||
|
||||
static void
|
||||
rndr_table_body(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data)
|
||||
{
|
||||
if (ob->size) hoedown_buffer_putc(ob, '\n');
|
||||
HOEDOWN_BUFPUTSL(ob, "<tbody>\n");
|
||||
hoedown_buffer_put(ob, content->data, content->size);
|
||||
HOEDOWN_BUFPUTSL(ob, "</tbody>\n");
|
||||
}
|
||||
|
||||
static void
|
||||
rndr_tablerow(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data)
|
||||
{
|
||||
HOEDOWN_BUFPUTSL(ob, "<tr>\n");
|
||||
if (content) hoedown_buffer_put(ob, content->data, content->size);
|
||||
HOEDOWN_BUFPUTSL(ob, "</tr>\n");
|
||||
}
|
||||
|
||||
static void
|
||||
rndr_tablecell(hoedown_buffer *ob, const hoedown_buffer *content, hoedown_table_flags flags, const hoedown_renderer_data *data)
|
||||
{
|
||||
if (flags & HOEDOWN_TABLE_HEADER) {
|
||||
HOEDOWN_BUFPUTSL(ob, "<th");
|
||||
} else {
|
||||
HOEDOWN_BUFPUTSL(ob, "<td");
|
||||
}
|
||||
|
||||
switch (flags & HOEDOWN_TABLE_ALIGNMASK) {
|
||||
case HOEDOWN_TABLE_ALIGN_CENTER:
|
||||
HOEDOWN_BUFPUTSL(ob, " style=\"text-align: center\">");
|
||||
break;
|
||||
|
||||
case HOEDOWN_TABLE_ALIGN_LEFT:
|
||||
HOEDOWN_BUFPUTSL(ob, " style=\"text-align: left\">");
|
||||
break;
|
||||
|
||||
case HOEDOWN_TABLE_ALIGN_RIGHT:
|
||||
HOEDOWN_BUFPUTSL(ob, " style=\"text-align: right\">");
|
||||
break;
|
||||
|
||||
default:
|
||||
HOEDOWN_BUFPUTSL(ob, ">");
|
||||
}
|
||||
|
||||
if (content)
|
||||
hoedown_buffer_put(ob, content->data, content->size);
|
||||
|
||||
if (flags & HOEDOWN_TABLE_HEADER) {
|
||||
HOEDOWN_BUFPUTSL(ob, "</th>\n");
|
||||
} else {
|
||||
HOEDOWN_BUFPUTSL(ob, "</td>\n");
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
rndr_superscript(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data)
|
||||
{
|
||||
if (!content || !content->size) return 0;
|
||||
HOEDOWN_BUFPUTSL(ob, "<sup>");
|
||||
hoedown_buffer_put(ob, content->data, content->size);
|
||||
HOEDOWN_BUFPUTSL(ob, "</sup>");
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void
|
||||
rndr_normal_text(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data)
|
||||
{
|
||||
if (content)
|
||||
escape_html(ob, content->data, content->size);
|
||||
}
|
||||
|
||||
static void
|
||||
rndr_footnotes(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_renderer_data *data)
|
||||
{
|
||||
hoedown_html_renderer_state *state = data->opaque;
|
||||
|
||||
if (ob->size) hoedown_buffer_putc(ob, '\n');
|
||||
HOEDOWN_BUFPUTSL(ob, "<div class=\"footnotes\">\n");
|
||||
hoedown_buffer_puts(ob, USE_XHTML(state) ? "<hr/>\n" : "<hr>\n");
|
||||
HOEDOWN_BUFPUTSL(ob, "<ol>\n");
|
||||
|
||||
if (content) hoedown_buffer_put(ob, content->data, content->size);
|
||||
|
||||
HOEDOWN_BUFPUTSL(ob, "\n</ol>\n</div>\n");
|
||||
}
|
||||
|
||||
static void
|
||||
rndr_footnote_def(hoedown_buffer *ob, const hoedown_buffer *content, unsigned int num, const hoedown_renderer_data *data)
|
||||
{
|
||||
size_t i = 0;
|
||||
int pfound = 0;
|
||||
|
||||
/* insert anchor at the end of first paragraph block */
|
||||
if (content) {
|
||||
while ((i+3) < content->size) {
|
||||
if (content->data[i++] != '<') continue;
|
||||
if (content->data[i++] != '/') continue;
|
||||
if (content->data[i++] != 'p' && content->data[i] != 'P') continue;
|
||||
if (content->data[i] != '>') continue;
|
||||
i -= 3;
|
||||
pfound = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
hoedown_buffer_printf(ob, "\n<li id=\"fn%d\">\n", num);
|
||||
if (pfound) {
|
||||
hoedown_buffer_put(ob, content->data, i);
|
||||
hoedown_buffer_printf(ob, " <a href=\"#fnref%d\" rev=\"footnote\">↩</a>", num);
|
||||
hoedown_buffer_put(ob, content->data + i, content->size - i);
|
||||
} else if (content) {
|
||||
hoedown_buffer_put(ob, content->data, content->size);
|
||||
}
|
||||
HOEDOWN_BUFPUTSL(ob, "</li>\n");
|
||||
}
|
||||
|
||||
static int
|
||||
rndr_footnote_ref(hoedown_buffer *ob, unsigned int num, const hoedown_renderer_data *data)
|
||||
{
|
||||
hoedown_buffer_printf(ob, "<sup id=\"fnref%d\"><a href=\"#fn%d\" rel=\"footnote\">%d</a></sup>", num, num, num);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
rndr_math(hoedown_buffer *ob, const hoedown_buffer *text, int displaymode, const hoedown_renderer_data *data)
|
||||
{
|
||||
hoedown_buffer_put(ob, (const uint8_t *)(displaymode ? "\\[" : "\\("), 2);
|
||||
escape_html(ob, text->data, text->size);
|
||||
hoedown_buffer_put(ob, (const uint8_t *)(displaymode ? "\\]" : "\\)"), 2);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void
|
||||
toc_header(hoedown_buffer *ob, const hoedown_buffer *content, int level, const hoedown_renderer_data *data)
|
||||
{
|
||||
hoedown_html_renderer_state *state = data->opaque;
|
||||
|
||||
if (level <= state->toc_data.nesting_level) {
|
||||
/* set the level offset if this is the first header
|
||||
* we're parsing for the document */
|
||||
if (state->toc_data.current_level == 0)
|
||||
state->toc_data.level_offset = level - 1;
|
||||
|
||||
level -= state->toc_data.level_offset;
|
||||
|
||||
if (level > state->toc_data.current_level) {
|
||||
while (level > state->toc_data.current_level) {
|
||||
HOEDOWN_BUFPUTSL(ob, "<ul>\n<li>\n");
|
||||
state->toc_data.current_level++;
|
||||
}
|
||||
} else if (level < state->toc_data.current_level) {
|
||||
HOEDOWN_BUFPUTSL(ob, "</li>\n");
|
||||
while (level < state->toc_data.current_level) {
|
||||
HOEDOWN_BUFPUTSL(ob, "</ul>\n</li>\n");
|
||||
state->toc_data.current_level--;
|
||||
}
|
||||
HOEDOWN_BUFPUTSL(ob,"<li>\n");
|
||||
} else {
|
||||
HOEDOWN_BUFPUTSL(ob,"</li>\n<li>\n");
|
||||
}
|
||||
|
||||
hoedown_buffer_printf(ob, "<a href=\"#toc_%d\">", state->toc_data.header_count++);
|
||||
if (content) hoedown_buffer_put(ob, content->data, content->size);
|
||||
HOEDOWN_BUFPUTSL(ob, "</a>\n");
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
toc_link(hoedown_buffer *ob, const hoedown_buffer *content, const hoedown_buffer *link, const hoedown_buffer *title, const hoedown_renderer_data *data)
|
||||
{
|
||||
if (content && content->size) hoedown_buffer_put(ob, content->data, content->size);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void
|
||||
toc_finalize(hoedown_buffer *ob, int inline_render, const hoedown_renderer_data *data)
|
||||
{
|
||||
hoedown_html_renderer_state *state;
|
||||
|
||||
if (inline_render)
|
||||
return;
|
||||
|
||||
state = data->opaque;
|
||||
|
||||
while (state->toc_data.current_level > 0) {
|
||||
HOEDOWN_BUFPUTSL(ob, "</li>\n</ul>\n");
|
||||
state->toc_data.current_level--;
|
||||
}
|
||||
|
||||
state->toc_data.header_count = 0;
|
||||
}
|
||||
|
||||
hoedown_renderer *
|
||||
hoedown_html_toc_renderer_new(int nesting_level)
|
||||
{
|
||||
static const hoedown_renderer cb_default = {
|
||||
NULL,
|
||||
|
||||
NULL,
|
||||
NULL,
|
||||
toc_header,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
|
||||
NULL,
|
||||
rndr_codespan,
|
||||
rndr_double_emphasis,
|
||||
rndr_emphasis,
|
||||
rndr_underline,
|
||||
rndr_highlight,
|
||||
rndr_quote,
|
||||
NULL,
|
||||
NULL,
|
||||
toc_link,
|
||||
rndr_triple_emphasis,
|
||||
rndr_strikethrough,
|
||||
rndr_superscript,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
|
||||
NULL,
|
||||
rndr_normal_text,
|
||||
|
||||
NULL,
|
||||
toc_finalize
|
||||
};
|
||||
|
||||
hoedown_html_renderer_state *state;
|
||||
hoedown_renderer *renderer;
|
||||
|
||||
/* Prepare the state pointer */
|
||||
state = hoedown_malloc(sizeof(hoedown_html_renderer_state));
|
||||
memset(state, 0x0, sizeof(hoedown_html_renderer_state));
|
||||
|
||||
state->toc_data.nesting_level = nesting_level;
|
||||
|
||||
/* Prepare the renderer */
|
||||
renderer = hoedown_malloc(sizeof(hoedown_renderer));
|
||||
memcpy(renderer, &cb_default, sizeof(hoedown_renderer));
|
||||
|
||||
renderer->opaque = state;
|
||||
return renderer;
|
||||
}
|
||||
|
||||
hoedown_renderer *
|
||||
hoedown_html_renderer_new(hoedown_html_flags render_flags, int nesting_level)
|
||||
{
|
||||
static const hoedown_renderer cb_default = {
|
||||
NULL,
|
||||
|
||||
rndr_blockcode,
|
||||
rndr_blockquote,
|
||||
rndr_header,
|
||||
rndr_hrule,
|
||||
rndr_list,
|
||||
rndr_listitem,
|
||||
rndr_paragraph,
|
||||
rndr_table,
|
||||
rndr_table_header,
|
||||
rndr_table_body,
|
||||
rndr_tablerow,
|
||||
rndr_tablecell,
|
||||
rndr_footnotes,
|
||||
rndr_footnote_def,
|
||||
rndr_raw_block,
|
||||
|
||||
rndr_autolink,
|
||||
rndr_codespan,
|
||||
rndr_double_emphasis,
|
||||
rndr_emphasis,
|
||||
rndr_underline,
|
||||
rndr_highlight,
|
||||
rndr_quote,
|
||||
rndr_image,
|
||||
rndr_linebreak,
|
||||
rndr_link,
|
||||
rndr_triple_emphasis,
|
||||
rndr_strikethrough,
|
||||
rndr_superscript,
|
||||
rndr_footnote_ref,
|
||||
rndr_math,
|
||||
rndr_raw_html,
|
||||
|
||||
NULL,
|
||||
rndr_normal_text,
|
||||
|
||||
NULL,
|
||||
NULL
|
||||
};
|
||||
|
||||
hoedown_html_renderer_state *state;
|
||||
hoedown_renderer *renderer;
|
||||
|
||||
/* Prepare the state pointer */
|
||||
state = hoedown_malloc(sizeof(hoedown_html_renderer_state));
|
||||
memset(state, 0x0, sizeof(hoedown_html_renderer_state));
|
||||
|
||||
state->flags = render_flags;
|
||||
state->toc_data.nesting_level = nesting_level;
|
||||
|
||||
/* Prepare the renderer */
|
||||
renderer = hoedown_malloc(sizeof(hoedown_renderer));
|
||||
memcpy(renderer, &cb_default, sizeof(hoedown_renderer));
|
||||
|
||||
if (render_flags & HOEDOWN_HTML_SKIP_HTML || render_flags & HOEDOWN_HTML_ESCAPE)
|
||||
renderer->blockhtml = NULL;
|
||||
|
||||
renderer->opaque = state;
|
||||
return renderer;
|
||||
}
|
||||
|
||||
void
|
||||
hoedown_html_renderer_free(hoedown_renderer *renderer)
|
||||
{
|
||||
free(renderer->opaque);
|
||||
free(renderer);
|
||||
}
|
|
@ -0,0 +1,84 @@
|
|||
/* html.h - HTML renderer and utilities */
|
||||
|
||||
#ifndef HOEDOWN_HTML_H
|
||||
#define HOEDOWN_HTML_H
|
||||
|
||||
#include "document.h"
|
||||
#include "buffer.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
/*************
|
||||
* CONSTANTS *
|
||||
*************/
|
||||
|
||||
typedef enum hoedown_html_flags {
|
||||
HOEDOWN_HTML_SKIP_HTML = (1 << 0),
|
||||
HOEDOWN_HTML_ESCAPE = (1 << 1),
|
||||
HOEDOWN_HTML_HARD_WRAP = (1 << 2),
|
||||
HOEDOWN_HTML_USE_XHTML = (1 << 3)
|
||||
} hoedown_html_flags;
|
||||
|
||||
typedef enum hoedown_html_tag {
|
||||
HOEDOWN_HTML_TAG_NONE = 0,
|
||||
HOEDOWN_HTML_TAG_OPEN,
|
||||
HOEDOWN_HTML_TAG_CLOSE
|
||||
} hoedown_html_tag;
|
||||
|
||||
|
||||
/*********
|
||||
* TYPES *
|
||||
*********/
|
||||
|
||||
struct hoedown_html_renderer_state {
|
||||
void *opaque;
|
||||
|
||||
struct {
|
||||
int header_count;
|
||||
int current_level;
|
||||
int level_offset;
|
||||
int nesting_level;
|
||||
} toc_data;
|
||||
|
||||
hoedown_html_flags flags;
|
||||
|
||||
/* extra callbacks */
|
||||
void (*link_attributes)(hoedown_buffer *ob, const hoedown_buffer *url, const hoedown_renderer_data *data);
|
||||
};
|
||||
typedef struct hoedown_html_renderer_state hoedown_html_renderer_state;
|
||||
|
||||
|
||||
/*************
|
||||
* FUNCTIONS *
|
||||
*************/
|
||||
|
||||
/* hoedown_html_smartypants: process an HTML snippet using SmartyPants for smart punctuation */
|
||||
void hoedown_html_smartypants(hoedown_buffer *ob, const uint8_t *data, size_t size);
|
||||
|
||||
/* hoedown_html_is_tag: checks if data starts with a specific tag, returns the tag type or NONE */
|
||||
hoedown_html_tag hoedown_html_is_tag(const uint8_t *data, size_t size, const char *tagname);
|
||||
|
||||
|
||||
/* hoedown_html_renderer_new: allocates a regular HTML renderer */
|
||||
hoedown_renderer *hoedown_html_renderer_new(
|
||||
hoedown_html_flags render_flags,
|
||||
int nesting_level
|
||||
) __attribute__ ((malloc));
|
||||
|
||||
/* hoedown_html_toc_renderer_new: like hoedown_html_renderer_new, but the returned renderer produces the Table of Contents */
|
||||
hoedown_renderer *hoedown_html_toc_renderer_new(
|
||||
int nesting_level
|
||||
) __attribute__ ((malloc));
|
||||
|
||||
/* hoedown_html_renderer_free: deallocate an HTML renderer */
|
||||
void hoedown_html_renderer_free(hoedown_renderer *renderer);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /** HOEDOWN_HTML_H **/
|
|
@ -0,0 +1,240 @@
|
|||
/* ANSI-C code produced by gperf version 3.0.3 */
|
||||
/* Command-line: gperf -L ANSI-C -N hoedown_find_block_tag -c -C -E -S 1 --ignore-case -m100 html_block_names.gperf */
|
||||
/* Computed positions: -k'1-2' */
|
||||
|
||||
#if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \
|
||||
&& ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \
|
||||
&& (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \
|
||||
&& ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \
|
||||
&& ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \
|
||||
&& ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \
|
||||
&& ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \
|
||||
&& ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \
|
||||
&& ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \
|
||||
&& ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \
|
||||
&& ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \
|
||||
&& ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \
|
||||
&& ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \
|
||||
&& ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \
|
||||
&& ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \
|
||||
&& ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \
|
||||
&& ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \
|
||||
&& ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \
|
||||
&& ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \
|
||||
&& ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \
|
||||
&& ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \
|
||||
&& ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \
|
||||
&& ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126))
|
||||
/* The character set is not based on ISO-646. */
|
||||
#error "gperf generated tables don't work with this execution character set. Please report a bug to <bug-gnu-gperf@gnu.org>."
|
||||
#endif
|
||||
|
||||
/* maximum key range = 24, duplicates = 0 */
|
||||
|
||||
#ifndef GPERF_DOWNCASE
|
||||
#define GPERF_DOWNCASE 1
|
||||
static unsigned char gperf_downcase[256] =
|
||||
{
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
|
||||
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
|
||||
30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
|
||||
45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
|
||||
60, 61, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106,
|
||||
107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
|
||||
122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104,
|
||||
105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
|
||||
120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134,
|
||||
135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
|
||||
150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
|
||||
165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
|
||||
180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194,
|
||||
195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209,
|
||||
210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224,
|
||||
225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
|
||||
240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254,
|
||||
255
|
||||
};
|
||||
#endif
|
||||
|
||||
#ifndef GPERF_CASE_STRNCMP
|
||||
#define GPERF_CASE_STRNCMP 1
|
||||
static int
|
||||
gperf_case_strncmp (register const char *s1, register const char *s2, register unsigned int n)
|
||||
{
|
||||
for (; n > 0;)
|
||||
{
|
||||
unsigned char c1 = gperf_downcase[(unsigned char)*s1++];
|
||||
unsigned char c2 = gperf_downcase[(unsigned char)*s2++];
|
||||
if (c1 != 0 && c1 == c2)
|
||||
{
|
||||
n--;
|
||||
continue;
|
||||
}
|
||||
return (int)c1 - (int)c2;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef __GNUC__
|
||||
__inline
|
||||
#else
|
||||
#ifdef __cplusplus
|
||||
inline
|
||||
#endif
|
||||
#endif
|
||||
static unsigned int
|
||||
hash (register const char *str, register unsigned int len)
|
||||
{
|
||||
static const unsigned char asso_values[] =
|
||||
{
|
||||
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
|
||||
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
|
||||
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
|
||||
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
|
||||
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
|
||||
22, 21, 19, 18, 16, 0, 25, 25, 25, 25,
|
||||
25, 25, 25, 25, 25, 25, 1, 25, 0, 25,
|
||||
1, 0, 0, 13, 0, 25, 25, 11, 2, 1,
|
||||
0, 25, 25, 5, 0, 2, 25, 25, 25, 25,
|
||||
25, 25, 25, 25, 25, 25, 25, 25, 1, 25,
|
||||
0, 25, 1, 0, 0, 13, 0, 25, 25, 11,
|
||||
2, 1, 0, 25, 25, 5, 0, 2, 25, 25,
|
||||
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
|
||||
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
|
||||
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
|
||||
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
|
||||
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
|
||||
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
|
||||
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
|
||||
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
|
||||
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
|
||||
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
|
||||
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
|
||||
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
|
||||
25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
|
||||
25, 25, 25, 25, 25, 25, 25
|
||||
};
|
||||
register int hval = (int)len;
|
||||
|
||||
switch (hval)
|
||||
{
|
||||
default:
|
||||
hval += asso_values[(unsigned char)str[1]+1];
|
||||
/*FALLTHROUGH*/
|
||||
case 1:
|
||||
hval += asso_values[(unsigned char)str[0]];
|
||||
break;
|
||||
}
|
||||
return hval;
|
||||
}
|
||||
|
||||
#ifdef __GNUC__
|
||||
__inline
|
||||
#ifdef __GNUC_STDC_INLINE__
|
||||
__attribute__ ((__gnu_inline__))
|
||||
#endif
|
||||
#endif
|
||||
const char *
|
||||
hoedown_find_block_tag (register const char *str, register unsigned int len)
|
||||
{
|
||||
enum
|
||||
{
|
||||
TOTAL_KEYWORDS = 24,
|
||||
MIN_WORD_LENGTH = 1,
|
||||
MAX_WORD_LENGTH = 10,
|
||||
MIN_HASH_VALUE = 1,
|
||||
MAX_HASH_VALUE = 24
|
||||
};
|
||||
|
||||
if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
|
||||
{
|
||||
register int key = hash (str, len);
|
||||
|
||||
if (key <= MAX_HASH_VALUE && key >= MIN_HASH_VALUE)
|
||||
{
|
||||
register const char *resword;
|
||||
|
||||
switch (key - 1)
|
||||
{
|
||||
case 0:
|
||||
resword = "p";
|
||||
goto compare;
|
||||
case 1:
|
||||
resword = "h6";
|
||||
goto compare;
|
||||
case 2:
|
||||
resword = "div";
|
||||
goto compare;
|
||||
case 3:
|
||||
resword = "del";
|
||||
goto compare;
|
||||
case 4:
|
||||
resword = "form";
|
||||
goto compare;
|
||||
case 5:
|
||||
resword = "table";
|
||||
goto compare;
|
||||
case 6:
|
||||
resword = "figure";
|
||||
goto compare;
|
||||
case 7:
|
||||
resword = "pre";
|
||||
goto compare;
|
||||
case 8:
|
||||
resword = "fieldset";
|
||||
goto compare;
|
||||
case 9:
|
||||
resword = "noscript";
|
||||
goto compare;
|
||||
case 10:
|
||||
resword = "script";
|
||||
goto compare;
|
||||
case 11:
|
||||
resword = "style";
|
||||
goto compare;
|
||||
case 12:
|
||||
resword = "dl";
|
||||
goto compare;
|
||||
case 13:
|
||||
resword = "ol";
|
||||
goto compare;
|
||||
case 14:
|
||||
resword = "ul";
|
||||
goto compare;
|
||||
case 15:
|
||||
resword = "math";
|
||||
goto compare;
|
||||
case 16:
|
||||
resword = "ins";
|
||||
goto compare;
|
||||
case 17:
|
||||
resword = "h5";
|
||||
goto compare;
|
||||
case 18:
|
||||
resword = "iframe";
|
||||
goto compare;
|
||||
case 19:
|
||||
resword = "h4";
|
||||
goto compare;
|
||||
case 20:
|
||||
resword = "h3";
|
||||
goto compare;
|
||||
case 21:
|
||||
resword = "blockquote";
|
||||
goto compare;
|
||||
case 22:
|
||||
resword = "h2";
|
||||
goto compare;
|
||||
case 23:
|
||||
resword = "h1";
|
||||
goto compare;
|
||||
}
|
||||
return 0;
|
||||
compare:
|
||||
if ((((unsigned char)*str ^ (unsigned char)*resword) & ~32) == 0 && !gperf_case_strncmp (str, resword, len) && resword[len] == '\0')
|
||||
return resword;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,435 @@
|
|||
#include "html.h"
|
||||
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <ctype.h>
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#define snprintf _snprintf
|
||||
#endif
|
||||
|
||||
struct smartypants_data {
|
||||
int in_squote;
|
||||
int in_dquote;
|
||||
};
|
||||
|
||||
static size_t smartypants_cb__ltag(hoedown_buffer *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
|
||||
static size_t smartypants_cb__dquote(hoedown_buffer *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
|
||||
static size_t smartypants_cb__amp(hoedown_buffer *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
|
||||
static size_t smartypants_cb__period(hoedown_buffer *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
|
||||
static size_t smartypants_cb__number(hoedown_buffer *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
|
||||
static size_t smartypants_cb__dash(hoedown_buffer *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
|
||||
static size_t smartypants_cb__parens(hoedown_buffer *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
|
||||
static size_t smartypants_cb__squote(hoedown_buffer *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
|
||||
static size_t smartypants_cb__backtick(hoedown_buffer *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
|
||||
static size_t smartypants_cb__escape(hoedown_buffer *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size);
|
||||
|
||||
static size_t (*smartypants_cb_ptrs[])
|
||||
(hoedown_buffer *, struct smartypants_data *, uint8_t, const uint8_t *, size_t) =
|
||||
{
|
||||
NULL, /* 0 */
|
||||
smartypants_cb__dash, /* 1 */
|
||||
smartypants_cb__parens, /* 2 */
|
||||
smartypants_cb__squote, /* 3 */
|
||||
smartypants_cb__dquote, /* 4 */
|
||||
smartypants_cb__amp, /* 5 */
|
||||
smartypants_cb__period, /* 6 */
|
||||
smartypants_cb__number, /* 7 */
|
||||
smartypants_cb__ltag, /* 8 */
|
||||
smartypants_cb__backtick, /* 9 */
|
||||
smartypants_cb__escape, /* 10 */
|
||||
};
|
||||
|
||||
static const uint8_t smartypants_cb_chars[UINT8_MAX+1] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 4, 0, 0, 0, 5, 3, 2, 0, 0, 0, 0, 1, 6, 0,
|
||||
0, 7, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0,
|
||||
9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
};
|
||||
|
||||
static int
|
||||
word_boundary(uint8_t c)
|
||||
{
|
||||
return c == 0 || isspace(c) || ispunct(c);
|
||||
}
|
||||
|
||||
/*
|
||||
If 'text' begins with any kind of single quote (e.g. "'" or "'" etc.),
|
||||
returns the length of the sequence of characters that makes up the single-
|
||||
quote. Otherwise, returns zero.
|
||||
*/
|
||||
static size_t
|
||||
squote_len(const uint8_t *text, size_t size)
|
||||
{
|
||||
static char* single_quote_list[] = { "'", "'", "'", "'", NULL };
|
||||
char** p;
|
||||
|
||||
for (p = single_quote_list; *p; ++p) {
|
||||
size_t len = strlen(*p);
|
||||
if (size >= len && memcmp(text, *p, len) == 0) {
|
||||
return len;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Converts " or ' at very beginning or end of a word to left or right quote */
|
||||
static int
|
||||
smartypants_quotes(hoedown_buffer *ob, uint8_t previous_char, uint8_t next_char, uint8_t quote, int *is_open)
|
||||
{
|
||||
char ent[8];
|
||||
|
||||
if (*is_open && !word_boundary(next_char))
|
||||
return 0;
|
||||
|
||||
if (!(*is_open) && !word_boundary(previous_char))
|
||||
return 0;
|
||||
|
||||
snprintf(ent, sizeof(ent), "&%c%cquo;", (*is_open) ? 'r' : 'l', quote);
|
||||
*is_open = !(*is_open);
|
||||
hoedown_buffer_puts(ob, ent);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
Converts ' to left or right single quote; but the initial ' might be in
|
||||
different forms, e.g. ' or ' or '.
|
||||
'squote_text' points to the original single quote, and 'squote_size' is its length.
|
||||
'text' points at the last character of the single-quote, e.g. ' or ;
|
||||
*/
|
||||
static size_t
|
||||
smartypants_squote(hoedown_buffer *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size,
|
||||
const uint8_t *squote_text, size_t squote_size)
|
||||
{
|
||||
if (size >= 2) {
|
||||
uint8_t t1 = tolower(text[1]);
|
||||
size_t next_squote_len = squote_len(text+1, size-1);
|
||||
|
||||
/* convert '' to “ or ” */
|
||||
if (next_squote_len > 0) {
|
||||
uint8_t next_char = (size > 1+next_squote_len) ? text[1+next_squote_len] : 0;
|
||||
if (smartypants_quotes(ob, previous_char, next_char, 'd', &smrt->in_dquote))
|
||||
return next_squote_len;
|
||||
}
|
||||
|
||||
/* Tom's, isn't, I'm, I'd */
|
||||
if ((t1 == 's' || t1 == 't' || t1 == 'm' || t1 == 'd') &&
|
||||
(size == 3 || word_boundary(text[2]))) {
|
||||
HOEDOWN_BUFPUTSL(ob, "’");
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* you're, you'll, you've */
|
||||
if (size >= 3) {
|
||||
uint8_t t2 = tolower(text[2]);
|
||||
|
||||
if (((t1 == 'r' && t2 == 'e') ||
|
||||
(t1 == 'l' && t2 == 'l') ||
|
||||
(t1 == 'v' && t2 == 'e')) &&
|
||||
(size == 4 || word_boundary(text[3]))) {
|
||||
HOEDOWN_BUFPUTSL(ob, "’");
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (smartypants_quotes(ob, previous_char, size > 0 ? text[1] : 0, 's', &smrt->in_squote))
|
||||
return 0;
|
||||
|
||||
hoedown_buffer_put(ob, squote_text, squote_size);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Converts ' to left or right single quote. */
|
||||
static size_t
|
||||
smartypants_cb__squote(hoedown_buffer *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
||||
{
|
||||
return smartypants_squote(ob, smrt, previous_char, text, size, text, 1);
|
||||
}
|
||||
|
||||
/* Converts (c), (r), (tm) */
|
||||
static size_t
|
||||
smartypants_cb__parens(hoedown_buffer *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
||||
{
|
||||
if (size >= 3) {
|
||||
uint8_t t1 = tolower(text[1]);
|
||||
uint8_t t2 = tolower(text[2]);
|
||||
|
||||
if (t1 == 'c' && t2 == ')') {
|
||||
HOEDOWN_BUFPUTSL(ob, "©");
|
||||
return 2;
|
||||
}
|
||||
|
||||
if (t1 == 'r' && t2 == ')') {
|
||||
HOEDOWN_BUFPUTSL(ob, "®");
|
||||
return 2;
|
||||
}
|
||||
|
||||
if (size >= 4 && t1 == 't' && t2 == 'm' && text[3] == ')') {
|
||||
HOEDOWN_BUFPUTSL(ob, "™");
|
||||
return 3;
|
||||
}
|
||||
}
|
||||
|
||||
hoedown_buffer_putc(ob, text[0]);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Converts "--" to em-dash, etc. */
|
||||
static size_t
|
||||
smartypants_cb__dash(hoedown_buffer *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
||||
{
|
||||
if (size >= 3 && text[1] == '-' && text[2] == '-') {
|
||||
HOEDOWN_BUFPUTSL(ob, "—");
|
||||
return 2;
|
||||
}
|
||||
|
||||
if (size >= 2 && text[1] == '-') {
|
||||
HOEDOWN_BUFPUTSL(ob, "–");
|
||||
return 1;
|
||||
}
|
||||
|
||||
hoedown_buffer_putc(ob, text[0]);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Converts " etc. */
|
||||
static size_t
|
||||
smartypants_cb__amp(hoedown_buffer *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
||||
{
|
||||
size_t len;
|
||||
if (size >= 6 && memcmp(text, """, 6) == 0) {
|
||||
if (smartypants_quotes(ob, previous_char, size >= 7 ? text[6] : 0, 'd', &smrt->in_dquote))
|
||||
return 5;
|
||||
}
|
||||
|
||||
len = squote_len(text, size);
|
||||
if (len > 0) {
|
||||
return (len-1) + smartypants_squote(ob, smrt, previous_char, text+(len-1), size-(len-1), text, len);
|
||||
}
|
||||
|
||||
if (size >= 4 && memcmp(text, "�", 4) == 0)
|
||||
return 3;
|
||||
|
||||
hoedown_buffer_putc(ob, '&');
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Converts "..." to ellipsis */
|
||||
static size_t
|
||||
smartypants_cb__period(hoedown_buffer *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
||||
{
|
||||
if (size >= 3 && text[1] == '.' && text[2] == '.') {
|
||||
HOEDOWN_BUFPUTSL(ob, "…");
|
||||
return 2;
|
||||
}
|
||||
|
||||
if (size >= 5 && text[1] == ' ' && text[2] == '.' && text[3] == ' ' && text[4] == '.') {
|
||||
HOEDOWN_BUFPUTSL(ob, "…");
|
||||
return 4;
|
||||
}
|
||||
|
||||
hoedown_buffer_putc(ob, text[0]);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Converts `` to opening double quote */
|
||||
static size_t
|
||||
smartypants_cb__backtick(hoedown_buffer *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
||||
{
|
||||
if (size >= 2 && text[1] == '`') {
|
||||
if (smartypants_quotes(ob, previous_char, size >= 3 ? text[2] : 0, 'd', &smrt->in_dquote))
|
||||
return 1;
|
||||
}
|
||||
|
||||
hoedown_buffer_putc(ob, text[0]);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Converts 1/2, 1/4, 3/4 */
|
||||
static size_t
|
||||
smartypants_cb__number(hoedown_buffer *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
||||
{
|
||||
if (word_boundary(previous_char) && size >= 3) {
|
||||
if (text[0] == '1' && text[1] == '/' && text[2] == '2') {
|
||||
if (size == 3 || word_boundary(text[3])) {
|
||||
HOEDOWN_BUFPUTSL(ob, "½");
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
|
||||
if (text[0] == '1' && text[1] == '/' && text[2] == '4') {
|
||||
if (size == 3 || word_boundary(text[3]) ||
|
||||
(size >= 5 && tolower(text[3]) == 't' && tolower(text[4]) == 'h')) {
|
||||
HOEDOWN_BUFPUTSL(ob, "¼");
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
|
||||
if (text[0] == '3' && text[1] == '/' && text[2] == '4') {
|
||||
if (size == 3 || word_boundary(text[3]) ||
|
||||
(size >= 6 && tolower(text[3]) == 't' && tolower(text[4]) == 'h' && tolower(text[5]) == 's')) {
|
||||
HOEDOWN_BUFPUTSL(ob, "¾");
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
hoedown_buffer_putc(ob, text[0]);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Converts " to left or right double quote */
|
||||
static size_t
|
||||
smartypants_cb__dquote(hoedown_buffer *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
||||
{
|
||||
if (!smartypants_quotes(ob, previous_char, size > 0 ? text[1] : 0, 'd', &smrt->in_dquote))
|
||||
HOEDOWN_BUFPUTSL(ob, """);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static size_t
|
||||
smartypants_cb__ltag(hoedown_buffer *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
||||
{
|
||||
static const char *skip_tags[] = {
|
||||
"pre", "code", "var", "samp", "kbd", "math", "script", "style"
|
||||
};
|
||||
static const size_t skip_tags_count = 8;
|
||||
|
||||
size_t tag, i = 0;
|
||||
|
||||
/* This is a comment. Copy everything verbatim until --> or EOF is seen. */
|
||||
if (i + 4 < size && memcmp(text + i, "<!--", 4) == 0) {
|
||||
i += 4;
|
||||
while (i + 3 < size && memcmp(text + i, "-->", 3) != 0)
|
||||
i++;
|
||||
i += 3;
|
||||
hoedown_buffer_put(ob, text, i + 1);
|
||||
return i;
|
||||
}
|
||||
|
||||
while (i < size && text[i] != '>')
|
||||
i++;
|
||||
|
||||
for (tag = 0; tag < skip_tags_count; ++tag) {
|
||||
if (hoedown_html_is_tag(text, size, skip_tags[tag]) == HOEDOWN_HTML_TAG_OPEN)
|
||||
break;
|
||||
}
|
||||
|
||||
if (tag < skip_tags_count) {
|
||||
for (;;) {
|
||||
while (i < size && text[i] != '<')
|
||||
i++;
|
||||
|
||||
if (i == size)
|
||||
break;
|
||||
|
||||
if (hoedown_html_is_tag(text + i, size - i, skip_tags[tag]) == HOEDOWN_HTML_TAG_CLOSE)
|
||||
break;
|
||||
|
||||
i++;
|
||||
}
|
||||
|
||||
while (i < size && text[i] != '>')
|
||||
i++;
|
||||
}
|
||||
|
||||
hoedown_buffer_put(ob, text, i + 1);
|
||||
return i;
|
||||
}
|
||||
|
||||
static size_t
|
||||
smartypants_cb__escape(hoedown_buffer *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size)
|
||||
{
|
||||
if (size < 2)
|
||||
return 0;
|
||||
|
||||
switch (text[1]) {
|
||||
case '\\':
|
||||
case '"':
|
||||
case '\'':
|
||||
case '.':
|
||||
case '-':
|
||||
case '`':
|
||||
hoedown_buffer_putc(ob, text[1]);
|
||||
return 1;
|
||||
|
||||
default:
|
||||
hoedown_buffer_putc(ob, '\\');
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
#if 0
|
||||
static struct {
|
||||
uint8_t c0;
|
||||
const uint8_t *pattern;
|
||||
const uint8_t *entity;
|
||||
int skip;
|
||||
} smartypants_subs[] = {
|
||||
{ '\'', "'s>", "’", 0 },
|
||||
{ '\'', "'t>", "’", 0 },
|
||||
{ '\'', "'re>", "’", 0 },
|
||||
{ '\'', "'ll>", "’", 0 },
|
||||
{ '\'', "'ve>", "’", 0 },
|
||||
{ '\'', "'m>", "’", 0 },
|
||||
{ '\'', "'d>", "’", 0 },
|
||||
{ '-', "--", "—", 1 },
|
||||
{ '-', "<->", "–", 0 },
|
||||
{ '.', "...", "…", 2 },
|
||||
{ '.', ". . .", "…", 4 },
|
||||
{ '(', "(c)", "©", 2 },
|
||||
{ '(', "(r)", "®", 2 },
|
||||
{ '(', "(tm)", "™", 3 },
|
||||
{ '3', "<3/4>", "¾", 2 },
|
||||
{ '3', "<3/4ths>", "¾", 2 },
|
||||
{ '1', "<1/2>", "½", 2 },
|
||||
{ '1', "<1/4>", "¼", 2 },
|
||||
{ '1', "<1/4th>", "¼", 2 },
|
||||
{ '&', "�", 0, 3 },
|
||||
};
|
||||
#endif
|
||||
|
||||
void
|
||||
hoedown_html_smartypants(hoedown_buffer *ob, const uint8_t *text, size_t size)
|
||||
{
|
||||
size_t i;
|
||||
struct smartypants_data smrt = {0, 0};
|
||||
|
||||
if (!text)
|
||||
return;
|
||||
|
||||
hoedown_buffer_grow(ob, size);
|
||||
|
||||
for (i = 0; i < size; ++i) {
|
||||
size_t org;
|
||||
uint8_t action = 0;
|
||||
|
||||
org = i;
|
||||
while (i < size && (action = smartypants_cb_chars[text[i]]) == 0)
|
||||
i++;
|
||||
|
||||
if (i > org)
|
||||
hoedown_buffer_put(ob, text + org, i - org);
|
||||
|
||||
if (i < size) {
|
||||
i += smartypants_cb_ptrs[(int)action]
|
||||
(ob, &smrt, i ? text[i - 1] : 0, text + i, size - i);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,79 @@
|
|||
#include "stack.h"
|
||||
|
||||
#include "buffer.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
|
||||
void
|
||||
hoedown_stack_init(hoedown_stack *st, size_t initial_size)
|
||||
{
|
||||
assert(st);
|
||||
|
||||
st->item = NULL;
|
||||
st->size = st->asize = 0;
|
||||
|
||||
if (!initial_size)
|
||||
initial_size = 8;
|
||||
|
||||
hoedown_stack_grow(st, initial_size);
|
||||
}
|
||||
|
||||
void
|
||||
hoedown_stack_uninit(hoedown_stack *st)
|
||||
{
|
||||
assert(st);
|
||||
|
||||
free(st->item);
|
||||
}
|
||||
|
||||
void
|
||||
hoedown_stack_grow(hoedown_stack *st, size_t neosz)
|
||||
{
|
||||
assert(st);
|
||||
|
||||
if (st->asize >= neosz)
|
||||
return;
|
||||
|
||||
st->item = hoedown_realloc(st->item, neosz * sizeof(void *));
|
||||
memset(st->item + st->asize, 0x0, (neosz - st->asize) * sizeof(void *));
|
||||
|
||||
st->asize = neosz;
|
||||
|
||||
if (st->size > neosz)
|
||||
st->size = neosz;
|
||||
}
|
||||
|
||||
void
|
||||
hoedown_stack_push(hoedown_stack *st, void *item)
|
||||
{
|
||||
assert(st);
|
||||
|
||||
if (st->size >= st->asize)
|
||||
hoedown_stack_grow(st, st->size * 2);
|
||||
|
||||
st->item[st->size++] = item;
|
||||
}
|
||||
|
||||
void *
|
||||
hoedown_stack_pop(hoedown_stack *st)
|
||||
{
|
||||
assert(st);
|
||||
|
||||
if (!st->size)
|
||||
return NULL;
|
||||
|
||||
return st->item[--st->size];
|
||||
}
|
||||
|
||||
void *
|
||||
hoedown_stack_top(const hoedown_stack *st)
|
||||
{
|
||||
assert(st);
|
||||
|
||||
if (!st->size)
|
||||
return NULL;
|
||||
|
||||
return st->item[st->size - 1];
|
||||
}
|
|
@ -0,0 +1,52 @@
|
|||
/* stack.h - simple stacking */
|
||||
|
||||
#ifndef HOEDOWN_STACK_H
|
||||
#define HOEDOWN_STACK_H
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
/*********
|
||||
* TYPES *
|
||||
*********/
|
||||
|
||||
struct hoedown_stack {
|
||||
void **item;
|
||||
size_t size;
|
||||
size_t asize;
|
||||
};
|
||||
typedef struct hoedown_stack hoedown_stack;
|
||||
|
||||
|
||||
/*************
|
||||
* FUNCTIONS *
|
||||
*************/
|
||||
|
||||
/* hoedown_stack_init: initialize a stack */
|
||||
void hoedown_stack_init(hoedown_stack *st, size_t initial_size);
|
||||
|
||||
/* hoedown_stack_uninit: free internal data of the stack */
|
||||
void hoedown_stack_uninit(hoedown_stack *st);
|
||||
|
||||
/* hoedown_stack_grow: increase the allocated size to the given value */
|
||||
void hoedown_stack_grow(hoedown_stack *st, size_t neosz);
|
||||
|
||||
/* hoedown_stack_push: push an item to the top of the stack */
|
||||
void hoedown_stack_push(hoedown_stack *st, void *item);
|
||||
|
||||
/* hoedown_stack_pop: retrieve and remove the item at the top of the stack */
|
||||
void *hoedown_stack_pop(hoedown_stack *st);
|
||||
|
||||
/* hoedown_stack_top: retrieve the item at the top of the stack */
|
||||
void *hoedown_stack_top(const hoedown_stack *st);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /** HOEDOWN_STACK_H **/
|
|
@ -0,0 +1,9 @@
|
|||
#include "version.h"
|
||||
|
||||
void
|
||||
hoedown_version(int *major, int *minor, int *revision)
|
||||
{
|
||||
*major = HOEDOWN_VERSION_MAJOR;
|
||||
*minor = HOEDOWN_VERSION_MINOR;
|
||||
*revision = HOEDOWN_VERSION_REVISION;
|
||||
}
|
|
@ -0,0 +1,33 @@
|
|||
/* version.h - holds Hoedown's version */
|
||||
|
||||
#ifndef HOEDOWN_VERSION_H
|
||||
#define HOEDOWN_VERSION_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
/*************
|
||||
* CONSTANTS *
|
||||
*************/
|
||||
|
||||
#define HOEDOWN_VERSION "3.0.7"
|
||||
#define HOEDOWN_VERSION_MAJOR 3
|
||||
#define HOEDOWN_VERSION_MINOR 0
|
||||
#define HOEDOWN_VERSION_REVISION 7
|
||||
|
||||
|
||||
/*************
|
||||
* FUNCTIONS *
|
||||
*************/
|
||||
|
||||
/* hoedown_version: retrieve Hoedown's version numbers */
|
||||
void hoedown_version(int *major, int *minor, int *revision);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /** HOEDOWN_VERSION_H **/
|
66
spectral.pro
66
spectral.pro
|
@ -20,9 +20,6 @@ isEmpty(USE_SYSTEM_SORTFILTERPROXYMODEL) {
|
|||
isEmpty(USE_SYSTEM_QMATRIXCLIENT) {
|
||||
USE_SYSTEM_QMATRIXCLIENT = false
|
||||
}
|
||||
isEmpty(USE_SYSTEM_CMARK) {
|
||||
USE_SYSTEM_CMARK = false
|
||||
}
|
||||
isEmpty(BUNDLE_FONT) {
|
||||
BUNDLE_FONT = false
|
||||
}
|
||||
|
@ -39,50 +36,27 @@ $$USE_SYSTEM_SORTFILTERPROXYMODEL {
|
|||
message("Falling back to built-in SortFilterProxyModel.")
|
||||
include(include/SortFilterProxyModel/SortFilterProxyModel.pri)
|
||||
}
|
||||
$$USE_SYSTEM_CMARK {
|
||||
PKGCONFIG += libcmark
|
||||
} else {
|
||||
message("Falling back to built-in CMark.")
|
||||
INCLUDEPATH += include/cmark
|
||||
HEADERS += \
|
||||
include/cmark/buffer.h \
|
||||
include/cmark/chunk.h \
|
||||
include/cmark/cmark.h \
|
||||
include/cmark/cmark_ctype.h \
|
||||
include/cmark/cmark_export.h \
|
||||
include/cmark/config.h \
|
||||
include/cmark/houdini.h \
|
||||
include/cmark/inlines.h \
|
||||
include/cmark/iterator.h \
|
||||
include/cmark/node.h \
|
||||
include/cmark/parser.h \
|
||||
include/cmark/references.h \
|
||||
include/cmark/render.h \
|
||||
include/cmark/scanners.h \
|
||||
include/cmark/utf8.h
|
||||
|
||||
SOURCES += \
|
||||
include/cmark/blocks.c \
|
||||
include/cmark/buffer.c \
|
||||
include/cmark/cmark.c \
|
||||
include/cmark/cmark_ctype.c \
|
||||
include/cmark/commonmark.c \
|
||||
include/cmark/entities.inc \
|
||||
include/cmark/houdini_href_e.c \
|
||||
include/cmark/houdini_html_e.c \
|
||||
include/cmark/houdini_html_u.c \
|
||||
include/cmark/html.c \
|
||||
include/cmark/inlines.c \
|
||||
include/cmark/iterator.c \
|
||||
include/cmark/latex.c \
|
||||
include/cmark/man.c \
|
||||
include/cmark/node.c \
|
||||
include/cmark/references.c \
|
||||
include/cmark/render.c \
|
||||
include/cmark/scanners.c \
|
||||
include/cmark/utf8.c \
|
||||
include/cmark/xml.c
|
||||
}
|
||||
INCLUDEPATH += include/hoedown
|
||||
HEADERS += \
|
||||
include/hoedown/autolink.h \
|
||||
include/hoedown/buffer.h \
|
||||
include/hoedown/document.h \
|
||||
include/hoedown/escape.h \
|
||||
include/hoedown/html.h \
|
||||
include/hoedown/stack.h \
|
||||
include/hoedown/version.h
|
||||
|
||||
SOURCES += \
|
||||
include/hoedown/autolink.c \
|
||||
include/hoedown/buffer.c \
|
||||
include/hoedown/document.c \
|
||||
include/hoedown/escape.c \
|
||||
include/hoedown/html.c \
|
||||
include/hoedown/html_blocks.c \
|
||||
include/hoedown/html_smartypants.c \
|
||||
include/hoedown/stack.c \
|
||||
include/hoedown/version.c
|
||||
|
||||
# The following define makes your compiler emit warnings if you use
|
||||
# any feature of Qt which as been marked deprecated (the exact warnings
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
#include <QMetaObject>
|
||||
#include <QMimeDatabase>
|
||||
|
||||
#include "cmark.h"
|
||||
#include "html.h"
|
||||
|
||||
#include "utils.h"
|
||||
|
||||
|
@ -205,8 +205,20 @@ QVariantList SpectralRoom::getUsers(const QString& prefix) {
|
|||
}
|
||||
|
||||
QString SpectralRoom::postMarkdownText(const QString& markdown) {
|
||||
QByteArray local = markdown.toLocal8Bit();
|
||||
const char* data = local.data();
|
||||
QString html = cmark_markdown_to_html(data, local.length(), 0);
|
||||
return postHtmlText(markdown, html);
|
||||
unsigned char *sequence = (unsigned char *) qstrdup(markdown.toUtf8().constData());
|
||||
qint64 length = strlen((char *) sequence);
|
||||
|
||||
hoedown_renderer* renderer = hoedown_html_renderer_new(HOEDOWN_HTML_USE_XHTML, 32);
|
||||
hoedown_extensions extensions = (hoedown_extensions) ((HOEDOWN_EXT_BLOCK | HOEDOWN_EXT_SPAN | HOEDOWN_EXT_MATH_EXPLICIT) & ~HOEDOWN_EXT_QUOTE);
|
||||
hoedown_document* document = hoedown_document_new(renderer, extensions, 32);
|
||||
hoedown_buffer* html = hoedown_buffer_new(length);
|
||||
hoedown_document_render(document, html, sequence, length);
|
||||
QString result = QString::fromUtf8((char *) html->data, html->size);
|
||||
|
||||
free(sequence);
|
||||
hoedown_buffer_free(html);
|
||||
hoedown_document_free(document);
|
||||
hoedown_html_renderer_free(renderer);
|
||||
|
||||
return postHtmlText(markdown, result);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue