This commit is contained in:
Rasmus Andersson 2020-10-18 09:23:42 -07:00
parent cb877fce88
commit bef16e0aa0
12 changed files with 376 additions and 265 deletions

6
example/example-json.js Normal file
View file

@ -0,0 +1,6 @@
const fs = require("fs")
const md = require("../build/debug/markdown.node.js")
const source = fs.readFileSync(__dirname + "/example2.md")
const json = md.parse(source, { format: "json" })
console.log(json)

44
example/example2.md Normal file
View file

@ -0,0 +1,44 @@
# H1
This is a paragraph
## H2
This is a paragraph
## Another
This is a paragraph with style *italic* _italic_ **bold** __bold__
![image](https://rsms.me/raster/examples/image1.jpg)
![](https://rsms.me/image.png?without-alt)
*Hello [link](https://rsms.me/) lol*
Hello [*link*](https://rsms.me/) lol "cat"
Hello from *[link](https://rsms.me/)* to __everyone__ `reading this`
Here's an [**important** anchor link](#example).
line 1
line 2
XML & html "entities"
&
&
&
&
&
&


## Lists
- Unordered
* Lists
+ Of mixed type
1. Ordered
2. Lists
4. Numbers are ignored

View file

@ -1,5 +1,5 @@
/*
* md4c modified for mdjs.
* md4c modified for markdown-wasm.
* Original source code is licensed as follows:
*
* Copyright (c) 2016-2019 Martin Mitas
@ -439,14 +439,14 @@ int fmt_html(
const MD_CHAR* input,
MD_SIZE input_size,
WBuf* outbuf,
u32 parser_flags,
u32 render_flags
u32 parseFlags,
u32 fmtFlags
) {
HtmlRenderer render = { outbuf, 0, 0, render_flags };
HtmlRenderer render = { outbuf, 0, 0, fmtFlags };
MD_PARSER parser = {
0,
parser_flags,
parseFlags,
enter_block_callback,
leave_block_callback,
enter_span_callback,

View file

@ -1,6 +1,6 @@
#pragma once
#include "wbuf.h"
#define MD_HTML_FLAG_XHTML 0x0008 // instead of e.g. <br>, generate <br/>
#define MD_HTML_FLAG_XHTML (1 << 0) // instead of e.g. <br>, generate <br/>
int fmt_html(const char* input, u32 inputlen, WBuf* outbuf, u32 parserFlags, u32 renderFlags);
int fmt_html(const char* input, u32 inputlen, WBuf* outbuf, u32 parseFlags, u32 fmtFlags);

View file

@ -4,20 +4,17 @@
#include "fmt_json.h"
#include "md4c.h"
// #include "md4c_render_html.h"
// #include "entity.h"
// JSON formatter
//
//
// -------------- WORK IN PROGRESS
//
// -- WORK IN PROGRESS --
//
#ifdef _WIN32
#define snprintf _snprintf
#endif
// dlog
#ifndef DEBUG
#define DEBUG 1
#endif
@ -31,6 +28,7 @@
typedef struct JsonFormatter_st {
WBuf* outbuf;
u32 bnest; // block nesting level
} JsonFormatter;
@ -39,61 +37,49 @@ typedef struct JsonFormatter_st {
#define ISUPPER(ch) ('A' <= (ch) && (ch) <= 'Z')
#define ISALNUM(ch) (ISLOWER(ch) || ISUPPER(ch) || ISDIGIT(ch))
#define SIZEOF_ARRAY(a) (sizeof(a) / sizeof(a[0]))
// static inline void render_text(JsonFormatter* r, const MD_CHAR* text, MD_SIZE size) {
// // r->process_output(text, size, r->userdata);
// WBufAppendBytes(r->outbuf, text, size);
// }
#define render_text(f, textptr, textlen) \
WBufAppendBytes((r)->outbuf, (textptr), (textlen))
// #define RENDER_LITERAL(r, literal) \
// WBufAppendBytes((r)->outbuf, (literal), (MD_SIZE)strlen(literal))
static char jsonEscapeMap[256];
#define JSON_SUB_LEN 2
static const char* jsonEscapeMap[256];
static void __attribute__((constructor)) init() {
jsonEscapeMap[(unsigned char)'"'] = 1;
jsonEscapeMap[(unsigned char)'\n'] = 1;
jsonEscapeMap[(unsigned char)'\r'] = 1;
jsonEscapeMap[(unsigned char)'\t'] = 1;
// important: Values must all be exactly JSON_SUB_LEN bytes long
jsonEscapeMap[(unsigned char)'"'] = "\\\"";
jsonEscapeMap[(unsigned char)'\n'] = "\\n";
jsonEscapeMap[(unsigned char)'\r'] = "\\r";
jsonEscapeMap[(unsigned char)'\t'] = "\\t";
}
// #define JSON_BYTE_NEED_ESCAPE(ch) (jsonEscapeMap[(unsigned char)(ch)] != 0)
#define JSON_ESCAPE_MAP(ch) jsonEscapeMap[(unsigned char)(ch)]
static void writeJsonEscaped(JsonFormatter* r, const MD_CHAR* data, MD_SIZE size) {
MD_OFFSET beg = 0;
MD_OFFSET off = 0;
#define NEED_ESCAPE(ch) (jsonEscapeMap[(unsigned char)(ch)] != 0)
while(1) {
/* Optimization: Use some loop unrolling. */
while (
off + 3 < size &&
!NEED_ESCAPE(data[off+0]) &&
!NEED_ESCAPE(data[off+1]) &&
!NEED_ESCAPE(data[off+2]) &&
!NEED_ESCAPE(data[off+3])
) {
off += 4;
}
while (off < size && !NEED_ESCAPE(data[off])) {
while (1) {
const char* sub = NULL;
while (off < size) {
sub = JSON_ESCAPE_MAP(data[off]);
if (sub != NULL) {
break;
}
off++;
}
if (off > beg) {
// in-between
WBufAppendBytes(r->outbuf, data + beg, off - beg);
}
if (off < size) {
switch (data[off]) {
case '"': WBufAppendCStr(r->outbuf, "\\\""); break;
case '\n': WBufAppendCStr(r->outbuf, "\\n"); break;
case '\r': WBufAppendCStr(r->outbuf, "\\r"); break;
case '\t': WBufAppendCStr(r->outbuf, "\\t"); break;
}
if (sub) {
WBufAppendBytes(r->outbuf, sub, JSON_SUB_LEN);
off++;
} else {
break;
@ -101,14 +87,10 @@ static void writeJsonEscaped(JsonFormatter* r, const MD_CHAR* data, MD_SIZE size
beg = off;
}
#undef NEED_ESCAPE
}
static void
render_url_escaped(JsonFormatter* r, const MD_CHAR* data, MD_SIZE size)
{
static void render_url_escaped(JsonFormatter* r, const MD_CHAR* data, MD_SIZE size) {
static const MD_CHAR hex_chars[] = "0123456789ABCDEF";
MD_OFFSET beg = 0;
MD_OFFSET off = 0;
@ -144,56 +126,21 @@ render_url_escaped(JsonFormatter* r, const MD_CHAR* data, MD_SIZE size)
}
}
static unsigned
hex_val(char ch)
{
if('0' <= ch && ch <= '9')
static unsigned hex_val(char ch) {
if ('0' <= ch && ch <= '9') {
return ch - '0';
if('A' <= ch && ch <= 'Z')
}
if ('A' <= ch && ch <= 'Z') {
return ch - 'A' + 10;
else
return ch - 'a' + 10;
}
return ch - 'a' + 10;
}
static void WBufAppendUTF8Codepoint(WBuf* b, u32 codepoint) {
if (codepoint <= 0x7f) {
WBufAppendc(b, (char)codepoint);
return;
}
unsigned char utf8[4];
size_t n;
if (codepoint <= 0x7ff) {
n = 2;
utf8[0] = 0xc0 | ((codepoint >> 6) & 0x1f);
utf8[1] = 0x80 + ((codepoint >> 0) & 0x3f);
} else if (codepoint <= 0xffff) {
n = 3;
utf8[0] = 0xe0 | ((codepoint >> 12) & 0xf);
utf8[1] = 0x80 + ((codepoint >> 6) & 0x3f);
utf8[2] = 0x80 + ((codepoint >> 0) & 0x3f);
} else {
n = 4;
utf8[0] = 0xf0 | ((codepoint >> 18) & 0x7);
utf8[1] = 0x80 + ((codepoint >> 12) & 0x3f);
utf8[2] = 0x80 + ((codepoint >> 6) & 0x3f);
utf8[3] = 0x80 + ((codepoint >> 0) & 0x3f);
}
if (0 < codepoint && codepoint <= 0x10ffff) {
WBufAppendBytes(b, (const char*)utf8, n);
} else {
static const MD_CHAR utf8_replacement_char[] = { 0xef, 0xbf, 0xbd };
WBufAppendBytes(b, utf8_replacement_char, sizeof(utf8_replacement_char));
}
}
/* Translate entity to its UTF-8 equivalent, or output the verbatim one
* if such entity is unknown (or if the translation is disabled). */
// Translate entity to its UTF-8 equivalent, or output the verbatim one
// if such entity is unknown (or if the translation is disabled).
static void writeDecodeXmlEntity(JsonFormatter* r, const MD_CHAR* text, MD_SIZE size) {
if (size > 3 && text[1] == '#') {
unsigned codepoint = 0;
if(text[2] == 'x' || text[2] == 'X') {
// Hexadecimal entity (e.g. "&#x1234abcd;")).
for (MD_SIZE i = 3; i < size-1; i++) {
@ -205,17 +152,41 @@ static void writeDecodeXmlEntity(JsonFormatter* r, const MD_CHAR* text, MD_SIZE
codepoint = 10 * codepoint + (text[i] - '0');
}
}
WBufAppendUTF8Codepoint(r->outbuf, codepoint);
if (codepoint <= 0xFF) {
const char* sub = JSON_ESCAPE_MAP(codepoint);
if (sub) {
// predefined escape code, e.g. "\n"
WBufAppendBytes(r->outbuf, sub, JSON_SUB_LEN);
} else {
// verbatim
WBufAppendUTF8Codepoint(r->outbuf, codepoint);
}
} else {
// e.g. \uD87E
WBufAppendCStr(r->outbuf, "\\u");
if (codepoint <= 0xF) {
WBufAppendCStr(r->outbuf, "000");
} else if (codepoint <= 0xFF) {
WBufAppendCStr(r->outbuf, "00");
} else if (codepoint <= 0xFFF) {
WBufAppendCStr(r->outbuf, "0");
}
WBufAppendU32(r->outbuf, codepoint, 16);
}
} else {
WBufAppendBytes(r->outbuf, text, size);
// named entity
// We could do a lookup here but it would increase the WASM module binary size by
// at least 20kB, so for now, let's keep it simple and just include it verbatim until we
// can do something fancy like a compressed b-tree.
writeJsonEscaped(r, text, size);
}
}
static void
render_attribute(JsonFormatter* r, const MD_ATTRIBUTE* attr,
void (*fn_append)(JsonFormatter*, const MD_CHAR*, MD_SIZE))
{
static void render_attribute(
JsonFormatter* r,
const MD_ATTRIBUTE* attr,
void (*fn_append)(JsonFormatter*, const MD_CHAR*, MD_SIZE)
) {
int i;
for(i = 0; attr->substr_offsets[i] < attr->size; i++) {
@ -233,68 +204,7 @@ render_attribute(JsonFormatter* r, const MD_ATTRIBUTE* attr,
}
static void
render_open_ol_block(JsonFormatter* r, const MD_BLOCK_OL_DETAIL* det)
{
char buf[64];
if(det->start == 1) {
WBufAppendCStr(r->outbuf, "<ol>\n");
return;
}
snprintf(buf, sizeof(buf), "<ol start=\"%u\">\n", det->start);
WBufAppendCStr(r->outbuf, buf);
}
static void
render_open_li_block(JsonFormatter* r, const MD_BLOCK_LI_DETAIL* det)
{
if(det->is_task) {
WBufAppendCStr(r->outbuf,
"<li class=\"task-list-item\">"
"<input type=\"checkbox\" class=\"task-list-item-checkbox\" disabled");
if (det->task_mark == 'x' || det->task_mark == 'X') {
WBufAppendCStr(r->outbuf, " checked");
}
WBufAppendc(r->outbuf, '>');
} else {
WBufAppendCStr(r->outbuf, "<li>");
}
}
static void
render_open_code_block(JsonFormatter* r, const MD_BLOCK_CODE_DETAIL* det)
{
WBufAppendCStr(r->outbuf, "<pre><code");
/* If known, output the HTML 5 attribute class="language-LANGNAME". */
if(det->lang.text != NULL) {
WBufAppendCStr(r->outbuf, " class=\"language-");
render_attribute(r, &det->lang, writeJsonEscaped);
WBufAppendc(r->outbuf, '"');
}
WBufAppendc(r->outbuf, '>');
}
static void
render_open_td_block(JsonFormatter* r, const MD_CHAR* cell_type, const MD_BLOCK_TD_DETAIL* det)
{
WBufAppendc(r->outbuf, '<');
WBufAppendCStr(r->outbuf, cell_type);
switch (det->align) {
case MD_ALIGN_LEFT: WBufAppendCStr(r->outbuf, " align=\"left\">"); break;
case MD_ALIGN_CENTER: WBufAppendCStr(r->outbuf, " align=\"center\">"); break;
case MD_ALIGN_RIGHT: WBufAppendCStr(r->outbuf, " align=\"right\">"); break;
default: WBufAppendCStr(r->outbuf, ">"); break;
}
}
static void
render_open_a_span(JsonFormatter* r, const MD_SPAN_A_DETAIL* det)
{
static void render_open_a_span(JsonFormatter* r, const MD_SPAN_A_DETAIL* det) {
WBufAppendCStr(r->outbuf, "<a href=\"");
render_attribute(r, &det->href, render_url_escaped);
@ -306,18 +216,14 @@ render_open_a_span(JsonFormatter* r, const MD_SPAN_A_DETAIL* det)
WBufAppendCStr(r->outbuf, "\">");
}
static void
render_open_img_span(JsonFormatter* r, const MD_SPAN_IMG_DETAIL* det)
{
static void render_open_img_span(JsonFormatter* r, const MD_SPAN_IMG_DETAIL* det) {
WBufAppendCStr(r->outbuf, "<img src=\"");
render_attribute(r, &det->src, render_url_escaped);
WBufAppendCStr(r->outbuf, "\" alt=\"");
}
static void
render_close_img_span(JsonFormatter* r, const MD_SPAN_IMG_DETAIL* det)
{
static void render_close_img_span(JsonFormatter* r, const MD_SPAN_IMG_DETAIL* det) {
if(det->title.text != NULL) {
WBufAppendCStr(r->outbuf, "\" title=\"");
render_attribute(r, &det->title, writeJsonEscaped);
@ -326,9 +232,7 @@ render_close_img_span(JsonFormatter* r, const MD_SPAN_IMG_DETAIL* det)
WBufAppendCStr(r->outbuf, "\">");
}
static void
render_open_wikilink_span(JsonFormatter* r, const MD_SPAN_WIKILINK_DETAIL* det)
{
static void render_open_wikilink_span(JsonFormatter* r, const MD_SPAN_WIKILINK_DETAIL* det) {
WBufAppendCStr(r->outbuf, "<x-wikilink data-target=\"");
render_attribute(r, &det->target, writeJsonEscaped);
WBufAppendCStr(r->outbuf, "\">");
@ -338,12 +242,20 @@ render_open_wikilink_span(JsonFormatter* r, const MD_SPAN_WIKILINK_DETAIL* det)
// ------------------------------------------------------------------------------------------------
static void writeTypeStart(JsonFormatter* r, const char* typename, size_t typenamelen) {
WBufAppendCStr(r->outbuf, "{\"_\":\"");
WBufAppendBytes(r->outbuf, typename, typenamelen);
WBufAppendc(r->outbuf, '"');
}
static void writeNewline(JsonFormatter* r) {
WBufAppendc(r->outbuf, '\n');
static const char indent_chunk_str[] = " ";
static const u32 indent_chunk_size = (u32)(SIZEOF_ARRAY(indent_chunk_str) - 1);
u32 indent = r->bnest * 4;
while (indent > indent_chunk_size) {
WBufAppendBytes(r->outbuf, indent_chunk_str, indent_chunk_size);
indent -= indent_chunk_size;
}
if (indent > 0) {
WBufAppendBytes(r->outbuf, indent_chunk_str, indent);
}
}
static void writeAttribute(JsonFormatter* r, const MD_ATTRIBUTE* attr) {
for (u32 i = 0; attr->substr_offsets[i] < attr->size; i++) {
@ -359,13 +271,29 @@ static void writeAttribute(JsonFormatter* r, const MD_ATTRIBUTE* attr) {
}
}
static void writeKey(JsonFormatter* r, const char* rawkey, size_t rawkeyLen) {
WBufAppendc(r->outbuf, ',');
writeNewline(r);
WBufAppendc(r->outbuf, '"');
WBufAppendBytes(r->outbuf, rawkey, rawkeyLen);
WBufAppendCStr(r->outbuf, "\":");
}
static void writeTypeStart(JsonFormatter* r, const char* typename, size_t typenamelen) {
WBufAppendCStr(r->outbuf, "{ \"_\": \"");
WBufAppendBytes(r->outbuf, typename, typenamelen);
WBufAppendc(r->outbuf, '"');
}
static int enter_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata) {
static const MD_CHAR* head[6] = { "h1", "h2", "h3", "h4", "h5", "h6" };
JsonFormatter* r = (JsonFormatter*) userdata;
const char* typename = "";
size_t typenamelen = 0;
#define WRITE_TYPE_START(name) writeTypeStart(r, (name), strlen((name)))
writeNewline(r);
r->bnest++;
#define WRITE_TYPE_START(name) \
writeTypeStart(r, (name), strlen((name)))
switch (type) {
case MD_BLOCK_DOC: WRITE_TYPE_START("doc"); break;
@ -380,7 +308,9 @@ static int enter_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata)
case MD_BLOCK_HR: WRITE_TYPE_START("hr"); break;
case MD_BLOCK_H: {
WRITE_TYPE_START(head[((MD_BLOCK_H_DETAIL*)detail)->level - 1]);
WRITE_TYPE_START("h");
WBufAppendCStr(r->outbuf, ", \"level\": ");
WBufAppendU32(r->outbuf, ((MD_BLOCK_H_DETAIL*)detail)->level, 10);
break;
}
@ -443,28 +373,30 @@ static int enter_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata)
break;
}
// static void
// render_open_td_block(MD_RENDER_HTML* r, const MD_CHAR* cell_type, const MD_BLOCK_TD_DETAIL* det)
// {
// RENDER_LITERAL(r, "<");
// RENDER_LITERAL(r, cell_type);
// switch(det->align) {
// case MD_ALIGN_LEFT: RENDER_LITERAL(r, " align=\"left\">"); break;
// case MD_ALIGN_CENTER: RENDER_LITERAL(r, " align=\"center\">"); break;
// case MD_ALIGN_RIGHT: RENDER_LITERAL(r, " align=\"right\">"); break;
// default: RENDER_LITERAL(r, ">"); break;
// }
}
WBufAppendCStr(r->outbuf, ", \"children\":[\n ");
WBufAppendCStr(r->outbuf, ", \"children\": [");
return 0;
}
static int leave_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata) {
JsonFormatter* r = (JsonFormatter*)userdata;
WBufAppendCStr(r->outbuf, "]},\n");
r->bnest--;
if (*(r->outbuf->ptr-1) == ',') {
// undo trailing comma
// e.g.
//
// "1,2,3,"
// ^
// "1,2,3"
// ^
//
r->outbuf->ptr--;
}
writeNewline(r);
WBufAppendCStr(r->outbuf, "]},");
return 0;
}
@ -475,6 +407,7 @@ static int enter_span_callback(MD_SPANTYPE type, void* detail, void* userdata) {
switch(type) {
case MD_SPAN_EM: WBufAppendCStr(r->outbuf, "<em>"); break;
case MD_SPAN_STRONG: WBufAppendCStr(r->outbuf, "<b>"); break;
case MD_SPAN_U: WBufAppendCStr(r->outbuf, "<u>"); break;
case MD_SPAN_A: render_open_a_span(r, (MD_SPAN_A_DETAIL*) detail); break;
case MD_SPAN_IMG: render_open_img_span(r, (MD_SPAN_IMG_DETAIL*) detail); break;
case MD_SPAN_CODE: WBufAppendCStr(r->outbuf, "<code>"); break;
@ -493,6 +426,7 @@ static int leave_span_callback(MD_SPANTYPE type, void* detail, void* userdata) {
switch(type) {
case MD_SPAN_EM: WBufAppendCStr(r->outbuf, "</em>"); break;
case MD_SPAN_STRONG: WBufAppendCStr(r->outbuf, "</b>"); break;
case MD_SPAN_U: WBufAppendCStr(r->outbuf, "</u>"); break;
case MD_SPAN_A: WBufAppendCStr(r->outbuf, "</a>"); break;
case MD_SPAN_IMG: /*noop, handled above*/ break;
case MD_SPAN_CODE: WBufAppendCStr(r->outbuf, "</code>"); break;
@ -508,27 +442,65 @@ static int leave_span_callback(MD_SPANTYPE type, void* detail, void* userdata) {
static int text_callback(MD_TEXTTYPE type, const MD_CHAR* text, MD_SIZE size, void* userdata) {
JsonFormatter* r = (JsonFormatter*)userdata;
WBufAppendCStr(r->outbuf, ", \"");
switch (type) {
case MD_TEXT_NULLCHAR: WBufAppendCStr(r->outbuf, "\\0"); break;
case MD_TEXT_BR: WBufAppendCStr(r->outbuf, "<br>"); break;
case MD_TEXT_SOFTBR: WBufAppendc(r->outbuf, '\n'); break;
case MD_TEXT_HTML: render_text(r, text, size); break;
case MD_TEXT_ENTITY: writeDecodeXmlEntity(r, text, size); break;
default: writeJsonEscaped(r, text, size); break;
if (type == MD_TEXT_SOFTBR) {
// ignore soft break, i.e.
//
// Markdown:
// line1
// line2
//
// md4c emits: (line1, MD_TEXT_SOFTBR, line2)
//
return 0;
}
WBufAppendc(r->outbuf, '"');
writeNewline(r);
if (type == MD_TEXT_HTML) {
WBufAppendCStr(r->outbuf, "{\"_\":\"html\",\"content\":\"");
writeJsonEscaped(r, text, size);
WBufAppendCStr(r->outbuf, "\"}");
} else {
WBufAppendc(r->outbuf, '"');
switch (type) {
case MD_TEXT_NULLCHAR:
WBufAppendCStr(r->outbuf, "\\0");
break;
case MD_TEXT_BR:
WBufAppendCStr(r->outbuf, "\\n");
break;
case MD_TEXT_ENTITY:
writeDecodeXmlEntity(r, text, size);
break;
default:
writeJsonEscaped(r, text, size);
break;
}
WBufAppendc(r->outbuf, '"');
}
WBufAppendc(r->outbuf, ',');
return 0;
}
int fmt_json(const MD_CHAR* input, MD_SIZE input_size, WBuf* outbuf, u32 parser_flags) {
JsonFormatter render = { outbuf };
int fmt_json(
const MD_CHAR* input,
MD_SIZE inputlen,
WBuf* outbuf,
u32 parseFlags,
u32 _fmtFlags
) {
JsonFormatter render = {
.outbuf = outbuf,
.bnest = 0,
};
MD_PARSER parser = {
0,
parser_flags,
parseFlags,
enter_block_callback,
leave_block_callback,
enter_span_callback,
@ -538,5 +510,5 @@ int fmt_json(const MD_CHAR* input, MD_SIZE input_size, WBuf* outbuf, u32 parser_
NULL
};
return md_parse(input, input_size, &parser, (void*) &render);
return md_parse(input, inputlen, &parser, (void*)&render);
}

View file

@ -1,4 +1,6 @@
#pragma once
#include "wbuf.h"
int fmt_json(const char* input, u32 inputlen, WBuf* outbuf, u32 parserFlags);
#define MD_JSON_FLAG_NONE 0
int fmt_json(const char* input, u32 inputlen, WBuf* outbuf, u32 parseFlags, u32 fmtFlags);

View file

@ -3,18 +3,28 @@
#include "wlib.h"
#include "wbuf.h"
#include "fmt_html.h"
// #include "fmt_json.h"
#if MD_WITH_JSON
#include "fmt_json.h"
#endif
// these should be in sync with "OutputFlags" in md.js
typedef enum OutputFlags {
OutputFlagHTML = 1 << 0,
OutputFlagXHTML = 1 << 1,
} OutputFlags;
typedef enum Formatter {
FormatterNONE,
FormatterHTML,
FormatterJSON,
} Formatter;
typedef enum FormatFlags {
FormatFlagHTML = 1 << 0,
FormatFlagXHTML = 1 << 1,
FormatFlagJSON = 1 << 2,
} FormatFlags;
typedef enum ErrorCode {
ERR_NONE,
ERR_MD_PARSE,
ERR_OUTFLAGS,
ERR_FORMAT,
} ErrorCode;
@ -31,38 +41,51 @@ static WBuf outbuf;
export size_t parseUTF8(
const char* inbufptr,
u32 inbuflen,
u32 parser_flags,
OutputFlags outflags,
const char* inbufptr,
u32 inbuflen,
u32 parseFlags,
Formatter formatter,
u32 fmtflags,
const char** outptr
) {
dlog("parseUTF8 called with inbufptr=%p inbuflen=%u\n", inbufptr, inbuflen);
WBufReset(&outbuf);
WBufReserve(&outbuf, inbuflen * 2); // approximate output size to minimize reallocations
if (outflags & OutputFlagHTML) {
WBufReserve(&outbuf, inbuflen * 2); // approximate output size to minimize reallocations
int result = 0x7ffff;
u32 render_flags = 0;
if (outflags & OutputFlagXHTML) {
render_flags |= MD_HTML_FLAG_XHTML;
}
switch (formatter) {
if (fmt_html(inbufptr, inbuflen, &outbuf, parser_flags, render_flags) != 0) {
// fmt_html returns status of md_parse which only fails in extreme cases
// like when out of memory. md4c does not provide error codes or error messages.
WErrSet(ERR_MD_PARSE, "md parser error");
*outptr = 0;
return 0;
}
case FormatterHTML:
result = fmt_html(inbufptr, inbuflen, &outbuf, parseFlags, fmtflags);
break;
case FormatterJSON:
#if MD_WITH_JSON
result = fmt_json(inbufptr, inbuflen, &outbuf, parseFlags, fmtflags);
#endif
break;
case FormatterNONE:
break;
} // switch
if (result == 0x7ffff) {
WErrSet(ERR_FORMAT, "invalid formatter");
} else if (result != 0) {
// fmt_html returns status of md_parse which only fails in extreme cases
// like when out of memory. md4c does not provide error codes or error messages.
WErrSet(ERR_MD_PARSE, "parser error");
}
if (result == 0) {
*outptr = outbuf.start;
// dlog("outbuf =>\n%.*s\n", WBufLen(&outbuf), outbuf.start);
return WBufLen(&outbuf);
}
WErrSet(ERR_OUTFLAGS, "no output format set in output flags");
*outptr = 0;
return 0;
}

View file

@ -41,6 +41,15 @@ const OutputFlags = {
XHTML: 1 << 1, // Output XHTML (only has effect with HTML flag set)
}
// these should be in sync with "Formatter" in md.c
const FormatterNONE = 0
, FormatterHTML = 1
, FormatterJSON = 2
// html formatter flags (sync with fmt_html.h)
const MD_HTML_FLAG_XHTML = 1 << 0
export function parse(source, options) {
options = options || {}
@ -49,17 +58,17 @@ export function parse(source, options) {
options.parseFlags
)
let outputFlags = 0
switch (options.format) {
case "xhtml":
outputFlags |= OutputFlags.HTML | OutputFlags.XHTML
break
let formatter = FormatterHTML
let fmtFlags = 0
if (options.format) switch (options.format) {
case "html":
case undefined:
case null:
case "":
outputFlags |= OutputFlags.HTML
break
case "xhtml":
fmtFlags |= MD_HTML_FLAG_XHTML
break
case "json":
formatter = FormatterJSON
break
default:
@ -68,7 +77,7 @@ export function parse(source, options) {
let buf = typeof source == "string" ? utf8.encode(source) : source
let outbuf = withOutPtr(outptr => withTmpBytePtr(buf, (inptr, inlen) =>
_parseUTF8(inptr, inlen, parseFlags, outputFlags, outptr)
_parseUTF8(inptr, inlen, parseFlags, formatter, fmtFlags, outptr)
))
// check for error and throw if needed

View file

@ -3678,7 +3678,7 @@ md_analyze_emph(MD_CTX* ctx, int mark_index)
/* If we can be a closer, try to resolve with the preceding opener. */
if(mark->flags & MD_MARK_POTENTIAL_CLOSER) {
MD_MARK* opener = NULL;
int opener_index;
int opener_index = 0;
if(mark->ch == _T('*')) {
MD_MARKCHAIN* opener_chains[6];
@ -5654,7 +5654,7 @@ md_line_indentation(MD_CTX* ctx, unsigned total_indent, OFF beg, OFF* p_end)
return indent - total_indent;
}
static const MD_LINE_ANALYSIS md_dummy_blank_line = { MD_LINE_BLANK, 0 };
static const MD_LINE_ANALYSIS md_dummy_blank_line = { MD_LINE_BLANK, 0, 0, 0, 0 };
/* Analyze type of the line and find some its properties. This serves as a
* main input for determining type and boundaries of a block. */

View file

@ -15,12 +15,8 @@ void WBufReset(WBuf* b) {
b->ptr = b->start;
}
inline size_t WBufCap(WBuf* b) { return b->end - b->start; } // total capacity (size)
inline size_t WBufLen(WBuf* b) { return b->ptr - b->start; } // valid bytes at start
inline size_t WBufAvail(WBuf* b) { return b->end - b->ptr; } // bytes available
// grows buffer so that there is at least minspace available space
static void WBufGrow(WBuf* b, size_t minspace) {
void WBufGrow(WBuf* b, size_t minspace) {
// size_t avail = b->end - b->ptr;
size_t len = WBufLen(b); // store len before changing b
size_t cap = WBufCap(b);
@ -42,13 +38,6 @@ void WBufReserve(WBuf* b, size_t minspace) {
}
}
void WBufAppendc(WBuf* b, char c) {
if (WBufAvail(b) < 1) {
WBufGrow(b, 1);
}
*(b->ptr++) = c;
}
void WBufAppendBytes(WBuf* b, const void* bytes, size_t len) {
if (WBufAvail(b) < len) {
WBufGrow(b, len);
@ -162,6 +151,35 @@ void WBufAppendU32(WBuf* b, u32 n, u32 radix) {
}
void _WBufAppendUTF8Codepoint2(WBuf* b, u32 codepoint) {
unsigned char utf8[4];
size_t n;
if (codepoint <= 0x7ff) {
n = 2;
utf8[0] = 0xc0 | ((codepoint >> 6) & 0x1f);
utf8[1] = 0x80 + ((codepoint >> 0) & 0x3f);
} else if (codepoint <= 0xffff) {
n = 3;
utf8[0] = 0xe0 | ((codepoint >> 12) & 0xf);
utf8[1] = 0x80 + ((codepoint >> 6) & 0x3f);
utf8[2] = 0x80 + ((codepoint >> 0) & 0x3f);
} else {
n = 4;
utf8[0] = 0xf0 | ((codepoint >> 18) & 0x7);
utf8[1] = 0x80 + ((codepoint >> 12) & 0x3f);
utf8[2] = 0x80 + ((codepoint >> 6) & 0x3f);
utf8[3] = 0x80 + ((codepoint >> 0) & 0x3f);
}
if (0 < codepoint && codepoint <= 0x10ffff) {
WBufAppendBytes(b, (const char*)utf8, n);
} else {
static const char utf8_replacement_char[] = { 0xef, 0xbf, 0xbd };
WBufAppendBytes(b, utf8_replacement_char, sizeof(utf8_replacement_char));
}
}
// static void WBufAppendSlug(WBuf* b, const char* text) {
// size_t len = strlen(text);
// WBufReserve(b, len);

View file

@ -11,13 +11,13 @@ void WBufInit(WBuf*);
void WBufFree(WBuf*);
void WBufReset(WBuf*);
size_t WBufCap(WBuf*); // total capacity (size)
size_t WBufLen(WBuf*); // valid bytes at start
size_t WBufAvail(WBuf*); // bytes available
inline static size_t WBufCap(WBuf* b) { return b->end - b->start; } // total capacity (size)
inline static size_t WBufLen(WBuf* b) { return b->ptr - b->start; } // valid bytes at start
inline static size_t WBufAvail(WBuf* b) { return b->end - b->ptr; } // bytes available
void WBufReserve(WBuf*, size_t minspace);
void WBufAppendc(WBuf*, char c);
static void WBufAppendc(WBuf*, char c);
void WBufAppendBytes(WBuf*, const void* bytes, size_t len);
void WBufAppendStr(WBuf*, const char* pch);
#define WBufAppendCStr(b, cstr) WBufAppendBytes((b), (cstr), strlen(cstr))
@ -27,3 +27,26 @@ void _WBufAppendHtml(WBuf*, const char* pch, bool isattr);
// append u32 integer n. radix must be in range [2-36]
void WBufAppendU32(WBuf*, u32 n, u32 radix);
static void WBufAppendUTF8Codepoint(WBuf* b, u32 codepoint);
// grows buffer so that there is at least minspace available space
void WBufGrow(WBuf* b, size_t minspace);
// implementation of WBufAppendUTF8Codepoint
void _WBufAppendUTF8Codepoint2(WBuf* b, u32 codepoint);
inline static void WBufAppendUTF8Codepoint(WBuf* b, u32 codepoint) {
if (codepoint > 0x7f) {
return _WBufAppendUTF8Codepoint2(b, codepoint);
}
WBufAppendc(b, (char)codepoint);
}
inline static void WBufAppendc(WBuf* b, char c) {
if (WBufAvail(b) < 1) {
WBufGrow(b, 1);
}
*(b->ptr++) = c;
}

View file

@ -1,16 +1,29 @@
const package = require("./package.json")
const outdir = debug ? builddir : "dist"
cflags = cflags.concat([
"-std=c11",
"-Wall",
"-Wuninitialized",
"-Wmissing-field-initializers",
"-Wconditional-uninitialized",
"-Wno-nullability-completeness",
"-Wno-unused-function",
"-fcolor-diagnostics",
])
const m = {
jsentry: "src/md.js",
sources: [
"src/wlib.c",
"src/wbuf.c",
"src/md.c",
"src/md4c.c",
"src/fmt_html.c",
// "src/fmt_json.c",
],
debug ? "src/fmt_json.c" : "",
].filter(s => !!s),
cflags: [
"-DMD4C_USE_UTF8",
].concat(debug ? [
@ -20,6 +33,7 @@ const m = {
"-DSAFE_HEAP=1", // emcc
"-DSTACK_OVERFLOW_CHECK=1", // emcc
"-DDEMANGLE_SUPPORT=1", // emcc
"-DMD_WITH_JSON=1", // enable WIP json formatter
] : [
// release flags
]),