This commit is contained in:
Rasmus Andersson 2020-10-18 09:23:42 -07:00
parent cb877fce88
commit bef16e0aa0
12 changed files with 376 additions and 265 deletions

6
example/example-json.js Normal file
View file

@ -0,0 +1,6 @@
const fs = require("fs")
const md = require("../build/debug/markdown.node.js")
const source = fs.readFileSync(__dirname + "/example2.md")
const json = md.parse(source, { format: "json" })
console.log(json)

44
example/example2.md Normal file
View file

@ -0,0 +1,44 @@
# H1
This is a paragraph
## H2
This is a paragraph
## Another
This is a paragraph with style *italic* _italic_ **bold** __bold__
![image](https://rsms.me/raster/examples/image1.jpg)
![](https://rsms.me/image.png?without-alt)
*Hello [link](https://rsms.me/) lol*
Hello [*link*](https://rsms.me/) lol "cat"
Hello from *[link](https://rsms.me/)* to __everyone__ `reading this`
Here's an [**important** anchor link](#example).
line 1
line 2
XML & html "entities"
&
&
&
&
&
&


## Lists
- Unordered
* Lists
+ Of mixed type
1. Ordered
2. Lists
4. Numbers are ignored

View file

@ -1,5 +1,5 @@
/* /*
* md4c modified for mdjs. * md4c modified for markdown-wasm.
* Original source code is licensed as follows: * Original source code is licensed as follows:
* *
* Copyright (c) 2016-2019 Martin Mitas * Copyright (c) 2016-2019 Martin Mitas
@ -439,14 +439,14 @@ int fmt_html(
const MD_CHAR* input, const MD_CHAR* input,
MD_SIZE input_size, MD_SIZE input_size,
WBuf* outbuf, WBuf* outbuf,
u32 parser_flags, u32 parseFlags,
u32 render_flags u32 fmtFlags
) { ) {
HtmlRenderer render = { outbuf, 0, 0, render_flags }; HtmlRenderer render = { outbuf, 0, 0, fmtFlags };
MD_PARSER parser = { MD_PARSER parser = {
0, 0,
parser_flags, parseFlags,
enter_block_callback, enter_block_callback,
leave_block_callback, leave_block_callback,
enter_span_callback, enter_span_callback,

View file

@ -1,6 +1,6 @@
#pragma once #pragma once
#include "wbuf.h" #include "wbuf.h"
#define MD_HTML_FLAG_XHTML 0x0008 // instead of e.g. <br>, generate <br/> #define MD_HTML_FLAG_XHTML (1 << 0) // instead of e.g. <br>, generate <br/>
int fmt_html(const char* input, u32 inputlen, WBuf* outbuf, u32 parserFlags, u32 renderFlags); int fmt_html(const char* input, u32 inputlen, WBuf* outbuf, u32 parseFlags, u32 fmtFlags);

View file

@ -4,20 +4,17 @@
#include "fmt_json.h" #include "fmt_json.h"
#include "md4c.h" #include "md4c.h"
// #include "md4c_render_html.h"
// #include "entity.h"
// JSON formatter
// //
// // -- WORK IN PROGRESS --
// -------------- WORK IN PROGRESS
//
// //
#ifdef _WIN32 #ifdef _WIN32
#define snprintf _snprintf #define snprintf _snprintf
#endif #endif
// dlog
#ifndef DEBUG #ifndef DEBUG
#define DEBUG 1 #define DEBUG 1
#endif #endif
@ -31,6 +28,7 @@
typedef struct JsonFormatter_st { typedef struct JsonFormatter_st {
WBuf* outbuf; WBuf* outbuf;
u32 bnest; // block nesting level
} JsonFormatter; } JsonFormatter;
@ -39,61 +37,49 @@ typedef struct JsonFormatter_st {
#define ISUPPER(ch) ('A' <= (ch) && (ch) <= 'Z') #define ISUPPER(ch) ('A' <= (ch) && (ch) <= 'Z')
#define ISALNUM(ch) (ISLOWER(ch) || ISUPPER(ch) || ISDIGIT(ch)) #define ISALNUM(ch) (ISLOWER(ch) || ISUPPER(ch) || ISDIGIT(ch))
#define SIZEOF_ARRAY(a) (sizeof(a) / sizeof(a[0]))
// static inline void render_text(JsonFormatter* r, const MD_CHAR* text, MD_SIZE size) {
// // r->process_output(text, size, r->userdata);
// WBufAppendBytes(r->outbuf, text, size);
// }
#define render_text(f, textptr, textlen) \ #define render_text(f, textptr, textlen) \
WBufAppendBytes((r)->outbuf, (textptr), (textlen)) WBufAppendBytes((r)->outbuf, (textptr), (textlen))
// #define RENDER_LITERAL(r, literal) \
// WBufAppendBytes((r)->outbuf, (literal), (MD_SIZE)strlen(literal))
#define JSON_SUB_LEN 2
static char jsonEscapeMap[256]; static const char* jsonEscapeMap[256];
static void __attribute__((constructor)) init() { static void __attribute__((constructor)) init() {
jsonEscapeMap[(unsigned char)'"'] = 1; // important: Values must all be exactly JSON_SUB_LEN bytes long
jsonEscapeMap[(unsigned char)'\n'] = 1; jsonEscapeMap[(unsigned char)'"'] = "\\\"";
jsonEscapeMap[(unsigned char)'\r'] = 1; jsonEscapeMap[(unsigned char)'\n'] = "\\n";
jsonEscapeMap[(unsigned char)'\t'] = 1; jsonEscapeMap[(unsigned char)'\r'] = "\\r";
jsonEscapeMap[(unsigned char)'\t'] = "\\t";
} }
// #define JSON_BYTE_NEED_ESCAPE(ch) (jsonEscapeMap[(unsigned char)(ch)] != 0)
#define JSON_ESCAPE_MAP(ch) jsonEscapeMap[(unsigned char)(ch)]
static void writeJsonEscaped(JsonFormatter* r, const MD_CHAR* data, MD_SIZE size) { static void writeJsonEscaped(JsonFormatter* r, const MD_CHAR* data, MD_SIZE size) {
MD_OFFSET beg = 0; MD_OFFSET beg = 0;
MD_OFFSET off = 0; MD_OFFSET off = 0;
#define NEED_ESCAPE(ch) (jsonEscapeMap[(unsigned char)(ch)] != 0) while (1) {
const char* sub = NULL;
while(1) { while (off < size) {
/* Optimization: Use some loop unrolling. */ sub = JSON_ESCAPE_MAP(data[off]);
while ( if (sub != NULL) {
off + 3 < size && break;
!NEED_ESCAPE(data[off+0]) && }
!NEED_ESCAPE(data[off+1]) &&
!NEED_ESCAPE(data[off+2]) &&
!NEED_ESCAPE(data[off+3])
) {
off += 4;
}
while (off < size && !NEED_ESCAPE(data[off])) {
off++; off++;
} }
if (off > beg) { if (off > beg) {
// in-between
WBufAppendBytes(r->outbuf, data + beg, off - beg); WBufAppendBytes(r->outbuf, data + beg, off - beg);
} }
if (off < size) { if (sub) {
switch (data[off]) { WBufAppendBytes(r->outbuf, sub, JSON_SUB_LEN);
case '"': WBufAppendCStr(r->outbuf, "\\\""); break;
case '\n': WBufAppendCStr(r->outbuf, "\\n"); break;
case '\r': WBufAppendCStr(r->outbuf, "\\r"); break;
case '\t': WBufAppendCStr(r->outbuf, "\\t"); break;
}
off++; off++;
} else { } else {
break; break;
@ -101,14 +87,10 @@ static void writeJsonEscaped(JsonFormatter* r, const MD_CHAR* data, MD_SIZE size
beg = off; beg = off;
} }
#undef NEED_ESCAPE
} }
static void static void render_url_escaped(JsonFormatter* r, const MD_CHAR* data, MD_SIZE size) {
render_url_escaped(JsonFormatter* r, const MD_CHAR* data, MD_SIZE size)
{
static const MD_CHAR hex_chars[] = "0123456789ABCDEF"; static const MD_CHAR hex_chars[] = "0123456789ABCDEF";
MD_OFFSET beg = 0; MD_OFFSET beg = 0;
MD_OFFSET off = 0; MD_OFFSET off = 0;
@ -144,56 +126,21 @@ render_url_escaped(JsonFormatter* r, const MD_CHAR* data, MD_SIZE size)
} }
} }
static unsigned static unsigned hex_val(char ch) {
hex_val(char ch) if ('0' <= ch && ch <= '9') {
{
if('0' <= ch && ch <= '9')
return ch - '0'; return ch - '0';
if('A' <= ch && ch <= 'Z') }
if ('A' <= ch && ch <= 'Z') {
return ch - 'A' + 10; return ch - 'A' + 10;
else }
return ch - 'a' + 10; return ch - 'a' + 10;
} }
static void WBufAppendUTF8Codepoint(WBuf* b, u32 codepoint) { // Translate entity to its UTF-8 equivalent, or output the verbatim one
if (codepoint <= 0x7f) { // if such entity is unknown (or if the translation is disabled).
WBufAppendc(b, (char)codepoint);
return;
}
unsigned char utf8[4];
size_t n;
if (codepoint <= 0x7ff) {
n = 2;
utf8[0] = 0xc0 | ((codepoint >> 6) & 0x1f);
utf8[1] = 0x80 + ((codepoint >> 0) & 0x3f);
} else if (codepoint <= 0xffff) {
n = 3;
utf8[0] = 0xe0 | ((codepoint >> 12) & 0xf);
utf8[1] = 0x80 + ((codepoint >> 6) & 0x3f);
utf8[2] = 0x80 + ((codepoint >> 0) & 0x3f);
} else {
n = 4;
utf8[0] = 0xf0 | ((codepoint >> 18) & 0x7);
utf8[1] = 0x80 + ((codepoint >> 12) & 0x3f);
utf8[2] = 0x80 + ((codepoint >> 6) & 0x3f);
utf8[3] = 0x80 + ((codepoint >> 0) & 0x3f);
}
if (0 < codepoint && codepoint <= 0x10ffff) {
WBufAppendBytes(b, (const char*)utf8, n);
} else {
static const MD_CHAR utf8_replacement_char[] = { 0xef, 0xbf, 0xbd };
WBufAppendBytes(b, utf8_replacement_char, sizeof(utf8_replacement_char));
}
}
/* Translate entity to its UTF-8 equivalent, or output the verbatim one
* if such entity is unknown (or if the translation is disabled). */
static void writeDecodeXmlEntity(JsonFormatter* r, const MD_CHAR* text, MD_SIZE size) { static void writeDecodeXmlEntity(JsonFormatter* r, const MD_CHAR* text, MD_SIZE size) {
if (size > 3 && text[1] == '#') { if (size > 3 && text[1] == '#') {
unsigned codepoint = 0; unsigned codepoint = 0;
if(text[2] == 'x' || text[2] == 'X') { if(text[2] == 'x' || text[2] == 'X') {
// Hexadecimal entity (e.g. "&#x1234abcd;")). // Hexadecimal entity (e.g. "&#x1234abcd;")).
for (MD_SIZE i = 3; i < size-1; i++) { for (MD_SIZE i = 3; i < size-1; i++) {
@ -205,17 +152,41 @@ static void writeDecodeXmlEntity(JsonFormatter* r, const MD_CHAR* text, MD_SIZE
codepoint = 10 * codepoint + (text[i] - '0'); codepoint = 10 * codepoint + (text[i] - '0');
} }
} }
if (codepoint <= 0xFF) {
WBufAppendUTF8Codepoint(r->outbuf, codepoint); const char* sub = JSON_ESCAPE_MAP(codepoint);
if (sub) {
// predefined escape code, e.g. "\n"
WBufAppendBytes(r->outbuf, sub, JSON_SUB_LEN);
} else {
// verbatim
WBufAppendUTF8Codepoint(r->outbuf, codepoint);
}
} else {
// e.g. \uD87E
WBufAppendCStr(r->outbuf, "\\u");
if (codepoint <= 0xF) {
WBufAppendCStr(r->outbuf, "000");
} else if (codepoint <= 0xFF) {
WBufAppendCStr(r->outbuf, "00");
} else if (codepoint <= 0xFFF) {
WBufAppendCStr(r->outbuf, "0");
}
WBufAppendU32(r->outbuf, codepoint, 16);
}
} else { } else {
WBufAppendBytes(r->outbuf, text, size); // named entity
// We could do a lookup here but it would increase the WASM module binary size by
// at least 20kB, so for now, let's keep it simple and just include it verbatim until we
// can do something fancy like a compressed b-tree.
writeJsonEscaped(r, text, size);
} }
} }
static void static void render_attribute(
render_attribute(JsonFormatter* r, const MD_ATTRIBUTE* attr, JsonFormatter* r,
void (*fn_append)(JsonFormatter*, const MD_CHAR*, MD_SIZE)) const MD_ATTRIBUTE* attr,
{ void (*fn_append)(JsonFormatter*, const MD_CHAR*, MD_SIZE)
) {
int i; int i;
for(i = 0; attr->substr_offsets[i] < attr->size; i++) { for(i = 0; attr->substr_offsets[i] < attr->size; i++) {
@ -233,68 +204,7 @@ render_attribute(JsonFormatter* r, const MD_ATTRIBUTE* attr,
} }
static void static void render_open_a_span(JsonFormatter* r, const MD_SPAN_A_DETAIL* det) {
render_open_ol_block(JsonFormatter* r, const MD_BLOCK_OL_DETAIL* det)
{
char buf[64];
if(det->start == 1) {
WBufAppendCStr(r->outbuf, "<ol>\n");
return;
}
snprintf(buf, sizeof(buf), "<ol start=\"%u\">\n", det->start);
WBufAppendCStr(r->outbuf, buf);
}
static void
render_open_li_block(JsonFormatter* r, const MD_BLOCK_LI_DETAIL* det)
{
if(det->is_task) {
WBufAppendCStr(r->outbuf,
"<li class=\"task-list-item\">"
"<input type=\"checkbox\" class=\"task-list-item-checkbox\" disabled");
if (det->task_mark == 'x' || det->task_mark == 'X') {
WBufAppendCStr(r->outbuf, " checked");
}
WBufAppendc(r->outbuf, '>');
} else {
WBufAppendCStr(r->outbuf, "<li>");
}
}
static void
render_open_code_block(JsonFormatter* r, const MD_BLOCK_CODE_DETAIL* det)
{
WBufAppendCStr(r->outbuf, "<pre><code");
/* If known, output the HTML 5 attribute class="language-LANGNAME". */
if(det->lang.text != NULL) {
WBufAppendCStr(r->outbuf, " class=\"language-");
render_attribute(r, &det->lang, writeJsonEscaped);
WBufAppendc(r->outbuf, '"');
}
WBufAppendc(r->outbuf, '>');
}
static void
render_open_td_block(JsonFormatter* r, const MD_CHAR* cell_type, const MD_BLOCK_TD_DETAIL* det)
{
WBufAppendc(r->outbuf, '<');
WBufAppendCStr(r->outbuf, cell_type);
switch (det->align) {
case MD_ALIGN_LEFT: WBufAppendCStr(r->outbuf, " align=\"left\">"); break;
case MD_ALIGN_CENTER: WBufAppendCStr(r->outbuf, " align=\"center\">"); break;
case MD_ALIGN_RIGHT: WBufAppendCStr(r->outbuf, " align=\"right\">"); break;
default: WBufAppendCStr(r->outbuf, ">"); break;
}
}
static void
render_open_a_span(JsonFormatter* r, const MD_SPAN_A_DETAIL* det)
{
WBufAppendCStr(r->outbuf, "<a href=\""); WBufAppendCStr(r->outbuf, "<a href=\"");
render_attribute(r, &det->href, render_url_escaped); render_attribute(r, &det->href, render_url_escaped);
@ -306,18 +216,14 @@ render_open_a_span(JsonFormatter* r, const MD_SPAN_A_DETAIL* det)
WBufAppendCStr(r->outbuf, "\">"); WBufAppendCStr(r->outbuf, "\">");
} }
static void static void render_open_img_span(JsonFormatter* r, const MD_SPAN_IMG_DETAIL* det) {
render_open_img_span(JsonFormatter* r, const MD_SPAN_IMG_DETAIL* det)
{
WBufAppendCStr(r->outbuf, "<img src=\""); WBufAppendCStr(r->outbuf, "<img src=\"");
render_attribute(r, &det->src, render_url_escaped); render_attribute(r, &det->src, render_url_escaped);
WBufAppendCStr(r->outbuf, "\" alt=\""); WBufAppendCStr(r->outbuf, "\" alt=\"");
} }
static void static void render_close_img_span(JsonFormatter* r, const MD_SPAN_IMG_DETAIL* det) {
render_close_img_span(JsonFormatter* r, const MD_SPAN_IMG_DETAIL* det)
{
if(det->title.text != NULL) { if(det->title.text != NULL) {
WBufAppendCStr(r->outbuf, "\" title=\""); WBufAppendCStr(r->outbuf, "\" title=\"");
render_attribute(r, &det->title, writeJsonEscaped); render_attribute(r, &det->title, writeJsonEscaped);
@ -326,9 +232,7 @@ render_close_img_span(JsonFormatter* r, const MD_SPAN_IMG_DETAIL* det)
WBufAppendCStr(r->outbuf, "\">"); WBufAppendCStr(r->outbuf, "\">");
} }
static void static void render_open_wikilink_span(JsonFormatter* r, const MD_SPAN_WIKILINK_DETAIL* det) {
render_open_wikilink_span(JsonFormatter* r, const MD_SPAN_WIKILINK_DETAIL* det)
{
WBufAppendCStr(r->outbuf, "<x-wikilink data-target=\""); WBufAppendCStr(r->outbuf, "<x-wikilink data-target=\"");
render_attribute(r, &det->target, writeJsonEscaped); render_attribute(r, &det->target, writeJsonEscaped);
WBufAppendCStr(r->outbuf, "\">"); WBufAppendCStr(r->outbuf, "\">");
@ -338,12 +242,20 @@ render_open_wikilink_span(JsonFormatter* r, const MD_SPAN_WIKILINK_DETAIL* det)
// ------------------------------------------------------------------------------------------------ // ------------------------------------------------------------------------------------------------
static void writeTypeStart(JsonFormatter* r, const char* typename, size_t typenamelen) { static void writeNewline(JsonFormatter* r) {
WBufAppendCStr(r->outbuf, "{\"_\":\""); WBufAppendc(r->outbuf, '\n');
WBufAppendBytes(r->outbuf, typename, typenamelen);
WBufAppendc(r->outbuf, '"');
}
static const char indent_chunk_str[] = " ";
static const u32 indent_chunk_size = (u32)(SIZEOF_ARRAY(indent_chunk_str) - 1);
u32 indent = r->bnest * 4;
while (indent > indent_chunk_size) {
WBufAppendBytes(r->outbuf, indent_chunk_str, indent_chunk_size);
indent -= indent_chunk_size;
}
if (indent > 0) {
WBufAppendBytes(r->outbuf, indent_chunk_str, indent);
}
}
static void writeAttribute(JsonFormatter* r, const MD_ATTRIBUTE* attr) { static void writeAttribute(JsonFormatter* r, const MD_ATTRIBUTE* attr) {
for (u32 i = 0; attr->substr_offsets[i] < attr->size; i++) { for (u32 i = 0; attr->substr_offsets[i] < attr->size; i++) {
@ -359,13 +271,29 @@ static void writeAttribute(JsonFormatter* r, const MD_ATTRIBUTE* attr) {
} }
} }
static void writeKey(JsonFormatter* r, const char* rawkey, size_t rawkeyLen) {
WBufAppendc(r->outbuf, ',');
writeNewline(r);
WBufAppendc(r->outbuf, '"');
WBufAppendBytes(r->outbuf, rawkey, rawkeyLen);
WBufAppendCStr(r->outbuf, "\":");
}
static void writeTypeStart(JsonFormatter* r, const char* typename, size_t typenamelen) {
WBufAppendCStr(r->outbuf, "{ \"_\": \"");
WBufAppendBytes(r->outbuf, typename, typenamelen);
WBufAppendc(r->outbuf, '"');
}
static int enter_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata) { static int enter_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata) {
static const MD_CHAR* head[6] = { "h1", "h2", "h3", "h4", "h5", "h6" };
JsonFormatter* r = (JsonFormatter*) userdata; JsonFormatter* r = (JsonFormatter*) userdata;
const char* typename = "";
size_t typenamelen = 0; writeNewline(r);
#define WRITE_TYPE_START(name) writeTypeStart(r, (name), strlen((name))) r->bnest++;
#define WRITE_TYPE_START(name) \
writeTypeStart(r, (name), strlen((name)))
switch (type) { switch (type) {
case MD_BLOCK_DOC: WRITE_TYPE_START("doc"); break; case MD_BLOCK_DOC: WRITE_TYPE_START("doc"); break;
@ -380,7 +308,9 @@ static int enter_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata)
case MD_BLOCK_HR: WRITE_TYPE_START("hr"); break; case MD_BLOCK_HR: WRITE_TYPE_START("hr"); break;
case MD_BLOCK_H: { case MD_BLOCK_H: {
WRITE_TYPE_START(head[((MD_BLOCK_H_DETAIL*)detail)->level - 1]); WRITE_TYPE_START("h");
WBufAppendCStr(r->outbuf, ", \"level\": ");
WBufAppendU32(r->outbuf, ((MD_BLOCK_H_DETAIL*)detail)->level, 10);
break; break;
} }
@ -443,28 +373,30 @@ static int enter_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata)
break; break;
} }
// static void
// render_open_td_block(MD_RENDER_HTML* r, const MD_CHAR* cell_type, const MD_BLOCK_TD_DETAIL* det)
// {
// RENDER_LITERAL(r, "<");
// RENDER_LITERAL(r, cell_type);
// switch(det->align) {
// case MD_ALIGN_LEFT: RENDER_LITERAL(r, " align=\"left\">"); break;
// case MD_ALIGN_CENTER: RENDER_LITERAL(r, " align=\"center\">"); break;
// case MD_ALIGN_RIGHT: RENDER_LITERAL(r, " align=\"right\">"); break;
// default: RENDER_LITERAL(r, ">"); break;
// }
} }
WBufAppendCStr(r->outbuf, ", \"children\":[\n "); WBufAppendCStr(r->outbuf, ", \"children\": [");
return 0; return 0;
} }
static int leave_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata) { static int leave_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata) {
JsonFormatter* r = (JsonFormatter*)userdata; JsonFormatter* r = (JsonFormatter*)userdata;
WBufAppendCStr(r->outbuf, "]},\n"); r->bnest--;
if (*(r->outbuf->ptr-1) == ',') {
// undo trailing comma
// e.g.
//
// "1,2,3,"
// ^
// "1,2,3"
// ^
//
r->outbuf->ptr--;
}
writeNewline(r);
WBufAppendCStr(r->outbuf, "]},");
return 0; return 0;
} }
@ -475,6 +407,7 @@ static int enter_span_callback(MD_SPANTYPE type, void* detail, void* userdata) {
switch(type) { switch(type) {
case MD_SPAN_EM: WBufAppendCStr(r->outbuf, "<em>"); break; case MD_SPAN_EM: WBufAppendCStr(r->outbuf, "<em>"); break;
case MD_SPAN_STRONG: WBufAppendCStr(r->outbuf, "<b>"); break; case MD_SPAN_STRONG: WBufAppendCStr(r->outbuf, "<b>"); break;
case MD_SPAN_U: WBufAppendCStr(r->outbuf, "<u>"); break;
case MD_SPAN_A: render_open_a_span(r, (MD_SPAN_A_DETAIL*) detail); break; case MD_SPAN_A: render_open_a_span(r, (MD_SPAN_A_DETAIL*) detail); break;
case MD_SPAN_IMG: render_open_img_span(r, (MD_SPAN_IMG_DETAIL*) detail); break; case MD_SPAN_IMG: render_open_img_span(r, (MD_SPAN_IMG_DETAIL*) detail); break;
case MD_SPAN_CODE: WBufAppendCStr(r->outbuf, "<code>"); break; case MD_SPAN_CODE: WBufAppendCStr(r->outbuf, "<code>"); break;
@ -493,6 +426,7 @@ static int leave_span_callback(MD_SPANTYPE type, void* detail, void* userdata) {
switch(type) { switch(type) {
case MD_SPAN_EM: WBufAppendCStr(r->outbuf, "</em>"); break; case MD_SPAN_EM: WBufAppendCStr(r->outbuf, "</em>"); break;
case MD_SPAN_STRONG: WBufAppendCStr(r->outbuf, "</b>"); break; case MD_SPAN_STRONG: WBufAppendCStr(r->outbuf, "</b>"); break;
case MD_SPAN_U: WBufAppendCStr(r->outbuf, "</u>"); break;
case MD_SPAN_A: WBufAppendCStr(r->outbuf, "</a>"); break; case MD_SPAN_A: WBufAppendCStr(r->outbuf, "</a>"); break;
case MD_SPAN_IMG: /*noop, handled above*/ break; case MD_SPAN_IMG: /*noop, handled above*/ break;
case MD_SPAN_CODE: WBufAppendCStr(r->outbuf, "</code>"); break; case MD_SPAN_CODE: WBufAppendCStr(r->outbuf, "</code>"); break;
@ -508,27 +442,65 @@ static int leave_span_callback(MD_SPANTYPE type, void* detail, void* userdata) {
static int text_callback(MD_TEXTTYPE type, const MD_CHAR* text, MD_SIZE size, void* userdata) { static int text_callback(MD_TEXTTYPE type, const MD_CHAR* text, MD_SIZE size, void* userdata) {
JsonFormatter* r = (JsonFormatter*)userdata; JsonFormatter* r = (JsonFormatter*)userdata;
WBufAppendCStr(r->outbuf, ", \""); if (type == MD_TEXT_SOFTBR) {
// ignore soft break, i.e.
switch (type) { //
case MD_TEXT_NULLCHAR: WBufAppendCStr(r->outbuf, "\\0"); break; // Markdown:
case MD_TEXT_BR: WBufAppendCStr(r->outbuf, "<br>"); break; // line1
case MD_TEXT_SOFTBR: WBufAppendc(r->outbuf, '\n'); break; // line2
case MD_TEXT_HTML: render_text(r, text, size); break; //
case MD_TEXT_ENTITY: writeDecodeXmlEntity(r, text, size); break; // md4c emits: (line1, MD_TEXT_SOFTBR, line2)
default: writeJsonEscaped(r, text, size); break; //
return 0;
} }
WBufAppendc(r->outbuf, '"'); writeNewline(r);
if (type == MD_TEXT_HTML) {
WBufAppendCStr(r->outbuf, "{\"_\":\"html\",\"content\":\"");
writeJsonEscaped(r, text, size);
WBufAppendCStr(r->outbuf, "\"}");
} else {
WBufAppendc(r->outbuf, '"');
switch (type) {
case MD_TEXT_NULLCHAR:
WBufAppendCStr(r->outbuf, "\\0");
break;
case MD_TEXT_BR:
WBufAppendCStr(r->outbuf, "\\n");
break;
case MD_TEXT_ENTITY:
writeDecodeXmlEntity(r, text, size);
break;
default:
writeJsonEscaped(r, text, size);
break;
}
WBufAppendc(r->outbuf, '"');
}
WBufAppendc(r->outbuf, ',');
return 0; return 0;
} }
int fmt_json(const MD_CHAR* input, MD_SIZE input_size, WBuf* outbuf, u32 parser_flags) { int fmt_json(
JsonFormatter render = { outbuf }; const MD_CHAR* input,
MD_SIZE inputlen,
WBuf* outbuf,
u32 parseFlags,
u32 _fmtFlags
) {
JsonFormatter render = {
.outbuf = outbuf,
.bnest = 0,
};
MD_PARSER parser = { MD_PARSER parser = {
0, 0,
parser_flags, parseFlags,
enter_block_callback, enter_block_callback,
leave_block_callback, leave_block_callback,
enter_span_callback, enter_span_callback,
@ -538,5 +510,5 @@ int fmt_json(const MD_CHAR* input, MD_SIZE input_size, WBuf* outbuf, u32 parser_
NULL NULL
}; };
return md_parse(input, input_size, &parser, (void*) &render); return md_parse(input, inputlen, &parser, (void*)&render);
} }

View file

@ -1,4 +1,6 @@
#pragma once #pragma once
#include "wbuf.h" #include "wbuf.h"
int fmt_json(const char* input, u32 inputlen, WBuf* outbuf, u32 parserFlags); #define MD_JSON_FLAG_NONE 0
int fmt_json(const char* input, u32 inputlen, WBuf* outbuf, u32 parseFlags, u32 fmtFlags);

View file

@ -3,18 +3,28 @@
#include "wlib.h" #include "wlib.h"
#include "wbuf.h" #include "wbuf.h"
#include "fmt_html.h" #include "fmt_html.h"
// #include "fmt_json.h"
#if MD_WITH_JSON
#include "fmt_json.h"
#endif
// these should be in sync with "OutputFlags" in md.js // these should be in sync with "OutputFlags" in md.js
typedef enum OutputFlags { typedef enum Formatter {
OutputFlagHTML = 1 << 0, FormatterNONE,
OutputFlagXHTML = 1 << 1, FormatterHTML,
} OutputFlags; FormatterJSON,
} Formatter;
typedef enum FormatFlags {
FormatFlagHTML = 1 << 0,
FormatFlagXHTML = 1 << 1,
FormatFlagJSON = 1 << 2,
} FormatFlags;
typedef enum ErrorCode { typedef enum ErrorCode {
ERR_NONE, ERR_NONE,
ERR_MD_PARSE, ERR_MD_PARSE,
ERR_OUTFLAGS, ERR_FORMAT,
} ErrorCode; } ErrorCode;
@ -31,38 +41,51 @@ static WBuf outbuf;
export size_t parseUTF8( export size_t parseUTF8(
const char* inbufptr, const char* inbufptr,
u32 inbuflen, u32 inbuflen,
u32 parser_flags, u32 parseFlags,
OutputFlags outflags, Formatter formatter,
u32 fmtflags,
const char** outptr const char** outptr
) { ) {
dlog("parseUTF8 called with inbufptr=%p inbuflen=%u\n", inbufptr, inbuflen); dlog("parseUTF8 called with inbufptr=%p inbuflen=%u\n", inbufptr, inbuflen);
WBufReset(&outbuf); WBufReset(&outbuf);
WBufReserve(&outbuf, inbuflen * 2); // approximate output size to minimize reallocations
if (outflags & OutputFlagHTML) { int result = 0x7ffff;
WBufReserve(&outbuf, inbuflen * 2); // approximate output size to minimize reallocations
u32 render_flags = 0; switch (formatter) {
if (outflags & OutputFlagXHTML) {
render_flags |= MD_HTML_FLAG_XHTML;
}
if (fmt_html(inbufptr, inbuflen, &outbuf, parser_flags, render_flags) != 0) { case FormatterHTML:
// fmt_html returns status of md_parse which only fails in extreme cases result = fmt_html(inbufptr, inbuflen, &outbuf, parseFlags, fmtflags);
// like when out of memory. md4c does not provide error codes or error messages. break;
WErrSet(ERR_MD_PARSE, "md parser error");
*outptr = 0;
return 0;
}
case FormatterJSON:
#if MD_WITH_JSON
result = fmt_json(inbufptr, inbuflen, &outbuf, parseFlags, fmtflags);
#endif
break;
case FormatterNONE:
break;
} // switch
if (result == 0x7ffff) {
WErrSet(ERR_FORMAT, "invalid formatter");
} else if (result != 0) {
// fmt_html returns status of md_parse which only fails in extreme cases
// like when out of memory. md4c does not provide error codes or error messages.
WErrSet(ERR_MD_PARSE, "parser error");
}
if (result == 0) {
*outptr = outbuf.start; *outptr = outbuf.start;
// dlog("outbuf =>\n%.*s\n", WBufLen(&outbuf), outbuf.start); // dlog("outbuf =>\n%.*s\n", WBufLen(&outbuf), outbuf.start);
return WBufLen(&outbuf); return WBufLen(&outbuf);
} }
WErrSet(ERR_OUTFLAGS, "no output format set in output flags");
*outptr = 0; *outptr = 0;
return 0; return 0;
} }

View file

@ -41,6 +41,15 @@ const OutputFlags = {
XHTML: 1 << 1, // Output XHTML (only has effect with HTML flag set) XHTML: 1 << 1, // Output XHTML (only has effect with HTML flag set)
} }
// these should be in sync with "Formatter" in md.c
const FormatterNONE = 0
, FormatterHTML = 1
, FormatterJSON = 2
// html formatter flags (sync with fmt_html.h)
const MD_HTML_FLAG_XHTML = 1 << 0
export function parse(source, options) { export function parse(source, options) {
options = options || {} options = options || {}
@ -49,17 +58,17 @@ export function parse(source, options) {
options.parseFlags options.parseFlags
) )
let outputFlags = 0 let formatter = FormatterHTML
switch (options.format) { let fmtFlags = 0
case "xhtml":
outputFlags |= OutputFlags.HTML | OutputFlags.XHTML
break
if (options.format) switch (options.format) {
case "html": case "html":
case undefined: break
case null: case "xhtml":
case "": fmtFlags |= MD_HTML_FLAG_XHTML
outputFlags |= OutputFlags.HTML break
case "json":
formatter = FormatterJSON
break break
default: default:
@ -68,7 +77,7 @@ export function parse(source, options) {
let buf = typeof source == "string" ? utf8.encode(source) : source let buf = typeof source == "string" ? utf8.encode(source) : source
let outbuf = withOutPtr(outptr => withTmpBytePtr(buf, (inptr, inlen) => let outbuf = withOutPtr(outptr => withTmpBytePtr(buf, (inptr, inlen) =>
_parseUTF8(inptr, inlen, parseFlags, outputFlags, outptr) _parseUTF8(inptr, inlen, parseFlags, formatter, fmtFlags, outptr)
)) ))
// check for error and throw if needed // check for error and throw if needed

View file

@ -3678,7 +3678,7 @@ md_analyze_emph(MD_CTX* ctx, int mark_index)
/* If we can be a closer, try to resolve with the preceding opener. */ /* If we can be a closer, try to resolve with the preceding opener. */
if(mark->flags & MD_MARK_POTENTIAL_CLOSER) { if(mark->flags & MD_MARK_POTENTIAL_CLOSER) {
MD_MARK* opener = NULL; MD_MARK* opener = NULL;
int opener_index; int opener_index = 0;
if(mark->ch == _T('*')) { if(mark->ch == _T('*')) {
MD_MARKCHAIN* opener_chains[6]; MD_MARKCHAIN* opener_chains[6];
@ -5654,7 +5654,7 @@ md_line_indentation(MD_CTX* ctx, unsigned total_indent, OFF beg, OFF* p_end)
return indent - total_indent; return indent - total_indent;
} }
static const MD_LINE_ANALYSIS md_dummy_blank_line = { MD_LINE_BLANK, 0 }; static const MD_LINE_ANALYSIS md_dummy_blank_line = { MD_LINE_BLANK, 0, 0, 0, 0 };
/* Analyze type of the line and find some its properties. This serves as a /* Analyze type of the line and find some its properties. This serves as a
* main input for determining type and boundaries of a block. */ * main input for determining type and boundaries of a block. */

View file

@ -15,12 +15,8 @@ void WBufReset(WBuf* b) {
b->ptr = b->start; b->ptr = b->start;
} }
inline size_t WBufCap(WBuf* b) { return b->end - b->start; } // total capacity (size)
inline size_t WBufLen(WBuf* b) { return b->ptr - b->start; } // valid bytes at start
inline size_t WBufAvail(WBuf* b) { return b->end - b->ptr; } // bytes available
// grows buffer so that there is at least minspace available space // grows buffer so that there is at least minspace available space
static void WBufGrow(WBuf* b, size_t minspace) { void WBufGrow(WBuf* b, size_t minspace) {
// size_t avail = b->end - b->ptr; // size_t avail = b->end - b->ptr;
size_t len = WBufLen(b); // store len before changing b size_t len = WBufLen(b); // store len before changing b
size_t cap = WBufCap(b); size_t cap = WBufCap(b);
@ -42,13 +38,6 @@ void WBufReserve(WBuf* b, size_t minspace) {
} }
} }
void WBufAppendc(WBuf* b, char c) {
if (WBufAvail(b) < 1) {
WBufGrow(b, 1);
}
*(b->ptr++) = c;
}
void WBufAppendBytes(WBuf* b, const void* bytes, size_t len) { void WBufAppendBytes(WBuf* b, const void* bytes, size_t len) {
if (WBufAvail(b) < len) { if (WBufAvail(b) < len) {
WBufGrow(b, len); WBufGrow(b, len);
@ -162,6 +151,35 @@ void WBufAppendU32(WBuf* b, u32 n, u32 radix) {
} }
void _WBufAppendUTF8Codepoint2(WBuf* b, u32 codepoint) {
unsigned char utf8[4];
size_t n;
if (codepoint <= 0x7ff) {
n = 2;
utf8[0] = 0xc0 | ((codepoint >> 6) & 0x1f);
utf8[1] = 0x80 + ((codepoint >> 0) & 0x3f);
} else if (codepoint <= 0xffff) {
n = 3;
utf8[0] = 0xe0 | ((codepoint >> 12) & 0xf);
utf8[1] = 0x80 + ((codepoint >> 6) & 0x3f);
utf8[2] = 0x80 + ((codepoint >> 0) & 0x3f);
} else {
n = 4;
utf8[0] = 0xf0 | ((codepoint >> 18) & 0x7);
utf8[1] = 0x80 + ((codepoint >> 12) & 0x3f);
utf8[2] = 0x80 + ((codepoint >> 6) & 0x3f);
utf8[3] = 0x80 + ((codepoint >> 0) & 0x3f);
}
if (0 < codepoint && codepoint <= 0x10ffff) {
WBufAppendBytes(b, (const char*)utf8, n);
} else {
static const char utf8_replacement_char[] = { 0xef, 0xbf, 0xbd };
WBufAppendBytes(b, utf8_replacement_char, sizeof(utf8_replacement_char));
}
}
// static void WBufAppendSlug(WBuf* b, const char* text) { // static void WBufAppendSlug(WBuf* b, const char* text) {
// size_t len = strlen(text); // size_t len = strlen(text);
// WBufReserve(b, len); // WBufReserve(b, len);

View file

@ -11,13 +11,13 @@ void WBufInit(WBuf*);
void WBufFree(WBuf*); void WBufFree(WBuf*);
void WBufReset(WBuf*); void WBufReset(WBuf*);
size_t WBufCap(WBuf*); // total capacity (size) inline static size_t WBufCap(WBuf* b) { return b->end - b->start; } // total capacity (size)
size_t WBufLen(WBuf*); // valid bytes at start inline static size_t WBufLen(WBuf* b) { return b->ptr - b->start; } // valid bytes at start
size_t WBufAvail(WBuf*); // bytes available inline static size_t WBufAvail(WBuf* b) { return b->end - b->ptr; } // bytes available
void WBufReserve(WBuf*, size_t minspace); void WBufReserve(WBuf*, size_t minspace);
void WBufAppendc(WBuf*, char c); static void WBufAppendc(WBuf*, char c);
void WBufAppendBytes(WBuf*, const void* bytes, size_t len); void WBufAppendBytes(WBuf*, const void* bytes, size_t len);
void WBufAppendStr(WBuf*, const char* pch); void WBufAppendStr(WBuf*, const char* pch);
#define WBufAppendCStr(b, cstr) WBufAppendBytes((b), (cstr), strlen(cstr)) #define WBufAppendCStr(b, cstr) WBufAppendBytes((b), (cstr), strlen(cstr))
@ -27,3 +27,26 @@ void _WBufAppendHtml(WBuf*, const char* pch, bool isattr);
// append u32 integer n. radix must be in range [2-36] // append u32 integer n. radix must be in range [2-36]
void WBufAppendU32(WBuf*, u32 n, u32 radix); void WBufAppendU32(WBuf*, u32 n, u32 radix);
static void WBufAppendUTF8Codepoint(WBuf* b, u32 codepoint);
// grows buffer so that there is at least minspace available space
void WBufGrow(WBuf* b, size_t minspace);
// implementation of WBufAppendUTF8Codepoint
void _WBufAppendUTF8Codepoint2(WBuf* b, u32 codepoint);
inline static void WBufAppendUTF8Codepoint(WBuf* b, u32 codepoint) {
if (codepoint > 0x7f) {
return _WBufAppendUTF8Codepoint2(b, codepoint);
}
WBufAppendc(b, (char)codepoint);
}
inline static void WBufAppendc(WBuf* b, char c) {
if (WBufAvail(b) < 1) {
WBufGrow(b, 1);
}
*(b->ptr++) = c;
}

View file

@ -1,16 +1,29 @@
const package = require("./package.json") const package = require("./package.json")
const outdir = debug ? builddir : "dist" const outdir = debug ? builddir : "dist"
cflags = cflags.concat([
"-std=c11",
"-Wall",
"-Wuninitialized",
"-Wmissing-field-initializers",
"-Wconditional-uninitialized",
"-Wno-nullability-completeness",
"-Wno-unused-function",
"-fcolor-diagnostics",
])
const m = { const m = {
jsentry: "src/md.js", jsentry: "src/md.js",
sources: [ sources: [
"src/wlib.c", "src/wlib.c",
"src/wbuf.c", "src/wbuf.c",
"src/md.c", "src/md.c",
"src/md4c.c", "src/md4c.c",
"src/fmt_html.c", "src/fmt_html.c",
// "src/fmt_json.c", debug ? "src/fmt_json.c" : "",
], ].filter(s => !!s),
cflags: [ cflags: [
"-DMD4C_USE_UTF8", "-DMD4C_USE_UTF8",
].concat(debug ? [ ].concat(debug ? [
@ -20,6 +33,7 @@ const m = {
"-DSAFE_HEAP=1", // emcc "-DSAFE_HEAP=1", // emcc
"-DSTACK_OVERFLOW_CHECK=1", // emcc "-DSTACK_OVERFLOW_CHECK=1", // emcc
"-DDEMANGLE_SUPPORT=1", // emcc "-DDEMANGLE_SUPPORT=1", // emcc
"-DMD_WITH_JSON=1", // enable WIP json formatter
] : [ ] : [
// release flags // release flags
]), ]),