diff --git a/example/example-json.js b/example/example-json.js
new file mode 100644
index 0000000..0f53622
--- /dev/null
+++ b/example/example-json.js
@@ -0,0 +1,6 @@
+const fs = require("fs")
+const md = require("../build/debug/markdown.node.js")
+
+const source = fs.readFileSync(__dirname + "/example2.md")
+const json = md.parse(source, { format: "json" })
+console.log(json)
diff --git a/example/example2.md b/example/example2.md
new file mode 100644
index 0000000..f757406
--- /dev/null
+++ b/example/example2.md
@@ -0,0 +1,44 @@
+# H1
+
+This is a paragraph
+
+## H2
+
+This is a paragraph
+
+## Another
+
+This is a paragraph with style *italic* _italic_ **bold** __bold__
+
+
+
+
+*Hello [link](https://rsms.me/) lol*
+
+Hello [*link*](https://rsms.me/) lol "cat"
+
+Hello from *[link](https://rsms.me/)* to __everyone__ `reading this`
+
+Here's an [**important** anchor link](#example).
+
+line 1
+line 2
+
+XML & html "entities"
+&
+&
+&
+&
+&
+&
+
+
+## Lists
+
+- Unordered
+* Lists
++ Of mixed type
+
+1. Ordered
+2. Lists
+4. Numbers are ignored
diff --git a/src/fmt_html.c b/src/fmt_html.c
index a2788f8..aad2d5f 100644
--- a/src/fmt_html.c
+++ b/src/fmt_html.c
@@ -1,5 +1,5 @@
/*
- * md4c modified for mdjs.
+ * md4c modified for markdown-wasm.
* Original source code is licensed as follows:
*
* Copyright (c) 2016-2019 Martin Mitas
@@ -439,14 +439,14 @@ int fmt_html(
const MD_CHAR* input,
MD_SIZE input_size,
WBuf* outbuf,
- u32 parser_flags,
- u32 render_flags
+ u32 parseFlags,
+ u32 fmtFlags
) {
- HtmlRenderer render = { outbuf, 0, 0, render_flags };
+ HtmlRenderer render = { outbuf, 0, 0, fmtFlags };
MD_PARSER parser = {
0,
- parser_flags,
+ parseFlags,
enter_block_callback,
leave_block_callback,
enter_span_callback,
diff --git a/src/fmt_html.h b/src/fmt_html.h
index ed38124..a6ff29d 100644
--- a/src/fmt_html.h
+++ b/src/fmt_html.h
@@ -1,6 +1,6 @@
#pragma once
#include "wbuf.h"
-#define MD_HTML_FLAG_XHTML 0x0008 // instead of e.g.
, generate
+#define MD_HTML_FLAG_XHTML (1 << 0) // instead of e.g.
, generate
-int fmt_html(const char* input, u32 inputlen, WBuf* outbuf, u32 parserFlags, u32 renderFlags);
+int fmt_html(const char* input, u32 inputlen, WBuf* outbuf, u32 parseFlags, u32 fmtFlags);
diff --git a/src/fmt_json.c b/src/fmt_json.c
index e69817b..f85aad0 100644
--- a/src/fmt_json.c
+++ b/src/fmt_json.c
@@ -4,20 +4,17 @@
#include "fmt_json.h"
#include "md4c.h"
-// #include "md4c_render_html.h"
-// #include "entity.h"
+// JSON formatter
//
-//
-// -------------- WORK IN PROGRESS
-//
+// -- WORK IN PROGRESS --
//
#ifdef _WIN32
#define snprintf _snprintf
#endif
-
+// dlog
#ifndef DEBUG
#define DEBUG 1
#endif
@@ -31,6 +28,7 @@
typedef struct JsonFormatter_st {
WBuf* outbuf;
+ u32 bnest; // block nesting level
} JsonFormatter;
@@ -39,61 +37,49 @@ typedef struct JsonFormatter_st {
#define ISUPPER(ch) ('A' <= (ch) && (ch) <= 'Z')
#define ISALNUM(ch) (ISLOWER(ch) || ISUPPER(ch) || ISDIGIT(ch))
+#define SIZEOF_ARRAY(a) (sizeof(a) / sizeof(a[0]))
-// static inline void render_text(JsonFormatter* r, const MD_CHAR* text, MD_SIZE size) {
-// // r->process_output(text, size, r->userdata);
-// WBufAppendBytes(r->outbuf, text, size);
-// }
#define render_text(f, textptr, textlen) \
WBufAppendBytes((r)->outbuf, (textptr), (textlen))
-// #define RENDER_LITERAL(r, literal) \
-// WBufAppendBytes((r)->outbuf, (literal), (MD_SIZE)strlen(literal))
-
-static char jsonEscapeMap[256];
+#define JSON_SUB_LEN 2
+static const char* jsonEscapeMap[256];
static void __attribute__((constructor)) init() {
- jsonEscapeMap[(unsigned char)'"'] = 1;
- jsonEscapeMap[(unsigned char)'\n'] = 1;
- jsonEscapeMap[(unsigned char)'\r'] = 1;
- jsonEscapeMap[(unsigned char)'\t'] = 1;
+ // important: Values must all be exactly JSON_SUB_LEN bytes long
+ jsonEscapeMap[(unsigned char)'"'] = "\\\"";
+ jsonEscapeMap[(unsigned char)'\n'] = "\\n";
+ jsonEscapeMap[(unsigned char)'\r'] = "\\r";
+ jsonEscapeMap[(unsigned char)'\t'] = "\\t";
}
+// #define JSON_BYTE_NEED_ESCAPE(ch) (jsonEscapeMap[(unsigned char)(ch)] != 0)
+#define JSON_ESCAPE_MAP(ch) jsonEscapeMap[(unsigned char)(ch)]
+
static void writeJsonEscaped(JsonFormatter* r, const MD_CHAR* data, MD_SIZE size) {
MD_OFFSET beg = 0;
MD_OFFSET off = 0;
- #define NEED_ESCAPE(ch) (jsonEscapeMap[(unsigned char)(ch)] != 0)
-
- while(1) {
- /* Optimization: Use some loop unrolling. */
- while (
- off + 3 < size &&
- !NEED_ESCAPE(data[off+0]) &&
- !NEED_ESCAPE(data[off+1]) &&
- !NEED_ESCAPE(data[off+2]) &&
- !NEED_ESCAPE(data[off+3])
- ) {
- off += 4;
- }
- while (off < size && !NEED_ESCAPE(data[off])) {
+ while (1) {
+ const char* sub = NULL;
+ while (off < size) {
+ sub = JSON_ESCAPE_MAP(data[off]);
+ if (sub != NULL) {
+ break;
+ }
off++;
}
if (off > beg) {
+ // in-between
WBufAppendBytes(r->outbuf, data + beg, off - beg);
}
- if (off < size) {
- switch (data[off]) {
- case '"': WBufAppendCStr(r->outbuf, "\\\""); break;
- case '\n': WBufAppendCStr(r->outbuf, "\\n"); break;
- case '\r': WBufAppendCStr(r->outbuf, "\\r"); break;
- case '\t': WBufAppendCStr(r->outbuf, "\\t"); break;
- }
+ if (sub) {
+ WBufAppendBytes(r->outbuf, sub, JSON_SUB_LEN);
off++;
} else {
break;
@@ -101,14 +87,10 @@ static void writeJsonEscaped(JsonFormatter* r, const MD_CHAR* data, MD_SIZE size
beg = off;
}
-
- #undef NEED_ESCAPE
}
-static void
-render_url_escaped(JsonFormatter* r, const MD_CHAR* data, MD_SIZE size)
-{
+static void render_url_escaped(JsonFormatter* r, const MD_CHAR* data, MD_SIZE size) {
static const MD_CHAR hex_chars[] = "0123456789ABCDEF";
MD_OFFSET beg = 0;
MD_OFFSET off = 0;
@@ -144,56 +126,21 @@ render_url_escaped(JsonFormatter* r, const MD_CHAR* data, MD_SIZE size)
}
}
-static unsigned
-hex_val(char ch)
-{
- if('0' <= ch && ch <= '9')
+static unsigned hex_val(char ch) {
+ if ('0' <= ch && ch <= '9') {
return ch - '0';
- if('A' <= ch && ch <= 'Z')
+ }
+ if ('A' <= ch && ch <= 'Z') {
return ch - 'A' + 10;
- else
- return ch - 'a' + 10;
+ }
+ return ch - 'a' + 10;
}
-static void WBufAppendUTF8Codepoint(WBuf* b, u32 codepoint) {
- if (codepoint <= 0x7f) {
- WBufAppendc(b, (char)codepoint);
- return;
- }
-
- unsigned char utf8[4];
- size_t n;
- if (codepoint <= 0x7ff) {
- n = 2;
- utf8[0] = 0xc0 | ((codepoint >> 6) & 0x1f);
- utf8[1] = 0x80 + ((codepoint >> 0) & 0x3f);
- } else if (codepoint <= 0xffff) {
- n = 3;
- utf8[0] = 0xe0 | ((codepoint >> 12) & 0xf);
- utf8[1] = 0x80 + ((codepoint >> 6) & 0x3f);
- utf8[2] = 0x80 + ((codepoint >> 0) & 0x3f);
- } else {
- n = 4;
- utf8[0] = 0xf0 | ((codepoint >> 18) & 0x7);
- utf8[1] = 0x80 + ((codepoint >> 12) & 0x3f);
- utf8[2] = 0x80 + ((codepoint >> 6) & 0x3f);
- utf8[3] = 0x80 + ((codepoint >> 0) & 0x3f);
- }
-
- if (0 < codepoint && codepoint <= 0x10ffff) {
- WBufAppendBytes(b, (const char*)utf8, n);
- } else {
- static const MD_CHAR utf8_replacement_char[] = { 0xef, 0xbf, 0xbd };
- WBufAppendBytes(b, utf8_replacement_char, sizeof(utf8_replacement_char));
- }
-}
-
-/* Translate entity to its UTF-8 equivalent, or output the verbatim one
- * if such entity is unknown (or if the translation is disabled). */
+// Translate entity to its UTF-8 equivalent, or output the verbatim one
+// if such entity is unknown (or if the translation is disabled).
static void writeDecodeXmlEntity(JsonFormatter* r, const MD_CHAR* text, MD_SIZE size) {
if (size > 3 && text[1] == '#') {
unsigned codepoint = 0;
-
if(text[2] == 'x' || text[2] == 'X') {
// Hexadecimal entity (e.g. "")).
for (MD_SIZE i = 3; i < size-1; i++) {
@@ -205,17 +152,41 @@ static void writeDecodeXmlEntity(JsonFormatter* r, const MD_CHAR* text, MD_SIZE
codepoint = 10 * codepoint + (text[i] - '0');
}
}
-
- WBufAppendUTF8Codepoint(r->outbuf, codepoint);
+ if (codepoint <= 0xFF) {
+ const char* sub = JSON_ESCAPE_MAP(codepoint);
+ if (sub) {
+ // predefined escape code, e.g. "\n"
+ WBufAppendBytes(r->outbuf, sub, JSON_SUB_LEN);
+ } else {
+ // verbatim
+ WBufAppendUTF8Codepoint(r->outbuf, codepoint);
+ }
+ } else {
+ // e.g. \uD87E
+ WBufAppendCStr(r->outbuf, "\\u");
+ if (codepoint <= 0xF) {
+ WBufAppendCStr(r->outbuf, "000");
+ } else if (codepoint <= 0xFF) {
+ WBufAppendCStr(r->outbuf, "00");
+ } else if (codepoint <= 0xFFF) {
+ WBufAppendCStr(r->outbuf, "0");
+ }
+ WBufAppendU32(r->outbuf, codepoint, 16);
+ }
} else {
- WBufAppendBytes(r->outbuf, text, size);
+ // named entity
+ // We could do a lookup here but it would increase the WASM module binary size by
+ // at least 20kB, so for now, let's keep it simple and just include it verbatim until we
+ // can do something fancy like a compressed b-tree.
+ writeJsonEscaped(r, text, size);
}
}
-static void
-render_attribute(JsonFormatter* r, const MD_ATTRIBUTE* attr,
- void (*fn_append)(JsonFormatter*, const MD_CHAR*, MD_SIZE))
-{
+static void render_attribute(
+ JsonFormatter* r,
+ const MD_ATTRIBUTE* attr,
+ void (*fn_append)(JsonFormatter*, const MD_CHAR*, MD_SIZE)
+) {
int i;
for(i = 0; attr->substr_offsets[i] < attr->size; i++) {
@@ -233,68 +204,7 @@ render_attribute(JsonFormatter* r, const MD_ATTRIBUTE* attr,
}
-static void
-render_open_ol_block(JsonFormatter* r, const MD_BLOCK_OL_DETAIL* det)
-{
- char buf[64];
-
- if(det->start == 1) {
- WBufAppendCStr(r->outbuf, "
lang.text != NULL) { - WBufAppendCStr(r->outbuf, " class=\"language-"); - render_attribute(r, &det->lang, writeJsonEscaped); - WBufAppendc(r->outbuf, '"'); - } - - WBufAppendc(r->outbuf, '>'); -} - -static void -render_open_td_block(JsonFormatter* r, const MD_CHAR* cell_type, const MD_BLOCK_TD_DETAIL* det) -{ - WBufAppendc(r->outbuf, '<'); - WBufAppendCStr(r->outbuf, cell_type); - - switch (det->align) { - case MD_ALIGN_LEFT: WBufAppendCStr(r->outbuf, " align=\"left\">"); break; - case MD_ALIGN_CENTER: WBufAppendCStr(r->outbuf, " align=\"center\">"); break; - case MD_ALIGN_RIGHT: WBufAppendCStr(r->outbuf, " align=\"right\">"); break; - default: WBufAppendCStr(r->outbuf, ">"); break; - } -} - -static void -render_open_a_span(JsonFormatter* r, const MD_SPAN_A_DETAIL* det) -{ +static void render_open_a_span(JsonFormatter* r, const MD_SPAN_A_DETAIL* det) { WBufAppendCStr(r->outbuf, "href, render_url_escaped); @@ -306,18 +216,14 @@ render_open_a_span(JsonFormatter* r, const MD_SPAN_A_DETAIL* det) WBufAppendCStr(r->outbuf, "\">"); } -static void -render_open_img_span(JsonFormatter* r, const MD_SPAN_IMG_DETAIL* det) -{ +static void render_open_img_span(JsonFormatter* r, const MD_SPAN_IMG_DETAIL* det) { WBufAppendCStr(r->outbuf, ""); break; @@ -508,27 +442,65 @@ static int leave_span_callback(MD_SPANTYPE type, void* detail, void* userdata) { static int text_callback(MD_TEXTTYPE type, const MD_CHAR* text, MD_SIZE size, void* userdata) { JsonFormatter* r = (JsonFormatter*)userdata; - WBufAppendCStr(r->outbuf, ", \""); - - switch (type) { - case MD_TEXT_NULLCHAR: WBufAppendCStr(r->outbuf, "\\0"); break; - case MD_TEXT_BR: WBufAppendCStr(r->outbuf, "src, render_url_escaped); WBufAppendCStr(r->outbuf, "\" alt=\""); } -static void -render_close_img_span(JsonFormatter* r, const MD_SPAN_IMG_DETAIL* det) -{ +static void render_close_img_span(JsonFormatter* r, const MD_SPAN_IMG_DETAIL* det) { if(det->title.text != NULL) { WBufAppendCStr(r->outbuf, "\" title=\""); render_attribute(r, &det->title, writeJsonEscaped); @@ -326,9 +232,7 @@ render_close_img_span(JsonFormatter* r, const MD_SPAN_IMG_DETAIL* det) WBufAppendCStr(r->outbuf, "\">"); } -static void -render_open_wikilink_span(JsonFormatter* r, const MD_SPAN_WIKILINK_DETAIL* det) -{ +static void render_open_wikilink_span(JsonFormatter* r, const MD_SPAN_WIKILINK_DETAIL* det) { WBufAppendCStr(r->outbuf, "
target, writeJsonEscaped); WBufAppendCStr(r->outbuf, "\">"); @@ -338,12 +242,20 @@ render_open_wikilink_span(JsonFormatter* r, const MD_SPAN_WIKILINK_DETAIL* det) // ------------------------------------------------------------------------------------------------ -static void writeTypeStart(JsonFormatter* r, const char* typename, size_t typenamelen) { - WBufAppendCStr(r->outbuf, "{\"_\":\""); - WBufAppendBytes(r->outbuf, typename, typenamelen); - WBufAppendc(r->outbuf, '"'); -} +static void writeNewline(JsonFormatter* r) { + WBufAppendc(r->outbuf, '\n'); + static const char indent_chunk_str[] = " "; + static const u32 indent_chunk_size = (u32)(SIZEOF_ARRAY(indent_chunk_str) - 1); + u32 indent = r->bnest * 4; + while (indent > indent_chunk_size) { + WBufAppendBytes(r->outbuf, indent_chunk_str, indent_chunk_size); + indent -= indent_chunk_size; + } + if (indent > 0) { + WBufAppendBytes(r->outbuf, indent_chunk_str, indent); + } +} static void writeAttribute(JsonFormatter* r, const MD_ATTRIBUTE* attr) { for (u32 i = 0; attr->substr_offsets[i] < attr->size; i++) { @@ -359,13 +271,29 @@ static void writeAttribute(JsonFormatter* r, const MD_ATTRIBUTE* attr) { } } +static void writeKey(JsonFormatter* r, const char* rawkey, size_t rawkeyLen) { + WBufAppendc(r->outbuf, ','); + writeNewline(r); + WBufAppendc(r->outbuf, '"'); + WBufAppendBytes(r->outbuf, rawkey, rawkeyLen); + WBufAppendCStr(r->outbuf, "\":"); +} + +static void writeTypeStart(JsonFormatter* r, const char* typename, size_t typenamelen) { + WBufAppendCStr(r->outbuf, "{ \"_\": \""); + WBufAppendBytes(r->outbuf, typename, typenamelen); + WBufAppendc(r->outbuf, '"'); +} + static int enter_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata) { - static const MD_CHAR* head[6] = { "h1", "h2", "h3", "h4", "h5", "h6" }; JsonFormatter* r = (JsonFormatter*) userdata; - const char* typename = ""; - size_t typenamelen = 0; - #define WRITE_TYPE_START(name) writeTypeStart(r, (name), strlen((name))) + + writeNewline(r); + r->bnest++; + + #define WRITE_TYPE_START(name) \ + writeTypeStart(r, (name), strlen((name))) switch (type) { case MD_BLOCK_DOC: WRITE_TYPE_START("doc"); break; @@ -380,7 +308,9 @@ static int enter_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata) case MD_BLOCK_HR: WRITE_TYPE_START("hr"); break; case MD_BLOCK_H: { - WRITE_TYPE_START(head[((MD_BLOCK_H_DETAIL*)detail)->level - 1]); + WRITE_TYPE_START("h"); + WBufAppendCStr(r->outbuf, ", \"level\": "); + WBufAppendU32(r->outbuf, ((MD_BLOCK_H_DETAIL*)detail)->level, 10); break; } @@ -443,28 +373,30 @@ static int enter_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata) break; } -// static void -// render_open_td_block(MD_RENDER_HTML* r, const MD_CHAR* cell_type, const MD_BLOCK_TD_DETAIL* det) -// { -// RENDER_LITERAL(r, "<"); -// RENDER_LITERAL(r, cell_type); - -// switch(det->align) { -// case MD_ALIGN_LEFT: RENDER_LITERAL(r, " align=\"left\">"); break; -// case MD_ALIGN_CENTER: RENDER_LITERAL(r, " align=\"center\">"); break; -// case MD_ALIGN_RIGHT: RENDER_LITERAL(r, " align=\"right\">"); break; -// default: RENDER_LITERAL(r, ">"); break; -// } } - WBufAppendCStr(r->outbuf, ", \"children\":[\n "); + WBufAppendCStr(r->outbuf, ", \"children\": ["); + return 0; } static int leave_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata) { JsonFormatter* r = (JsonFormatter*)userdata; - WBufAppendCStr(r->outbuf, "]},\n"); + r->bnest--; + if (*(r->outbuf->ptr-1) == ',') { + // undo trailing comma + // e.g. + // + // "1,2,3," + // ^ + // "1,2,3" + // ^ + // + r->outbuf->ptr--; + } + writeNewline(r); + WBufAppendCStr(r->outbuf, "]},"); return 0; } @@ -475,6 +407,7 @@ static int enter_span_callback(MD_SPANTYPE type, void* detail, void* userdata) { switch(type) { case MD_SPAN_EM: WBufAppendCStr(r->outbuf, ""); break; case MD_SPAN_STRONG: WBufAppendCStr(r->outbuf, ""); break; + case MD_SPAN_U: WBufAppendCStr(r->outbuf, ""); break; case MD_SPAN_A: render_open_a_span(r, (MD_SPAN_A_DETAIL*) detail); break; case MD_SPAN_IMG: render_open_img_span(r, (MD_SPAN_IMG_DETAIL*) detail); break; case MD_SPAN_CODE: WBufAppendCStr(r->outbuf, " "); break; case MD_SPAN_IMG: /*noop, handled above*/ break; case MD_SPAN_CODE: WBufAppendCStr(r->outbuf, ""); break; @@ -493,6 +426,7 @@ static int leave_span_callback(MD_SPANTYPE type, void* detail, void* userdata) { switch(type) { case MD_SPAN_EM: WBufAppendCStr(r->outbuf, ""); break; case MD_SPAN_STRONG: WBufAppendCStr(r->outbuf, ""); break; + case MD_SPAN_U: WBufAppendCStr(r->outbuf, ""); break; case MD_SPAN_A: WBufAppendCStr(r->outbuf, "
"); break; - case MD_TEXT_SOFTBR: WBufAppendc(r->outbuf, '\n'); break; - case MD_TEXT_HTML: render_text(r, text, size); break; - case MD_TEXT_ENTITY: writeDecodeXmlEntity(r, text, size); break; - default: writeJsonEscaped(r, text, size); break; + if (type == MD_TEXT_SOFTBR) { + // ignore soft break, i.e. + // + // Markdown: + // line1 + // line2 + // + // md4c emits: (line1, MD_TEXT_SOFTBR, line2) + // + return 0; } - WBufAppendc(r->outbuf, '"'); + writeNewline(r); + + if (type == MD_TEXT_HTML) { + WBufAppendCStr(r->outbuf, "{\"_\":\"html\",\"content\":\""); + writeJsonEscaped(r, text, size); + WBufAppendCStr(r->outbuf, "\"}"); + } else { + WBufAppendc(r->outbuf, '"'); + switch (type) { + case MD_TEXT_NULLCHAR: + WBufAppendCStr(r->outbuf, "\\0"); + break; + + case MD_TEXT_BR: + WBufAppendCStr(r->outbuf, "\\n"); + break; + + case MD_TEXT_ENTITY: + writeDecodeXmlEntity(r, text, size); + break; + + default: + writeJsonEscaped(r, text, size); + break; + } + WBufAppendc(r->outbuf, '"'); + } + + WBufAppendc(r->outbuf, ','); return 0; } -int fmt_json(const MD_CHAR* input, MD_SIZE input_size, WBuf* outbuf, u32 parser_flags) { - JsonFormatter render = { outbuf }; +int fmt_json( + const MD_CHAR* input, + MD_SIZE inputlen, + WBuf* outbuf, + u32 parseFlags, + u32 _fmtFlags +) { + JsonFormatter render = { + .outbuf = outbuf, + .bnest = 0, + }; MD_PARSER parser = { 0, - parser_flags, + parseFlags, enter_block_callback, leave_block_callback, enter_span_callback, @@ -538,5 +510,5 @@ int fmt_json(const MD_CHAR* input, MD_SIZE input_size, WBuf* outbuf, u32 parser_ NULL }; - return md_parse(input, input_size, &parser, (void*) &render); + return md_parse(input, inputlen, &parser, (void*)&render); } diff --git a/src/fmt_json.h b/src/fmt_json.h index f111b85..8ef5398 100644 --- a/src/fmt_json.h +++ b/src/fmt_json.h @@ -1,4 +1,6 @@ #pragma once #include "wbuf.h" -int fmt_json(const char* input, u32 inputlen, WBuf* outbuf, u32 parserFlags); +#define MD_JSON_FLAG_NONE 0 + +int fmt_json(const char* input, u32 inputlen, WBuf* outbuf, u32 parseFlags, u32 fmtFlags); diff --git a/src/md.c b/src/md.c index 1ad2ebf..cfd59b2 100644 --- a/src/md.c +++ b/src/md.c @@ -3,18 +3,28 @@ #include "wlib.h" #include "wbuf.h" #include "fmt_html.h" -// #include "fmt_json.h" + +#if MD_WITH_JSON +#include "fmt_json.h" +#endif // these should be in sync with "OutputFlags" in md.js -typedef enum OutputFlags { - OutputFlagHTML = 1 << 0, - OutputFlagXHTML = 1 << 1, -} OutputFlags; +typedef enum Formatter { + FormatterNONE, + FormatterHTML, + FormatterJSON, +} Formatter; + +typedef enum FormatFlags { + FormatFlagHTML = 1 << 0, + FormatFlagXHTML = 1 << 1, + FormatFlagJSON = 1 << 2, +} FormatFlags; typedef enum ErrorCode { ERR_NONE, ERR_MD_PARSE, - ERR_OUTFLAGS, + ERR_FORMAT, } ErrorCode; @@ -31,38 +41,51 @@ static WBuf outbuf; export size_t parseUTF8( - const char* inbufptr, - u32 inbuflen, - u32 parser_flags, - OutputFlags outflags, + const char* inbufptr, + u32 inbuflen, + u32 parseFlags, + Formatter formatter, + u32 fmtflags, const char** outptr ) { dlog("parseUTF8 called with inbufptr=%p inbuflen=%u\n", inbufptr, inbuflen); WBufReset(&outbuf); + WBufReserve(&outbuf, inbuflen * 2); // approximate output size to minimize reallocations - if (outflags & OutputFlagHTML) { - WBufReserve(&outbuf, inbuflen * 2); // approximate output size to minimize reallocations + int result = 0x7ffff; - u32 render_flags = 0; - if (outflags & OutputFlagXHTML) { - render_flags |= MD_HTML_FLAG_XHTML; - } + switch (formatter) { - if (fmt_html(inbufptr, inbuflen, &outbuf, parser_flags, render_flags) != 0) { - // fmt_html returns status of md_parse which only fails in extreme cases - // like when out of memory. md4c does not provide error codes or error messages. - WErrSet(ERR_MD_PARSE, "md parser error"); - *outptr = 0; - return 0; - } + case FormatterHTML: + result = fmt_html(inbufptr, inbuflen, &outbuf, parseFlags, fmtflags); + break; + case FormatterJSON: + #if MD_WITH_JSON + result = fmt_json(inbufptr, inbuflen, &outbuf, parseFlags, fmtflags); + #endif + break; + + case FormatterNONE: + break; + + } // switch + + if (result == 0x7ffff) { + WErrSet(ERR_FORMAT, "invalid formatter"); + } else if (result != 0) { + // fmt_html returns status of md_parse which only fails in extreme cases + // like when out of memory. md4c does not provide error codes or error messages. + WErrSet(ERR_MD_PARSE, "parser error"); + } + + if (result == 0) { *outptr = outbuf.start; // dlog("outbuf =>\n%.*s\n", WBufLen(&outbuf), outbuf.start); return WBufLen(&outbuf); } - WErrSet(ERR_OUTFLAGS, "no output format set in output flags"); *outptr = 0; return 0; } diff --git a/src/md.js b/src/md.js index 20d93ce..f9e134d 100644 --- a/src/md.js +++ b/src/md.js @@ -41,6 +41,15 @@ const OutputFlags = { XHTML: 1 << 1, // Output XHTML (only has effect with HTML flag set) } +// these should be in sync with "Formatter" in md.c +const FormatterNONE = 0 + , FormatterHTML = 1 + , FormatterJSON = 2 + +// html formatter flags (sync with fmt_html.h) +const MD_HTML_FLAG_XHTML = 1 << 0 + + export function parse(source, options) { options = options || {} @@ -49,17 +58,17 @@ export function parse(source, options) { options.parseFlags ) - let outputFlags = 0 - switch (options.format) { - case "xhtml": - outputFlags |= OutputFlags.HTML | OutputFlags.XHTML - break + let formatter = FormatterHTML + let fmtFlags = 0 + if (options.format) switch (options.format) { case "html": - case undefined: - case null: - case "": - outputFlags |= OutputFlags.HTML + break + case "xhtml": + fmtFlags |= MD_HTML_FLAG_XHTML + break + case "json": + formatter = FormatterJSON break default: @@ -68,7 +77,7 @@ export function parse(source, options) { let buf = typeof source == "string" ? utf8.encode(source) : source let outbuf = withOutPtr(outptr => withTmpBytePtr(buf, (inptr, inlen) => - _parseUTF8(inptr, inlen, parseFlags, outputFlags, outptr) + _parseUTF8(inptr, inlen, parseFlags, formatter, fmtFlags, outptr) )) // check for error and throw if needed diff --git a/src/md4c.c b/src/md4c.c index 729cb72..40ba030 100644 --- a/src/md4c.c +++ b/src/md4c.c @@ -3678,7 +3678,7 @@ md_analyze_emph(MD_CTX* ctx, int mark_index) /* If we can be a closer, try to resolve with the preceding opener. */ if(mark->flags & MD_MARK_POTENTIAL_CLOSER) { MD_MARK* opener = NULL; - int opener_index; + int opener_index = 0; if(mark->ch == _T('*')) { MD_MARKCHAIN* opener_chains[6]; @@ -5654,7 +5654,7 @@ md_line_indentation(MD_CTX* ctx, unsigned total_indent, OFF beg, OFF* p_end) return indent - total_indent; } -static const MD_LINE_ANALYSIS md_dummy_blank_line = { MD_LINE_BLANK, 0 }; +static const MD_LINE_ANALYSIS md_dummy_blank_line = { MD_LINE_BLANK, 0, 0, 0, 0 }; /* Analyze type of the line and find some its properties. This serves as a * main input for determining type and boundaries of a block. */ diff --git a/src/wbuf.c b/src/wbuf.c index 121c594..9af894c 100644 --- a/src/wbuf.c +++ b/src/wbuf.c @@ -15,12 +15,8 @@ void WBufReset(WBuf* b) { b->ptr = b->start; } -inline size_t WBufCap(WBuf* b) { return b->end - b->start; } // total capacity (size) -inline size_t WBufLen(WBuf* b) { return b->ptr - b->start; } // valid bytes at start -inline size_t WBufAvail(WBuf* b) { return b->end - b->ptr; } // bytes available - // grows buffer so that there is at least minspace available space -static void WBufGrow(WBuf* b, size_t minspace) { +void WBufGrow(WBuf* b, size_t minspace) { // size_t avail = b->end - b->ptr; size_t len = WBufLen(b); // store len before changing b size_t cap = WBufCap(b); @@ -42,13 +38,6 @@ void WBufReserve(WBuf* b, size_t minspace) { } } -void WBufAppendc(WBuf* b, char c) { - if (WBufAvail(b) < 1) { - WBufGrow(b, 1); - } - *(b->ptr++) = c; -} - void WBufAppendBytes(WBuf* b, const void* bytes, size_t len) { if (WBufAvail(b) < len) { WBufGrow(b, len); @@ -162,6 +151,35 @@ void WBufAppendU32(WBuf* b, u32 n, u32 radix) { } +void _WBufAppendUTF8Codepoint2(WBuf* b, u32 codepoint) { + unsigned char utf8[4]; + size_t n; + if (codepoint <= 0x7ff) { + n = 2; + utf8[0] = 0xc0 | ((codepoint >> 6) & 0x1f); + utf8[1] = 0x80 + ((codepoint >> 0) & 0x3f); + } else if (codepoint <= 0xffff) { + n = 3; + utf8[0] = 0xe0 | ((codepoint >> 12) & 0xf); + utf8[1] = 0x80 + ((codepoint >> 6) & 0x3f); + utf8[2] = 0x80 + ((codepoint >> 0) & 0x3f); + } else { + n = 4; + utf8[0] = 0xf0 | ((codepoint >> 18) & 0x7); + utf8[1] = 0x80 + ((codepoint >> 12) & 0x3f); + utf8[2] = 0x80 + ((codepoint >> 6) & 0x3f); + utf8[3] = 0x80 + ((codepoint >> 0) & 0x3f); + } + + if (0 < codepoint && codepoint <= 0x10ffff) { + WBufAppendBytes(b, (const char*)utf8, n); + } else { + static const char utf8_replacement_char[] = { 0xef, 0xbf, 0xbd }; + WBufAppendBytes(b, utf8_replacement_char, sizeof(utf8_replacement_char)); + } +} + + // static void WBufAppendSlug(WBuf* b, const char* text) { // size_t len = strlen(text); // WBufReserve(b, len); diff --git a/src/wbuf.h b/src/wbuf.h index 4f0df26..5dcbb75 100644 --- a/src/wbuf.h +++ b/src/wbuf.h @@ -11,13 +11,13 @@ void WBufInit(WBuf*); void WBufFree(WBuf*); void WBufReset(WBuf*); -size_t WBufCap(WBuf*); // total capacity (size) -size_t WBufLen(WBuf*); // valid bytes at start -size_t WBufAvail(WBuf*); // bytes available +inline static size_t WBufCap(WBuf* b) { return b->end - b->start; } // total capacity (size) +inline static size_t WBufLen(WBuf* b) { return b->ptr - b->start; } // valid bytes at start +inline static size_t WBufAvail(WBuf* b) { return b->end - b->ptr; } // bytes available void WBufReserve(WBuf*, size_t minspace); -void WBufAppendc(WBuf*, char c); +static void WBufAppendc(WBuf*, char c); void WBufAppendBytes(WBuf*, const void* bytes, size_t len); void WBufAppendStr(WBuf*, const char* pch); #define WBufAppendCStr(b, cstr) WBufAppendBytes((b), (cstr), strlen(cstr)) @@ -27,3 +27,26 @@ void _WBufAppendHtml(WBuf*, const char* pch, bool isattr); // append u32 integer n. radix must be in range [2-36] void WBufAppendU32(WBuf*, u32 n, u32 radix); + +static void WBufAppendUTF8Codepoint(WBuf* b, u32 codepoint); + +// grows buffer so that there is at least minspace available space +void WBufGrow(WBuf* b, size_t minspace); + + + +// implementation of WBufAppendUTF8Codepoint +void _WBufAppendUTF8Codepoint2(WBuf* b, u32 codepoint); +inline static void WBufAppendUTF8Codepoint(WBuf* b, u32 codepoint) { + if (codepoint > 0x7f) { + return _WBufAppendUTF8Codepoint2(b, codepoint); + } + WBufAppendc(b, (char)codepoint); +} + +inline static void WBufAppendc(WBuf* b, char c) { + if (WBufAvail(b) < 1) { + WBufGrow(b, 1); + } + *(b->ptr++) = c; +} diff --git a/wasmc.js b/wasmc.js index c92392b..1447d4f 100644 --- a/wasmc.js +++ b/wasmc.js @@ -1,16 +1,29 @@ const package = require("./package.json") const outdir = debug ? builddir : "dist" +cflags = cflags.concat([ + "-std=c11", + "-Wall", + "-Wuninitialized", + "-Wmissing-field-initializers", + "-Wconditional-uninitialized", + "-Wno-nullability-completeness", + "-Wno-unused-function", + "-fcolor-diagnostics", +]) + const m = { jsentry: "src/md.js", + sources: [ "src/wlib.c", "src/wbuf.c", "src/md.c", "src/md4c.c", "src/fmt_html.c", - // "src/fmt_json.c", - ], + debug ? "src/fmt_json.c" : "", + ].filter(s => !!s), + cflags: [ "-DMD4C_USE_UTF8", ].concat(debug ? [ @@ -20,6 +33,7 @@ const m = { "-DSAFE_HEAP=1", // emcc "-DSTACK_OVERFLOW_CHECK=1", // emcc "-DDEMANGLE_SUPPORT=1", // emcc + "-DMD_WITH_JSON=1", // enable WIP json formatter ] : [ // release flags ]),