json wip

2026-07-03 18:20:42 +00:00 · 2020-10-18 09:23:42 -07:00 · 2020-10-18 09:23:42 -07:00 · bef16e0aa0
commit bef16e0aa0
parent cb877fce88
12 changed files with 376 additions and 265 deletions
--- a/example/example-json.js
+++ b/example/example-json.js
@ -0,0 +1,6 @@
+const fs = require("fs")
+const md = require("../build/debug/markdown.node.js")
+
+const source = fs.readFileSync(__dirname + "/example2.md")
+const json = md.parse(source, { format: "json" })
+console.log(json)
--- a/example/example2.md
+++ b/example/example2.md
@ -0,0 +1,44 @@
+# H1
+
+This is a paragraph
+
+## H2
+
+This is a paragraph
+
+## Another
+
+This is a paragraph with style *italic* _italic_ **bold** __bold__
+
+![image](https://rsms.me/raster/examples/image1.jpg)
+![](https://rsms.me/image.png?without-alt)
+
+*Hello [link](https://rsms.me/) lol*
+
+Hello [*link*](https://rsms.me/) lol "cat"
+
+Hello from *[link](https://rsms.me/)* to __everyone__ `reading this`
+
+Here's an [**important** anchor link](#example).
+
+line 1
+line 2
+
+XML & html "entities"
+&amp;
+&AMP;
+&#x00026;
+&#x0026;
+&#x026;
+&#38;
+&#x0A;
+
+## Lists
+
+- Unordered
+* Lists
+ Of mixed type
+
+1. Ordered
+2. Lists
+4. Numbers are ignored
--- a/src/fmt_html.c
+++ b/src/fmt_html.c
@ -1,5 +1,5 @@
 /*
- * md4c modified for mdjs.
+ * md4c modified for markdown-wasm.
 * Original source code is licensed as follows:
 *
 * Copyright (c) 2016-2019 Martin Mitas
@ -439,14 +439,14 @@ int fmt_html(
  const MD_CHAR* input,
  MD_SIZE input_size,
  WBuf* outbuf,
-  u32 parser_flags,
-  u32 render_flags
+  u32 parseFlags,
+  u32 fmtFlags
 ) {
-  HtmlRenderer render = { outbuf, 0, 0, render_flags };
+  HtmlRenderer render = { outbuf, 0, 0, fmtFlags };

  MD_PARSER parser = {
    0,
-    parser_flags,
+    parseFlags,
    enter_block_callback,
    leave_block_callback,
    enter_span_callback,
--- a/src/fmt_html.h
+++ b/src/fmt_html.h
@ -1,6 +1,6 @@
 #pragma once
 #include "wbuf.h"

-#define MD_HTML_FLAG_XHTML 0x0008 // instead of e.g. <br>, generate <br/>
+#define MD_HTML_FLAG_XHTML (1 << 0) // instead of e.g. <br>, generate <br/>

-int fmt_html(const char* input, u32 inputlen, WBuf* outbuf, u32 parserFlags, u32 renderFlags);
+int fmt_html(const char* input, u32 inputlen, WBuf* outbuf, u32 parseFlags, u32 fmtFlags);
--- a/src/fmt_json.c
+++ b/src/fmt_json.c
@ -4,20 +4,17 @@

 #include "fmt_json.h"
 #include "md4c.h"
-// #include "md4c_render_html.h"
-// #include "entity.h"

+// JSON formatter
 //
-//
-// --------------   WORK IN PROGRESS
-//
+// -- WORK IN PROGRESS --
 //

 #ifdef _WIN32
  #define snprintf _snprintf
 #endif

-
+// dlog
 #ifndef DEBUG
 #define DEBUG 1
 #endif
@ -31,6 +28,7 @@

 typedef struct JsonFormatter_st {
  WBuf* outbuf;
+  u32   bnest; // block nesting level
 } JsonFormatter;


@ -39,61 +37,49 @@ typedef struct JsonFormatter_st {
 #define ISUPPER(ch)  ('A' <= (ch) && (ch) <= 'Z')
 #define ISALNUM(ch)  (ISLOWER(ch) || ISUPPER(ch) || ISDIGIT(ch))

+#define SIZEOF_ARRAY(a)     (sizeof(a) / sizeof(a[0]))

-// static inline void render_text(JsonFormatter* r, const MD_CHAR* text, MD_SIZE size) {
-//   // r->process_output(text, size, r->userdata);
-//   WBufAppendBytes(r->outbuf, text, size);
-// }

 #define render_text(f, textptr, textlen) \
  WBufAppendBytes((r)->outbuf, (textptr), (textlen))

-// #define RENDER_LITERAL(r, literal) \
-//   WBufAppendBytes((r)->outbuf, (literal), (MD_SIZE)strlen(literal))

-
-static char jsonEscapeMap[256];
+#define JSON_SUB_LEN 2
+static const char* jsonEscapeMap[256];

 static void __attribute__((constructor)) init() {
-  jsonEscapeMap[(unsigned char)'"'] = 1;
-  jsonEscapeMap[(unsigned char)'\n'] = 1;
-  jsonEscapeMap[(unsigned char)'\r'] = 1;
-  jsonEscapeMap[(unsigned char)'\t'] = 1;
+  // important: Values must all be exactly JSON_SUB_LEN bytes long
+  jsonEscapeMap[(unsigned char)'"']  = "\\\"";
+  jsonEscapeMap[(unsigned char)'\n'] = "\\n";
+  jsonEscapeMap[(unsigned char)'\r'] = "\\r";
+  jsonEscapeMap[(unsigned char)'\t'] = "\\t";
 }

+// #define JSON_BYTE_NEED_ESCAPE(ch)  (jsonEscapeMap[(unsigned char)(ch)] != 0)
+#define JSON_ESCAPE_MAP(ch)  jsonEscapeMap[(unsigned char)(ch)]
+

 static void writeJsonEscaped(JsonFormatter* r, const MD_CHAR* data, MD_SIZE size) {
  MD_OFFSET beg = 0;
  MD_OFFSET off = 0;

-  #define NEED_ESCAPE(ch)  (jsonEscapeMap[(unsigned char)(ch)] != 0)
-
-  while(1) {
-    /* Optimization: Use some loop unrolling. */
-    while (
-      off + 3 < size &&
-      !NEED_ESCAPE(data[off+0]) &&
-      !NEED_ESCAPE(data[off+1]) &&
-      !NEED_ESCAPE(data[off+2]) &&
-      !NEED_ESCAPE(data[off+3])
-    ) {
-      off += 4;
-    }
-    while (off < size && !NEED_ESCAPE(data[off])) {
+  while (1) {
+    const char* sub = NULL;
+    while (off < size) {
+      sub = JSON_ESCAPE_MAP(data[off]);
+      if (sub != NULL) {
+        break;
+      }
      off++;
    }

    if (off > beg) {
+      // in-between
      WBufAppendBytes(r->outbuf, data + beg, off - beg);
    }

-    if (off < size) {
-      switch (data[off]) {
-        case '"':   WBufAppendCStr(r->outbuf, "\\\""); break;
-        case '\n':  WBufAppendCStr(r->outbuf, "\\n");  break;
-        case '\r':  WBufAppendCStr(r->outbuf, "\\r");  break;
-        case '\t':  WBufAppendCStr(r->outbuf, "\\t");  break;
-      }
+    if (sub) {
+      WBufAppendBytes(r->outbuf, sub, JSON_SUB_LEN);
      off++;
    } else {
      break;
@ -101,14 +87,10 @@ static void writeJsonEscaped(JsonFormatter* r, const MD_CHAR* data, MD_SIZE size

    beg = off;
  }
-
-  #undef NEED_ESCAPE
 }


-static void
-render_url_escaped(JsonFormatter* r, const MD_CHAR* data, MD_SIZE size)
-{
+static void render_url_escaped(JsonFormatter* r, const MD_CHAR* data, MD_SIZE size) {
  static const MD_CHAR hex_chars[] = "0123456789ABCDEF";
  MD_OFFSET beg = 0;
  MD_OFFSET off = 0;
@ -144,56 +126,21 @@ render_url_escaped(JsonFormatter* r, const MD_CHAR* data, MD_SIZE size)
  }
 }

-static unsigned
-hex_val(char ch)
-{
-  if('0' <= ch && ch <= '9')
+static unsigned hex_val(char ch) {
+  if ('0' <= ch && ch <= '9') {
    return ch - '0';
-  if('A' <= ch && ch <= 'Z')
+  }
+  if ('A' <= ch && ch <= 'Z') {
    return ch - 'A' + 10;
-  else
-    return ch - 'a' + 10;
+  }
+  return ch - 'a' + 10;
 }

-static void WBufAppendUTF8Codepoint(WBuf* b, u32 codepoint) {
-  if (codepoint <= 0x7f) {
-    WBufAppendc(b, (char)codepoint);
-    return;
-  }
-
-  unsigned char utf8[4];
-  size_t n;
-  if (codepoint <= 0x7ff) {
-    n = 2;
-    utf8[0] = 0xc0 | ((codepoint >>  6) & 0x1f);
-    utf8[1] = 0x80 + ((codepoint >>  0) & 0x3f);
-  } else if (codepoint <= 0xffff) {
-    n = 3;
-    utf8[0] = 0xe0 | ((codepoint >> 12) & 0xf);
-    utf8[1] = 0x80 + ((codepoint >>  6) & 0x3f);
-    utf8[2] = 0x80 + ((codepoint >>  0) & 0x3f);
-  } else {
-    n = 4;
-    utf8[0] = 0xf0 | ((codepoint >> 18) & 0x7);
-    utf8[1] = 0x80 + ((codepoint >> 12) & 0x3f);
-    utf8[2] = 0x80 + ((codepoint >>  6) & 0x3f);
-    utf8[3] = 0x80 + ((codepoint >>  0) & 0x3f);
-  }
-
-  if (0 < codepoint && codepoint <= 0x10ffff) {
-    WBufAppendBytes(b, (const char*)utf8, n);
-  } else {
-    static const MD_CHAR utf8_replacement_char[] = { 0xef, 0xbf, 0xbd };
-    WBufAppendBytes(b, utf8_replacement_char, sizeof(utf8_replacement_char));
-  }
-}
-
-/* Translate entity to its UTF-8 equivalent, or output the verbatim one
- * if such entity is unknown (or if the translation is disabled). */
+// Translate entity to its UTF-8 equivalent, or output the verbatim one
+// if such entity is unknown (or if the translation is disabled).
 static void writeDecodeXmlEntity(JsonFormatter* r, const MD_CHAR* text, MD_SIZE size) {
  if (size > 3 && text[1] == '#') {
    unsigned codepoint = 0;
-
    if(text[2] == 'x' || text[2] == 'X') {
      // Hexadecimal entity (e.g. "&#x1234abcd;")).
      for (MD_SIZE i = 3; i < size-1; i++) {
@ -205,17 +152,41 @@ static void writeDecodeXmlEntity(JsonFormatter* r, const MD_CHAR* text, MD_SIZE
        codepoint = 10 * codepoint + (text[i] - '0');
      }
    }
-
-    WBufAppendUTF8Codepoint(r->outbuf, codepoint);
+    if (codepoint <= 0xFF) {
+      const char* sub = JSON_ESCAPE_MAP(codepoint);
+      if (sub) {
+        // predefined escape code, e.g. "\n"
+        WBufAppendBytes(r->outbuf, sub, JSON_SUB_LEN);
+      } else {
+        // verbatim
+        WBufAppendUTF8Codepoint(r->outbuf, codepoint);
+      }
+    } else {
+      // e.g. \uD87E
+      WBufAppendCStr(r->outbuf, "\\u");
+      if (codepoint <= 0xF) {
+        WBufAppendCStr(r->outbuf, "000");
+      } else if (codepoint <= 0xFF) {
+        WBufAppendCStr(r->outbuf, "00");
+      } else if (codepoint <= 0xFFF) {
+        WBufAppendCStr(r->outbuf, "0");
+      }
+      WBufAppendU32(r->outbuf, codepoint, 16);
+    }
  } else {
-    WBufAppendBytes(r->outbuf, text, size);
+    // named entity
+    // We could do a lookup here but it would increase the WASM module binary size by
+    // at least 20kB, so for now, let's keep it simple and just include it verbatim until we
+    // can do something fancy like a compressed b-tree.
+    writeJsonEscaped(r, text, size);
  }
 }

-static void
-render_attribute(JsonFormatter* r, const MD_ATTRIBUTE* attr,
-         void (*fn_append)(JsonFormatter*, const MD_CHAR*, MD_SIZE))
-{
+static void render_attribute(
+  JsonFormatter* r,
+  const MD_ATTRIBUTE* attr,
+  void (*fn_append)(JsonFormatter*, const MD_CHAR*, MD_SIZE)
+) {
  int i;

  for(i = 0; attr->substr_offsets[i] < attr->size; i++) {
@ -233,68 +204,7 @@ render_attribute(JsonFormatter* r, const MD_ATTRIBUTE* attr,
 }


-static void
-render_open_ol_block(JsonFormatter* r, const MD_BLOCK_OL_DETAIL* det)
-{
-  char buf[64];
-
-  if(det->start == 1) {
-    WBufAppendCStr(r->outbuf, "<ol>\n");
-    return;
-  }
-
-  snprintf(buf, sizeof(buf), "<ol start=\"%u\">\n", det->start);
-  WBufAppendCStr(r->outbuf, buf);
-}
-
-static void
-render_open_li_block(JsonFormatter* r, const MD_BLOCK_LI_DETAIL* det)
-{
-  if(det->is_task) {
-    WBufAppendCStr(r->outbuf,
-      "<li class=\"task-list-item\">"
-      "<input type=\"checkbox\" class=\"task-list-item-checkbox\" disabled");
-    if (det->task_mark == 'x' || det->task_mark == 'X') {
-      WBufAppendCStr(r->outbuf, " checked");
-    }
-    WBufAppendc(r->outbuf, '>');
-  } else {
-    WBufAppendCStr(r->outbuf, "<li>");
-  }
-}
-
-static void
-render_open_code_block(JsonFormatter* r, const MD_BLOCK_CODE_DETAIL* det)
-{
-  WBufAppendCStr(r->outbuf, "<pre><code");
-
-  /* If known, output the HTML 5 attribute class="language-LANGNAME". */
-  if(det->lang.text != NULL) {
-    WBufAppendCStr(r->outbuf, " class=\"language-");
-    render_attribute(r, &det->lang, writeJsonEscaped);
-    WBufAppendc(r->outbuf, '"');
-  }
-
-  WBufAppendc(r->outbuf, '>');
-}
-
-static void
-render_open_td_block(JsonFormatter* r, const MD_CHAR* cell_type, const MD_BLOCK_TD_DETAIL* det)
-{
-  WBufAppendc(r->outbuf, '<');
-  WBufAppendCStr(r->outbuf, cell_type);
-
-  switch (det->align) {
-    case MD_ALIGN_LEFT:     WBufAppendCStr(r->outbuf, " align=\"left\">"); break;
-    case MD_ALIGN_CENTER:   WBufAppendCStr(r->outbuf, " align=\"center\">"); break;
-    case MD_ALIGN_RIGHT:    WBufAppendCStr(r->outbuf, " align=\"right\">"); break;
-    default:                WBufAppendCStr(r->outbuf, ">"); break;
-  }
-}
-
-static void
-render_open_a_span(JsonFormatter* r, const MD_SPAN_A_DETAIL* det)
-{
+static void render_open_a_span(JsonFormatter* r, const MD_SPAN_A_DETAIL* det) {
  WBufAppendCStr(r->outbuf, "<a href=\"");
  render_attribute(r, &det->href, render_url_escaped);

@ -306,18 +216,14 @@ render_open_a_span(JsonFormatter* r, const MD_SPAN_A_DETAIL* det)
  WBufAppendCStr(r->outbuf, "\">");
 }

-static void
-render_open_img_span(JsonFormatter* r, const MD_SPAN_IMG_DETAIL* det)
-{
+static void render_open_img_span(JsonFormatter* r, const MD_SPAN_IMG_DETAIL* det) {
  WBufAppendCStr(r->outbuf, "<img src=\"");
  render_attribute(r, &det->src, render_url_escaped);

  WBufAppendCStr(r->outbuf, "\" alt=\"");
 }

-static void
-render_close_img_span(JsonFormatter* r, const MD_SPAN_IMG_DETAIL* det)
-{
+static void render_close_img_span(JsonFormatter* r, const MD_SPAN_IMG_DETAIL* det) {
  if(det->title.text != NULL) {
    WBufAppendCStr(r->outbuf, "\" title=\"");
    render_attribute(r, &det->title, writeJsonEscaped);
@ -326,9 +232,7 @@ render_close_img_span(JsonFormatter* r, const MD_SPAN_IMG_DETAIL* det)
  WBufAppendCStr(r->outbuf, "\">");
 }

-static void
-render_open_wikilink_span(JsonFormatter* r, const MD_SPAN_WIKILINK_DETAIL* det)
-{
+static void render_open_wikilink_span(JsonFormatter* r, const MD_SPAN_WIKILINK_DETAIL* det) {
  WBufAppendCStr(r->outbuf, "<x-wikilink data-target=\"");
  render_attribute(r, &det->target, writeJsonEscaped);
  WBufAppendCStr(r->outbuf, "\">");
@ -338,12 +242,20 @@ render_open_wikilink_span(JsonFormatter* r, const MD_SPAN_WIKILINK_DETAIL* det)
 // ------------------------------------------------------------------------------------------------


-static void writeTypeStart(JsonFormatter* r, const char* typename, size_t typenamelen) {
-  WBufAppendCStr(r->outbuf, "{\"_\":\"");
-  WBufAppendBytes(r->outbuf, typename, typenamelen);
-  WBufAppendc(r->outbuf, '"');
-}
+static void writeNewline(JsonFormatter* r) {
+  WBufAppendc(r->outbuf, '\n');

+  static const char indent_chunk_str[] = "                ";
+  static const u32  indent_chunk_size = (u32)(SIZEOF_ARRAY(indent_chunk_str) - 1);
+  u32 indent = r->bnest * 4;
+  while (indent > indent_chunk_size) {
+    WBufAppendBytes(r->outbuf, indent_chunk_str, indent_chunk_size);
+    indent -= indent_chunk_size;
+  }
+  if (indent > 0) {
+    WBufAppendBytes(r->outbuf, indent_chunk_str, indent);
+  }
+}

 static void writeAttribute(JsonFormatter* r, const MD_ATTRIBUTE* attr) {
  for (u32 i = 0; attr->substr_offsets[i] < attr->size; i++) {
@ -359,13 +271,29 @@ static void writeAttribute(JsonFormatter* r, const MD_ATTRIBUTE* attr) {
  }
 }

+static void writeKey(JsonFormatter* r, const char* rawkey, size_t rawkeyLen) {
+  WBufAppendc(r->outbuf, ',');
+  writeNewline(r);
+  WBufAppendc(r->outbuf, '"');
+  WBufAppendBytes(r->outbuf, rawkey, rawkeyLen);
+  WBufAppendCStr(r->outbuf, "\":");
+}
+
+static void writeTypeStart(JsonFormatter* r, const char* typename, size_t typenamelen) {
+  WBufAppendCStr(r->outbuf, "{ \"_\": \"");
+  WBufAppendBytes(r->outbuf, typename, typenamelen);
+  WBufAppendc(r->outbuf, '"');
+}
+

 static int enter_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata) {
-  static const MD_CHAR* head[6] = { "h1", "h2", "h3", "h4", "h5", "h6" };
  JsonFormatter* r = (JsonFormatter*) userdata;
-  const char* typename = "";
-  size_t typenamelen = 0;
-  #define WRITE_TYPE_START(name) writeTypeStart(r, (name), strlen((name)))
+
+  writeNewline(r);
+  r->bnest++;
+
+  #define WRITE_TYPE_START(name) \
+    writeTypeStart(r, (name), strlen((name)))

  switch (type) {
    case MD_BLOCK_DOC:   WRITE_TYPE_START("doc"); break;
@ -380,7 +308,9 @@ static int enter_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata)
    case MD_BLOCK_HR:    WRITE_TYPE_START("hr"); break;

    case MD_BLOCK_H: {
-      WRITE_TYPE_START(head[((MD_BLOCK_H_DETAIL*)detail)->level - 1]);
+      WRITE_TYPE_START("h");
+      WBufAppendCStr(r->outbuf, ", \"level\": ");
+      WBufAppendU32(r->outbuf, ((MD_BLOCK_H_DETAIL*)detail)->level, 10);
      break;
    }

@ -443,28 +373,30 @@ static int enter_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata)
      break;
    }

-// static void
-// render_open_td_block(MD_RENDER_HTML* r, const MD_CHAR* cell_type, const MD_BLOCK_TD_DETAIL* det)
-// {
-//     RENDER_LITERAL(r, "<");
-//     RENDER_LITERAL(r, cell_type);
-
-//     switch(det->align) {
-//         case MD_ALIGN_LEFT:     RENDER_LITERAL(r, " align=\"left\">"); break;
-//         case MD_ALIGN_CENTER:   RENDER_LITERAL(r, " align=\"center\">"); break;
-//         case MD_ALIGN_RIGHT:    RENDER_LITERAL(r, " align=\"right\">"); break;
-//         default:                RENDER_LITERAL(r, ">"); break;
-//     }
  }

-  WBufAppendCStr(r->outbuf, ", \"children\":[\n  ");
+  WBufAppendCStr(r->outbuf, ", \"children\": [");
+
  return 0;
 }


 static int leave_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata) {
  JsonFormatter* r = (JsonFormatter*)userdata;
-  WBufAppendCStr(r->outbuf, "]},\n");
+  r->bnest--;
+  if (*(r->outbuf->ptr-1) == ',') {
+    // undo trailing comma
+    // e.g.
+    //
+    //  "1,2,3,"
+    //        ^
+    //  "1,2,3"
+    //       ^
+    //
+    r->outbuf->ptr--;
+  }
+  writeNewline(r);
+  WBufAppendCStr(r->outbuf, "]},");
  return 0;
 }

@ -475,6 +407,7 @@ static int enter_span_callback(MD_SPANTYPE type, void* detail, void* userdata) {
  switch(type) {
    case MD_SPAN_EM:                WBufAppendCStr(r->outbuf, "<em>"); break;
    case MD_SPAN_STRONG:            WBufAppendCStr(r->outbuf, "<b>"); break;
+    case MD_SPAN_U:                 WBufAppendCStr(r->outbuf, "<u>"); break;
    case MD_SPAN_A:                 render_open_a_span(r, (MD_SPAN_A_DETAIL*) detail); break;
    case MD_SPAN_IMG:               render_open_img_span(r, (MD_SPAN_IMG_DETAIL*) detail); break;
    case MD_SPAN_CODE:              WBufAppendCStr(r->outbuf, "<code>"); break;
@ -493,6 +426,7 @@ static int leave_span_callback(MD_SPANTYPE type, void* detail, void* userdata) {
  switch(type) {
    case MD_SPAN_EM:                WBufAppendCStr(r->outbuf, "</em>"); break;
    case MD_SPAN_STRONG:            WBufAppendCStr(r->outbuf, "</b>"); break;
+    case MD_SPAN_U:                 WBufAppendCStr(r->outbuf, "</u>"); break;
    case MD_SPAN_A:                 WBufAppendCStr(r->outbuf, "</a>"); break;
    case MD_SPAN_IMG:               /*noop, handled above*/ break;
    case MD_SPAN_CODE:              WBufAppendCStr(r->outbuf, "</code>"); break;
@ -508,27 +442,65 @@ static int leave_span_callback(MD_SPANTYPE type, void* detail, void* userdata) {
 static int text_callback(MD_TEXTTYPE type, const MD_CHAR* text, MD_SIZE size, void* userdata) {
  JsonFormatter* r = (JsonFormatter*)userdata;

-  WBufAppendCStr(r->outbuf, ", \"");
-
-  switch (type) {
-    case MD_TEXT_NULLCHAR:  WBufAppendCStr(r->outbuf, "\\0"); break;
-    case MD_TEXT_BR:        WBufAppendCStr(r->outbuf, "<br>"); break;
-    case MD_TEXT_SOFTBR:    WBufAppendc(r->outbuf, '\n'); break;
-    case MD_TEXT_HTML:      render_text(r, text, size); break;
-    case MD_TEXT_ENTITY:    writeDecodeXmlEntity(r, text, size); break;
-    default:                writeJsonEscaped(r, text, size); break;
+  if (type == MD_TEXT_SOFTBR) {
+    // ignore soft break, i.e.
+    //
+    // Markdown:
+    //    line1
+    //    line2
+    //
+    // md4c emits: (line1, MD_TEXT_SOFTBR, line2)
+    //
+    return 0;
  }

-  WBufAppendc(r->outbuf, '"');
+  writeNewline(r);
+
+  if (type == MD_TEXT_HTML) {
+    WBufAppendCStr(r->outbuf, "{\"_\":\"html\",\"content\":\"");
+    writeJsonEscaped(r, text, size);
+    WBufAppendCStr(r->outbuf, "\"}");
+  } else {
+    WBufAppendc(r->outbuf, '"');
+    switch (type) {
+    case MD_TEXT_NULLCHAR:
+      WBufAppendCStr(r->outbuf, "\\0");
+      break;
+
+    case MD_TEXT_BR:
+      WBufAppendCStr(r->outbuf, "\\n");
+      break;
+
+    case MD_TEXT_ENTITY:
+      writeDecodeXmlEntity(r, text, size);
+      break;
+
+    default:
+      writeJsonEscaped(r, text, size);
+      break;
+    }
+    WBufAppendc(r->outbuf, '"');
+  }
+
+  WBufAppendc(r->outbuf, ',');

  return 0;
 }

-int fmt_json(const MD_CHAR* input, MD_SIZE input_size, WBuf* outbuf, u32 parser_flags) {
-  JsonFormatter render = { outbuf };
+int fmt_json(
+  const MD_CHAR* input,
+  MD_SIZE inputlen,
+  WBuf* outbuf,
+  u32 parseFlags,
+  u32 _fmtFlags
+) {
+  JsonFormatter render = {
+    .outbuf = outbuf,
+    .bnest = 0,
+  };
  MD_PARSER parser = {
    0,
-    parser_flags,
+    parseFlags,
    enter_block_callback,
    leave_block_callback,
    enter_span_callback,
@ -538,5 +510,5 @@ int fmt_json(const MD_CHAR* input, MD_SIZE input_size, WBuf* outbuf, u32 parser_
    NULL
  };

-  return md_parse(input, input_size, &parser, (void*) &render);
+  return md_parse(input, inputlen, &parser, (void*)&render);
 }
--- a/src/fmt_json.h
+++ b/src/fmt_json.h
@ -1,4 +1,6 @@
 #pragma once
 #include "wbuf.h"

-int fmt_json(const char* input, u32 inputlen, WBuf* outbuf, u32 parserFlags);
+#define MD_JSON_FLAG_NONE 0
+
+int fmt_json(const char* input, u32 inputlen, WBuf* outbuf, u32 parseFlags, u32 fmtFlags);
--- a/src/md.c
+++ b/src/md.c
@ -3,18 +3,28 @@
 #include "wlib.h"
 #include "wbuf.h"
 #include "fmt_html.h"
-// #include "fmt_json.h"
+
+#if MD_WITH_JSON
+#include "fmt_json.h"
+#endif

 // these should be in sync with "OutputFlags" in md.js
-typedef enum OutputFlags {
-  OutputFlagHTML  = 1 << 0,
-  OutputFlagXHTML = 1 << 1,
-} OutputFlags;
+typedef enum Formatter {
+  FormatterNONE,
+  FormatterHTML,
+  FormatterJSON,
+} Formatter;
+
+typedef enum FormatFlags {
+  FormatFlagHTML  = 1 << 0,
+  FormatFlagXHTML = 1 << 1,
+  FormatFlagJSON  = 1 << 2,
+} FormatFlags;

 typedef enum ErrorCode {
  ERR_NONE,
  ERR_MD_PARSE,
-  ERR_OUTFLAGS,
+  ERR_FORMAT,
 } ErrorCode;


@ -31,38 +41,51 @@ static WBuf outbuf;


 export size_t parseUTF8(
-  const char* inbufptr,
-  u32 inbuflen,
-  u32 parser_flags,
-  OutputFlags outflags,
+  const char*  inbufptr,
+  u32          inbuflen,
+  u32          parseFlags,
+  Formatter    formatter,
+  u32          fmtflags,
  const char** outptr
 ) {
  dlog("parseUTF8 called with inbufptr=%p  inbuflen=%u\n", inbufptr, inbuflen);

  WBufReset(&outbuf);
+  WBufReserve(&outbuf, inbuflen * 2);  // approximate output size to minimize reallocations

-  if (outflags & OutputFlagHTML) {
-    WBufReserve(&outbuf, inbuflen * 2);  // approximate output size to minimize reallocations
+  int result = 0x7ffff;

-    u32 render_flags = 0;
-    if (outflags & OutputFlagXHTML) {
-      render_flags |= MD_HTML_FLAG_XHTML;
-    }
+  switch (formatter) {

-    if (fmt_html(inbufptr, inbuflen, &outbuf, parser_flags, render_flags) != 0) {
-      // fmt_html returns status of md_parse which only fails in extreme cases
-      // like when out of memory. md4c does not provide error codes or error messages.
-      WErrSet(ERR_MD_PARSE, "md parser error");
-      *outptr = 0;
-      return 0;
-    }
+  case FormatterHTML:
+    result = fmt_html(inbufptr, inbuflen, &outbuf, parseFlags, fmtflags);
+    break;

+  case FormatterJSON:
+    #if MD_WITH_JSON
+    result = fmt_json(inbufptr, inbuflen, &outbuf, parseFlags, fmtflags);
+    #endif
+    break;
+
+  case FormatterNONE:
+    break;
+
+  } // switch
+
+  if (result == 0x7ffff) {
+    WErrSet(ERR_FORMAT, "invalid formatter");
+  } else if (result != 0) {
+    // fmt_html returns status of md_parse which only fails in extreme cases
+    // like when out of memory. md4c does not provide error codes or error messages.
+    WErrSet(ERR_MD_PARSE, "parser error");
+  }
+
+  if (result == 0) {
    *outptr = outbuf.start;
    // dlog("outbuf =>\n%.*s\n", WBufLen(&outbuf), outbuf.start);
    return WBufLen(&outbuf);
  }

-  WErrSet(ERR_OUTFLAGS, "no output format set in output flags");
  *outptr = 0;
  return 0;
 }
--- a/src/md.js
+++ b/src/md.js
@ -41,6 +41,15 @@ const OutputFlags = {
  XHTML: 1 << 1, // Output XHTML (only has effect with HTML flag set)
 }

+// these should be in sync with "Formatter" in md.c
+const FormatterNONE = 0
+    , FormatterHTML = 1
+    , FormatterJSON = 2
+
+// html formatter flags (sync with fmt_html.h)
+const MD_HTML_FLAG_XHTML = 1 << 0
+
+
 export function parse(source, options) {
  options = options || {}

@ -49,17 +58,17 @@ export function parse(source, options) {
    options.parseFlags
  )

-  let outputFlags = 0
-  switch (options.format) {
-    case "xhtml":
-      outputFlags |= OutputFlags.HTML | OutputFlags.XHTML
-      break
+  let formatter = FormatterHTML
+  let fmtFlags = 0

+  if (options.format) switch (options.format) {
    case "html":
-    case undefined:
-    case null:
-    case "":
-      outputFlags |= OutputFlags.HTML
+      break
+    case "xhtml":
+      fmtFlags |= MD_HTML_FLAG_XHTML
+      break
+    case "json":
+      formatter = FormatterJSON
      break

    default:
@ -68,7 +77,7 @@ export function parse(source, options) {

  let buf = typeof source == "string" ? utf8.encode(source) : source
  let outbuf = withOutPtr(outptr => withTmpBytePtr(buf, (inptr, inlen) =>
-    _parseUTF8(inptr, inlen, parseFlags, outputFlags, outptr)
+    _parseUTF8(inptr, inlen, parseFlags, formatter, fmtFlags, outptr)
  ))

  // check for error and throw if needed
--- a/src/md4c.c
+++ b/src/md4c.c
@ -3678,7 +3678,7 @@ md_analyze_emph(MD_CTX* ctx, int mark_index)
    /* If we can be a closer, try to resolve with the preceding opener. */
    if(mark->flags & MD_MARK_POTENTIAL_CLOSER) {
        MD_MARK* opener = NULL;
-        int opener_index;
+        int opener_index = 0;

        if(mark->ch == _T('*')) {
            MD_MARKCHAIN* opener_chains[6];
@ -5654,7 +5654,7 @@ md_line_indentation(MD_CTX* ctx, unsigned total_indent, OFF beg, OFF* p_end)
    return indent - total_indent;
 }

-static const MD_LINE_ANALYSIS md_dummy_blank_line = { MD_LINE_BLANK, 0 };
+static const MD_LINE_ANALYSIS md_dummy_blank_line = { MD_LINE_BLANK, 0, 0, 0, 0 };

 /* Analyze type of the line and find some its properties. This serves as a
 * main input for determining type and boundaries of a block. */
--- a/src/wbuf.c
+++ b/src/wbuf.c
@ -15,12 +15,8 @@ void WBufReset(WBuf* b) {
  b->ptr = b->start;
 }

-inline size_t WBufCap(WBuf* b) { return b->end - b->start; } // total capacity (size)
-inline size_t WBufLen(WBuf* b) { return b->ptr - b->start; } // valid bytes at start
-inline size_t WBufAvail(WBuf* b) { return b->end - b->ptr; } // bytes available
-
 // grows buffer so that there is at least minspace available space
-static void WBufGrow(WBuf* b, size_t minspace) {
+void WBufGrow(WBuf* b, size_t minspace) {
  // size_t avail = b->end - b->ptr;
  size_t len = WBufLen(b); // store len before changing b
  size_t cap = WBufCap(b);
@ -42,13 +38,6 @@ void WBufReserve(WBuf* b, size_t minspace) {
  }
 }

-void WBufAppendc(WBuf* b, char c) {
-  if (WBufAvail(b) < 1) {
-    WBufGrow(b, 1);
-  }
-  *(b->ptr++) = c;
-}
-
 void WBufAppendBytes(WBuf* b, const void* bytes, size_t len) {
  if (WBufAvail(b) < len) {
    WBufGrow(b, len);
@ -162,6 +151,35 @@ void WBufAppendU32(WBuf* b, u32 n, u32 radix) {
 }


+void _WBufAppendUTF8Codepoint2(WBuf* b, u32 codepoint) {
+  unsigned char utf8[4];
+  size_t n;
+  if (codepoint <= 0x7ff) {
+    n = 2;
+    utf8[0] = 0xc0 | ((codepoint >>  6) & 0x1f);
+    utf8[1] = 0x80 + ((codepoint >>  0) & 0x3f);
+  } else if (codepoint <= 0xffff) {
+    n = 3;
+    utf8[0] = 0xe0 | ((codepoint >> 12) & 0xf);
+    utf8[1] = 0x80 + ((codepoint >>  6) & 0x3f);
+    utf8[2] = 0x80 + ((codepoint >>  0) & 0x3f);
+  } else {
+    n = 4;
+    utf8[0] = 0xf0 | ((codepoint >> 18) & 0x7);
+    utf8[1] = 0x80 + ((codepoint >> 12) & 0x3f);
+    utf8[2] = 0x80 + ((codepoint >>  6) & 0x3f);
+    utf8[3] = 0x80 + ((codepoint >>  0) & 0x3f);
+  }
+
+  if (0 < codepoint && codepoint <= 0x10ffff) {
+    WBufAppendBytes(b, (const char*)utf8, n);
+  } else {
+    static const char utf8_replacement_char[] = { 0xef, 0xbf, 0xbd };
+    WBufAppendBytes(b, utf8_replacement_char, sizeof(utf8_replacement_char));
+  }
+}
+
+
 // static void WBufAppendSlug(WBuf* b, const char* text) {
 //   size_t len = strlen(text);
 //   WBufReserve(b, len);
--- a/src/wbuf.h
+++ b/src/wbuf.h
@ -11,13 +11,13 @@ void WBufInit(WBuf*);
 void WBufFree(WBuf*);
 void WBufReset(WBuf*);

-size_t WBufCap(WBuf*);   // total capacity (size)
-size_t WBufLen(WBuf*);   // valid bytes at start
-size_t WBufAvail(WBuf*); // bytes available
+inline static size_t WBufCap(WBuf* b) { return b->end - b->start; } // total capacity (size)
+inline static size_t WBufLen(WBuf* b) { return b->ptr - b->start; } // valid bytes at start
+inline static size_t WBufAvail(WBuf* b) { return b->end - b->ptr; } // bytes available

 void WBufReserve(WBuf*, size_t minspace);

-void WBufAppendc(WBuf*, char c);
+static void WBufAppendc(WBuf*, char c);
 void WBufAppendBytes(WBuf*, const void* bytes, size_t len);
 void WBufAppendStr(WBuf*, const char* pch);
 #define WBufAppendCStr(b, cstr) WBufAppendBytes((b), (cstr), strlen(cstr))
@ -27,3 +27,26 @@ void _WBufAppendHtml(WBuf*, const char* pch, bool isattr);

 // append u32 integer n. radix must be in range [2-36]
 void WBufAppendU32(WBuf*, u32 n, u32 radix);
+
+static void WBufAppendUTF8Codepoint(WBuf* b, u32 codepoint);
+
+// grows buffer so that there is at least minspace available space
+void WBufGrow(WBuf* b, size_t minspace);
+
+
+
+// implementation of WBufAppendUTF8Codepoint
+void _WBufAppendUTF8Codepoint2(WBuf* b, u32 codepoint);
+inline static void WBufAppendUTF8Codepoint(WBuf* b, u32 codepoint) {
+  if (codepoint > 0x7f) {
+    return _WBufAppendUTF8Codepoint2(b, codepoint);
+  }
+  WBufAppendc(b, (char)codepoint);
+}
+
+inline static void WBufAppendc(WBuf* b, char c) {
+  if (WBufAvail(b) < 1) {
+    WBufGrow(b, 1);
+  }
+  *(b->ptr++) = c;
+}
--- a/wasmc.js
+++ b/wasmc.js
@ -1,16 +1,29 @@
 const package = require("./package.json")
 const outdir = debug ? builddir : "dist"

+cflags = cflags.concat([
+  "-std=c11",
+  "-Wall",
+  "-Wuninitialized",
+  "-Wmissing-field-initializers",
+  "-Wconditional-uninitialized",
+  "-Wno-nullability-completeness",
+  "-Wno-unused-function",
+  "-fcolor-diagnostics",
+])
+
 const m = {
  jsentry: "src/md.js",
+
  sources: [
    "src/wlib.c",
    "src/wbuf.c",
    "src/md.c",
    "src/md4c.c",
    "src/fmt_html.c",
-    // "src/fmt_json.c",
-  ],
+    debug ? "src/fmt_json.c" : "",
+  ].filter(s => !!s),
+
  cflags: [
    "-DMD4C_USE_UTF8",
  ].concat(debug ? [
@ -20,6 +33,7 @@ const m = {
    "-DSAFE_HEAP=1", // emcc
    "-DSTACK_OVERFLOW_CHECK=1", // emcc
    "-DDEMANGLE_SUPPORT=1", // emcc
+    "-DMD_WITH_JSON=1", // enable WIP json formatter
  ] : [
    // release flags
  ]),