diff --git a/example/example-json.js b/example/example-json.js new file mode 100644 index 0000000..0f53622 --- /dev/null +++ b/example/example-json.js @@ -0,0 +1,6 @@ +const fs = require("fs") +const md = require("../build/debug/markdown.node.js") + +const source = fs.readFileSync(__dirname + "/example2.md") +const json = md.parse(source, { format: "json" }) +console.log(json) diff --git a/example/example2.md b/example/example2.md new file mode 100644 index 0000000..f757406 --- /dev/null +++ b/example/example2.md @@ -0,0 +1,44 @@ +# H1 + +This is a paragraph + +## H2 + +This is a paragraph + +## Another + +This is a paragraph with style *italic* _italic_ **bold** __bold__ + +![image](https://rsms.me/raster/examples/image1.jpg) +![](https://rsms.me/image.png?without-alt) + +*Hello [link](https://rsms.me/) lol* + +Hello [*link*](https://rsms.me/) lol "cat" + +Hello from *[link](https://rsms.me/)* to __everyone__ `reading this` + +Here's an [**important** anchor link](#example). + +line 1 +line 2 + +XML & html "entities" +& +& +& +& +& +& + + +## Lists + +- Unordered +* Lists ++ Of mixed type + +1. Ordered +2. Lists +4. Numbers are ignored diff --git a/src/fmt_html.c b/src/fmt_html.c index a2788f8..aad2d5f 100644 --- a/src/fmt_html.c +++ b/src/fmt_html.c @@ -1,5 +1,5 @@ /* - * md4c modified for mdjs. + * md4c modified for markdown-wasm. * Original source code is licensed as follows: * * Copyright (c) 2016-2019 Martin Mitas @@ -439,14 +439,14 @@ int fmt_html( const MD_CHAR* input, MD_SIZE input_size, WBuf* outbuf, - u32 parser_flags, - u32 render_flags + u32 parseFlags, + u32 fmtFlags ) { - HtmlRenderer render = { outbuf, 0, 0, render_flags }; + HtmlRenderer render = { outbuf, 0, 0, fmtFlags }; MD_PARSER parser = { 0, - parser_flags, + parseFlags, enter_block_callback, leave_block_callback, enter_span_callback, diff --git a/src/fmt_html.h b/src/fmt_html.h index ed38124..a6ff29d 100644 --- a/src/fmt_html.h +++ b/src/fmt_html.h @@ -1,6 +1,6 @@ #pragma once #include "wbuf.h" -#define MD_HTML_FLAG_XHTML 0x0008 // instead of e.g.
, generate
+#define MD_HTML_FLAG_XHTML (1 << 0) // instead of e.g.
, generate
-int fmt_html(const char* input, u32 inputlen, WBuf* outbuf, u32 parserFlags, u32 renderFlags); +int fmt_html(const char* input, u32 inputlen, WBuf* outbuf, u32 parseFlags, u32 fmtFlags); diff --git a/src/fmt_json.c b/src/fmt_json.c index e69817b..f85aad0 100644 --- a/src/fmt_json.c +++ b/src/fmt_json.c @@ -4,20 +4,17 @@ #include "fmt_json.h" #include "md4c.h" -// #include "md4c_render_html.h" -// #include "entity.h" +// JSON formatter // -// -// -------------- WORK IN PROGRESS -// +// -- WORK IN PROGRESS -- // #ifdef _WIN32 #define snprintf _snprintf #endif - +// dlog #ifndef DEBUG #define DEBUG 1 #endif @@ -31,6 +28,7 @@ typedef struct JsonFormatter_st { WBuf* outbuf; + u32 bnest; // block nesting level } JsonFormatter; @@ -39,61 +37,49 @@ typedef struct JsonFormatter_st { #define ISUPPER(ch) ('A' <= (ch) && (ch) <= 'Z') #define ISALNUM(ch) (ISLOWER(ch) || ISUPPER(ch) || ISDIGIT(ch)) +#define SIZEOF_ARRAY(a) (sizeof(a) / sizeof(a[0])) -// static inline void render_text(JsonFormatter* r, const MD_CHAR* text, MD_SIZE size) { -// // r->process_output(text, size, r->userdata); -// WBufAppendBytes(r->outbuf, text, size); -// } #define render_text(f, textptr, textlen) \ WBufAppendBytes((r)->outbuf, (textptr), (textlen)) -// #define RENDER_LITERAL(r, literal) \ -// WBufAppendBytes((r)->outbuf, (literal), (MD_SIZE)strlen(literal)) - -static char jsonEscapeMap[256]; +#define JSON_SUB_LEN 2 +static const char* jsonEscapeMap[256]; static void __attribute__((constructor)) init() { - jsonEscapeMap[(unsigned char)'"'] = 1; - jsonEscapeMap[(unsigned char)'\n'] = 1; - jsonEscapeMap[(unsigned char)'\r'] = 1; - jsonEscapeMap[(unsigned char)'\t'] = 1; + // important: Values must all be exactly JSON_SUB_LEN bytes long + jsonEscapeMap[(unsigned char)'"'] = "\\\""; + jsonEscapeMap[(unsigned char)'\n'] = "\\n"; + jsonEscapeMap[(unsigned char)'\r'] = "\\r"; + jsonEscapeMap[(unsigned char)'\t'] = "\\t"; } +// #define JSON_BYTE_NEED_ESCAPE(ch) (jsonEscapeMap[(unsigned char)(ch)] != 0) +#define JSON_ESCAPE_MAP(ch) jsonEscapeMap[(unsigned char)(ch)] + static void writeJsonEscaped(JsonFormatter* r, const MD_CHAR* data, MD_SIZE size) { MD_OFFSET beg = 0; MD_OFFSET off = 0; - #define NEED_ESCAPE(ch) (jsonEscapeMap[(unsigned char)(ch)] != 0) - - while(1) { - /* Optimization: Use some loop unrolling. */ - while ( - off + 3 < size && - !NEED_ESCAPE(data[off+0]) && - !NEED_ESCAPE(data[off+1]) && - !NEED_ESCAPE(data[off+2]) && - !NEED_ESCAPE(data[off+3]) - ) { - off += 4; - } - while (off < size && !NEED_ESCAPE(data[off])) { + while (1) { + const char* sub = NULL; + while (off < size) { + sub = JSON_ESCAPE_MAP(data[off]); + if (sub != NULL) { + break; + } off++; } if (off > beg) { + // in-between WBufAppendBytes(r->outbuf, data + beg, off - beg); } - if (off < size) { - switch (data[off]) { - case '"': WBufAppendCStr(r->outbuf, "\\\""); break; - case '\n': WBufAppendCStr(r->outbuf, "\\n"); break; - case '\r': WBufAppendCStr(r->outbuf, "\\r"); break; - case '\t': WBufAppendCStr(r->outbuf, "\\t"); break; - } + if (sub) { + WBufAppendBytes(r->outbuf, sub, JSON_SUB_LEN); off++; } else { break; @@ -101,14 +87,10 @@ static void writeJsonEscaped(JsonFormatter* r, const MD_CHAR* data, MD_SIZE size beg = off; } - - #undef NEED_ESCAPE } -static void -render_url_escaped(JsonFormatter* r, const MD_CHAR* data, MD_SIZE size) -{ +static void render_url_escaped(JsonFormatter* r, const MD_CHAR* data, MD_SIZE size) { static const MD_CHAR hex_chars[] = "0123456789ABCDEF"; MD_OFFSET beg = 0; MD_OFFSET off = 0; @@ -144,56 +126,21 @@ render_url_escaped(JsonFormatter* r, const MD_CHAR* data, MD_SIZE size) } } -static unsigned -hex_val(char ch) -{ - if('0' <= ch && ch <= '9') +static unsigned hex_val(char ch) { + if ('0' <= ch && ch <= '9') { return ch - '0'; - if('A' <= ch && ch <= 'Z') + } + if ('A' <= ch && ch <= 'Z') { return ch - 'A' + 10; - else - return ch - 'a' + 10; + } + return ch - 'a' + 10; } -static void WBufAppendUTF8Codepoint(WBuf* b, u32 codepoint) { - if (codepoint <= 0x7f) { - WBufAppendc(b, (char)codepoint); - return; - } - - unsigned char utf8[4]; - size_t n; - if (codepoint <= 0x7ff) { - n = 2; - utf8[0] = 0xc0 | ((codepoint >> 6) & 0x1f); - utf8[1] = 0x80 + ((codepoint >> 0) & 0x3f); - } else if (codepoint <= 0xffff) { - n = 3; - utf8[0] = 0xe0 | ((codepoint >> 12) & 0xf); - utf8[1] = 0x80 + ((codepoint >> 6) & 0x3f); - utf8[2] = 0x80 + ((codepoint >> 0) & 0x3f); - } else { - n = 4; - utf8[0] = 0xf0 | ((codepoint >> 18) & 0x7); - utf8[1] = 0x80 + ((codepoint >> 12) & 0x3f); - utf8[2] = 0x80 + ((codepoint >> 6) & 0x3f); - utf8[3] = 0x80 + ((codepoint >> 0) & 0x3f); - } - - if (0 < codepoint && codepoint <= 0x10ffff) { - WBufAppendBytes(b, (const char*)utf8, n); - } else { - static const MD_CHAR utf8_replacement_char[] = { 0xef, 0xbf, 0xbd }; - WBufAppendBytes(b, utf8_replacement_char, sizeof(utf8_replacement_char)); - } -} - -/* Translate entity to its UTF-8 equivalent, or output the verbatim one - * if such entity is unknown (or if the translation is disabled). */ +// Translate entity to its UTF-8 equivalent, or output the verbatim one +// if such entity is unknown (or if the translation is disabled). static void writeDecodeXmlEntity(JsonFormatter* r, const MD_CHAR* text, MD_SIZE size) { if (size > 3 && text[1] == '#') { unsigned codepoint = 0; - if(text[2] == 'x' || text[2] == 'X') { // Hexadecimal entity (e.g. "�")). for (MD_SIZE i = 3; i < size-1; i++) { @@ -205,17 +152,41 @@ static void writeDecodeXmlEntity(JsonFormatter* r, const MD_CHAR* text, MD_SIZE codepoint = 10 * codepoint + (text[i] - '0'); } } - - WBufAppendUTF8Codepoint(r->outbuf, codepoint); + if (codepoint <= 0xFF) { + const char* sub = JSON_ESCAPE_MAP(codepoint); + if (sub) { + // predefined escape code, e.g. "\n" + WBufAppendBytes(r->outbuf, sub, JSON_SUB_LEN); + } else { + // verbatim + WBufAppendUTF8Codepoint(r->outbuf, codepoint); + } + } else { + // e.g. \uD87E + WBufAppendCStr(r->outbuf, "\\u"); + if (codepoint <= 0xF) { + WBufAppendCStr(r->outbuf, "000"); + } else if (codepoint <= 0xFF) { + WBufAppendCStr(r->outbuf, "00"); + } else if (codepoint <= 0xFFF) { + WBufAppendCStr(r->outbuf, "0"); + } + WBufAppendU32(r->outbuf, codepoint, 16); + } } else { - WBufAppendBytes(r->outbuf, text, size); + // named entity + // We could do a lookup here but it would increase the WASM module binary size by + // at least 20kB, so for now, let's keep it simple and just include it verbatim until we + // can do something fancy like a compressed b-tree. + writeJsonEscaped(r, text, size); } } -static void -render_attribute(JsonFormatter* r, const MD_ATTRIBUTE* attr, - void (*fn_append)(JsonFormatter*, const MD_CHAR*, MD_SIZE)) -{ +static void render_attribute( + JsonFormatter* r, + const MD_ATTRIBUTE* attr, + void (*fn_append)(JsonFormatter*, const MD_CHAR*, MD_SIZE) +) { int i; for(i = 0; attr->substr_offsets[i] < attr->size; i++) { @@ -233,68 +204,7 @@ render_attribute(JsonFormatter* r, const MD_ATTRIBUTE* attr, } -static void -render_open_ol_block(JsonFormatter* r, const MD_BLOCK_OL_DETAIL* det) -{ - char buf[64]; - - if(det->start == 1) { - WBufAppendCStr(r->outbuf, "
    \n"); - return; - } - - snprintf(buf, sizeof(buf), "
      \n", det->start); - WBufAppendCStr(r->outbuf, buf); -} - -static void -render_open_li_block(JsonFormatter* r, const MD_BLOCK_LI_DETAIL* det) -{ - if(det->is_task) { - WBufAppendCStr(r->outbuf, - "
    1. " - "task_mark == 'x' || det->task_mark == 'X') { - WBufAppendCStr(r->outbuf, " checked"); - } - WBufAppendc(r->outbuf, '>'); - } else { - WBufAppendCStr(r->outbuf, "
    2. "); - } -} - -static void -render_open_code_block(JsonFormatter* r, const MD_BLOCK_CODE_DETAIL* det) -{ - WBufAppendCStr(r->outbuf, "
      lang.text != NULL) {
      -    WBufAppendCStr(r->outbuf, " class=\"language-");
      -    render_attribute(r, &det->lang, writeJsonEscaped);
      -    WBufAppendc(r->outbuf, '"');
      -  }
      -
      -  WBufAppendc(r->outbuf, '>');
      -}
      -
      -static void
      -render_open_td_block(JsonFormatter* r, const MD_CHAR* cell_type, const MD_BLOCK_TD_DETAIL* det)
      -{
      -  WBufAppendc(r->outbuf, '<');
      -  WBufAppendCStr(r->outbuf, cell_type);
      -
      -  switch (det->align) {
      -    case MD_ALIGN_LEFT:     WBufAppendCStr(r->outbuf, " align=\"left\">"); break;
      -    case MD_ALIGN_CENTER:   WBufAppendCStr(r->outbuf, " align=\"center\">"); break;
      -    case MD_ALIGN_RIGHT:    WBufAppendCStr(r->outbuf, " align=\"right\">"); break;
      -    default:                WBufAppendCStr(r->outbuf, ">"); break;
      -  }
      -}
      -
      -static void
      -render_open_a_span(JsonFormatter* r, const MD_SPAN_A_DETAIL* det)
      -{
      +static void render_open_a_span(JsonFormatter* r, const MD_SPAN_A_DETAIL* det) {
         WBufAppendCStr(r->outbuf, "href, render_url_escaped);
       
      @@ -306,18 +216,14 @@ render_open_a_span(JsonFormatter* r, const MD_SPAN_A_DETAIL* det)
         WBufAppendCStr(r->outbuf, "\">");
       }
       
      -static void
      -render_open_img_span(JsonFormatter* r, const MD_SPAN_IMG_DETAIL* det)
      -{
      +static void render_open_img_span(JsonFormatter* r, const MD_SPAN_IMG_DETAIL* det) {
         WBufAppendCStr(r->outbuf, "src, render_url_escaped);
       
         WBufAppendCStr(r->outbuf, "\" alt=\"");
       }
       
      -static void
      -render_close_img_span(JsonFormatter* r, const MD_SPAN_IMG_DETAIL* det)
      -{
      +static void render_close_img_span(JsonFormatter* r, const MD_SPAN_IMG_DETAIL* det) {
         if(det->title.text != NULL) {
           WBufAppendCStr(r->outbuf, "\" title=\"");
           render_attribute(r, &det->title, writeJsonEscaped);
      @@ -326,9 +232,7 @@ render_close_img_span(JsonFormatter* r, const MD_SPAN_IMG_DETAIL* det)
         WBufAppendCStr(r->outbuf, "\">");
       }
       
      -static void
      -render_open_wikilink_span(JsonFormatter* r, const MD_SPAN_WIKILINK_DETAIL* det)
      -{
      +static void render_open_wikilink_span(JsonFormatter* r, const MD_SPAN_WIKILINK_DETAIL* det) {
         WBufAppendCStr(r->outbuf, "target, writeJsonEscaped);
         WBufAppendCStr(r->outbuf, "\">");
      @@ -338,12 +242,20 @@ render_open_wikilink_span(JsonFormatter* r, const MD_SPAN_WIKILINK_DETAIL* det)
       // ------------------------------------------------------------------------------------------------
       
       
      -static void writeTypeStart(JsonFormatter* r, const char* typename, size_t typenamelen) {
      -  WBufAppendCStr(r->outbuf, "{\"_\":\"");
      -  WBufAppendBytes(r->outbuf, typename, typenamelen);
      -  WBufAppendc(r->outbuf, '"');
      -}
      +static void writeNewline(JsonFormatter* r) {
      +  WBufAppendc(r->outbuf, '\n');
       
      +  static const char indent_chunk_str[] = "                ";
      +  static const u32  indent_chunk_size = (u32)(SIZEOF_ARRAY(indent_chunk_str) - 1);
      +  u32 indent = r->bnest * 4;
      +  while (indent > indent_chunk_size) {
      +    WBufAppendBytes(r->outbuf, indent_chunk_str, indent_chunk_size);
      +    indent -= indent_chunk_size;
      +  }
      +  if (indent > 0) {
      +    WBufAppendBytes(r->outbuf, indent_chunk_str, indent);
      +  }
      +}
       
       static void writeAttribute(JsonFormatter* r, const MD_ATTRIBUTE* attr) {
         for (u32 i = 0; attr->substr_offsets[i] < attr->size; i++) {
      @@ -359,13 +271,29 @@ static void writeAttribute(JsonFormatter* r, const MD_ATTRIBUTE* attr) {
         }
       }
       
      +static void writeKey(JsonFormatter* r, const char* rawkey, size_t rawkeyLen) {
      +  WBufAppendc(r->outbuf, ',');
      +  writeNewline(r);
      +  WBufAppendc(r->outbuf, '"');
      +  WBufAppendBytes(r->outbuf, rawkey, rawkeyLen);
      +  WBufAppendCStr(r->outbuf, "\":");
      +}
      +
      +static void writeTypeStart(JsonFormatter* r, const char* typename, size_t typenamelen) {
      +  WBufAppendCStr(r->outbuf, "{ \"_\": \"");
      +  WBufAppendBytes(r->outbuf, typename, typenamelen);
      +  WBufAppendc(r->outbuf, '"');
      +}
      +
       
       static int enter_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata) {
      -  static const MD_CHAR* head[6] = { "h1", "h2", "h3", "h4", "h5", "h6" };
         JsonFormatter* r = (JsonFormatter*) userdata;
      -  const char* typename = "";
      -  size_t typenamelen = 0;
      -  #define WRITE_TYPE_START(name) writeTypeStart(r, (name), strlen((name)))
      +
      +  writeNewline(r);
      +  r->bnest++;
      +
      +  #define WRITE_TYPE_START(name) \
      +    writeTypeStart(r, (name), strlen((name)))
       
         switch (type) {
           case MD_BLOCK_DOC:   WRITE_TYPE_START("doc"); break;
      @@ -380,7 +308,9 @@ static int enter_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata)
           case MD_BLOCK_HR:    WRITE_TYPE_START("hr"); break;
       
           case MD_BLOCK_H: {
      -      WRITE_TYPE_START(head[((MD_BLOCK_H_DETAIL*)detail)->level - 1]);
      +      WRITE_TYPE_START("h");
      +      WBufAppendCStr(r->outbuf, ", \"level\": ");
      +      WBufAppendU32(r->outbuf, ((MD_BLOCK_H_DETAIL*)detail)->level, 10);
             break;
           }
       
      @@ -443,28 +373,30 @@ static int enter_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata)
             break;
           }
       
      -// static void
      -// render_open_td_block(MD_RENDER_HTML* r, const MD_CHAR* cell_type, const MD_BLOCK_TD_DETAIL* det)
      -// {
      -//     RENDER_LITERAL(r, "<");
      -//     RENDER_LITERAL(r, cell_type);
      -
      -//     switch(det->align) {
      -//         case MD_ALIGN_LEFT:     RENDER_LITERAL(r, " align=\"left\">"); break;
      -//         case MD_ALIGN_CENTER:   RENDER_LITERAL(r, " align=\"center\">"); break;
      -//         case MD_ALIGN_RIGHT:    RENDER_LITERAL(r, " align=\"right\">"); break;
      -//         default:                RENDER_LITERAL(r, ">"); break;
      -//     }
         }
       
      -  WBufAppendCStr(r->outbuf, ", \"children\":[\n  ");
      +  WBufAppendCStr(r->outbuf, ", \"children\": [");
      +
         return 0;
       }
       
       
       static int leave_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata) {
         JsonFormatter* r = (JsonFormatter*)userdata;
      -  WBufAppendCStr(r->outbuf, "]},\n");
      +  r->bnest--;
      +  if (*(r->outbuf->ptr-1) == ',') {
      +    // undo trailing comma
      +    // e.g.
      +    //
      +    //  "1,2,3,"
      +    //        ^
      +    //  "1,2,3"
      +    //       ^
      +    //
      +    r->outbuf->ptr--;
      +  }
      +  writeNewline(r);
      +  WBufAppendCStr(r->outbuf, "]},");
         return 0;
       }
       
      @@ -475,6 +407,7 @@ static int enter_span_callback(MD_SPANTYPE type, void* detail, void* userdata) {
         switch(type) {
           case MD_SPAN_EM:                WBufAppendCStr(r->outbuf, ""); break;
           case MD_SPAN_STRONG:            WBufAppendCStr(r->outbuf, ""); break;
      +    case MD_SPAN_U:                 WBufAppendCStr(r->outbuf, ""); break;
           case MD_SPAN_A:                 render_open_a_span(r, (MD_SPAN_A_DETAIL*) detail); break;
           case MD_SPAN_IMG:               render_open_img_span(r, (MD_SPAN_IMG_DETAIL*) detail); break;
           case MD_SPAN_CODE:              WBufAppendCStr(r->outbuf, ""); break;
      @@ -493,6 +426,7 @@ static int leave_span_callback(MD_SPANTYPE type, void* detail, void* userdata) {
         switch(type) {
           case MD_SPAN_EM:                WBufAppendCStr(r->outbuf, ""); break;
           case MD_SPAN_STRONG:            WBufAppendCStr(r->outbuf, ""); break;
      +    case MD_SPAN_U:                 WBufAppendCStr(r->outbuf, ""); break;
           case MD_SPAN_A:                 WBufAppendCStr(r->outbuf, ""); break;
           case MD_SPAN_IMG:               /*noop, handled above*/ break;
           case MD_SPAN_CODE:              WBufAppendCStr(r->outbuf, ""); break;
      @@ -508,27 +442,65 @@ static int leave_span_callback(MD_SPANTYPE type, void* detail, void* userdata) {
       static int text_callback(MD_TEXTTYPE type, const MD_CHAR* text, MD_SIZE size, void* userdata) {
         JsonFormatter* r = (JsonFormatter*)userdata;
       
      -  WBufAppendCStr(r->outbuf, ", \"");
      -
      -  switch (type) {
      -    case MD_TEXT_NULLCHAR:  WBufAppendCStr(r->outbuf, "\\0"); break;
      -    case MD_TEXT_BR:        WBufAppendCStr(r->outbuf, "
      "); break; - case MD_TEXT_SOFTBR: WBufAppendc(r->outbuf, '\n'); break; - case MD_TEXT_HTML: render_text(r, text, size); break; - case MD_TEXT_ENTITY: writeDecodeXmlEntity(r, text, size); break; - default: writeJsonEscaped(r, text, size); break; + if (type == MD_TEXT_SOFTBR) { + // ignore soft break, i.e. + // + // Markdown: + // line1 + // line2 + // + // md4c emits: (line1, MD_TEXT_SOFTBR, line2) + // + return 0; } - WBufAppendc(r->outbuf, '"'); + writeNewline(r); + + if (type == MD_TEXT_HTML) { + WBufAppendCStr(r->outbuf, "{\"_\":\"html\",\"content\":\""); + writeJsonEscaped(r, text, size); + WBufAppendCStr(r->outbuf, "\"}"); + } else { + WBufAppendc(r->outbuf, '"'); + switch (type) { + case MD_TEXT_NULLCHAR: + WBufAppendCStr(r->outbuf, "\\0"); + break; + + case MD_TEXT_BR: + WBufAppendCStr(r->outbuf, "\\n"); + break; + + case MD_TEXT_ENTITY: + writeDecodeXmlEntity(r, text, size); + break; + + default: + writeJsonEscaped(r, text, size); + break; + } + WBufAppendc(r->outbuf, '"'); + } + + WBufAppendc(r->outbuf, ','); return 0; } -int fmt_json(const MD_CHAR* input, MD_SIZE input_size, WBuf* outbuf, u32 parser_flags) { - JsonFormatter render = { outbuf }; +int fmt_json( + const MD_CHAR* input, + MD_SIZE inputlen, + WBuf* outbuf, + u32 parseFlags, + u32 _fmtFlags +) { + JsonFormatter render = { + .outbuf = outbuf, + .bnest = 0, + }; MD_PARSER parser = { 0, - parser_flags, + parseFlags, enter_block_callback, leave_block_callback, enter_span_callback, @@ -538,5 +510,5 @@ int fmt_json(const MD_CHAR* input, MD_SIZE input_size, WBuf* outbuf, u32 parser_ NULL }; - return md_parse(input, input_size, &parser, (void*) &render); + return md_parse(input, inputlen, &parser, (void*)&render); } diff --git a/src/fmt_json.h b/src/fmt_json.h index f111b85..8ef5398 100644 --- a/src/fmt_json.h +++ b/src/fmt_json.h @@ -1,4 +1,6 @@ #pragma once #include "wbuf.h" -int fmt_json(const char* input, u32 inputlen, WBuf* outbuf, u32 parserFlags); +#define MD_JSON_FLAG_NONE 0 + +int fmt_json(const char* input, u32 inputlen, WBuf* outbuf, u32 parseFlags, u32 fmtFlags); diff --git a/src/md.c b/src/md.c index 1ad2ebf..cfd59b2 100644 --- a/src/md.c +++ b/src/md.c @@ -3,18 +3,28 @@ #include "wlib.h" #include "wbuf.h" #include "fmt_html.h" -// #include "fmt_json.h" + +#if MD_WITH_JSON +#include "fmt_json.h" +#endif // these should be in sync with "OutputFlags" in md.js -typedef enum OutputFlags { - OutputFlagHTML = 1 << 0, - OutputFlagXHTML = 1 << 1, -} OutputFlags; +typedef enum Formatter { + FormatterNONE, + FormatterHTML, + FormatterJSON, +} Formatter; + +typedef enum FormatFlags { + FormatFlagHTML = 1 << 0, + FormatFlagXHTML = 1 << 1, + FormatFlagJSON = 1 << 2, +} FormatFlags; typedef enum ErrorCode { ERR_NONE, ERR_MD_PARSE, - ERR_OUTFLAGS, + ERR_FORMAT, } ErrorCode; @@ -31,38 +41,51 @@ static WBuf outbuf; export size_t parseUTF8( - const char* inbufptr, - u32 inbuflen, - u32 parser_flags, - OutputFlags outflags, + const char* inbufptr, + u32 inbuflen, + u32 parseFlags, + Formatter formatter, + u32 fmtflags, const char** outptr ) { dlog("parseUTF8 called with inbufptr=%p inbuflen=%u\n", inbufptr, inbuflen); WBufReset(&outbuf); + WBufReserve(&outbuf, inbuflen * 2); // approximate output size to minimize reallocations - if (outflags & OutputFlagHTML) { - WBufReserve(&outbuf, inbuflen * 2); // approximate output size to minimize reallocations + int result = 0x7ffff; - u32 render_flags = 0; - if (outflags & OutputFlagXHTML) { - render_flags |= MD_HTML_FLAG_XHTML; - } + switch (formatter) { - if (fmt_html(inbufptr, inbuflen, &outbuf, parser_flags, render_flags) != 0) { - // fmt_html returns status of md_parse which only fails in extreme cases - // like when out of memory. md4c does not provide error codes or error messages. - WErrSet(ERR_MD_PARSE, "md parser error"); - *outptr = 0; - return 0; - } + case FormatterHTML: + result = fmt_html(inbufptr, inbuflen, &outbuf, parseFlags, fmtflags); + break; + case FormatterJSON: + #if MD_WITH_JSON + result = fmt_json(inbufptr, inbuflen, &outbuf, parseFlags, fmtflags); + #endif + break; + + case FormatterNONE: + break; + + } // switch + + if (result == 0x7ffff) { + WErrSet(ERR_FORMAT, "invalid formatter"); + } else if (result != 0) { + // fmt_html returns status of md_parse which only fails in extreme cases + // like when out of memory. md4c does not provide error codes or error messages. + WErrSet(ERR_MD_PARSE, "parser error"); + } + + if (result == 0) { *outptr = outbuf.start; // dlog("outbuf =>\n%.*s\n", WBufLen(&outbuf), outbuf.start); return WBufLen(&outbuf); } - WErrSet(ERR_OUTFLAGS, "no output format set in output flags"); *outptr = 0; return 0; } diff --git a/src/md.js b/src/md.js index 20d93ce..f9e134d 100644 --- a/src/md.js +++ b/src/md.js @@ -41,6 +41,15 @@ const OutputFlags = { XHTML: 1 << 1, // Output XHTML (only has effect with HTML flag set) } +// these should be in sync with "Formatter" in md.c +const FormatterNONE = 0 + , FormatterHTML = 1 + , FormatterJSON = 2 + +// html formatter flags (sync with fmt_html.h) +const MD_HTML_FLAG_XHTML = 1 << 0 + + export function parse(source, options) { options = options || {} @@ -49,17 +58,17 @@ export function parse(source, options) { options.parseFlags ) - let outputFlags = 0 - switch (options.format) { - case "xhtml": - outputFlags |= OutputFlags.HTML | OutputFlags.XHTML - break + let formatter = FormatterHTML + let fmtFlags = 0 + if (options.format) switch (options.format) { case "html": - case undefined: - case null: - case "": - outputFlags |= OutputFlags.HTML + break + case "xhtml": + fmtFlags |= MD_HTML_FLAG_XHTML + break + case "json": + formatter = FormatterJSON break default: @@ -68,7 +77,7 @@ export function parse(source, options) { let buf = typeof source == "string" ? utf8.encode(source) : source let outbuf = withOutPtr(outptr => withTmpBytePtr(buf, (inptr, inlen) => - _parseUTF8(inptr, inlen, parseFlags, outputFlags, outptr) + _parseUTF8(inptr, inlen, parseFlags, formatter, fmtFlags, outptr) )) // check for error and throw if needed diff --git a/src/md4c.c b/src/md4c.c index 729cb72..40ba030 100644 --- a/src/md4c.c +++ b/src/md4c.c @@ -3678,7 +3678,7 @@ md_analyze_emph(MD_CTX* ctx, int mark_index) /* If we can be a closer, try to resolve with the preceding opener. */ if(mark->flags & MD_MARK_POTENTIAL_CLOSER) { MD_MARK* opener = NULL; - int opener_index; + int opener_index = 0; if(mark->ch == _T('*')) { MD_MARKCHAIN* opener_chains[6]; @@ -5654,7 +5654,7 @@ md_line_indentation(MD_CTX* ctx, unsigned total_indent, OFF beg, OFF* p_end) return indent - total_indent; } -static const MD_LINE_ANALYSIS md_dummy_blank_line = { MD_LINE_BLANK, 0 }; +static const MD_LINE_ANALYSIS md_dummy_blank_line = { MD_LINE_BLANK, 0, 0, 0, 0 }; /* Analyze type of the line and find some its properties. This serves as a * main input for determining type and boundaries of a block. */ diff --git a/src/wbuf.c b/src/wbuf.c index 121c594..9af894c 100644 --- a/src/wbuf.c +++ b/src/wbuf.c @@ -15,12 +15,8 @@ void WBufReset(WBuf* b) { b->ptr = b->start; } -inline size_t WBufCap(WBuf* b) { return b->end - b->start; } // total capacity (size) -inline size_t WBufLen(WBuf* b) { return b->ptr - b->start; } // valid bytes at start -inline size_t WBufAvail(WBuf* b) { return b->end - b->ptr; } // bytes available - // grows buffer so that there is at least minspace available space -static void WBufGrow(WBuf* b, size_t minspace) { +void WBufGrow(WBuf* b, size_t minspace) { // size_t avail = b->end - b->ptr; size_t len = WBufLen(b); // store len before changing b size_t cap = WBufCap(b); @@ -42,13 +38,6 @@ void WBufReserve(WBuf* b, size_t minspace) { } } -void WBufAppendc(WBuf* b, char c) { - if (WBufAvail(b) < 1) { - WBufGrow(b, 1); - } - *(b->ptr++) = c; -} - void WBufAppendBytes(WBuf* b, const void* bytes, size_t len) { if (WBufAvail(b) < len) { WBufGrow(b, len); @@ -162,6 +151,35 @@ void WBufAppendU32(WBuf* b, u32 n, u32 radix) { } +void _WBufAppendUTF8Codepoint2(WBuf* b, u32 codepoint) { + unsigned char utf8[4]; + size_t n; + if (codepoint <= 0x7ff) { + n = 2; + utf8[0] = 0xc0 | ((codepoint >> 6) & 0x1f); + utf8[1] = 0x80 + ((codepoint >> 0) & 0x3f); + } else if (codepoint <= 0xffff) { + n = 3; + utf8[0] = 0xe0 | ((codepoint >> 12) & 0xf); + utf8[1] = 0x80 + ((codepoint >> 6) & 0x3f); + utf8[2] = 0x80 + ((codepoint >> 0) & 0x3f); + } else { + n = 4; + utf8[0] = 0xf0 | ((codepoint >> 18) & 0x7); + utf8[1] = 0x80 + ((codepoint >> 12) & 0x3f); + utf8[2] = 0x80 + ((codepoint >> 6) & 0x3f); + utf8[3] = 0x80 + ((codepoint >> 0) & 0x3f); + } + + if (0 < codepoint && codepoint <= 0x10ffff) { + WBufAppendBytes(b, (const char*)utf8, n); + } else { + static const char utf8_replacement_char[] = { 0xef, 0xbf, 0xbd }; + WBufAppendBytes(b, utf8_replacement_char, sizeof(utf8_replacement_char)); + } +} + + // static void WBufAppendSlug(WBuf* b, const char* text) { // size_t len = strlen(text); // WBufReserve(b, len); diff --git a/src/wbuf.h b/src/wbuf.h index 4f0df26..5dcbb75 100644 --- a/src/wbuf.h +++ b/src/wbuf.h @@ -11,13 +11,13 @@ void WBufInit(WBuf*); void WBufFree(WBuf*); void WBufReset(WBuf*); -size_t WBufCap(WBuf*); // total capacity (size) -size_t WBufLen(WBuf*); // valid bytes at start -size_t WBufAvail(WBuf*); // bytes available +inline static size_t WBufCap(WBuf* b) { return b->end - b->start; } // total capacity (size) +inline static size_t WBufLen(WBuf* b) { return b->ptr - b->start; } // valid bytes at start +inline static size_t WBufAvail(WBuf* b) { return b->end - b->ptr; } // bytes available void WBufReserve(WBuf*, size_t minspace); -void WBufAppendc(WBuf*, char c); +static void WBufAppendc(WBuf*, char c); void WBufAppendBytes(WBuf*, const void* bytes, size_t len); void WBufAppendStr(WBuf*, const char* pch); #define WBufAppendCStr(b, cstr) WBufAppendBytes((b), (cstr), strlen(cstr)) @@ -27,3 +27,26 @@ void _WBufAppendHtml(WBuf*, const char* pch, bool isattr); // append u32 integer n. radix must be in range [2-36] void WBufAppendU32(WBuf*, u32 n, u32 radix); + +static void WBufAppendUTF8Codepoint(WBuf* b, u32 codepoint); + +// grows buffer so that there is at least minspace available space +void WBufGrow(WBuf* b, size_t minspace); + + + +// implementation of WBufAppendUTF8Codepoint +void _WBufAppendUTF8Codepoint2(WBuf* b, u32 codepoint); +inline static void WBufAppendUTF8Codepoint(WBuf* b, u32 codepoint) { + if (codepoint > 0x7f) { + return _WBufAppendUTF8Codepoint2(b, codepoint); + } + WBufAppendc(b, (char)codepoint); +} + +inline static void WBufAppendc(WBuf* b, char c) { + if (WBufAvail(b) < 1) { + WBufGrow(b, 1); + } + *(b->ptr++) = c; +} diff --git a/wasmc.js b/wasmc.js index c92392b..1447d4f 100644 --- a/wasmc.js +++ b/wasmc.js @@ -1,16 +1,29 @@ const package = require("./package.json") const outdir = debug ? builddir : "dist" +cflags = cflags.concat([ + "-std=c11", + "-Wall", + "-Wuninitialized", + "-Wmissing-field-initializers", + "-Wconditional-uninitialized", + "-Wno-nullability-completeness", + "-Wno-unused-function", + "-fcolor-diagnostics", +]) + const m = { jsentry: "src/md.js", + sources: [ "src/wlib.c", "src/wbuf.c", "src/md.c", "src/md4c.c", "src/fmt_html.c", - // "src/fmt_json.c", - ], + debug ? "src/fmt_json.c" : "", + ].filter(s => !!s), + cflags: [ "-DMD4C_USE_UTF8", ].concat(debug ? [ @@ -20,6 +33,7 @@ const m = { "-DSAFE_HEAP=1", // emcc "-DSTACK_OVERFLOW_CHECK=1", // emcc "-DDEMANGLE_SUPPORT=1", // emcc + "-DMD_WITH_JSON=1", // enable WIP json formatter ] : [ // release flags ]),