mirror of
https://github.com/danbulant/markdown-wasm
synced 2026-05-19 04:18:38 +00:00
json wip
This commit is contained in:
parent
cb877fce88
commit
bef16e0aa0
12 changed files with 376 additions and 265 deletions
6
example/example-json.js
Normal file
6
example/example-json.js
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
const fs = require("fs")
|
||||
const md = require("../build/debug/markdown.node.js")
|
||||
|
||||
const source = fs.readFileSync(__dirname + "/example2.md")
|
||||
const json = md.parse(source, { format: "json" })
|
||||
console.log(json)
|
||||
44
example/example2.md
Normal file
44
example/example2.md
Normal file
|
|
@ -0,0 +1,44 @@
|
|||
# H1
|
||||
|
||||
This is a paragraph
|
||||
|
||||
## H2
|
||||
|
||||
This is a paragraph
|
||||
|
||||
## Another
|
||||
|
||||
This is a paragraph with style *italic* _italic_ **bold** __bold__
|
||||
|
||||

|
||||

|
||||
|
||||
*Hello [link](https://rsms.me/) lol*
|
||||
|
||||
Hello [*link*](https://rsms.me/) lol "cat"
|
||||
|
||||
Hello from *[link](https://rsms.me/)* to __everyone__ `reading this`
|
||||
|
||||
Here's an [**important** anchor link](#example).
|
||||
|
||||
line 1
|
||||
line 2
|
||||
|
||||
XML & html "entities"
|
||||
&
|
||||
&
|
||||
&
|
||||
&
|
||||
&
|
||||
&
|
||||


|
||||
|
||||
## Lists
|
||||
|
||||
- Unordered
|
||||
* Lists
|
||||
+ Of mixed type
|
||||
|
||||
1. Ordered
|
||||
2. Lists
|
||||
4. Numbers are ignored
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* md4c modified for mdjs.
|
||||
* md4c modified for markdown-wasm.
|
||||
* Original source code is licensed as follows:
|
||||
*
|
||||
* Copyright (c) 2016-2019 Martin Mitas
|
||||
|
|
@ -439,14 +439,14 @@ int fmt_html(
|
|||
const MD_CHAR* input,
|
||||
MD_SIZE input_size,
|
||||
WBuf* outbuf,
|
||||
u32 parser_flags,
|
||||
u32 render_flags
|
||||
u32 parseFlags,
|
||||
u32 fmtFlags
|
||||
) {
|
||||
HtmlRenderer render = { outbuf, 0, 0, render_flags };
|
||||
HtmlRenderer render = { outbuf, 0, 0, fmtFlags };
|
||||
|
||||
MD_PARSER parser = {
|
||||
0,
|
||||
parser_flags,
|
||||
parseFlags,
|
||||
enter_block_callback,
|
||||
leave_block_callback,
|
||||
enter_span_callback,
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
#pragma once
|
||||
#include "wbuf.h"
|
||||
|
||||
#define MD_HTML_FLAG_XHTML 0x0008 // instead of e.g. <br>, generate <br/>
|
||||
#define MD_HTML_FLAG_XHTML (1 << 0) // instead of e.g. <br>, generate <br/>
|
||||
|
||||
int fmt_html(const char* input, u32 inputlen, WBuf* outbuf, u32 parserFlags, u32 renderFlags);
|
||||
int fmt_html(const char* input, u32 inputlen, WBuf* outbuf, u32 parseFlags, u32 fmtFlags);
|
||||
|
|
|
|||
378
src/fmt_json.c
378
src/fmt_json.c
|
|
@ -4,20 +4,17 @@
|
|||
|
||||
#include "fmt_json.h"
|
||||
#include "md4c.h"
|
||||
// #include "md4c_render_html.h"
|
||||
// #include "entity.h"
|
||||
|
||||
// JSON formatter
|
||||
//
|
||||
//
|
||||
// -------------- WORK IN PROGRESS
|
||||
//
|
||||
// -- WORK IN PROGRESS --
|
||||
//
|
||||
|
||||
#ifdef _WIN32
|
||||
#define snprintf _snprintf
|
||||
#endif
|
||||
|
||||
|
||||
// dlog
|
||||
#ifndef DEBUG
|
||||
#define DEBUG 1
|
||||
#endif
|
||||
|
|
@ -31,6 +28,7 @@
|
|||
|
||||
typedef struct JsonFormatter_st {
|
||||
WBuf* outbuf;
|
||||
u32 bnest; // block nesting level
|
||||
} JsonFormatter;
|
||||
|
||||
|
||||
|
|
@ -39,61 +37,49 @@ typedef struct JsonFormatter_st {
|
|||
#define ISUPPER(ch) ('A' <= (ch) && (ch) <= 'Z')
|
||||
#define ISALNUM(ch) (ISLOWER(ch) || ISUPPER(ch) || ISDIGIT(ch))
|
||||
|
||||
#define SIZEOF_ARRAY(a) (sizeof(a) / sizeof(a[0]))
|
||||
|
||||
// static inline void render_text(JsonFormatter* r, const MD_CHAR* text, MD_SIZE size) {
|
||||
// // r->process_output(text, size, r->userdata);
|
||||
// WBufAppendBytes(r->outbuf, text, size);
|
||||
// }
|
||||
|
||||
#define render_text(f, textptr, textlen) \
|
||||
WBufAppendBytes((r)->outbuf, (textptr), (textlen))
|
||||
|
||||
// #define RENDER_LITERAL(r, literal) \
|
||||
// WBufAppendBytes((r)->outbuf, (literal), (MD_SIZE)strlen(literal))
|
||||
|
||||
|
||||
static char jsonEscapeMap[256];
|
||||
#define JSON_SUB_LEN 2
|
||||
static const char* jsonEscapeMap[256];
|
||||
|
||||
static void __attribute__((constructor)) init() {
|
||||
jsonEscapeMap[(unsigned char)'"'] = 1;
|
||||
jsonEscapeMap[(unsigned char)'\n'] = 1;
|
||||
jsonEscapeMap[(unsigned char)'\r'] = 1;
|
||||
jsonEscapeMap[(unsigned char)'\t'] = 1;
|
||||
// important: Values must all be exactly JSON_SUB_LEN bytes long
|
||||
jsonEscapeMap[(unsigned char)'"'] = "\\\"";
|
||||
jsonEscapeMap[(unsigned char)'\n'] = "\\n";
|
||||
jsonEscapeMap[(unsigned char)'\r'] = "\\r";
|
||||
jsonEscapeMap[(unsigned char)'\t'] = "\\t";
|
||||
}
|
||||
|
||||
// #define JSON_BYTE_NEED_ESCAPE(ch) (jsonEscapeMap[(unsigned char)(ch)] != 0)
|
||||
#define JSON_ESCAPE_MAP(ch) jsonEscapeMap[(unsigned char)(ch)]
|
||||
|
||||
|
||||
static void writeJsonEscaped(JsonFormatter* r, const MD_CHAR* data, MD_SIZE size) {
|
||||
MD_OFFSET beg = 0;
|
||||
MD_OFFSET off = 0;
|
||||
|
||||
#define NEED_ESCAPE(ch) (jsonEscapeMap[(unsigned char)(ch)] != 0)
|
||||
|
||||
while(1) {
|
||||
/* Optimization: Use some loop unrolling. */
|
||||
while (
|
||||
off + 3 < size &&
|
||||
!NEED_ESCAPE(data[off+0]) &&
|
||||
!NEED_ESCAPE(data[off+1]) &&
|
||||
!NEED_ESCAPE(data[off+2]) &&
|
||||
!NEED_ESCAPE(data[off+3])
|
||||
) {
|
||||
off += 4;
|
||||
}
|
||||
while (off < size && !NEED_ESCAPE(data[off])) {
|
||||
while (1) {
|
||||
const char* sub = NULL;
|
||||
while (off < size) {
|
||||
sub = JSON_ESCAPE_MAP(data[off]);
|
||||
if (sub != NULL) {
|
||||
break;
|
||||
}
|
||||
off++;
|
||||
}
|
||||
|
||||
if (off > beg) {
|
||||
// in-between
|
||||
WBufAppendBytes(r->outbuf, data + beg, off - beg);
|
||||
}
|
||||
|
||||
if (off < size) {
|
||||
switch (data[off]) {
|
||||
case '"': WBufAppendCStr(r->outbuf, "\\\""); break;
|
||||
case '\n': WBufAppendCStr(r->outbuf, "\\n"); break;
|
||||
case '\r': WBufAppendCStr(r->outbuf, "\\r"); break;
|
||||
case '\t': WBufAppendCStr(r->outbuf, "\\t"); break;
|
||||
}
|
||||
if (sub) {
|
||||
WBufAppendBytes(r->outbuf, sub, JSON_SUB_LEN);
|
||||
off++;
|
||||
} else {
|
||||
break;
|
||||
|
|
@ -101,14 +87,10 @@ static void writeJsonEscaped(JsonFormatter* r, const MD_CHAR* data, MD_SIZE size
|
|||
|
||||
beg = off;
|
||||
}
|
||||
|
||||
#undef NEED_ESCAPE
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
render_url_escaped(JsonFormatter* r, const MD_CHAR* data, MD_SIZE size)
|
||||
{
|
||||
static void render_url_escaped(JsonFormatter* r, const MD_CHAR* data, MD_SIZE size) {
|
||||
static const MD_CHAR hex_chars[] = "0123456789ABCDEF";
|
||||
MD_OFFSET beg = 0;
|
||||
MD_OFFSET off = 0;
|
||||
|
|
@ -144,56 +126,21 @@ render_url_escaped(JsonFormatter* r, const MD_CHAR* data, MD_SIZE size)
|
|||
}
|
||||
}
|
||||
|
||||
static unsigned
|
||||
hex_val(char ch)
|
||||
{
|
||||
if('0' <= ch && ch <= '9')
|
||||
static unsigned hex_val(char ch) {
|
||||
if ('0' <= ch && ch <= '9') {
|
||||
return ch - '0';
|
||||
if('A' <= ch && ch <= 'Z')
|
||||
}
|
||||
if ('A' <= ch && ch <= 'Z') {
|
||||
return ch - 'A' + 10;
|
||||
else
|
||||
return ch - 'a' + 10;
|
||||
}
|
||||
return ch - 'a' + 10;
|
||||
}
|
||||
|
||||
static void WBufAppendUTF8Codepoint(WBuf* b, u32 codepoint) {
|
||||
if (codepoint <= 0x7f) {
|
||||
WBufAppendc(b, (char)codepoint);
|
||||
return;
|
||||
}
|
||||
|
||||
unsigned char utf8[4];
|
||||
size_t n;
|
||||
if (codepoint <= 0x7ff) {
|
||||
n = 2;
|
||||
utf8[0] = 0xc0 | ((codepoint >> 6) & 0x1f);
|
||||
utf8[1] = 0x80 + ((codepoint >> 0) & 0x3f);
|
||||
} else if (codepoint <= 0xffff) {
|
||||
n = 3;
|
||||
utf8[0] = 0xe0 | ((codepoint >> 12) & 0xf);
|
||||
utf8[1] = 0x80 + ((codepoint >> 6) & 0x3f);
|
||||
utf8[2] = 0x80 + ((codepoint >> 0) & 0x3f);
|
||||
} else {
|
||||
n = 4;
|
||||
utf8[0] = 0xf0 | ((codepoint >> 18) & 0x7);
|
||||
utf8[1] = 0x80 + ((codepoint >> 12) & 0x3f);
|
||||
utf8[2] = 0x80 + ((codepoint >> 6) & 0x3f);
|
||||
utf8[3] = 0x80 + ((codepoint >> 0) & 0x3f);
|
||||
}
|
||||
|
||||
if (0 < codepoint && codepoint <= 0x10ffff) {
|
||||
WBufAppendBytes(b, (const char*)utf8, n);
|
||||
} else {
|
||||
static const MD_CHAR utf8_replacement_char[] = { 0xef, 0xbf, 0xbd };
|
||||
WBufAppendBytes(b, utf8_replacement_char, sizeof(utf8_replacement_char));
|
||||
}
|
||||
}
|
||||
|
||||
/* Translate entity to its UTF-8 equivalent, or output the verbatim one
|
||||
* if such entity is unknown (or if the translation is disabled). */
|
||||
// Translate entity to its UTF-8 equivalent, or output the verbatim one
|
||||
// if such entity is unknown (or if the translation is disabled).
|
||||
static void writeDecodeXmlEntity(JsonFormatter* r, const MD_CHAR* text, MD_SIZE size) {
|
||||
if (size > 3 && text[1] == '#') {
|
||||
unsigned codepoint = 0;
|
||||
|
||||
if(text[2] == 'x' || text[2] == 'X') {
|
||||
// Hexadecimal entity (e.g. "�")).
|
||||
for (MD_SIZE i = 3; i < size-1; i++) {
|
||||
|
|
@ -205,17 +152,41 @@ static void writeDecodeXmlEntity(JsonFormatter* r, const MD_CHAR* text, MD_SIZE
|
|||
codepoint = 10 * codepoint + (text[i] - '0');
|
||||
}
|
||||
}
|
||||
|
||||
WBufAppendUTF8Codepoint(r->outbuf, codepoint);
|
||||
if (codepoint <= 0xFF) {
|
||||
const char* sub = JSON_ESCAPE_MAP(codepoint);
|
||||
if (sub) {
|
||||
// predefined escape code, e.g. "\n"
|
||||
WBufAppendBytes(r->outbuf, sub, JSON_SUB_LEN);
|
||||
} else {
|
||||
// verbatim
|
||||
WBufAppendUTF8Codepoint(r->outbuf, codepoint);
|
||||
}
|
||||
} else {
|
||||
// e.g. \uD87E
|
||||
WBufAppendCStr(r->outbuf, "\\u");
|
||||
if (codepoint <= 0xF) {
|
||||
WBufAppendCStr(r->outbuf, "000");
|
||||
} else if (codepoint <= 0xFF) {
|
||||
WBufAppendCStr(r->outbuf, "00");
|
||||
} else if (codepoint <= 0xFFF) {
|
||||
WBufAppendCStr(r->outbuf, "0");
|
||||
}
|
||||
WBufAppendU32(r->outbuf, codepoint, 16);
|
||||
}
|
||||
} else {
|
||||
WBufAppendBytes(r->outbuf, text, size);
|
||||
// named entity
|
||||
// We could do a lookup here but it would increase the WASM module binary size by
|
||||
// at least 20kB, so for now, let's keep it simple and just include it verbatim until we
|
||||
// can do something fancy like a compressed b-tree.
|
||||
writeJsonEscaped(r, text, size);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
render_attribute(JsonFormatter* r, const MD_ATTRIBUTE* attr,
|
||||
void (*fn_append)(JsonFormatter*, const MD_CHAR*, MD_SIZE))
|
||||
{
|
||||
static void render_attribute(
|
||||
JsonFormatter* r,
|
||||
const MD_ATTRIBUTE* attr,
|
||||
void (*fn_append)(JsonFormatter*, const MD_CHAR*, MD_SIZE)
|
||||
) {
|
||||
int i;
|
||||
|
||||
for(i = 0; attr->substr_offsets[i] < attr->size; i++) {
|
||||
|
|
@ -233,68 +204,7 @@ render_attribute(JsonFormatter* r, const MD_ATTRIBUTE* attr,
|
|||
}
|
||||
|
||||
|
||||
static void
|
||||
render_open_ol_block(JsonFormatter* r, const MD_BLOCK_OL_DETAIL* det)
|
||||
{
|
||||
char buf[64];
|
||||
|
||||
if(det->start == 1) {
|
||||
WBufAppendCStr(r->outbuf, "<ol>\n");
|
||||
return;
|
||||
}
|
||||
|
||||
snprintf(buf, sizeof(buf), "<ol start=\"%u\">\n", det->start);
|
||||
WBufAppendCStr(r->outbuf, buf);
|
||||
}
|
||||
|
||||
static void
|
||||
render_open_li_block(JsonFormatter* r, const MD_BLOCK_LI_DETAIL* det)
|
||||
{
|
||||
if(det->is_task) {
|
||||
WBufAppendCStr(r->outbuf,
|
||||
"<li class=\"task-list-item\">"
|
||||
"<input type=\"checkbox\" class=\"task-list-item-checkbox\" disabled");
|
||||
if (det->task_mark == 'x' || det->task_mark == 'X') {
|
||||
WBufAppendCStr(r->outbuf, " checked");
|
||||
}
|
||||
WBufAppendc(r->outbuf, '>');
|
||||
} else {
|
||||
WBufAppendCStr(r->outbuf, "<li>");
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
render_open_code_block(JsonFormatter* r, const MD_BLOCK_CODE_DETAIL* det)
|
||||
{
|
||||
WBufAppendCStr(r->outbuf, "<pre><code");
|
||||
|
||||
/* If known, output the HTML 5 attribute class="language-LANGNAME". */
|
||||
if(det->lang.text != NULL) {
|
||||
WBufAppendCStr(r->outbuf, " class=\"language-");
|
||||
render_attribute(r, &det->lang, writeJsonEscaped);
|
||||
WBufAppendc(r->outbuf, '"');
|
||||
}
|
||||
|
||||
WBufAppendc(r->outbuf, '>');
|
||||
}
|
||||
|
||||
static void
|
||||
render_open_td_block(JsonFormatter* r, const MD_CHAR* cell_type, const MD_BLOCK_TD_DETAIL* det)
|
||||
{
|
||||
WBufAppendc(r->outbuf, '<');
|
||||
WBufAppendCStr(r->outbuf, cell_type);
|
||||
|
||||
switch (det->align) {
|
||||
case MD_ALIGN_LEFT: WBufAppendCStr(r->outbuf, " align=\"left\">"); break;
|
||||
case MD_ALIGN_CENTER: WBufAppendCStr(r->outbuf, " align=\"center\">"); break;
|
||||
case MD_ALIGN_RIGHT: WBufAppendCStr(r->outbuf, " align=\"right\">"); break;
|
||||
default: WBufAppendCStr(r->outbuf, ">"); break;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
render_open_a_span(JsonFormatter* r, const MD_SPAN_A_DETAIL* det)
|
||||
{
|
||||
static void render_open_a_span(JsonFormatter* r, const MD_SPAN_A_DETAIL* det) {
|
||||
WBufAppendCStr(r->outbuf, "<a href=\"");
|
||||
render_attribute(r, &det->href, render_url_escaped);
|
||||
|
||||
|
|
@ -306,18 +216,14 @@ render_open_a_span(JsonFormatter* r, const MD_SPAN_A_DETAIL* det)
|
|||
WBufAppendCStr(r->outbuf, "\">");
|
||||
}
|
||||
|
||||
static void
|
||||
render_open_img_span(JsonFormatter* r, const MD_SPAN_IMG_DETAIL* det)
|
||||
{
|
||||
static void render_open_img_span(JsonFormatter* r, const MD_SPAN_IMG_DETAIL* det) {
|
||||
WBufAppendCStr(r->outbuf, "<img src=\"");
|
||||
render_attribute(r, &det->src, render_url_escaped);
|
||||
|
||||
WBufAppendCStr(r->outbuf, "\" alt=\"");
|
||||
}
|
||||
|
||||
static void
|
||||
render_close_img_span(JsonFormatter* r, const MD_SPAN_IMG_DETAIL* det)
|
||||
{
|
||||
static void render_close_img_span(JsonFormatter* r, const MD_SPAN_IMG_DETAIL* det) {
|
||||
if(det->title.text != NULL) {
|
||||
WBufAppendCStr(r->outbuf, "\" title=\"");
|
||||
render_attribute(r, &det->title, writeJsonEscaped);
|
||||
|
|
@ -326,9 +232,7 @@ render_close_img_span(JsonFormatter* r, const MD_SPAN_IMG_DETAIL* det)
|
|||
WBufAppendCStr(r->outbuf, "\">");
|
||||
}
|
||||
|
||||
static void
|
||||
render_open_wikilink_span(JsonFormatter* r, const MD_SPAN_WIKILINK_DETAIL* det)
|
||||
{
|
||||
static void render_open_wikilink_span(JsonFormatter* r, const MD_SPAN_WIKILINK_DETAIL* det) {
|
||||
WBufAppendCStr(r->outbuf, "<x-wikilink data-target=\"");
|
||||
render_attribute(r, &det->target, writeJsonEscaped);
|
||||
WBufAppendCStr(r->outbuf, "\">");
|
||||
|
|
@ -338,12 +242,20 @@ render_open_wikilink_span(JsonFormatter* r, const MD_SPAN_WIKILINK_DETAIL* det)
|
|||
// ------------------------------------------------------------------------------------------------
|
||||
|
||||
|
||||
static void writeTypeStart(JsonFormatter* r, const char* typename, size_t typenamelen) {
|
||||
WBufAppendCStr(r->outbuf, "{\"_\":\"");
|
||||
WBufAppendBytes(r->outbuf, typename, typenamelen);
|
||||
WBufAppendc(r->outbuf, '"');
|
||||
}
|
||||
static void writeNewline(JsonFormatter* r) {
|
||||
WBufAppendc(r->outbuf, '\n');
|
||||
|
||||
static const char indent_chunk_str[] = " ";
|
||||
static const u32 indent_chunk_size = (u32)(SIZEOF_ARRAY(indent_chunk_str) - 1);
|
||||
u32 indent = r->bnest * 4;
|
||||
while (indent > indent_chunk_size) {
|
||||
WBufAppendBytes(r->outbuf, indent_chunk_str, indent_chunk_size);
|
||||
indent -= indent_chunk_size;
|
||||
}
|
||||
if (indent > 0) {
|
||||
WBufAppendBytes(r->outbuf, indent_chunk_str, indent);
|
||||
}
|
||||
}
|
||||
|
||||
static void writeAttribute(JsonFormatter* r, const MD_ATTRIBUTE* attr) {
|
||||
for (u32 i = 0; attr->substr_offsets[i] < attr->size; i++) {
|
||||
|
|
@ -359,13 +271,29 @@ static void writeAttribute(JsonFormatter* r, const MD_ATTRIBUTE* attr) {
|
|||
}
|
||||
}
|
||||
|
||||
static void writeKey(JsonFormatter* r, const char* rawkey, size_t rawkeyLen) {
|
||||
WBufAppendc(r->outbuf, ',');
|
||||
writeNewline(r);
|
||||
WBufAppendc(r->outbuf, '"');
|
||||
WBufAppendBytes(r->outbuf, rawkey, rawkeyLen);
|
||||
WBufAppendCStr(r->outbuf, "\":");
|
||||
}
|
||||
|
||||
static void writeTypeStart(JsonFormatter* r, const char* typename, size_t typenamelen) {
|
||||
WBufAppendCStr(r->outbuf, "{ \"_\": \"");
|
||||
WBufAppendBytes(r->outbuf, typename, typenamelen);
|
||||
WBufAppendc(r->outbuf, '"');
|
||||
}
|
||||
|
||||
|
||||
static int enter_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata) {
|
||||
static const MD_CHAR* head[6] = { "h1", "h2", "h3", "h4", "h5", "h6" };
|
||||
JsonFormatter* r = (JsonFormatter*) userdata;
|
||||
const char* typename = "";
|
||||
size_t typenamelen = 0;
|
||||
#define WRITE_TYPE_START(name) writeTypeStart(r, (name), strlen((name)))
|
||||
|
||||
writeNewline(r);
|
||||
r->bnest++;
|
||||
|
||||
#define WRITE_TYPE_START(name) \
|
||||
writeTypeStart(r, (name), strlen((name)))
|
||||
|
||||
switch (type) {
|
||||
case MD_BLOCK_DOC: WRITE_TYPE_START("doc"); break;
|
||||
|
|
@ -380,7 +308,9 @@ static int enter_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata)
|
|||
case MD_BLOCK_HR: WRITE_TYPE_START("hr"); break;
|
||||
|
||||
case MD_BLOCK_H: {
|
||||
WRITE_TYPE_START(head[((MD_BLOCK_H_DETAIL*)detail)->level - 1]);
|
||||
WRITE_TYPE_START("h");
|
||||
WBufAppendCStr(r->outbuf, ", \"level\": ");
|
||||
WBufAppendU32(r->outbuf, ((MD_BLOCK_H_DETAIL*)detail)->level, 10);
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
@ -443,28 +373,30 @@ static int enter_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata)
|
|||
break;
|
||||
}
|
||||
|
||||
// static void
|
||||
// render_open_td_block(MD_RENDER_HTML* r, const MD_CHAR* cell_type, const MD_BLOCK_TD_DETAIL* det)
|
||||
// {
|
||||
// RENDER_LITERAL(r, "<");
|
||||
// RENDER_LITERAL(r, cell_type);
|
||||
|
||||
// switch(det->align) {
|
||||
// case MD_ALIGN_LEFT: RENDER_LITERAL(r, " align=\"left\">"); break;
|
||||
// case MD_ALIGN_CENTER: RENDER_LITERAL(r, " align=\"center\">"); break;
|
||||
// case MD_ALIGN_RIGHT: RENDER_LITERAL(r, " align=\"right\">"); break;
|
||||
// default: RENDER_LITERAL(r, ">"); break;
|
||||
// }
|
||||
}
|
||||
|
||||
WBufAppendCStr(r->outbuf, ", \"children\":[\n ");
|
||||
WBufAppendCStr(r->outbuf, ", \"children\": [");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static int leave_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata) {
|
||||
JsonFormatter* r = (JsonFormatter*)userdata;
|
||||
WBufAppendCStr(r->outbuf, "]},\n");
|
||||
r->bnest--;
|
||||
if (*(r->outbuf->ptr-1) == ',') {
|
||||
// undo trailing comma
|
||||
// e.g.
|
||||
//
|
||||
// "1,2,3,"
|
||||
// ^
|
||||
// "1,2,3"
|
||||
// ^
|
||||
//
|
||||
r->outbuf->ptr--;
|
||||
}
|
||||
writeNewline(r);
|
||||
WBufAppendCStr(r->outbuf, "]},");
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
@ -475,6 +407,7 @@ static int enter_span_callback(MD_SPANTYPE type, void* detail, void* userdata) {
|
|||
switch(type) {
|
||||
case MD_SPAN_EM: WBufAppendCStr(r->outbuf, "<em>"); break;
|
||||
case MD_SPAN_STRONG: WBufAppendCStr(r->outbuf, "<b>"); break;
|
||||
case MD_SPAN_U: WBufAppendCStr(r->outbuf, "<u>"); break;
|
||||
case MD_SPAN_A: render_open_a_span(r, (MD_SPAN_A_DETAIL*) detail); break;
|
||||
case MD_SPAN_IMG: render_open_img_span(r, (MD_SPAN_IMG_DETAIL*) detail); break;
|
||||
case MD_SPAN_CODE: WBufAppendCStr(r->outbuf, "<code>"); break;
|
||||
|
|
@ -493,6 +426,7 @@ static int leave_span_callback(MD_SPANTYPE type, void* detail, void* userdata) {
|
|||
switch(type) {
|
||||
case MD_SPAN_EM: WBufAppendCStr(r->outbuf, "</em>"); break;
|
||||
case MD_SPAN_STRONG: WBufAppendCStr(r->outbuf, "</b>"); break;
|
||||
case MD_SPAN_U: WBufAppendCStr(r->outbuf, "</u>"); break;
|
||||
case MD_SPAN_A: WBufAppendCStr(r->outbuf, "</a>"); break;
|
||||
case MD_SPAN_IMG: /*noop, handled above*/ break;
|
||||
case MD_SPAN_CODE: WBufAppendCStr(r->outbuf, "</code>"); break;
|
||||
|
|
@ -508,27 +442,65 @@ static int leave_span_callback(MD_SPANTYPE type, void* detail, void* userdata) {
|
|||
static int text_callback(MD_TEXTTYPE type, const MD_CHAR* text, MD_SIZE size, void* userdata) {
|
||||
JsonFormatter* r = (JsonFormatter*)userdata;
|
||||
|
||||
WBufAppendCStr(r->outbuf, ", \"");
|
||||
|
||||
switch (type) {
|
||||
case MD_TEXT_NULLCHAR: WBufAppendCStr(r->outbuf, "\\0"); break;
|
||||
case MD_TEXT_BR: WBufAppendCStr(r->outbuf, "<br>"); break;
|
||||
case MD_TEXT_SOFTBR: WBufAppendc(r->outbuf, '\n'); break;
|
||||
case MD_TEXT_HTML: render_text(r, text, size); break;
|
||||
case MD_TEXT_ENTITY: writeDecodeXmlEntity(r, text, size); break;
|
||||
default: writeJsonEscaped(r, text, size); break;
|
||||
if (type == MD_TEXT_SOFTBR) {
|
||||
// ignore soft break, i.e.
|
||||
//
|
||||
// Markdown:
|
||||
// line1
|
||||
// line2
|
||||
//
|
||||
// md4c emits: (line1, MD_TEXT_SOFTBR, line2)
|
||||
//
|
||||
return 0;
|
||||
}
|
||||
|
||||
WBufAppendc(r->outbuf, '"');
|
||||
writeNewline(r);
|
||||
|
||||
if (type == MD_TEXT_HTML) {
|
||||
WBufAppendCStr(r->outbuf, "{\"_\":\"html\",\"content\":\"");
|
||||
writeJsonEscaped(r, text, size);
|
||||
WBufAppendCStr(r->outbuf, "\"}");
|
||||
} else {
|
||||
WBufAppendc(r->outbuf, '"');
|
||||
switch (type) {
|
||||
case MD_TEXT_NULLCHAR:
|
||||
WBufAppendCStr(r->outbuf, "\\0");
|
||||
break;
|
||||
|
||||
case MD_TEXT_BR:
|
||||
WBufAppendCStr(r->outbuf, "\\n");
|
||||
break;
|
||||
|
||||
case MD_TEXT_ENTITY:
|
||||
writeDecodeXmlEntity(r, text, size);
|
||||
break;
|
||||
|
||||
default:
|
||||
writeJsonEscaped(r, text, size);
|
||||
break;
|
||||
}
|
||||
WBufAppendc(r->outbuf, '"');
|
||||
}
|
||||
|
||||
WBufAppendc(r->outbuf, ',');
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int fmt_json(const MD_CHAR* input, MD_SIZE input_size, WBuf* outbuf, u32 parser_flags) {
|
||||
JsonFormatter render = { outbuf };
|
||||
int fmt_json(
|
||||
const MD_CHAR* input,
|
||||
MD_SIZE inputlen,
|
||||
WBuf* outbuf,
|
||||
u32 parseFlags,
|
||||
u32 _fmtFlags
|
||||
) {
|
||||
JsonFormatter render = {
|
||||
.outbuf = outbuf,
|
||||
.bnest = 0,
|
||||
};
|
||||
MD_PARSER parser = {
|
||||
0,
|
||||
parser_flags,
|
||||
parseFlags,
|
||||
enter_block_callback,
|
||||
leave_block_callback,
|
||||
enter_span_callback,
|
||||
|
|
@ -538,5 +510,5 @@ int fmt_json(const MD_CHAR* input, MD_SIZE input_size, WBuf* outbuf, u32 parser_
|
|||
NULL
|
||||
};
|
||||
|
||||
return md_parse(input, input_size, &parser, (void*) &render);
|
||||
return md_parse(input, inputlen, &parser, (void*)&render);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,4 +1,6 @@
|
|||
#pragma once
|
||||
#include "wbuf.h"
|
||||
|
||||
int fmt_json(const char* input, u32 inputlen, WBuf* outbuf, u32 parserFlags);
|
||||
#define MD_JSON_FLAG_NONE 0
|
||||
|
||||
int fmt_json(const char* input, u32 inputlen, WBuf* outbuf, u32 parseFlags, u32 fmtFlags);
|
||||
|
|
|
|||
71
src/md.c
71
src/md.c
|
|
@ -3,18 +3,28 @@
|
|||
#include "wlib.h"
|
||||
#include "wbuf.h"
|
||||
#include "fmt_html.h"
|
||||
// #include "fmt_json.h"
|
||||
|
||||
#if MD_WITH_JSON
|
||||
#include "fmt_json.h"
|
||||
#endif
|
||||
|
||||
// these should be in sync with "OutputFlags" in md.js
|
||||
typedef enum OutputFlags {
|
||||
OutputFlagHTML = 1 << 0,
|
||||
OutputFlagXHTML = 1 << 1,
|
||||
} OutputFlags;
|
||||
typedef enum Formatter {
|
||||
FormatterNONE,
|
||||
FormatterHTML,
|
||||
FormatterJSON,
|
||||
} Formatter;
|
||||
|
||||
typedef enum FormatFlags {
|
||||
FormatFlagHTML = 1 << 0,
|
||||
FormatFlagXHTML = 1 << 1,
|
||||
FormatFlagJSON = 1 << 2,
|
||||
} FormatFlags;
|
||||
|
||||
typedef enum ErrorCode {
|
||||
ERR_NONE,
|
||||
ERR_MD_PARSE,
|
||||
ERR_OUTFLAGS,
|
||||
ERR_FORMAT,
|
||||
} ErrorCode;
|
||||
|
||||
|
||||
|
|
@ -31,38 +41,51 @@ static WBuf outbuf;
|
|||
|
||||
|
||||
export size_t parseUTF8(
|
||||
const char* inbufptr,
|
||||
u32 inbuflen,
|
||||
u32 parser_flags,
|
||||
OutputFlags outflags,
|
||||
const char* inbufptr,
|
||||
u32 inbuflen,
|
||||
u32 parseFlags,
|
||||
Formatter formatter,
|
||||
u32 fmtflags,
|
||||
const char** outptr
|
||||
) {
|
||||
dlog("parseUTF8 called with inbufptr=%p inbuflen=%u\n", inbufptr, inbuflen);
|
||||
|
||||
WBufReset(&outbuf);
|
||||
WBufReserve(&outbuf, inbuflen * 2); // approximate output size to minimize reallocations
|
||||
|
||||
if (outflags & OutputFlagHTML) {
|
||||
WBufReserve(&outbuf, inbuflen * 2); // approximate output size to minimize reallocations
|
||||
int result = 0x7ffff;
|
||||
|
||||
u32 render_flags = 0;
|
||||
if (outflags & OutputFlagXHTML) {
|
||||
render_flags |= MD_HTML_FLAG_XHTML;
|
||||
}
|
||||
switch (formatter) {
|
||||
|
||||
if (fmt_html(inbufptr, inbuflen, &outbuf, parser_flags, render_flags) != 0) {
|
||||
// fmt_html returns status of md_parse which only fails in extreme cases
|
||||
// like when out of memory. md4c does not provide error codes or error messages.
|
||||
WErrSet(ERR_MD_PARSE, "md parser error");
|
||||
*outptr = 0;
|
||||
return 0;
|
||||
}
|
||||
case FormatterHTML:
|
||||
result = fmt_html(inbufptr, inbuflen, &outbuf, parseFlags, fmtflags);
|
||||
break;
|
||||
|
||||
case FormatterJSON:
|
||||
#if MD_WITH_JSON
|
||||
result = fmt_json(inbufptr, inbuflen, &outbuf, parseFlags, fmtflags);
|
||||
#endif
|
||||
break;
|
||||
|
||||
case FormatterNONE:
|
||||
break;
|
||||
|
||||
} // switch
|
||||
|
||||
if (result == 0x7ffff) {
|
||||
WErrSet(ERR_FORMAT, "invalid formatter");
|
||||
} else if (result != 0) {
|
||||
// fmt_html returns status of md_parse which only fails in extreme cases
|
||||
// like when out of memory. md4c does not provide error codes or error messages.
|
||||
WErrSet(ERR_MD_PARSE, "parser error");
|
||||
}
|
||||
|
||||
if (result == 0) {
|
||||
*outptr = outbuf.start;
|
||||
// dlog("outbuf =>\n%.*s\n", WBufLen(&outbuf), outbuf.start);
|
||||
return WBufLen(&outbuf);
|
||||
}
|
||||
|
||||
WErrSet(ERR_OUTFLAGS, "no output format set in output flags");
|
||||
*outptr = 0;
|
||||
return 0;
|
||||
}
|
||||
|
|
|
|||
29
src/md.js
29
src/md.js
|
|
@ -41,6 +41,15 @@ const OutputFlags = {
|
|||
XHTML: 1 << 1, // Output XHTML (only has effect with HTML flag set)
|
||||
}
|
||||
|
||||
// these should be in sync with "Formatter" in md.c
|
||||
const FormatterNONE = 0
|
||||
, FormatterHTML = 1
|
||||
, FormatterJSON = 2
|
||||
|
||||
// html formatter flags (sync with fmt_html.h)
|
||||
const MD_HTML_FLAG_XHTML = 1 << 0
|
||||
|
||||
|
||||
export function parse(source, options) {
|
||||
options = options || {}
|
||||
|
||||
|
|
@ -49,17 +58,17 @@ export function parse(source, options) {
|
|||
options.parseFlags
|
||||
)
|
||||
|
||||
let outputFlags = 0
|
||||
switch (options.format) {
|
||||
case "xhtml":
|
||||
outputFlags |= OutputFlags.HTML | OutputFlags.XHTML
|
||||
break
|
||||
let formatter = FormatterHTML
|
||||
let fmtFlags = 0
|
||||
|
||||
if (options.format) switch (options.format) {
|
||||
case "html":
|
||||
case undefined:
|
||||
case null:
|
||||
case "":
|
||||
outputFlags |= OutputFlags.HTML
|
||||
break
|
||||
case "xhtml":
|
||||
fmtFlags |= MD_HTML_FLAG_XHTML
|
||||
break
|
||||
case "json":
|
||||
formatter = FormatterJSON
|
||||
break
|
||||
|
||||
default:
|
||||
|
|
@ -68,7 +77,7 @@ export function parse(source, options) {
|
|||
|
||||
let buf = typeof source == "string" ? utf8.encode(source) : source
|
||||
let outbuf = withOutPtr(outptr => withTmpBytePtr(buf, (inptr, inlen) =>
|
||||
_parseUTF8(inptr, inlen, parseFlags, outputFlags, outptr)
|
||||
_parseUTF8(inptr, inlen, parseFlags, formatter, fmtFlags, outptr)
|
||||
))
|
||||
|
||||
// check for error and throw if needed
|
||||
|
|
|
|||
|
|
@ -3678,7 +3678,7 @@ md_analyze_emph(MD_CTX* ctx, int mark_index)
|
|||
/* If we can be a closer, try to resolve with the preceding opener. */
|
||||
if(mark->flags & MD_MARK_POTENTIAL_CLOSER) {
|
||||
MD_MARK* opener = NULL;
|
||||
int opener_index;
|
||||
int opener_index = 0;
|
||||
|
||||
if(mark->ch == _T('*')) {
|
||||
MD_MARKCHAIN* opener_chains[6];
|
||||
|
|
@ -5654,7 +5654,7 @@ md_line_indentation(MD_CTX* ctx, unsigned total_indent, OFF beg, OFF* p_end)
|
|||
return indent - total_indent;
|
||||
}
|
||||
|
||||
static const MD_LINE_ANALYSIS md_dummy_blank_line = { MD_LINE_BLANK, 0 };
|
||||
static const MD_LINE_ANALYSIS md_dummy_blank_line = { MD_LINE_BLANK, 0, 0, 0, 0 };
|
||||
|
||||
/* Analyze type of the line and find some its properties. This serves as a
|
||||
* main input for determining type and boundaries of a block. */
|
||||
|
|
|
|||
42
src/wbuf.c
42
src/wbuf.c
|
|
@ -15,12 +15,8 @@ void WBufReset(WBuf* b) {
|
|||
b->ptr = b->start;
|
||||
}
|
||||
|
||||
inline size_t WBufCap(WBuf* b) { return b->end - b->start; } // total capacity (size)
|
||||
inline size_t WBufLen(WBuf* b) { return b->ptr - b->start; } // valid bytes at start
|
||||
inline size_t WBufAvail(WBuf* b) { return b->end - b->ptr; } // bytes available
|
||||
|
||||
// grows buffer so that there is at least minspace available space
|
||||
static void WBufGrow(WBuf* b, size_t minspace) {
|
||||
void WBufGrow(WBuf* b, size_t minspace) {
|
||||
// size_t avail = b->end - b->ptr;
|
||||
size_t len = WBufLen(b); // store len before changing b
|
||||
size_t cap = WBufCap(b);
|
||||
|
|
@ -42,13 +38,6 @@ void WBufReserve(WBuf* b, size_t minspace) {
|
|||
}
|
||||
}
|
||||
|
||||
void WBufAppendc(WBuf* b, char c) {
|
||||
if (WBufAvail(b) < 1) {
|
||||
WBufGrow(b, 1);
|
||||
}
|
||||
*(b->ptr++) = c;
|
||||
}
|
||||
|
||||
void WBufAppendBytes(WBuf* b, const void* bytes, size_t len) {
|
||||
if (WBufAvail(b) < len) {
|
||||
WBufGrow(b, len);
|
||||
|
|
@ -162,6 +151,35 @@ void WBufAppendU32(WBuf* b, u32 n, u32 radix) {
|
|||
}
|
||||
|
||||
|
||||
void _WBufAppendUTF8Codepoint2(WBuf* b, u32 codepoint) {
|
||||
unsigned char utf8[4];
|
||||
size_t n;
|
||||
if (codepoint <= 0x7ff) {
|
||||
n = 2;
|
||||
utf8[0] = 0xc0 | ((codepoint >> 6) & 0x1f);
|
||||
utf8[1] = 0x80 + ((codepoint >> 0) & 0x3f);
|
||||
} else if (codepoint <= 0xffff) {
|
||||
n = 3;
|
||||
utf8[0] = 0xe0 | ((codepoint >> 12) & 0xf);
|
||||
utf8[1] = 0x80 + ((codepoint >> 6) & 0x3f);
|
||||
utf8[2] = 0x80 + ((codepoint >> 0) & 0x3f);
|
||||
} else {
|
||||
n = 4;
|
||||
utf8[0] = 0xf0 | ((codepoint >> 18) & 0x7);
|
||||
utf8[1] = 0x80 + ((codepoint >> 12) & 0x3f);
|
||||
utf8[2] = 0x80 + ((codepoint >> 6) & 0x3f);
|
||||
utf8[3] = 0x80 + ((codepoint >> 0) & 0x3f);
|
||||
}
|
||||
|
||||
if (0 < codepoint && codepoint <= 0x10ffff) {
|
||||
WBufAppendBytes(b, (const char*)utf8, n);
|
||||
} else {
|
||||
static const char utf8_replacement_char[] = { 0xef, 0xbf, 0xbd };
|
||||
WBufAppendBytes(b, utf8_replacement_char, sizeof(utf8_replacement_char));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// static void WBufAppendSlug(WBuf* b, const char* text) {
|
||||
// size_t len = strlen(text);
|
||||
// WBufReserve(b, len);
|
||||
|
|
|
|||
31
src/wbuf.h
31
src/wbuf.h
|
|
@ -11,13 +11,13 @@ void WBufInit(WBuf*);
|
|||
void WBufFree(WBuf*);
|
||||
void WBufReset(WBuf*);
|
||||
|
||||
size_t WBufCap(WBuf*); // total capacity (size)
|
||||
size_t WBufLen(WBuf*); // valid bytes at start
|
||||
size_t WBufAvail(WBuf*); // bytes available
|
||||
inline static size_t WBufCap(WBuf* b) { return b->end - b->start; } // total capacity (size)
|
||||
inline static size_t WBufLen(WBuf* b) { return b->ptr - b->start; } // valid bytes at start
|
||||
inline static size_t WBufAvail(WBuf* b) { return b->end - b->ptr; } // bytes available
|
||||
|
||||
void WBufReserve(WBuf*, size_t minspace);
|
||||
|
||||
void WBufAppendc(WBuf*, char c);
|
||||
static void WBufAppendc(WBuf*, char c);
|
||||
void WBufAppendBytes(WBuf*, const void* bytes, size_t len);
|
||||
void WBufAppendStr(WBuf*, const char* pch);
|
||||
#define WBufAppendCStr(b, cstr) WBufAppendBytes((b), (cstr), strlen(cstr))
|
||||
|
|
@ -27,3 +27,26 @@ void _WBufAppendHtml(WBuf*, const char* pch, bool isattr);
|
|||
|
||||
// append u32 integer n. radix must be in range [2-36]
|
||||
void WBufAppendU32(WBuf*, u32 n, u32 radix);
|
||||
|
||||
static void WBufAppendUTF8Codepoint(WBuf* b, u32 codepoint);
|
||||
|
||||
// grows buffer so that there is at least minspace available space
|
||||
void WBufGrow(WBuf* b, size_t minspace);
|
||||
|
||||
|
||||
|
||||
// implementation of WBufAppendUTF8Codepoint
|
||||
void _WBufAppendUTF8Codepoint2(WBuf* b, u32 codepoint);
|
||||
inline static void WBufAppendUTF8Codepoint(WBuf* b, u32 codepoint) {
|
||||
if (codepoint > 0x7f) {
|
||||
return _WBufAppendUTF8Codepoint2(b, codepoint);
|
||||
}
|
||||
WBufAppendc(b, (char)codepoint);
|
||||
}
|
||||
|
||||
inline static void WBufAppendc(WBuf* b, char c) {
|
||||
if (WBufAvail(b) < 1) {
|
||||
WBufGrow(b, 1);
|
||||
}
|
||||
*(b->ptr++) = c;
|
||||
}
|
||||
|
|
|
|||
18
wasmc.js
18
wasmc.js
|
|
@ -1,16 +1,29 @@
|
|||
const package = require("./package.json")
|
||||
const outdir = debug ? builddir : "dist"
|
||||
|
||||
cflags = cflags.concat([
|
||||
"-std=c11",
|
||||
"-Wall",
|
||||
"-Wuninitialized",
|
||||
"-Wmissing-field-initializers",
|
||||
"-Wconditional-uninitialized",
|
||||
"-Wno-nullability-completeness",
|
||||
"-Wno-unused-function",
|
||||
"-fcolor-diagnostics",
|
||||
])
|
||||
|
||||
const m = {
|
||||
jsentry: "src/md.js",
|
||||
|
||||
sources: [
|
||||
"src/wlib.c",
|
||||
"src/wbuf.c",
|
||||
"src/md.c",
|
||||
"src/md4c.c",
|
||||
"src/fmt_html.c",
|
||||
// "src/fmt_json.c",
|
||||
],
|
||||
debug ? "src/fmt_json.c" : "",
|
||||
].filter(s => !!s),
|
||||
|
||||
cflags: [
|
||||
"-DMD4C_USE_UTF8",
|
||||
].concat(debug ? [
|
||||
|
|
@ -20,6 +33,7 @@ const m = {
|
|||
"-DSAFE_HEAP=1", // emcc
|
||||
"-DSTACK_OVERFLOW_CHECK=1", // emcc
|
||||
"-DDEMANGLE_SUPPORT=1", // emcc
|
||||
"-DMD_WITH_JSON=1", // enable WIP json formatter
|
||||
] : [
|
||||
// release flags
|
||||
]),
|
||||
|
|
|
|||
Loading…
Reference in a new issue