From 9e251082eea84361ec160ca098d5081ca9d87fca Mon Sep 17 00:00:00 2001 From: Rasmus Andersson Date: Fri, 16 Oct 2020 15:07:19 -0700 Subject: [PATCH] upgrade md4c to https://github.com/mity/md4c/tree/601885f73862918e441375d7cb38ca18f9ee9d7f --- markdown.d.ts | 31 ++- src/fmt_html.c | 51 +++- src/fmt_html.h | 4 +- src/md.c | 18 +- src/md.js | 49 ++-- src/md4c.c | 674 +++++++++++++++++++++++++------------------------ src/md4c.h | 54 ++-- 7 files changed, 482 insertions(+), 399 deletions(-) diff --git a/markdown.d.ts b/markdown.d.ts index 3756265..c0dcd57 100644 --- a/markdown.d.ts +++ b/markdown.d.ts @@ -6,22 +6,16 @@ export function parse(s :Source, o? :ParseOptions & { asMemoryView? :never|false export function parse(s :Source, o? :ParseOptions & { asMemoryView :true }) :Uint8Array /** Markdown source code can be provided as a JavaScript string or UTF8 encoded data */ -type Source = string|ArrayLike +type Source = string | ArrayLike /** Options for the parse function */ export interface ParseOptions { - /** - * Customize parsing. - * If not provided, the following flags are used, equating to github-style parsing: - * COLLAPSE_WHITESPACE - * PERMISSIVE_ATX_HEADERS - * PERMISSIVE_URL_AUTO_LINKS - * STRIKETHROUGH - * TABLES - * TASK_LISTS - */ + /** Customize parsing. Defaults to ParseFlags.DEFAULT */ parseFlags? :ParseFlags + /** Select output format. Defaults to "html" */ + format? : "html" | "xhtml" + /** * asMemoryView=true causes parse() to return a view of heap memory as a Uint8Array, * instead of a string. @@ -51,7 +45,18 @@ export enum ParseFlags { /** Enable tables extension. */ TABLES, /** Enable task list extension. */ TASK_LISTS, /** Enable wiki links extension. */ WIKI_LINKS, + /** Enable underline extension (disables '_' for emphasis) */ UNDERLINE, - /** Default flags */ DEFAULT, - /** Shorthand for NO_HTML_BLOCKS | NO_HTML_SPANS */ NO_HTML, + /** Default flags are: + * COLLAPSE_WHITESPACE | + * PERMISSIVE_ATX_HEADERS | + * PERMISSIVE_URL_AUTO_LINKS | + * STRIKETHROUGH | + * TABLES | + * TASK_LISTS + */ + DEFAULT, + + /** Shorthand for NO_HTML_BLOCKS | NO_HTML_SPANS */ + NO_HTML, } diff --git a/src/fmt_html.c b/src/fmt_html.c index e24dd1b..a2788f8 100644 --- a/src/fmt_html.c +++ b/src/fmt_html.c @@ -34,6 +34,7 @@ typedef struct HtmlRenderer_st { WBuf* outbuf; int imgnest; int addanchor; + u32 flags; } HtmlRenderer; @@ -246,7 +247,7 @@ static void render_close_img_span(HtmlRenderer* r, const MD_SPAN_IMG_DETAIL* det render_literal(r, "\" title=\""); render_attribute(r, &det->title); } - render_literal(r, "\">"); + render_literal(r, (r->flags & MD_HTML_FLAG_XHTML) ? "\"/>" : "\">"); r->imgnest--; } @@ -273,7 +274,7 @@ static int enter_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata) case MD_BLOCK_UL: render_literal(r, "
    \n"); break; case MD_BLOCK_OL: render_open_ol_block(r, (const MD_BLOCK_OL_DETAIL*)detail); break; case MD_BLOCK_LI: render_open_li_block(r, (const MD_BLOCK_LI_DETAIL*)detail); break; - case MD_BLOCK_HR: render_literal(r, "
    \n"); break; + case MD_BLOCK_HR: render_literal(r, (r->flags & MD_HTML_FLAG_XHTML) ? "
    \n" : "
    \n"); break; case MD_BLOCK_H: { render_literal(r, head[((MD_BLOCK_H_DETAIL*)detail)->level - 1]); @@ -324,14 +325,28 @@ static int enter_span_callback(MD_SPANTYPE type, void* detail, void* userdata) { HtmlRenderer* r = (HtmlRenderer*) userdata; if(r->imgnest > 0) { - /* We are inside an image, i.e. rendering the ALT attribute of - * tag. */ + /* We are inside a Markdown image label. Markdown allows to use any + * emphasis and other rich contents in that context similarly as in + * any link label. + * + * However, unlike in the case of links (where that contents becomes + * contents of the ... tag), in the case of images the contents + * is supposed to fall into the attribute alt: .... + * + * In that context we naturally cannot output nested HTML tags. So lets + * suppress them and only output the plain text (i.e. what falls into + * text() callback). + * + * This make-it-a-plain-text approach is the recommended practice by + * CommonMark specification (for HTML output). + */ return 0; } switch(type) { case MD_SPAN_EM: render_literal(r, ""); break; case MD_SPAN_STRONG: render_literal(r, ""); break; + case MD_SPAN_U: render_literal(r, ""); break; case MD_SPAN_A: render_open_a_span(r, (MD_SPAN_A_DETAIL*) detail); break; case MD_SPAN_IMG: render_open_img_span(r, (MD_SPAN_IMG_DETAIL*) detail); break; case MD_SPAN_CODE: render_literal(r, ""); break; @@ -348,8 +363,8 @@ static int leave_span_callback(MD_SPANTYPE type, void* detail, void* userdata) { HtmlRenderer* r = (HtmlRenderer*) userdata; if(r->imgnest > 0) { - /* We are inside an image, i.e. rendering the ALT attribute of - * tag. */ + /* Ditto as in enter_span_callback(), except we have to allow the + * end of the tag. */ if(r->imgnest == 1 && type == MD_SPAN_IMG) render_close_img_span(r, (MD_SPAN_IMG_DETAIL*) detail); return 0; @@ -358,6 +373,7 @@ static int leave_span_callback(MD_SPANTYPE type, void* detail, void* userdata) { switch(type) { case MD_SPAN_EM: render_literal(r, ""); break; case MD_SPAN_STRONG: render_literal(r, ""); break; + case MD_SPAN_U: render_literal(r, ""); break; case MD_SPAN_A: render_literal(r, ""); break; case MD_SPAN_IMG: /*noop, handled above*/ break; case MD_SPAN_CODE: render_literal(r, ""); break; @@ -395,7 +411,17 @@ static int text_callback(MD_TEXTTYPE type, const MD_CHAR* text, MD_SIZE size, vo switch(type) { case MD_TEXT_NULLCHAR: render_text(r, ucReplacementUTF8, sizeof(ucReplacementUTF8)); break; - case MD_TEXT_BR: render_literal(r, (r->imgnest == 0 ? "
    \n" : " ")); break; + case MD_TEXT_BR: + render_literal( + r, + r->imgnest == 0 ? + ((r->flags & MD_HTML_FLAG_XHTML) ? "
    \n" : "
    \n") : + " " + ); + break; + + render_literal(r, (r->flags & MD_HTML_FLAG_XHTML) ? "
    \n" : "
    \n"); break; + case MD_TEXT_SOFTBR: render_literal(r, (r->imgnest == 0 ? "\n" : " ")); break; case MD_TEXT_HTML: render_text(r, text, size); break; case MD_TEXT_ENTITY: render_text(r, text, size); break; @@ -409,9 +435,14 @@ static int text_callback(MD_TEXTTYPE type, const MD_CHAR* text, MD_SIZE size, vo // dlog("MD4C: %s\n", msg); // } -int -fmt_html(const MD_CHAR* input, MD_SIZE input_size, WBuf* outbuf, unsigned parser_flags) { - HtmlRenderer render = { outbuf, 0, 0 }; +int fmt_html( + const MD_CHAR* input, + MD_SIZE input_size, + WBuf* outbuf, + u32 parser_flags, + u32 render_flags +) { + HtmlRenderer render = { outbuf, 0, 0, render_flags }; MD_PARSER parser = { 0, diff --git a/src/fmt_html.h b/src/fmt_html.h index a66dd1e..ed38124 100644 --- a/src/fmt_html.h +++ b/src/fmt_html.h @@ -1,4 +1,6 @@ #pragma once #include "wbuf.h" -int fmt_html(const char* input, u32 inputlen, WBuf* outbuf, u32 parserFlags); +#define MD_HTML_FLAG_XHTML 0x0008 // instead of e.g.
    , generate
    + +int fmt_html(const char* input, u32 inputlen, WBuf* outbuf, u32 parserFlags, u32 renderFlags); diff --git a/src/md.c b/src/md.c index 1398f9f..1ad2ebf 100644 --- a/src/md.c +++ b/src/md.c @@ -5,13 +5,10 @@ #include "fmt_html.h" // #include "fmt_json.h" -// #include "md4c.h" -/* If set, debug output from md_parse() is sent to stderr. */ -#define MD_RENDER_FLAG_DEBUG 0x0001 -#define MD_RENDER_FLAG_VERBATIM_ENTITIES 0x0002 - +// these should be in sync with "OutputFlags" in md.js typedef enum OutputFlags { - OutputFlagsHTML = 1 << 0, + OutputFlagHTML = 1 << 0, + OutputFlagXHTML = 1 << 1, } OutputFlags; typedef enum ErrorCode { @@ -44,10 +41,15 @@ export size_t parseUTF8( WBufReset(&outbuf); - if (outflags & OutputFlagsHTML) { + if (outflags & OutputFlagHTML) { WBufReserve(&outbuf, inbuflen * 2); // approximate output size to minimize reallocations - if (fmt_html(inbufptr, inbuflen, &outbuf, parser_flags) != 0) { + u32 render_flags = 0; + if (outflags & OutputFlagXHTML) { + render_flags |= MD_HTML_FLAG_XHTML; + } + + if (fmt_html(inbufptr, inbuflen, &outbuf, parser_flags, render_flags) != 0) { // fmt_html returns status of md_parse which only fails in extreme cases // like when out of memory. md4c does not provide error codes or error messages. WErrSet(ERR_MD_PARSE, "md parser error"); diff --git a/src/md.js b/src/md.js index 710eab9..a53352a 100644 --- a/src/md.js +++ b/src/md.js @@ -12,7 +12,7 @@ export const ParseFlags = { PERMISSIVE_ATX_HEADERS: 0x0002, // Do not require space in ATX headers ( ###header ) PERMISSIVE_URL_AUTO_LINKS: 0x0004, // Recognize URLs as links even without <...> PERMISSIVE_EMAIL_AUTO_LINKS: 0x0008, // Recognize e-mails as links even without <...> - NO_INDENTED_CODE_BLOCKS: 0x0010, // Disable indented code blocks. (Only fenced code works.) + NO_INDENTED_CODE_BLOCKS: 0x0010, // Disable indented code blocks. (Only fenced code works) NO_HTML_BLOCKS: 0x0020, // Disable raw HTML blocks. NO_HTML_SPANS: 0x0040, // Disable raw HTML (inline). TABLES: 0x0100, // Enable tables extension. @@ -21,6 +21,7 @@ export const ParseFlags = { TASK_LISTS: 0x0800, // Enable task list extension. LATEX_MATH_SPANS: 0x1000, // Enable $ and $$ containing LaTeX equations. WIKI_LINKS: 0x2000, // Enable wiki links extension. + UNDERLINE: 0x4000, // Enable underline extension (disables '_' for emphasis) // Github style default flags DEFAULT: 0x0001 | 0x0002 | 0x0004 | 0x0200 | 0x0100 | 0x0800, @@ -34,38 +35,46 @@ export const ParseFlags = { NO_HTML: 0x0020 | 0x0040, // NO_HTML_BLOCKS | NO_HTML_SPANS } +// these should be in sync with "OutputFlags" in md.c const OutputFlags = { - HTML: 1 << 0, // Output HTML -} - -const defaultOptions = { - parseFlags: ParseFlags.DEFAULT, - - // how to format the output - format: "html", - - // Return a view of heap memory as a Uint8Array, instead of a string. - // - // The returned Uint8Array is only valid until the next call to parse(). - // If you need to keep the returned Uint8Array around, call Uint8Array.slice() - // to make a copy, as each call to parse() reuses the same underlying memory. - asMemoryView: false, + HTML: 1 << 0, // Output HTML + XHTML: 1 << 1, // Output XHTML (only has effect with HTML flag set) } export function parse(source, options) { - options = options ? {__proto__:defaultOptions, ...options} : defaultOptions - let outflags = (0 - | (options.format == "html" ? OutputFlags.HTML : 0) + options = options || {} + + let parseFlags = ( + options.parseFlags === undefined ? ParseFlags.DEFAULT : + options.parseFlags ) + let outputFlags = 0 + switch (options.format) { + case "xhtml": + outputFlags |= OutputFlags.HTML | OutputFlags.XHTML + break + + case "html": + case undefined: + case null: + case "": + outputFlags |= OutputFlags.HTML + break + + default: + throw new Error(`invalid format "${options.format}"`) + } + let buf = typeof source == "string" ? utf8.encode(source) : source let outbuf = withOutPtr(outptr => withTmpBytePtr(buf, (inptr, inlen) => - _parseUTF8(inptr, inlen, options.parseFlags, outflags, outptr) + _parseUTF8(inptr, inlen, parseFlags, outputFlags, outptr) )) // check for error and throw if needed werrCheck() + // DEBUG // if (outbuf) { // console.log(utf8.decode(outbuf)) // } diff --git a/src/md4c.c b/src/md4c.c index 0407da0..729cb72 100644 --- a/src/md4c.c +++ b/src/md4c.c @@ -2,7 +2,7 @@ * MD4C: Markdown parser for C * (http://github.com/mity/md4c) * - * Copyright (c) 2016-2019 Martin Mitas + * Copyright (c) 2016-2020 Martin Mitas * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -34,14 +34,23 @@ *** Miscellaneous Stuff *** *****************************/ -#ifdef _MSC_VER - /* MSVC does not understand "inline" when building as pure C (not C++). - * However it understands "__inline" */ - #ifndef __cplusplus +#if !defined(__STDC_VERSION__) || __STDC_VERSION__ < 199409L + /* C89/90 or old compilers in general may not understand "inline". */ + #if defined __GNUC__ + #define inline __inline__ + #elif defined _MSC_VER #define inline __inline + #else + #define inline #endif #endif +/* Make the UTF-8 support the default. */ +#if !defined MD4C_USE_ASCII && !defined MD4C_USE_UTF8 && !defined MD4C_USE_UTF16 + #define MD4C_USE_UTF8 +#endif + +/* Magic for making wide literals with MD4C_USE_UTF16. */ #ifdef _T #undef _T #endif @@ -126,21 +135,22 @@ struct MD_CTX_tag { #endif /* For resolving of inline spans. */ - MD_MARKCHAIN mark_chains[12]; -#define PTR_CHAIN ctx->mark_chains[0] -#define TABLECELLBOUNDARIES ctx->mark_chains[1] -#define ASTERISK_OPENERS_extraword_mod3_0 ctx->mark_chains[2] -#define ASTERISK_OPENERS_extraword_mod3_1 ctx->mark_chains[3] -#define ASTERISK_OPENERS_extraword_mod3_2 ctx->mark_chains[4] -#define ASTERISK_OPENERS_intraword_mod3_0 ctx->mark_chains[5] -#define ASTERISK_OPENERS_intraword_mod3_1 ctx->mark_chains[6] -#define ASTERISK_OPENERS_intraword_mod3_2 ctx->mark_chains[7] -#define UNDERSCORE_OPENERS ctx->mark_chains[8] -#define TILDE_OPENERS ctx->mark_chains[9] -#define BRACKET_OPENERS ctx->mark_chains[10] -#define DOLLAR_OPENERS ctx->mark_chains[11] + MD_MARKCHAIN mark_chains[13]; +#define PTR_CHAIN (ctx->mark_chains[0]) +#define TABLECELLBOUNDARIES (ctx->mark_chains[1]) +#define ASTERISK_OPENERS_extraword_mod3_0 (ctx->mark_chains[2]) +#define ASTERISK_OPENERS_extraword_mod3_1 (ctx->mark_chains[3]) +#define ASTERISK_OPENERS_extraword_mod3_2 (ctx->mark_chains[4]) +#define ASTERISK_OPENERS_intraword_mod3_0 (ctx->mark_chains[5]) +#define ASTERISK_OPENERS_intraword_mod3_1 (ctx->mark_chains[6]) +#define ASTERISK_OPENERS_intraword_mod3_2 (ctx->mark_chains[7]) +#define UNDERSCORE_OPENERS (ctx->mark_chains[8]) +#define TILDE_OPENERS_1 (ctx->mark_chains[9]) +#define TILDE_OPENERS_2 (ctx->mark_chains[10]) +#define BRACKET_OPENERS (ctx->mark_chains[11]) +#define DOLLAR_OPENERS (ctx->mark_chains[12]) #define OPENERS_CHAIN_FIRST 2 -#define OPENERS_CHAIN_LAST 11 +#define OPENERS_CHAIN_LAST 12 int n_table_cell_boundaries; @@ -203,14 +213,12 @@ struct MD_LINE_ANALYSIS_tag { OFF beg; OFF end; unsigned indent; /* Indentation level. */ - unsigned total_indent; /* Total indent in characters. */ }; typedef struct MD_LINE_tag MD_LINE; struct MD_LINE_tag { OFF beg; OFF end; - unsigned total_indent; /* Total indent in characters. */ }; typedef struct MD_VERBATIMLINE_tag MD_VERBATIMLINE; @@ -225,19 +233,11 @@ struct MD_VERBATIMLINE_tag { *** Debugging *** *******************/ -// change to 0 to disable MD_PARSER.debug_log -#define WITH_PARSER_DEBUG_LOG 0 - -#if WITH_PARSER_DEBUG_LOG #define MD_LOG(msg) \ do { \ if(ctx->parser.debug_log != NULL) \ ctx->parser.debug_log((msg), ctx->userdata); \ } while(0) -#else -#define MD_LOG(msg) do{}while(0) -#endif - #ifdef DEBUG #define MD_ASSERT(cond) \ @@ -275,7 +275,7 @@ struct MD_VERBATIMLINE_tag { /* Character classification. * Note we assume ASCII compatibility of code points < 128 here. */ #define ISIN_(ch, ch_min, ch_max) ((ch_min) <= (unsigned)(ch) && (unsigned)(ch) <= (ch_max)) -#define ISANYOF_(ch, palette) (md_strchr((palette), (ch)) != NULL) +#define ISANYOF_(ch, palette) ((ch) != _T('\0') && md_strchr((palette), (ch)) != NULL) #define ISANYOF2_(ch, ch1, ch2) ((ch) == (ch1) || (ch) == (ch2)) #define ISANYOF3_(ch, ch1, ch2, ch3) ((ch) == (ch1) || (ch) == (ch2) || (ch) == (ch3)) #define ISASCII_(ch) ((unsigned)(ch) <= 127) @@ -306,16 +306,14 @@ struct MD_VERBATIMLINE_tag { #define ISDIGIT(off) ISDIGIT_(CH(off)) #define ISXDIGIT(off) ISXDIGIT_(CH(off)) #define ISALNUM(off) ISALNUM_(CH(off)) -static inline const CHAR* -md_strchr(const CHAR* str, CHAR ch) -{ - OFF i; - for(i = 0; str[i] != _T('\0'); i++) { - if(ch == str[i]) - return (str + i); - } - return NULL; -} + + +#if defined MD4C_USE_UTF16 + #define md_strchr wcschr +#else + #define md_strchr strchr +#endif + /* Case insensitive check of string equality. */ static inline int @@ -373,89 +371,89 @@ md_text_with_null_replacement(MD_CTX* ctx, MD_TEXTTYPE type, const CHAR* str, SZ } -#define MD_CHECK(func) \ - do { \ - ret = (func); \ - if(ret < 0) \ - goto abort; \ +#define MD_CHECK(func) \ + do { \ + ret = (func); \ + if(ret < 0) \ + goto abort; \ } while(0) -#define MD_TEMP_BUFFER(sz) \ - do { \ - if(sz > ctx->alloc_buffer) { \ - CHAR* new_buffer; \ - SZ new_size = ((sz) + (sz) / 2 + 128) & ~127; \ - \ - new_buffer = realloc(ctx->buffer, new_size); \ - if(new_buffer == NULL) { \ - MD_LOG("realloc() failed."); \ - ret = -1; \ - goto abort; \ - } \ - \ - ctx->buffer = new_buffer; \ - ctx->alloc_buffer = new_size; \ - } \ +#define MD_TEMP_BUFFER(sz) \ + do { \ + if(sz > ctx->alloc_buffer) { \ + CHAR* new_buffer; \ + SZ new_size = ((sz) + (sz) / 2 + 128) & ~127; \ + \ + new_buffer = realloc(ctx->buffer, new_size); \ + if(new_buffer == NULL) { \ + MD_LOG("realloc() failed."); \ + ret = -1; \ + goto abort; \ + } \ + \ + ctx->buffer = new_buffer; \ + ctx->alloc_buffer = new_size; \ + } \ } while(0) -#define MD_ENTER_BLOCK(type, arg) \ - do { \ - ret = ctx->parser.enter_block((type), (arg), ctx->userdata); \ - if(ret != 0) { \ - MD_LOG("Aborted from enter_block() callback."); \ - goto abort; \ - } \ +#define MD_ENTER_BLOCK(type, arg) \ + do { \ + ret = ctx->parser.enter_block((type), (arg), ctx->userdata); \ + if(ret != 0) { \ + MD_LOG("Aborted from enter_block() callback."); \ + goto abort; \ + } \ } while(0) -#define MD_LEAVE_BLOCK(type, arg) \ - do { \ - ret = ctx->parser.leave_block((type), (arg), ctx->userdata); \ - if(ret != 0) { \ - MD_LOG("Aborted from leave_block() callback."); \ - goto abort; \ - } \ +#define MD_LEAVE_BLOCK(type, arg) \ + do { \ + ret = ctx->parser.leave_block((type), (arg), ctx->userdata); \ + if(ret != 0) { \ + MD_LOG("Aborted from leave_block() callback."); \ + goto abort; \ + } \ } while(0) -#define MD_ENTER_SPAN(type, arg) \ - do { \ - ret = ctx->parser.enter_span((type), (arg), ctx->userdata); \ - if(ret != 0) { \ - MD_LOG("Aborted from enter_span() callback."); \ - goto abort; \ - } \ +#define MD_ENTER_SPAN(type, arg) \ + do { \ + ret = ctx->parser.enter_span((type), (arg), ctx->userdata); \ + if(ret != 0) { \ + MD_LOG("Aborted from enter_span() callback."); \ + goto abort; \ + } \ } while(0) -#define MD_LEAVE_SPAN(type, arg) \ - do { \ - ret = ctx->parser.leave_span((type), (arg), ctx->userdata); \ - if(ret != 0) { \ - MD_LOG("Aborted from leave_span() callback."); \ - goto abort; \ - } \ +#define MD_LEAVE_SPAN(type, arg) \ + do { \ + ret = ctx->parser.leave_span((type), (arg), ctx->userdata); \ + if(ret != 0) { \ + MD_LOG("Aborted from leave_span() callback."); \ + goto abort; \ + } \ } while(0) -#define MD_TEXT(type, str, size) \ - do { \ - if(size > 0) { \ - ret = ctx->parser.text((type), (str), (size), ctx->userdata); \ - if(ret != 0) { \ - MD_LOG("Aborted from text() callback."); \ - goto abort; \ - } \ - } \ +#define MD_TEXT(type, str, size) \ + do { \ + if(size > 0) { \ + ret = ctx->parser.text((type), (str), (size), ctx->userdata); \ + if(ret != 0) { \ + MD_LOG("Aborted from text() callback."); \ + goto abort; \ + } \ + } \ } while(0) -#define MD_TEXT_INSECURE(type, str, size) \ - do { \ - if(size > 0) { \ - ret = md_text_with_null_replacement(ctx, type, str, size); \ - if(ret != 0) { \ - MD_LOG("Aborted from text() callback."); \ - goto abort; \ - } \ - } \ +#define MD_TEXT_INSECURE(type, str, size) \ + do { \ + if(size > 0) { \ + ret = md_text_with_null_replacement(ctx, type, str, size); \ + if(ret != 0) { \ + MD_LOG("Aborted from text() callback."); \ + goto abort; \ + } \ + } \ } while(0) @@ -548,22 +546,22 @@ struct MD_UNICODE_FOLD_INFO_tag { R(0x2030,0x2043), R(0x2045,0x2051), R(0x2053,0x205e), R(0x207d,0x207e), R(0x208d,0x208e), R(0x2308,0x230b), R(0x2329,0x232a), R(0x2768,0x2775), R(0x27c5,0x27c6), R(0x27e6,0x27ef), R(0x2983,0x2998), R(0x29d8,0x29db), R(0x29fc,0x29fd), R(0x2cf9,0x2cfc), R(0x2cfe,0x2cff), S(0x2d70), - R(0x2e00,0x2e2e), R(0x2e30,0x2e4f), R(0x3001,0x3003), R(0x3008,0x3011), R(0x3014,0x301f), S(0x3030), - S(0x303d), S(0x30a0), S(0x30fb), R(0xa4fe,0xa4ff), R(0xa60d,0xa60f), S(0xa673), S(0xa67e), + R(0x2e00,0x2e2e), R(0x2e30,0x2e4f), S(0x2e52), R(0x3001,0x3003), R(0x3008,0x3011), R(0x3014,0x301f), + S(0x3030), S(0x303d), S(0x30a0), S(0x30fb), R(0xa4fe,0xa4ff), R(0xa60d,0xa60f), S(0xa673), S(0xa67e), R(0xa6f2,0xa6f7), R(0xa874,0xa877), R(0xa8ce,0xa8cf), R(0xa8f8,0xa8fa), S(0xa8fc), R(0xa92e,0xa92f), S(0xa95f), R(0xa9c1,0xa9cd), R(0xa9de,0xa9df), R(0xaa5c,0xaa5f), R(0xaade,0xaadf), R(0xaaf0,0xaaf1), S(0xabeb), R(0xfd3e,0xfd3f), R(0xfe10,0xfe19), R(0xfe30,0xfe52), R(0xfe54,0xfe61), S(0xfe63), S(0xfe68), R(0xfe6a,0xfe6b), R(0xff01,0xff03), R(0xff05,0xff0a), R(0xff0c,0xff0f), R(0xff1a,0xff1b), R(0xff1f,0xff20), R(0xff3b,0xff3d), S(0xff3f), S(0xff5b), S(0xff5d), R(0xff5f,0xff65), R(0x10100,0x10102), S(0x1039f), S(0x103d0), S(0x1056f), S(0x10857), S(0x1091f), S(0x1093f), R(0x10a50,0x10a58), S(0x10a7f), - R(0x10af0,0x10af6), R(0x10b39,0x10b3f), R(0x10b99,0x10b9c), R(0x10f55,0x10f59), R(0x11047,0x1104d), - R(0x110bb,0x110bc), R(0x110be,0x110c1), R(0x11140,0x11143), R(0x11174,0x11175), R(0x111c5,0x111c8), - S(0x111cd), S(0x111db), R(0x111dd,0x111df), R(0x11238,0x1123d), S(0x112a9), R(0x1144b,0x1144f), - S(0x1145b), S(0x1145d), S(0x114c6), R(0x115c1,0x115d7), R(0x11641,0x11643), R(0x11660,0x1166c), - R(0x1173c,0x1173e), S(0x1183b), S(0x119e2), R(0x11a3f,0x11a46), R(0x11a9a,0x11a9c), R(0x11a9e,0x11aa2), - R(0x11c41,0x11c45), R(0x11c70,0x11c71), R(0x11ef7,0x11ef8), S(0x11fff), R(0x12470,0x12474), - R(0x16a6e,0x16a6f), S(0x16af5), R(0x16b37,0x16b3b), S(0x16b44), R(0x16e97,0x16e9a), S(0x16fe2), - S(0x1bc9f), R(0x1da87,0x1da8b), R(0x1e95e,0x1e95f) + R(0x10af0,0x10af6), R(0x10b39,0x10b3f), R(0x10b99,0x10b9c), S(0x10ead), R(0x10f55,0x10f59), + R(0x11047,0x1104d), R(0x110bb,0x110bc), R(0x110be,0x110c1), R(0x11140,0x11143), R(0x11174,0x11175), + R(0x111c5,0x111c8), S(0x111cd), S(0x111db), R(0x111dd,0x111df), R(0x11238,0x1123d), S(0x112a9), + R(0x1144b,0x1144f), R(0x1145a,0x1145b), S(0x1145d), S(0x114c6), R(0x115c1,0x115d7), R(0x11641,0x11643), + R(0x11660,0x1166c), R(0x1173c,0x1173e), S(0x1183b), R(0x11944,0x11946), S(0x119e2), R(0x11a3f,0x11a46), + R(0x11a9a,0x11a9c), R(0x11a9e,0x11aa2), R(0x11c41,0x11c45), R(0x11c70,0x11c71), R(0x11ef7,0x11ef8), + S(0x11fff), R(0x12470,0x12474), R(0x16a6e,0x16a6f), S(0x16af5), R(0x16b37,0x16b3b), S(0x16b44), + R(0x16e97,0x16e9a), S(0x16fe2), S(0x1bc9f), R(0x1da87,0x1da8b), R(0x1e95e,0x1e95f) }; #undef R #undef S @@ -586,52 +584,56 @@ struct MD_UNICODE_FOLD_INFO_tag { static const unsigned FOLD_MAP_1[] = { R(0x0041,0x005a), S(0x00b5), R(0x00c0,0x00d6), R(0x00d8,0x00de), R(0x0100,0x012e), R(0x0132,0x0136), R(0x0139,0x0147), R(0x014a,0x0176), S(0x0178), R(0x0179,0x017d), S(0x017f), S(0x0181), S(0x0182), - S(0x0186), S(0x0187), S(0x0189), S(0x018b), S(0x018e), S(0x018f), S(0x0190), S(0x0191), S(0x0193), - S(0x0194), S(0x0196), S(0x0197), S(0x0198), S(0x019c), S(0x019d), S(0x019f), R(0x01a0,0x01a4), S(0x01a6), - S(0x01a7), S(0x01a9), S(0x01ac), S(0x01ae), S(0x01af), S(0x01b1), S(0x01b3), S(0x01b7), S(0x01b8), - S(0x01bc), S(0x01c4), S(0x01c5), S(0x01c7), S(0x01c8), S(0x01ca), R(0x01cb,0x01db), R(0x01de,0x01ee), - S(0x01f1), S(0x01f2), S(0x01f6), S(0x01f7), R(0x01f8,0x021e), S(0x0220), R(0x0222,0x0232), S(0x023a), - S(0x023b), S(0x023d), S(0x023e), S(0x0241), S(0x0243), S(0x0244), S(0x0245), R(0x0246,0x024e), S(0x0345), - S(0x0370), S(0x0376), S(0x037f), S(0x0386), R(0x0388,0x038a), S(0x038c), S(0x038e), R(0x0391,0x03a1), - R(0x03a3,0x03ab), S(0x03c2), S(0x03cf), S(0x03d0), S(0x03d1), S(0x03d5), S(0x03d6), R(0x03d8,0x03ee), - S(0x03f0), S(0x03f1), S(0x03f4), S(0x03f5), S(0x03f7), S(0x03f9), S(0x03fa), R(0x03fd,0x03ff), - R(0x0400,0x040f), R(0x0410,0x042f), R(0x0460,0x0480), R(0x048a,0x04be), S(0x04c0), R(0x04c1,0x04cd), - R(0x04d0,0x052e), R(0x0531,0x0556), R(0x10a0,0x10c5), S(0x10c7), S(0x10cd), R(0x13f8,0x13fd), S(0x1c80), - S(0x1c81), S(0x1c82), S(0x1c83), S(0x1c85), S(0x1c86), S(0x1c87), S(0x1c88), R(0x1c90,0x1cba), + S(0x0184), S(0x0186), S(0x0187), S(0x0189), S(0x018a), S(0x018b), S(0x018e), S(0x018f), S(0x0190), + S(0x0191), S(0x0193), S(0x0194), S(0x0196), S(0x0197), S(0x0198), S(0x019c), S(0x019d), S(0x019f), + R(0x01a0,0x01a4), S(0x01a6), S(0x01a7), S(0x01a9), S(0x01ac), S(0x01ae), S(0x01af), S(0x01b1), S(0x01b2), + S(0x01b3), S(0x01b5), S(0x01b7), S(0x01b8), S(0x01bc), S(0x01c4), S(0x01c5), S(0x01c7), S(0x01c8), + S(0x01ca), R(0x01cb,0x01db), R(0x01de,0x01ee), S(0x01f1), S(0x01f2), S(0x01f4), S(0x01f6), S(0x01f7), + R(0x01f8,0x021e), S(0x0220), R(0x0222,0x0232), S(0x023a), S(0x023b), S(0x023d), S(0x023e), S(0x0241), + S(0x0243), S(0x0244), S(0x0245), R(0x0246,0x024e), S(0x0345), S(0x0370), S(0x0372), S(0x0376), S(0x037f), + S(0x0386), R(0x0388,0x038a), S(0x038c), S(0x038e), S(0x038f), R(0x0391,0x03a1), R(0x03a3,0x03ab), + S(0x03c2), S(0x03cf), S(0x03d0), S(0x03d1), S(0x03d5), S(0x03d6), R(0x03d8,0x03ee), S(0x03f0), S(0x03f1), + S(0x03f4), S(0x03f5), S(0x03f7), S(0x03f9), S(0x03fa), R(0x03fd,0x03ff), R(0x0400,0x040f), + R(0x0410,0x042f), R(0x0460,0x0480), R(0x048a,0x04be), S(0x04c0), R(0x04c1,0x04cd), R(0x04d0,0x052e), + R(0x0531,0x0556), R(0x10a0,0x10c5), S(0x10c7), S(0x10cd), R(0x13f8,0x13fd), S(0x1c80), S(0x1c81), + S(0x1c82), S(0x1c83), S(0x1c84), S(0x1c85), S(0x1c86), S(0x1c87), S(0x1c88), R(0x1c90,0x1cba), R(0x1cbd,0x1cbf), R(0x1e00,0x1e94), S(0x1e9b), R(0x1ea0,0x1efe), R(0x1f08,0x1f0f), R(0x1f18,0x1f1d), R(0x1f28,0x1f2f), R(0x1f38,0x1f3f), R(0x1f48,0x1f4d), S(0x1f59), S(0x1f5b), S(0x1f5d), S(0x1f5f), - R(0x1f68,0x1f6f), S(0x1fb8), S(0x1fba), S(0x1fbe), R(0x1fc8,0x1fcb), S(0x1fd8), S(0x1fda), S(0x1fe8), - S(0x1fea), S(0x1fec), S(0x1ff8), S(0x1ffa), S(0x2126), S(0x212a), S(0x212b), S(0x2132), R(0x2160,0x216f), - S(0x2183), R(0x24b6,0x24cf), R(0x2c00,0x2c2e), S(0x2c60), S(0x2c62), S(0x2c63), S(0x2c64), - R(0x2c67,0x2c6b), S(0x2c6d), S(0x2c6e), S(0x2c6f), S(0x2c70), S(0x2c72), S(0x2c75), S(0x2c7e), - R(0x2c80,0x2ce2), S(0x2ceb), S(0x2cf2), R(0xa640,0xa66c), R(0xa680,0xa69a), R(0xa722,0xa72e), - R(0xa732,0xa76e), S(0xa779), S(0xa77d), R(0xa77e,0xa786), S(0xa78b), S(0xa78d), S(0xa790), + R(0x1f68,0x1f6f), S(0x1fb8), S(0x1fb9), S(0x1fba), S(0x1fbb), S(0x1fbe), R(0x1fc8,0x1fcb), S(0x1fd8), + S(0x1fd9), S(0x1fda), S(0x1fdb), S(0x1fe8), S(0x1fe9), S(0x1fea), S(0x1feb), S(0x1fec), S(0x1ff8), + S(0x1ff9), S(0x1ffa), S(0x1ffb), S(0x2126), S(0x212a), S(0x212b), S(0x2132), R(0x2160,0x216f), S(0x2183), + R(0x24b6,0x24cf), R(0x2c00,0x2c2e), S(0x2c60), S(0x2c62), S(0x2c63), S(0x2c64), R(0x2c67,0x2c6b), + S(0x2c6d), S(0x2c6e), S(0x2c6f), S(0x2c70), S(0x2c72), S(0x2c75), S(0x2c7e), S(0x2c7f), R(0x2c80,0x2ce2), + S(0x2ceb), S(0x2ced), S(0x2cf2), R(0xa640,0xa66c), R(0xa680,0xa69a), R(0xa722,0xa72e), R(0xa732,0xa76e), + S(0xa779), S(0xa77b), S(0xa77d), R(0xa77e,0xa786), S(0xa78b), S(0xa78d), S(0xa790), S(0xa792), R(0xa796,0xa7a8), S(0xa7aa), S(0xa7ab), S(0xa7ac), S(0xa7ad), S(0xa7ae), S(0xa7b0), S(0xa7b1), S(0xa7b2), - S(0xa7b3), R(0xa7b4,0xa7be), S(0xa7c2), S(0xa7c4), S(0xa7c5), S(0xa7c6), R(0xab70,0xabbf), - R(0xff21,0xff3a), R(0x10400,0x10427), R(0x104b0,0x104d3), R(0x10c80,0x10cb2), R(0x118a0,0x118bf), - R(0x16e40,0x16e5f), R(0x1e900,0x1e921) + S(0xa7b3), R(0xa7b4,0xa7be), S(0xa7c2), S(0xa7c4), S(0xa7c5), S(0xa7c6), S(0xa7c7), S(0xa7c9), S(0xa7f5), + R(0xab70,0xabbf), R(0xff21,0xff3a), R(0x10400,0x10427), R(0x104b0,0x104d3), R(0x10c80,0x10cb2), + R(0x118a0,0x118bf), R(0x16e40,0x16e5f), R(0x1e900,0x1e921) }; static const unsigned FOLD_MAP_1_DATA[] = { 0x0061, 0x007a, 0x03bc, 0x00e0, 0x00f6, 0x00f8, 0x00fe, 0x0101, 0x012f, 0x0133, 0x0137, 0x013a, 0x0148, - 0x014b, 0x0177, 0x00ff, 0x017a, 0x017e, 0x0073, 0x0253, 0x0183, 0x0254, 0x0188, 0x0256, 0x018c, 0x01dd, - 0x0259, 0x025b, 0x0192, 0x0260, 0x0263, 0x0269, 0x0268, 0x0199, 0x026f, 0x0272, 0x0275, 0x01a1, 0x01a5, - 0x0280, 0x01a8, 0x0283, 0x01ad, 0x0288, 0x01b0, 0x028a, 0x01b4, 0x0292, 0x01b9, 0x01bd, 0x01c6, 0x01c6, - 0x01c9, 0x01c9, 0x01cc, 0x01cc, 0x01dc, 0x01df, 0x01ef, 0x01f3, 0x01f3, 0x0195, 0x01bf, 0x01f9, 0x021f, - 0x019e, 0x0223, 0x0233, 0x2c65, 0x023c, 0x019a, 0x2c66, 0x0242, 0x0180, 0x0289, 0x028c, 0x0247, 0x024f, - 0x03b9, 0x0371, 0x0377, 0x03f3, 0x03ac, 0x03ad, 0x03af, 0x03cc, 0x03cd, 0x03b1, 0x03c1, 0x03c3, 0x03cb, - 0x03c3, 0x03d7, 0x03b2, 0x03b8, 0x03c6, 0x03c0, 0x03d9, 0x03ef, 0x03ba, 0x03c1, 0x03b8, 0x03b5, 0x03f8, - 0x03f2, 0x03fb, 0x037b, 0x037d, 0x0450, 0x045f, 0x0430, 0x044f, 0x0461, 0x0481, 0x048b, 0x04bf, 0x04cf, - 0x04c2, 0x04ce, 0x04d1, 0x052f, 0x0561, 0x0586, 0x2d00, 0x2d25, 0x2d27, 0x2d2d, 0x13f0, 0x13f5, 0x0432, - 0x0434, 0x043e, 0x0441, 0x0442, 0x044a, 0x0463, 0xa64b, 0x10d0, 0x10fa, 0x10fd, 0x10ff, 0x1e01, 0x1e95, - 0x1e61, 0x1ea1, 0x1eff, 0x1f00, 0x1f07, 0x1f10, 0x1f15, 0x1f20, 0x1f27, 0x1f30, 0x1f37, 0x1f40, 0x1f45, - 0x1f51, 0x1f53, 0x1f55, 0x1f57, 0x1f60, 0x1f67, 0x1fb0, 0x1f70, 0x03b9, 0x1f72, 0x1f75, 0x1fd0, 0x1f76, - 0x1fe0, 0x1f7a, 0x1fe5, 0x1f78, 0x1f7c, 0x03c9, 0x006b, 0x00e5, 0x214e, 0x2170, 0x217f, 0x2184, 0x24d0, - 0x24e9, 0x2c30, 0x2c5e, 0x2c61, 0x026b, 0x1d7d, 0x027d, 0x2c68, 0x2c6c, 0x0251, 0x0271, 0x0250, 0x0252, - 0x2c73, 0x2c76, 0x023f, 0x2c81, 0x2ce3, 0x2cec, 0x2cf3, 0xa641, 0xa66d, 0xa681, 0xa69b, 0xa723, 0xa72f, - 0xa733, 0xa76f, 0xa77a, 0x1d79, 0xa77f, 0xa787, 0xa78c, 0x0265, 0xa791, 0xa797, 0xa7a9, 0x0266, 0x025c, - 0x0261, 0x026c, 0x026a, 0x029e, 0x0287, 0x029d, 0xab53, 0xa7b5, 0xa7bf, 0xa7c3, 0xa794, 0x0282, 0x1d8e, - 0x13a0, 0x13ef, 0xff41, 0xff5a, 0x10428, 0x1044f, 0x104d8, 0x104fb, 0x10cc0, 0x10cf2, 0x118c0, 0x118df, - 0x16e60, 0x16e7f, 0x1e922, 0x1e943 + 0x014b, 0x0177, 0x00ff, 0x017a, 0x017e, 0x0073, 0x0253, 0x0183, 0x0185, 0x0254, 0x0188, 0x0256, 0x0257, + 0x018c, 0x01dd, 0x0259, 0x025b, 0x0192, 0x0260, 0x0263, 0x0269, 0x0268, 0x0199, 0x026f, 0x0272, 0x0275, + 0x01a1, 0x01a5, 0x0280, 0x01a8, 0x0283, 0x01ad, 0x0288, 0x01b0, 0x028a, 0x028b, 0x01b4, 0x01b6, 0x0292, + 0x01b9, 0x01bd, 0x01c6, 0x01c6, 0x01c9, 0x01c9, 0x01cc, 0x01cc, 0x01dc, 0x01df, 0x01ef, 0x01f3, 0x01f3, + 0x01f5, 0x0195, 0x01bf, 0x01f9, 0x021f, 0x019e, 0x0223, 0x0233, 0x2c65, 0x023c, 0x019a, 0x2c66, 0x0242, + 0x0180, 0x0289, 0x028c, 0x0247, 0x024f, 0x03b9, 0x0371, 0x0373, 0x0377, 0x03f3, 0x03ac, 0x03ad, 0x03af, + 0x03cc, 0x03cd, 0x03ce, 0x03b1, 0x03c1, 0x03c3, 0x03cb, 0x03c3, 0x03d7, 0x03b2, 0x03b8, 0x03c6, 0x03c0, + 0x03d9, 0x03ef, 0x03ba, 0x03c1, 0x03b8, 0x03b5, 0x03f8, 0x03f2, 0x03fb, 0x037b, 0x037d, 0x0450, 0x045f, + 0x0430, 0x044f, 0x0461, 0x0481, 0x048b, 0x04bf, 0x04cf, 0x04c2, 0x04ce, 0x04d1, 0x052f, 0x0561, 0x0586, + 0x2d00, 0x2d25, 0x2d27, 0x2d2d, 0x13f0, 0x13f5, 0x0432, 0x0434, 0x043e, 0x0441, 0x0442, 0x0442, 0x044a, + 0x0463, 0xa64b, 0x10d0, 0x10fa, 0x10fd, 0x10ff, 0x1e01, 0x1e95, 0x1e61, 0x1ea1, 0x1eff, 0x1f00, 0x1f07, + 0x1f10, 0x1f15, 0x1f20, 0x1f27, 0x1f30, 0x1f37, 0x1f40, 0x1f45, 0x1f51, 0x1f53, 0x1f55, 0x1f57, 0x1f60, + 0x1f67, 0x1fb0, 0x1fb1, 0x1f70, 0x1f71, 0x03b9, 0x1f72, 0x1f75, 0x1fd0, 0x1fd1, 0x1f76, 0x1f77, 0x1fe0, + 0x1fe1, 0x1f7a, 0x1f7b, 0x1fe5, 0x1f78, 0x1f79, 0x1f7c, 0x1f7d, 0x03c9, 0x006b, 0x00e5, 0x214e, 0x2170, + 0x217f, 0x2184, 0x24d0, 0x24e9, 0x2c30, 0x2c5e, 0x2c61, 0x026b, 0x1d7d, 0x027d, 0x2c68, 0x2c6c, 0x0251, + 0x0271, 0x0250, 0x0252, 0x2c73, 0x2c76, 0x023f, 0x0240, 0x2c81, 0x2ce3, 0x2cec, 0x2cee, 0x2cf3, 0xa641, + 0xa66d, 0xa681, 0xa69b, 0xa723, 0xa72f, 0xa733, 0xa76f, 0xa77a, 0xa77c, 0x1d79, 0xa77f, 0xa787, 0xa78c, + 0x0265, 0xa791, 0xa793, 0xa797, 0xa7a9, 0x0266, 0x025c, 0x0261, 0x026c, 0x026a, 0x029e, 0x0287, 0x029d, + 0xab53, 0xa7b5, 0xa7bf, 0xa7c3, 0xa794, 0x0282, 0x1d8e, 0xa7c8, 0xa7ca, 0xa7f6, 0x13a0, 0x13ef, 0xff41, + 0xff5a, 0x10428, 0x1044f, 0x104d8, 0x104fb, 0x10cc0, 0x10cf2, 0x118c0, 0x118df, 0x16e60, 0x16e7f, 0x1e922, + 0x1e943 }; static const unsigned FOLD_MAP_2[] = { S(0x00df), S(0x0130), S(0x0149), S(0x01f0), S(0x0587), S(0x1e96), S(0x1e97), S(0x1e98), S(0x1e99), @@ -1338,8 +1340,9 @@ md_build_attr_append_substr(MD_CTX* ctx, MD_ATTRIBUTE_BUILD* build, MD_TEXTTYPE* new_substr_types; OFF* new_substr_offsets; - build->substr_alloc = (build->substr_alloc == 0 ? 8 : build->substr_alloc * 2); - + build->substr_alloc = (build->substr_alloc > 0 + ? build->substr_alloc + build->substr_alloc / 2 + : 8); new_substr_types = (MD_TEXTTYPE*) realloc(build->substr_types, build->substr_alloc * sizeof(MD_TEXTTYPE)); if(new_substr_types == NULL) { @@ -1465,8 +1468,8 @@ abort: *** Dictionary of Reference Definitions *** *********************************************/ -#define MD_FNV1A_BASE 2166136261 -#define MD_FNV1A_PRIME 16777619 +#define MD_FNV1A_BASE 2166136261U +#define MD_FNV1A_PRIME 16777619U static inline unsigned md_fnv1a(unsigned base, const void* data, size_t n) @@ -1488,12 +1491,12 @@ struct MD_REF_DEF_tag { CHAR* label; CHAR* title; unsigned hash; - SZ label_size : 24; - unsigned label_needs_free : 1; - unsigned title_needs_free : 1; + SZ label_size; SZ title_size; OFF dest_beg; OFF dest_end; + unsigned char label_needs_free : 1; + unsigned char title_needs_free : 1; }; /* Label equivalence is quite complicated with regards to whitespace and case @@ -1573,8 +1576,8 @@ md_link_label_cmp(const CHAR* a_label, SZ a_size, const CHAR* b_label, SZ b_size OFF b_off; int a_reached_end = FALSE; int b_reached_end = FALSE; - MD_UNICODE_FOLD_INFO a_fi = { 0 }; - MD_UNICODE_FOLD_INFO b_fi = { 0 }; + MD_UNICODE_FOLD_INFO a_fi = { { 0 }, 0 }; + MD_UNICODE_FOLD_INFO b_fi = { { 0 }, 0 }; OFF a_fi_off = 0; OFF b_fi_off = 0; int cmp; @@ -1627,7 +1630,7 @@ md_ref_def_cmp(const void* a, const void* b) } static int -md_ref_def_cmp_stable(const void* a, const void* b) +md_ref_def_cmp_for_sort(const void* a, const void* b) { int cmp; @@ -1680,6 +1683,7 @@ md_build_ref_def_hashtable(MD_CTX* ctx) bucket = ctx->ref_def_hashtable[def->hash % ctx->ref_def_hashtable_size]; if(bucket == NULL) { + /* The bucket is empty. Make it just point to the def. */ ctx->ref_def_hashtable[def->hash % ctx->ref_def_hashtable_size] = def; continue; } @@ -1691,12 +1695,12 @@ md_build_ref_def_hashtable(MD_CTX* ctx) MD_REF_DEF* old_def = (MD_REF_DEF*) bucket; if(md_link_label_cmp(def->label, def->label_size, old_def->label, old_def->label_size) == 0) { - /* Ignore this ref. def. */ + /* Duplicate label: Ignore this ref. def. */ continue; } - /* Make the bucket capable of holding more ref. defs. */ - list = (MD_REF_DEF_LIST*) malloc(sizeof(MD_REF_DEF_LIST) + 4 * sizeof(MD_REF_DEF*)); + /* Make the bucket complex, i.e. able to hold more ref. defs. */ + list = (MD_REF_DEF_LIST*) malloc(sizeof(MD_REF_DEF_LIST) + 2 * sizeof(MD_REF_DEF*)); if(list == NULL) { MD_LOG("malloc() failed."); goto abort; @@ -1704,22 +1708,28 @@ md_build_ref_def_hashtable(MD_CTX* ctx) list->ref_defs[0] = old_def; list->ref_defs[1] = def; list->n_ref_defs = 2; - list->alloc_ref_defs = 4; + list->alloc_ref_defs = 2; ctx->ref_def_hashtable[def->hash % ctx->ref_def_hashtable_size] = list; continue; } - /* Append the def to the bucket list. */ + /* Append the def to the complex bucket list. + * + * Note in this case we ignore potential duplicates to avoid expensive + * iterating over the complex bucket. Below, we revisit all the complex + * buckets and handle it more cheaply after the complex bucket contents + * is sorted. */ list = (MD_REF_DEF_LIST*) bucket; if(list->n_ref_defs >= list->alloc_ref_defs) { + int alloc_ref_defs = list->alloc_ref_defs + list->alloc_ref_defs / 2; MD_REF_DEF_LIST* list_tmp = (MD_REF_DEF_LIST*) realloc(list, - sizeof(MD_REF_DEF_LIST) + 2 * list->alloc_ref_defs * sizeof(MD_REF_DEF*)); + sizeof(MD_REF_DEF_LIST) + alloc_ref_defs * sizeof(MD_REF_DEF*)); if(list_tmp == NULL) { MD_LOG("realloc() failed."); goto abort; } list = list_tmp; - list->alloc_ref_defs *= 2; + list->alloc_ref_defs = alloc_ref_defs; ctx->ref_def_hashtable[def->hash % ctx->ref_def_hashtable_size] = list; } @@ -1738,9 +1748,12 @@ md_build_ref_def_hashtable(MD_CTX* ctx) continue; list = (MD_REF_DEF_LIST*) bucket; - qsort(list->ref_defs, list->n_ref_defs, sizeof(MD_REF_DEF*), md_ref_def_cmp_stable); + qsort(list->ref_defs, list->n_ref_defs, sizeof(MD_REF_DEF*), md_ref_def_cmp_for_sort); - /* Disable duplicates. */ + /* Disable all duplicates in the complex bucket by forcing all such + * records to point to the 1st such ref. def. I.e. no matter which + * record is found during the lookup, it will always point to the right + * ref. def. in ctx->ref_defs[]. */ for(j = 1; j < list->n_ref_defs; j++) { if(md_ref_def_cmp(&list->ref_defs[j-1], &list->ref_defs[j]) == 0) list->ref_defs[j] = list->ref_defs[j-1]; @@ -2063,21 +2076,18 @@ md_is_link_reference_definition(MD_CTX* ctx, const MD_LINE* lines, int n_lines) OFF label_contents_beg; OFF label_contents_end; int label_contents_line_index = -1; - int label_is_multiline; - CHAR* label; - SZ label_size; - int label_needs_free = FALSE; + int label_is_multiline = FALSE; OFF dest_contents_beg; OFF dest_contents_end; OFF title_contents_beg; OFF title_contents_end; int title_contents_line_index; - int title_is_multiline; + int title_is_multiline = FALSE; OFF off; int line_index = 0; int tmp_line_index; - MD_REF_DEF* def; - int ret; + MD_REF_DEF* def = NULL; + int ret = 0; /* Link label. */ if(!md_is_link_label(ctx, lines, n_lines, lines[0].beg, @@ -2128,65 +2138,58 @@ md_is_link_reference_definition(MD_CTX* ctx, const MD_LINE* lines, int n_lines) if(off < lines[line_index].end) return FALSE; - /* Construct label. */ - if(!label_is_multiline) { - label = (CHAR*) STR(label_contents_beg); - label_size = label_contents_end - label_contents_beg; - label_needs_free = FALSE; - } else { - MD_CHECK(md_merge_lines_alloc(ctx, label_contents_beg, label_contents_end, - lines + label_contents_line_index, n_lines - label_contents_line_index, - _T(' '), &label, &label_size)); - label_needs_free = TRUE; - } - - /* Store the reference definition. */ + /* So, it _is_ a reference definition. Remember it. */ if(ctx->n_ref_defs >= ctx->alloc_ref_defs) { MD_REF_DEF* new_defs; - ctx->alloc_ref_defs = (ctx->alloc_ref_defs > 0 ? ctx->alloc_ref_defs * 2 : 16); + ctx->alloc_ref_defs = (ctx->alloc_ref_defs > 0 + ? ctx->alloc_ref_defs + ctx->alloc_ref_defs / 2 + : 16); new_defs = (MD_REF_DEF*) realloc(ctx->ref_defs, ctx->alloc_ref_defs * sizeof(MD_REF_DEF)); if(new_defs == NULL) { MD_LOG("realloc() failed."); - ret = -1; goto abort; } ctx->ref_defs = new_defs; } - def = &ctx->ref_defs[ctx->n_ref_defs]; memset(def, 0, sizeof(MD_REF_DEF)); - def->label = label; - def->label_size = label_size; - def->label_needs_free = label_needs_free; - - def->dest_beg = dest_contents_beg; - def->dest_end = dest_contents_end; - - if(title_contents_beg >= title_contents_end) { - def->title = NULL; - def->title_size = 0; - } else if(!title_is_multiline) { - def->title = (CHAR*) STR(title_contents_beg); - def->title_size = title_contents_end - title_contents_beg; + if(label_is_multiline) { + MD_CHECK(md_merge_lines_alloc(ctx, label_contents_beg, label_contents_end, + lines + label_contents_line_index, n_lines - label_contents_line_index, + _T(' '), &def->label, &def->label_size)); + def->label_needs_free = TRUE; } else { + def->label = (CHAR*) STR(label_contents_beg); + def->label_size = label_contents_end - label_contents_beg; + } + + if(title_is_multiline) { MD_CHECK(md_merge_lines_alloc(ctx, title_contents_beg, title_contents_end, lines + title_contents_line_index, n_lines - title_contents_line_index, _T('\n'), &def->title, &def->title_size)); def->title_needs_free = TRUE; + } else { + def->title = (CHAR*) STR(title_contents_beg); + def->title_size = title_contents_end - title_contents_beg; } + def->dest_beg = dest_contents_beg; + def->dest_end = dest_contents_end; + /* Success. */ ctx->n_ref_defs++; return line_index + 1; abort: /* Failure. */ - if(label_needs_free) - free(label); - return -1; + if(def != NULL && def->label_needs_free) + free(def->label); + if(def != NULL && def->title_needs_free) + free(def->title); + return ret; } static int @@ -2479,7 +2482,7 @@ md_mark_chain(MD_CTX* ctx, int mark_index) switch(mark->ch) { case _T('*'): return md_asterisk_chain(ctx, mark->flags); case _T('_'): return &UNDERSCORE_OPENERS; - case _T('~'): return &TILDE_OPENERS; + case _T('~'): return (mark->end - mark->beg == 1) ? &TILDE_OPENERS_1 : &TILDE_OPENERS_2; case _T('['): return &BRACKET_OPENERS; case _T('|'): return &TABLECELLBOUNDARIES; default: return NULL; @@ -2492,7 +2495,9 @@ md_push_mark(MD_CTX* ctx) if(ctx->n_marks >= ctx->alloc_marks) { MD_MARK* new_marks; - ctx->alloc_marks = (ctx->alloc_marks > 0 ? ctx->alloc_marks * 2 : 64); + ctx->alloc_marks = (ctx->alloc_marks > 0 + ? ctx->alloc_marks + ctx->alloc_marks / 2 + : 64); new_marks = realloc(ctx->marks, ctx->alloc_marks * sizeof(MD_MARK)); if(new_marks == NULL) { MD_LOG("realloc() failed."); @@ -2535,6 +2540,7 @@ md_mark_chain_append(MD_CTX* ctx, MD_MARKCHAIN* chain, int mark_index) chain->head = mark_index; ctx->marks[mark_index].prev = chain->tail; + ctx->marks[mark_index].next = -1; chain->tail = mark_index; } @@ -2626,7 +2632,7 @@ md_rollback(MD_CTX* ctx, int opener_index, int closer_index, int how) chain->head = -1; } - /* Go backwards so that un-resolved openers are re-added into their + /* Go backwards so that unresolved openers are re-added into their * respective chains, in the right order. */ mark_index = closer_index - 1; while(mark_index > opener_index) { @@ -2718,7 +2724,7 @@ md_build_mark_char_map(MD_CTX* ctx) } } -/* We limit code span marks to lower then 32 backticks. This solves the +/* We limit code span marks to lower than 32 backticks. This solves the * pathologic case of too many openers, each of different length: Their * resolving would be then O(n^2). */ #define CODESPAN_MARK_MAXLEN 32 @@ -2905,7 +2911,7 @@ md_is_autolink_email(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end) return FALSE; off++; - /* Labels delimited with '.'; each label is sequence of 1 - 62 alnum + /* Labels delimited with '.'; each label is sequence of 1 - 63 alnum * characters or '-', but '-' is not allowed as first or last char. */ label_len = 0; while(off < max_end) { @@ -2918,7 +2924,7 @@ md_is_autolink_email(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end) else break; - if(label_len > 62) + if(label_len > 63) return FALSE; off++; @@ -3259,7 +3265,17 @@ md_collect_marks(MD_CTX* ctx, const MD_LINE* lines, int n_lines, int table_mode) while(tmp < line_end && CH(tmp) == _T('~')) tmp++; - PUSH_MARK(ch, off, tmp, MD_MARK_POTENTIAL_OPENER | MD_MARK_POTENTIAL_CLOSER); + if(tmp - off < 3) { + unsigned flags = 0; + + if(tmp < line_end && !ISUNICODEWHITESPACE(tmp)) + flags |= MD_MARK_POTENTIAL_OPENER; + if(off > line->beg && !ISUNICODEWHITESPACEBEFORE(off)) + flags |= MD_MARK_POTENTIAL_CLOSER; + if(flags != 0) + PUSH_MARK(ch, off, tmp, flags); + } + off = tmp; continue; } @@ -3407,92 +3423,91 @@ md_resolve_links(MD_CTX* ctx, const MD_LINE* lines, int n_lines) continue; } - /* Detect and resolve wiki links. */ + /* Recognize and resolve wiki links. + * Wiki-links maybe '[[destination]]' or '[[destination|label]]'. + */ if ((ctx->parser.flags & MD_FLAG_WIKILINKS) && - next_opener != NULL && next_closer != NULL && - (opener->end - opener->beg == 1) && + (opener->end - opener->beg == 1) && /* not image */ + next_opener != NULL && /* double '[' opener */ + next_opener->ch == '[' && (next_opener->beg == opener->beg - 1) && - (next_closer->beg == closer->beg + 1) && (next_opener->end - next_opener->beg == 1) && - (next_closer->end - next_closer->beg == 1) && - (next_opener->ch == '[' && next_closer->ch == ']')) + next_closer != NULL && /* double ']' closer */ + next_closer->ch == ']' && + (next_closer->beg == closer->beg + 1) && + (next_closer->end - next_closer->beg == 1)) { + MD_MARK* delim = NULL; + int delim_index; + OFF dest_beg, dest_end; + is_link = TRUE; - if (opener->end == closer->beg) - is_link = FALSE; - - int delim_index = opener_index; - MD_MARK* delim = &ctx->marks[delim_index]; - - /* To prevent runaway O(n^2) performance, don't look too far for the delimiter (.. < 100). */ - while(is_link && delim_index < closer_index && (delim_index - opener_index) < 100 ) { - if(delim->ch == '|' && delim->beg == opener->end) { - is_link = FALSE; - } else if(delim->ch == '|' && delim->end == closer->beg) { - break; - } else if(delim->ch == '|') { - opener->end = delim->beg; + /* We don't allow destination to be longer than 100 characters. + * Lets scan to see whether there is '|'. (If not then the whole + * wiki-link has to be below the 100 characters.) */ + delim_index = opener_index + 1; + while(delim_index < closer_index) { + MD_MARK* m = &ctx->marks[delim_index]; + if(m->ch == '|') { + delim = m; break; } + if(m->ch != 'D' && m->beg - opener->end > 100) + break; delim_index++; - delim = &ctx->marks[delim_index]; } - - OFF off = closer->beg-1; - int count = 0; - int has_label = (opener->end - opener->beg > 2); - const MD_LINE* line; - int line_index = n_lines-1; - - /* An image inside the link target disables the wiki link. */ - if( (has_label && last_img_beg >= opener->beg && last_img_end <= opener->end) || - (!has_label && last_img_beg >= opener->beg && last_img_end <= closer->end)) + dest_beg = opener->end; + dest_end = (delim != NULL) ? delim->beg : closer->beg; + if(dest_end - dest_beg == 0 || dest_end - dest_beg > 100) is_link = FALSE; - while(is_link && off > opener->beg && count++ < 100) { - - /* Newline not allowed in link target. */ - if(has_label && (off <= opener->end) && ISNEWLINE(off)) - is_link = FALSE; - else if(!has_label && off > opener->end && ISNEWLINE(off)) - is_link = FALSE; - else if(ISNEWLINE(off)) { - line = &lines[line_index--]; - count = count - line->total_indent - 1; /* Count newline too. */ + /* There may not be any new line in the destination. */ + if(is_link) { + OFF off; + for(off = dest_beg; off < dest_end; off++) { + if(ISNEWLINE(off)) { + is_link = FALSE; + break; + } } - - off--; } - if(off > opener->beg) - is_link = FALSE; - if(is_link) { - if(delim->ch == '|') - delim->flags |= MD_MARK_RESOLVED; + if(delim != NULL) { + if(delim->end < closer->beg) { + opener->end = delim->beg; + } else { + /* The pipe is just before the closer: [[foo|]] */ + closer->beg = delim->beg; + delim = NULL; + } + } opener->beg = next_opener->beg; - closer->end = next_closer->end; - opener->next = closer_index; opener->flags |= MD_MARK_OPENER | MD_MARK_RESOLVED; + + closer->end = next_closer->end; closer->prev = opener_index; closer->flags |= MD_MARK_CLOSER | MD_MARK_RESOLVED; last_link_beg = opener->beg; last_link_end = closer->end; - if ((opener->end - opener->beg > 2)) + if(delim != NULL) { + delim->flags |= MD_MARK_RESOLVED; + md_rollback(ctx, opener_index, delim_index, MD_ROLLBACK_ALL); md_analyze_link_contents(ctx, lines, n_lines, opener_index+1, closer_index); + } else { + md_rollback(ctx, opener_index, closer_index, MD_ROLLBACK_ALL); + } - opener_index = next_index; + opener_index = next_opener->prev; continue; } - } - if(next_opener != NULL && next_opener->beg == closer->end) { if(next_closer->beg > closer->end + 1) { /* Might be full reference link. */ @@ -3571,6 +3586,7 @@ md_resolve_links(MD_CTX* ctx, const MD_LINE* lines, int n_lines) MD_ASSERT(ctx->marks[opener_index+2].ch == 'D'); md_mark_store_ptr(ctx, opener_index+2, attr.title); + /* The title might or might not have been allocated for us. */ if(attr.title_needs_free) md_mark_chain_append(ctx, &PTR_CHAIN, opener_index+2); ctx->marks[opener_index+2].prev = attr.title_size; @@ -3669,8 +3685,7 @@ md_analyze_emph(MD_CTX* ctx, int mark_index) int i, n_opener_chains; unsigned flags = mark->flags; - /* Apply "rule of three". (This is why we break asterisk opener - * marks into multiple chains.) */ + /* Apply the "rule of three". */ n_opener_chains = 0; opener_chains[n_opener_chains++] = &ASTERISK_OPENERS_intraword_mod3_0; if((flags & MD_MARK_EMPH_MOD3_MASK) != MD_MARK_EMPH_MOD3_2) @@ -3706,7 +3721,7 @@ md_analyze_emph(MD_CTX* ctx, int mark_index) if(opener != NULL) { SZ opener_size = opener->end - opener->beg; SZ closer_size = mark->end - mark->beg; - MD_MARKCHAIN* opener_chain = md_mark_chain(ctx, mark_index); + MD_MARKCHAIN* opener_chain = md_mark_chain(ctx, opener_index); if(opener_size > closer_size) { opener_index = md_split_emph_mark(ctx, opener_index, closer_size); @@ -3729,20 +3744,23 @@ md_analyze_emph(MD_CTX* ctx, int mark_index) static void md_analyze_tilde(MD_CTX* ctx, int mark_index) { - /* We attempt to be Github Flavored Markdown compatible here. GFM says - * that length of the tilde sequence is not important at all. Note that - * implies the TILDE_OPENERS chain can have at most one item. */ + MD_MARK* mark = &ctx->marks[mark_index]; + MD_MARKCHAIN* chain = md_mark_chain(ctx, mark_index); - if(TILDE_OPENERS.head >= 0) { - /* The chain already contains an opener, so we may resolve the span. */ - int opener_index = TILDE_OPENERS.head; + /* We attempt to be Github Flavored Markdown compatible here. GFM accepts + * only tildes sequences of length 1 and 2, and the length of the opener + * and closer has to match. */ + + if((mark->flags & MD_MARK_POTENTIAL_CLOSER) && chain->head >= 0) { + int opener_index = chain->head; md_rollback(ctx, opener_index, mark_index, MD_ROLLBACK_CROSSING); - md_resolve_range(ctx, &TILDE_OPENERS, opener_index, mark_index); - } else { - /* We can only be opener. */ - md_mark_chain_append(ctx, &TILDE_OPENERS, mark_index); + md_resolve_range(ctx, chain, opener_index, mark_index); + return; } + + if(mark->flags & MD_MARK_POTENTIAL_OPENER) + md_mark_chain_append(ctx, chain, mark_index); } static void @@ -3987,25 +4005,14 @@ static void md_analyze_link_contents(MD_CTX* ctx, const MD_LINE* lines, int n_lines, int mark_beg, int mark_end) { + int i; + md_analyze_marks(ctx, lines, n_lines, mark_beg, mark_end, _T("*_~$@:.")); - ASTERISK_OPENERS_extraword_mod3_0.head = -1; - ASTERISK_OPENERS_extraword_mod3_0.tail = -1; - ASTERISK_OPENERS_extraword_mod3_1.head = -1; - ASTERISK_OPENERS_extraword_mod3_1.tail = -1; - ASTERISK_OPENERS_extraword_mod3_2.head = -1; - ASTERISK_OPENERS_extraword_mod3_2.tail = -1; - ASTERISK_OPENERS_intraword_mod3_0.head = -1; - ASTERISK_OPENERS_intraword_mod3_0.tail = -1; - ASTERISK_OPENERS_intraword_mod3_1.head = -1; - ASTERISK_OPENERS_intraword_mod3_1.tail = -1; - ASTERISK_OPENERS_intraword_mod3_2.head = -1; - ASTERISK_OPENERS_intraword_mod3_2.tail = -1; - UNDERSCORE_OPENERS.head = -1; - UNDERSCORE_OPENERS.tail = -1; - TILDE_OPENERS.head = -1; - TILDE_OPENERS.tail = -1; - DOLLAR_OPENERS.head = -1; - DOLLAR_OPENERS.tail = -1; + + for(i = OPENERS_CHAIN_FIRST; i <= OPENERS_CHAIN_LAST; i++) { + ctx->mark_chains[i].head = -1; + ctx->mark_chains[i].tail = -1; + } } static int @@ -4113,7 +4120,23 @@ md_process_inlines(MD_CTX* ctx, const MD_LINE* lines, int n_lines) } break; - case '_': + case '_': /* Underline (or emphasis if we fall through). */ + if(ctx->parser.flags & MD_FLAG_UNDERLINE) { + if(mark->flags & MD_MARK_OPENER) { + while(off < mark->end) { + MD_ENTER_SPAN(MD_SPAN_U, NULL); + off++; + } + } else { + while(off < mark->end) { + MD_LEAVE_SPAN(MD_SPAN_U, NULL); + off++; + } + } + break; + } + /* Fall though. */ + case '*': /* Emphasis, strong emphasis. */ if(mark->flags & MD_MARK_OPENER) { if((mark->end - off) % 2) { @@ -4159,19 +4182,18 @@ md_process_inlines(MD_CTX* ctx, const MD_LINE* lines, int n_lines) { const MD_MARK* opener = (mark->ch != ']' ? mark : &ctx->marks[mark->prev]); const MD_MARK* closer = &ctx->marks[opener->next]; + const MD_MARK* dest_mark; + const MD_MARK* title_mark; if ((opener->ch == '[' && closer->ch == ']') && opener->end - opener->beg >= 2 && - closer->end - closer->beg == 2) + closer->end - closer->beg >= 2) { - const MD_MARK* delim = opener+3; /* Scan past the two dummy marks. */ int has_label = (opener->end - opener->beg > 2); - int target_sz; + SZ target_sz; if(has_label) target_sz = opener->end - (opener->beg+2); - else if(delim->ch == '|') - target_sz = (closer->beg-1) - opener->end; else target_sz = closer->beg - opener->end; @@ -4182,10 +4204,9 @@ md_process_inlines(MD_CTX* ctx, const MD_LINE* lines, int n_lines) break; } - const MD_MARK* dest_mark = opener+1; - const MD_MARK* title_mark = opener+2; - + dest_mark = opener+1; MD_ASSERT(dest_mark->ch == 'D'); + title_mark = opener+2; MD_ASSERT(title_mark->ch == 'D'); MD_CHECK(md_enter_leave_span_a(ctx, (mark->ch != ']'), @@ -4557,7 +4578,7 @@ md_process_verbatim_block_contents(MD_CTX* ctx, MD_TEXTTYPE text_type, const MD_ MD_ASSERT(indent >= 0); /* Output code indentation. */ - while(indent >= indent_chunk_size) { + while(indent > (int) indent_chunk_size) { MD_TEXT(text_type, indent_chunk_str, indent_chunk_size); indent -= indent_chunk_size; } @@ -4813,7 +4834,9 @@ md_push_block_bytes(MD_CTX* ctx, int n_bytes) if(ctx->n_block_bytes + n_bytes > ctx->alloc_block_bytes) { void* new_block_bytes; - ctx->alloc_block_bytes = (ctx->alloc_block_bytes > 0 ? ctx->alloc_block_bytes * 2 : 512); + ctx->alloc_block_bytes = (ctx->alloc_block_bytes > 0 + ? ctx->alloc_block_bytes + ctx->alloc_block_bytes / 2 + : 512); new_block_bytes = realloc(ctx->block_bytes, ctx->alloc_block_bytes); if(new_block_bytes == NULL) { MD_LOG("realloc() failed."); @@ -5003,7 +5026,6 @@ md_add_line_into_current_block(MD_CTX* ctx, const MD_LINE_ANALYSIS* analysis) line->beg = analysis->beg; line->end = analysis->end; - line->total_indent = analysis->total_indent; } ctx->current_block->n_lines++; @@ -5246,7 +5268,7 @@ md_is_html_block_start_condition(MD_CTX* ctx, OFF beg) #ifdef X #undef X #endif -#define X(name) { _T(name), sizeof(name)-1 } +#define X(name) { _T(name), (sizeof(name)-1) / sizeof(CHAR) } #define Xend { NULL, 0 } static const TAG t1[] = { X("script"), X("pre"), X("style"), Xend }; @@ -5302,7 +5324,7 @@ md_is_html_block_start_condition(MD_CTX* ctx, OFF beg) /* Check for type 5: size) { - if(md_ascii_eq(STR(off), _T("![CDATA["), 8 * sizeof(CHAR))) + if(md_ascii_eq(STR(off), _T("![CDATA["), 8)) return 5; } } @@ -5451,7 +5473,9 @@ md_push_container(MD_CTX* ctx, const MD_CONTAINER* container) if(ctx->n_containers >= ctx->alloc_containers) { MD_CONTAINER* new_containers; - ctx->alloc_containers = (ctx->alloc_containers > 0 ? ctx->alloc_containers * 2 : 16); + ctx->alloc_containers = (ctx->alloc_containers > 0 + ? ctx->alloc_containers + ctx->alloc_containers / 2 + : 16); new_containers = realloc(ctx->containers, ctx->alloc_containers * sizeof(MD_CONTAINER)); if(new_containers == NULL) { MD_LOG("realloc() failed."); @@ -5783,7 +5807,7 @@ md_analyze_line(MD_CTX* ctx, OFF beg, OFF* p_end, #if 1 /* This is 2nd half of the hack. If the flag is set (that is there * were 2nd blank line at the start of the list item) and we would also - * belonging to such list item, then interrupt the list. */ + * belonging to such list item, than interrupt the list. */ ctx->last_line_has_list_loosening_effect = FALSE; if(ctx->last_list_item_starts_with_two_blank_lines) { if(n_parents > 0 && ctx->containers[n_parents-1].ch != _T('>') && @@ -6008,8 +6032,6 @@ md_analyze_line(MD_CTX* ctx, OFF beg, OFF* p_end, break; } - line->total_indent = total_indent; - /* Scan for end of the line. * * Note this is quite a bottleneck of the parsing as we here iterate almost @@ -6245,10 +6267,8 @@ md_parse(const MD_CHAR* text, MD_SIZE size, const MD_PARSER* parser, void* userd int ret; if(parser->abi_version != 0) { - #if WITH_PARSER_DEBUG_LOG if(parser->debug_log != NULL) parser->debug_log("Unsupported abi_version.", userdata); - #endif return -1; } diff --git a/src/md4c.h b/src/md4c.h index b2b6628..8bba712 100644 --- a/src/md4c.h +++ b/src/md4c.h @@ -2,7 +2,7 @@ * MD4C: Markdown parser for C * (http://github.com/mity/md4c) * - * Copyright (c) 2016-2019 Martin Mitas + * Copyright (c) 2016-2020 Martin Mitas * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -23,15 +23,15 @@ * IN THE SOFTWARE. */ -#ifndef MD4C_MARKDOWN_H -#define MD4C_MARKDOWN_H +#ifndef MD4C_H +#define MD4C_H #ifdef __cplusplus extern "C" { #endif #if defined MD4C_USE_UTF16 - /* Magic to support UTF-16. Not that in order to use it, you have to define + /* Magic to support UTF-16. Note that in order to use it, you have to define * the macro MD4C_USE_UTF16 both when building MD4C as well as when * including this header in your code. */ #ifdef _WIN32 @@ -119,7 +119,7 @@ typedef enum MD_SPANTYPE { * Detail: Structure MD_SPAN_IMG_DETAIL. * Note: Image text can contain nested spans and even nested images. * If rendered into ALT attribute of HTML tag, it's responsibility - * of the renderer to deal with it. + * of the parser to deal with it. */ MD_SPAN_IMG, @@ -140,7 +140,11 @@ typedef enum MD_SPANTYPE { /* Wiki links * Note: Recognized only when MD_FLAG_WIKILINKS is enabled. */ - MD_SPAN_WIKILINK + MD_SPAN_WIKILINK, + + /* ... + * Note: Recognized only when MD_FLAG_UNDERLINE is enabled. */ + MD_SPAN_U } MD_SPANTYPE; /* Text is the actual textual contents of span. */ @@ -159,7 +163,7 @@ typedef enum MD_TEXTTYPE { MD_TEXT_SOFTBR, /* '\n' in source text where it is not semantically meaningful (soft break) */ /* Entity. - * (a) Named entity, e.g.   + * (a) Named entity, e.g.   * (Note MD4C does not have a list of known entities. * Anything matching the regexp /&[A-Za-z][A-Za-z0-9]{1,47};/ is * treated as a named entity.) @@ -167,7 +171,7 @@ typedef enum MD_TEXTTYPE { * (c) Hexadecimal entity, e.g. ካ * * As MD4C is mostly encoding agnostic, application gets the verbatim - * entity text into the MD_RENDERER::text_callback(). */ + * entity text into the MD_PARSER::text_callback(). */ MD_TEXT_ENTITY, /* Text in a code block (inside MD_BLOCK_CODE) or inlined code (`code`). @@ -202,8 +206,13 @@ typedef enum MD_ALIGN { * propagated within various detailed structures, but which still may contain * string portions of different types like e.g. entities. * - * So, for example, lets consider an image has a title attribute string - * set to "foo " bar". (Note the string size is 14.) + * So, for example, lets consider this image: + * + * ![image alt text](http://example.org/image.png 'foo " bar') + * + * The image alt text is propagated as a normal text via the MD_PARSER::text() + * callback. However, the image title ('foo " bar') is propagated as + * MD_ATTRIBUTE in MD_SPAN_IMG_DETAIL::title. * * Then the attribute MD_SPAN_IMG_DETAIL::title shall provide the following: * -- [0]: "foo " (substr_types[0] == MD_TEXT_NORMAL; substr_offsets[0] == 0) @@ -211,10 +220,12 @@ typedef enum MD_ALIGN { * -- [2]: " bar" (substr_types[2] == MD_TEXT_NORMAL; substr_offsets[2] == 10) * -- [3]: (n/a) (n/a ; substr_offsets[3] == 14) * - * Note that these conditions are guaranteed: + * Note that these invariants are always guaranteed: * -- substr_offsets[0] == 0 * -- substr_offsets[LAST+1] == size - * -- Only MD_TEXT_NORMAL, MD_TEXT_ENTITY, MD_TEXT_NULLCHAR substrings can appear. + * -- Currently, only MD_TEXT_NORMAL, MD_TEXT_ENTITY, MD_TEXT_NULLCHAR + * substrings can appear. This could change only of the specification + * changes. */ typedef struct MD_ATTRIBUTE { const MD_CHAR* text; @@ -280,7 +291,7 @@ typedef struct MD_SPAN_WIKILINK { /* Flags specifying extensions/deviations from CommonMark specification. * - * By default (when MD_RENDERER::flags == 0), we follow CommonMark specification. + * By default (when MD_PARSER::flags == 0), we follow CommonMark specification. * The following flags may allow some extensions or deviations from it. */ #define MD_FLAG_COLLAPSEWHITESPACE 0x0001 /* In MD_TEXT_NORMAL, collapse non-trivial whitespace into single ' ' */ @@ -296,6 +307,7 @@ typedef struct MD_SPAN_WIKILINK { #define MD_FLAG_TASKLISTS 0x0800 /* Enable task list extension. */ #define MD_FLAG_LATEXMATHSPANS 0x1000 /* Enable $ and $$ containing LaTeX equations. */ #define MD_FLAG_WIKILINKS 0x2000 /* Enable wiki links extension. */ +#define MD_FLAG_UNDERLINE 0x4000 /* Enable underline extension (and disables '_' for normal emphasis). */ #define MD_FLAG_PERMISSIVEAUTOLINKS (MD_FLAG_PERMISSIVEEMAILAUTOLINKS | MD_FLAG_PERMISSIVEURLAUTOLINKS | MD_FLAG_PERMISSIVEWWWAUTOLINKS) #define MD_FLAG_NOHTML (MD_FLAG_NOHTMLBLOCKS | MD_FLAG_NOHTMLSPANS) @@ -312,7 +324,7 @@ typedef struct MD_SPAN_WIKILINK { #define MD_DIALECT_COMMONMARK 0 #define MD_DIALECT_GITHUB (MD_FLAG_PERMISSIVEAUTOLINKS | MD_FLAG_TABLES | MD_FLAG_STRIKETHROUGH | MD_FLAG_TASKLISTS) -/* Renderer structure. +/* Parser structure. */ typedef struct MD_PARSER { /* Reserved. Set to zero. @@ -333,9 +345,10 @@ typedef struct MD_PARSER { * * Note any strings provided to the callbacks as their arguments or as * members of any detail structure are generally not zero-terminated. - * Application has take the respective size information into account. + * Application has to take the respective size information into account. * - * Callbacks may abort further parsing of the document by returning non-zero. + * Any rendering callback may abort further parsing of the document by + * returning non-zero. */ int (*enter_block)(MD_BLOCKTYPE /*type*/, void* /*detail*/, void* /*userdata*/); int (*leave_block)(MD_BLOCKTYPE /*type*/, void* /*detail*/, void* /*userdata*/); @@ -360,18 +373,19 @@ typedef struct MD_PARSER { } MD_PARSER; -/* For backward compatibility. Do not use in new code. */ +/* For backward compatibility. Do not use in new code. + */ typedef MD_PARSER MD_RENDERER; /* Parse the Markdown document stored in the string 'text' of size 'size'. - * The renderer provides callbacks to be called during the parsing so the + * The parser provides callbacks to be called during the parsing so the * caller can render the document on the screen or convert the Markdown * to another format. * * Zero is returned on success. If a runtime error occurs (e.g. a memory * fails), -1 is returned. If the processing is aborted due any callback - * returning non-zero, md_parse() the return value of the callback is returned. + * returning non-zero, the return value of the callback is returned. */ int md_parse(const MD_CHAR* text, MD_SIZE size, const MD_PARSER* parser, void* userdata); @@ -380,4 +394,4 @@ int md_parse(const MD_CHAR* text, MD_SIZE size, const MD_PARSER* parser, void* u } /* extern "C" { */ #endif -#endif /* MD4C_MARKDOWN_H */ +#endif /* MD4C_H */