mirror of
https://github.com/danbulant/markdown-wasm
synced 2026-05-24 12:35:34 +00:00
upgrade md4c to 601885f738
This commit is contained in:
parent
9941275ff7
commit
9e251082ee
7 changed files with 482 additions and 399 deletions
29
markdown.d.ts
vendored
29
markdown.d.ts
vendored
|
|
@ -10,18 +10,12 @@ type Source = string|ArrayLike<number>
|
|||
|
||||
/** Options for the parse function */
|
||||
export interface ParseOptions {
|
||||
/**
|
||||
* Customize parsing.
|
||||
* If not provided, the following flags are used, equating to github-style parsing:
|
||||
* COLLAPSE_WHITESPACE
|
||||
* PERMISSIVE_ATX_HEADERS
|
||||
* PERMISSIVE_URL_AUTO_LINKS
|
||||
* STRIKETHROUGH
|
||||
* TABLES
|
||||
* TASK_LISTS
|
||||
*/
|
||||
/** Customize parsing. Defaults to ParseFlags.DEFAULT */
|
||||
parseFlags? :ParseFlags
|
||||
|
||||
/** Select output format. Defaults to "html" */
|
||||
format? : "html" | "xhtml"
|
||||
|
||||
/**
|
||||
* asMemoryView=true causes parse() to return a view of heap memory as a Uint8Array,
|
||||
* instead of a string.
|
||||
|
|
@ -51,7 +45,18 @@ export enum ParseFlags {
|
|||
/** Enable tables extension. */ TABLES,
|
||||
/** Enable task list extension. */ TASK_LISTS,
|
||||
/** Enable wiki links extension. */ WIKI_LINKS,
|
||||
/** Enable underline extension (disables '_' for emphasis) */ UNDERLINE,
|
||||
|
||||
/** Default flags */ DEFAULT,
|
||||
/** Shorthand for NO_HTML_BLOCKS | NO_HTML_SPANS */ NO_HTML,
|
||||
/** Default flags are:
|
||||
* COLLAPSE_WHITESPACE |
|
||||
* PERMISSIVE_ATX_HEADERS |
|
||||
* PERMISSIVE_URL_AUTO_LINKS |
|
||||
* STRIKETHROUGH |
|
||||
* TABLES |
|
||||
* TASK_LISTS
|
||||
*/
|
||||
DEFAULT,
|
||||
|
||||
/** Shorthand for NO_HTML_BLOCKS | NO_HTML_SPANS */
|
||||
NO_HTML,
|
||||
}
|
||||
|
|
|
|||
|
|
@ -34,6 +34,7 @@ typedef struct HtmlRenderer_st {
|
|||
WBuf* outbuf;
|
||||
int imgnest;
|
||||
int addanchor;
|
||||
u32 flags;
|
||||
} HtmlRenderer;
|
||||
|
||||
|
||||
|
|
@ -246,7 +247,7 @@ static void render_close_img_span(HtmlRenderer* r, const MD_SPAN_IMG_DETAIL* det
|
|||
render_literal(r, "\" title=\"");
|
||||
render_attribute(r, &det->title);
|
||||
}
|
||||
render_literal(r, "\">");
|
||||
render_literal(r, (r->flags & MD_HTML_FLAG_XHTML) ? "\"/>" : "\">");
|
||||
r->imgnest--;
|
||||
}
|
||||
|
||||
|
|
@ -273,7 +274,7 @@ static int enter_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata)
|
|||
case MD_BLOCK_UL: render_literal(r, "<ul>\n"); break;
|
||||
case MD_BLOCK_OL: render_open_ol_block(r, (const MD_BLOCK_OL_DETAIL*)detail); break;
|
||||
case MD_BLOCK_LI: render_open_li_block(r, (const MD_BLOCK_LI_DETAIL*)detail); break;
|
||||
case MD_BLOCK_HR: render_literal(r, "<hr>\n"); break;
|
||||
case MD_BLOCK_HR: render_literal(r, (r->flags & MD_HTML_FLAG_XHTML) ? "<hr/>\n" : "<hr>\n"); break;
|
||||
case MD_BLOCK_H:
|
||||
{
|
||||
render_literal(r, head[((MD_BLOCK_H_DETAIL*)detail)->level - 1]);
|
||||
|
|
@ -324,14 +325,28 @@ static int enter_span_callback(MD_SPANTYPE type, void* detail, void* userdata) {
|
|||
HtmlRenderer* r = (HtmlRenderer*) userdata;
|
||||
|
||||
if(r->imgnest > 0) {
|
||||
/* We are inside an image, i.e. rendering the ALT attribute of
|
||||
* <IMG> tag. */
|
||||
/* We are inside a Markdown image label. Markdown allows to use any
|
||||
* emphasis and other rich contents in that context similarly as in
|
||||
* any link label.
|
||||
*
|
||||
* However, unlike in the case of links (where that contents becomes
|
||||
* contents of the <a>...</a> tag), in the case of images the contents
|
||||
* is supposed to fall into the attribute alt: <img alt="...">.
|
||||
*
|
||||
* In that context we naturally cannot output nested HTML tags. So lets
|
||||
* suppress them and only output the plain text (i.e. what falls into
|
||||
* text() callback).
|
||||
*
|
||||
* This make-it-a-plain-text approach is the recommended practice by
|
||||
* CommonMark specification (for HTML output).
|
||||
*/
|
||||
return 0;
|
||||
}
|
||||
|
||||
switch(type) {
|
||||
case MD_SPAN_EM: render_literal(r, "<em>"); break;
|
||||
case MD_SPAN_STRONG: render_literal(r, "<b>"); break;
|
||||
case MD_SPAN_U: render_literal(r, "<u>"); break;
|
||||
case MD_SPAN_A: render_open_a_span(r, (MD_SPAN_A_DETAIL*) detail); break;
|
||||
case MD_SPAN_IMG: render_open_img_span(r, (MD_SPAN_IMG_DETAIL*) detail); break;
|
||||
case MD_SPAN_CODE: render_literal(r, "<code>"); break;
|
||||
|
|
@ -348,8 +363,8 @@ static int leave_span_callback(MD_SPANTYPE type, void* detail, void* userdata) {
|
|||
HtmlRenderer* r = (HtmlRenderer*) userdata;
|
||||
|
||||
if(r->imgnest > 0) {
|
||||
/* We are inside an image, i.e. rendering the ALT attribute of
|
||||
* <IMG> tag. */
|
||||
/* Ditto as in enter_span_callback(), except we have to allow the
|
||||
* end of the <img> tag. */
|
||||
if(r->imgnest == 1 && type == MD_SPAN_IMG)
|
||||
render_close_img_span(r, (MD_SPAN_IMG_DETAIL*) detail);
|
||||
return 0;
|
||||
|
|
@ -358,6 +373,7 @@ static int leave_span_callback(MD_SPANTYPE type, void* detail, void* userdata) {
|
|||
switch(type) {
|
||||
case MD_SPAN_EM: render_literal(r, "</em>"); break;
|
||||
case MD_SPAN_STRONG: render_literal(r, "</b>"); break;
|
||||
case MD_SPAN_U: render_literal(r, "</u>"); break;
|
||||
case MD_SPAN_A: render_literal(r, "</a>"); break;
|
||||
case MD_SPAN_IMG: /*noop, handled above*/ break;
|
||||
case MD_SPAN_CODE: render_literal(r, "</code>"); break;
|
||||
|
|
@ -395,7 +411,17 @@ static int text_callback(MD_TEXTTYPE type, const MD_CHAR* text, MD_SIZE size, vo
|
|||
|
||||
switch(type) {
|
||||
case MD_TEXT_NULLCHAR: render_text(r, ucReplacementUTF8, sizeof(ucReplacementUTF8)); break;
|
||||
case MD_TEXT_BR: render_literal(r, (r->imgnest == 0 ? "<br>\n" : " ")); break;
|
||||
case MD_TEXT_BR:
|
||||
render_literal(
|
||||
r,
|
||||
r->imgnest == 0 ?
|
||||
((r->flags & MD_HTML_FLAG_XHTML) ? "<br/>\n" : "<br>\n") :
|
||||
" "
|
||||
);
|
||||
break;
|
||||
|
||||
render_literal(r, (r->flags & MD_HTML_FLAG_XHTML) ? "<hr/>\n" : "<hr>\n"); break;
|
||||
|
||||
case MD_TEXT_SOFTBR: render_literal(r, (r->imgnest == 0 ? "\n" : " ")); break;
|
||||
case MD_TEXT_HTML: render_text(r, text, size); break;
|
||||
case MD_TEXT_ENTITY: render_text(r, text, size); break;
|
||||
|
|
@ -409,9 +435,14 @@ static int text_callback(MD_TEXTTYPE type, const MD_CHAR* text, MD_SIZE size, vo
|
|||
// dlog("MD4C: %s\n", msg);
|
||||
// }
|
||||
|
||||
int
|
||||
fmt_html(const MD_CHAR* input, MD_SIZE input_size, WBuf* outbuf, unsigned parser_flags) {
|
||||
HtmlRenderer render = { outbuf, 0, 0 };
|
||||
int fmt_html(
|
||||
const MD_CHAR* input,
|
||||
MD_SIZE input_size,
|
||||
WBuf* outbuf,
|
||||
u32 parser_flags,
|
||||
u32 render_flags
|
||||
) {
|
||||
HtmlRenderer render = { outbuf, 0, 0, render_flags };
|
||||
|
||||
MD_PARSER parser = {
|
||||
0,
|
||||
|
|
|
|||
|
|
@ -1,4 +1,6 @@
|
|||
#pragma once
|
||||
#include "wbuf.h"
|
||||
|
||||
int fmt_html(const char* input, u32 inputlen, WBuf* outbuf, u32 parserFlags);
|
||||
#define MD_HTML_FLAG_XHTML 0x0008 // instead of e.g. <br>, generate <br/>
|
||||
|
||||
int fmt_html(const char* input, u32 inputlen, WBuf* outbuf, u32 parserFlags, u32 renderFlags);
|
||||
|
|
|
|||
18
src/md.c
18
src/md.c
|
|
@ -5,13 +5,10 @@
|
|||
#include "fmt_html.h"
|
||||
// #include "fmt_json.h"
|
||||
|
||||
// #include "md4c.h"
|
||||
/* If set, debug output from md_parse() is sent to stderr. */
|
||||
#define MD_RENDER_FLAG_DEBUG 0x0001
|
||||
#define MD_RENDER_FLAG_VERBATIM_ENTITIES 0x0002
|
||||
|
||||
// these should be in sync with "OutputFlags" in md.js
|
||||
typedef enum OutputFlags {
|
||||
OutputFlagsHTML = 1 << 0,
|
||||
OutputFlagHTML = 1 << 0,
|
||||
OutputFlagXHTML = 1 << 1,
|
||||
} OutputFlags;
|
||||
|
||||
typedef enum ErrorCode {
|
||||
|
|
@ -44,10 +41,15 @@ export size_t parseUTF8(
|
|||
|
||||
WBufReset(&outbuf);
|
||||
|
||||
if (outflags & OutputFlagsHTML) {
|
||||
if (outflags & OutputFlagHTML) {
|
||||
WBufReserve(&outbuf, inbuflen * 2); // approximate output size to minimize reallocations
|
||||
|
||||
if (fmt_html(inbufptr, inbuflen, &outbuf, parser_flags) != 0) {
|
||||
u32 render_flags = 0;
|
||||
if (outflags & OutputFlagXHTML) {
|
||||
render_flags |= MD_HTML_FLAG_XHTML;
|
||||
}
|
||||
|
||||
if (fmt_html(inbufptr, inbuflen, &outbuf, parser_flags, render_flags) != 0) {
|
||||
// fmt_html returns status of md_parse which only fails in extreme cases
|
||||
// like when out of memory. md4c does not provide error codes or error messages.
|
||||
WErrSet(ERR_MD_PARSE, "md parser error");
|
||||
|
|
|
|||
47
src/md.js
47
src/md.js
|
|
@ -12,7 +12,7 @@ export const ParseFlags = {
|
|||
PERMISSIVE_ATX_HEADERS: 0x0002, // Do not require space in ATX headers ( ###header )
|
||||
PERMISSIVE_URL_AUTO_LINKS: 0x0004, // Recognize URLs as links even without <...>
|
||||
PERMISSIVE_EMAIL_AUTO_LINKS: 0x0008, // Recognize e-mails as links even without <...>
|
||||
NO_INDENTED_CODE_BLOCKS: 0x0010, // Disable indented code blocks. (Only fenced code works.)
|
||||
NO_INDENTED_CODE_BLOCKS: 0x0010, // Disable indented code blocks. (Only fenced code works)
|
||||
NO_HTML_BLOCKS: 0x0020, // Disable raw HTML blocks.
|
||||
NO_HTML_SPANS: 0x0040, // Disable raw HTML (inline).
|
||||
TABLES: 0x0100, // Enable tables extension.
|
||||
|
|
@ -21,6 +21,7 @@ export const ParseFlags = {
|
|||
TASK_LISTS: 0x0800, // Enable task list extension.
|
||||
LATEX_MATH_SPANS: 0x1000, // Enable $ and $$ containing LaTeX equations.
|
||||
WIKI_LINKS: 0x2000, // Enable wiki links extension.
|
||||
UNDERLINE: 0x4000, // Enable underline extension (disables '_' for emphasis)
|
||||
|
||||
// Github style default flags
|
||||
DEFAULT: 0x0001 | 0x0002 | 0x0004 | 0x0200 | 0x0100 | 0x0800,
|
||||
|
|
@ -34,38 +35,46 @@ export const ParseFlags = {
|
|||
NO_HTML: 0x0020 | 0x0040, // NO_HTML_BLOCKS | NO_HTML_SPANS
|
||||
}
|
||||
|
||||
// these should be in sync with "OutputFlags" in md.c
|
||||
const OutputFlags = {
|
||||
HTML: 1 << 0, // Output HTML
|
||||
}
|
||||
|
||||
const defaultOptions = {
|
||||
parseFlags: ParseFlags.DEFAULT,
|
||||
|
||||
// how to format the output
|
||||
format: "html",
|
||||
|
||||
// Return a view of heap memory as a Uint8Array, instead of a string.
|
||||
//
|
||||
// The returned Uint8Array is only valid until the next call to parse().
|
||||
// If you need to keep the returned Uint8Array around, call Uint8Array.slice()
|
||||
// to make a copy, as each call to parse() reuses the same underlying memory.
|
||||
asMemoryView: false,
|
||||
XHTML: 1 << 1, // Output XHTML (only has effect with HTML flag set)
|
||||
}
|
||||
|
||||
export function parse(source, options) {
|
||||
options = options ? {__proto__:defaultOptions, ...options} : defaultOptions
|
||||
let outflags = (0
|
||||
| (options.format == "html" ? OutputFlags.HTML : 0)
|
||||
options = options || {}
|
||||
|
||||
let parseFlags = (
|
||||
options.parseFlags === undefined ? ParseFlags.DEFAULT :
|
||||
options.parseFlags
|
||||
)
|
||||
|
||||
let outputFlags = 0
|
||||
switch (options.format) {
|
||||
case "xhtml":
|
||||
outputFlags |= OutputFlags.HTML | OutputFlags.XHTML
|
||||
break
|
||||
|
||||
case "html":
|
||||
case undefined:
|
||||
case null:
|
||||
case "":
|
||||
outputFlags |= OutputFlags.HTML
|
||||
break
|
||||
|
||||
default:
|
||||
throw new Error(`invalid format "${options.format}"`)
|
||||
}
|
||||
|
||||
let buf = typeof source == "string" ? utf8.encode(source) : source
|
||||
let outbuf = withOutPtr(outptr => withTmpBytePtr(buf, (inptr, inlen) =>
|
||||
_parseUTF8(inptr, inlen, options.parseFlags, outflags, outptr)
|
||||
_parseUTF8(inptr, inlen, parseFlags, outputFlags, outptr)
|
||||
))
|
||||
|
||||
// check for error and throw if needed
|
||||
werrCheck()
|
||||
|
||||
// DEBUG
|
||||
// if (outbuf) {
|
||||
// console.log(utf8.decode(outbuf))
|
||||
// }
|
||||
|
|
|
|||
538
src/md4c.c
538
src/md4c.c
|
|
@ -2,7 +2,7 @@
|
|||
* MD4C: Markdown parser for C
|
||||
* (http://github.com/mity/md4c)
|
||||
*
|
||||
* Copyright (c) 2016-2019 Martin Mitas
|
||||
* Copyright (c) 2016-2020 Martin Mitas
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
|
|
@ -34,14 +34,23 @@
|
|||
*** Miscellaneous Stuff ***
|
||||
*****************************/
|
||||
|
||||
#ifdef _MSC_VER
|
||||
/* MSVC does not understand "inline" when building as pure C (not C++).
|
||||
* However it understands "__inline" */
|
||||
#ifndef __cplusplus
|
||||
#if !defined(__STDC_VERSION__) || __STDC_VERSION__ < 199409L
|
||||
/* C89/90 or old compilers in general may not understand "inline". */
|
||||
#if defined __GNUC__
|
||||
#define inline __inline__
|
||||
#elif defined _MSC_VER
|
||||
#define inline __inline
|
||||
#else
|
||||
#define inline
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* Make the UTF-8 support the default. */
|
||||
#if !defined MD4C_USE_ASCII && !defined MD4C_USE_UTF8 && !defined MD4C_USE_UTF16
|
||||
#define MD4C_USE_UTF8
|
||||
#endif
|
||||
|
||||
/* Magic for making wide literals with MD4C_USE_UTF16. */
|
||||
#ifdef _T
|
||||
#undef _T
|
||||
#endif
|
||||
|
|
@ -126,21 +135,22 @@ struct MD_CTX_tag {
|
|||
#endif
|
||||
|
||||
/* For resolving of inline spans. */
|
||||
MD_MARKCHAIN mark_chains[12];
|
||||
#define PTR_CHAIN ctx->mark_chains[0]
|
||||
#define TABLECELLBOUNDARIES ctx->mark_chains[1]
|
||||
#define ASTERISK_OPENERS_extraword_mod3_0 ctx->mark_chains[2]
|
||||
#define ASTERISK_OPENERS_extraword_mod3_1 ctx->mark_chains[3]
|
||||
#define ASTERISK_OPENERS_extraword_mod3_2 ctx->mark_chains[4]
|
||||
#define ASTERISK_OPENERS_intraword_mod3_0 ctx->mark_chains[5]
|
||||
#define ASTERISK_OPENERS_intraword_mod3_1 ctx->mark_chains[6]
|
||||
#define ASTERISK_OPENERS_intraword_mod3_2 ctx->mark_chains[7]
|
||||
#define UNDERSCORE_OPENERS ctx->mark_chains[8]
|
||||
#define TILDE_OPENERS ctx->mark_chains[9]
|
||||
#define BRACKET_OPENERS ctx->mark_chains[10]
|
||||
#define DOLLAR_OPENERS ctx->mark_chains[11]
|
||||
MD_MARKCHAIN mark_chains[13];
|
||||
#define PTR_CHAIN (ctx->mark_chains[0])
|
||||
#define TABLECELLBOUNDARIES (ctx->mark_chains[1])
|
||||
#define ASTERISK_OPENERS_extraword_mod3_0 (ctx->mark_chains[2])
|
||||
#define ASTERISK_OPENERS_extraword_mod3_1 (ctx->mark_chains[3])
|
||||
#define ASTERISK_OPENERS_extraword_mod3_2 (ctx->mark_chains[4])
|
||||
#define ASTERISK_OPENERS_intraword_mod3_0 (ctx->mark_chains[5])
|
||||
#define ASTERISK_OPENERS_intraword_mod3_1 (ctx->mark_chains[6])
|
||||
#define ASTERISK_OPENERS_intraword_mod3_2 (ctx->mark_chains[7])
|
||||
#define UNDERSCORE_OPENERS (ctx->mark_chains[8])
|
||||
#define TILDE_OPENERS_1 (ctx->mark_chains[9])
|
||||
#define TILDE_OPENERS_2 (ctx->mark_chains[10])
|
||||
#define BRACKET_OPENERS (ctx->mark_chains[11])
|
||||
#define DOLLAR_OPENERS (ctx->mark_chains[12])
|
||||
#define OPENERS_CHAIN_FIRST 2
|
||||
#define OPENERS_CHAIN_LAST 11
|
||||
#define OPENERS_CHAIN_LAST 12
|
||||
|
||||
int n_table_cell_boundaries;
|
||||
|
||||
|
|
@ -203,14 +213,12 @@ struct MD_LINE_ANALYSIS_tag {
|
|||
OFF beg;
|
||||
OFF end;
|
||||
unsigned indent; /* Indentation level. */
|
||||
unsigned total_indent; /* Total indent in characters. */
|
||||
};
|
||||
|
||||
typedef struct MD_LINE_tag MD_LINE;
|
||||
struct MD_LINE_tag {
|
||||
OFF beg;
|
||||
OFF end;
|
||||
unsigned total_indent; /* Total indent in characters. */
|
||||
};
|
||||
|
||||
typedef struct MD_VERBATIMLINE_tag MD_VERBATIMLINE;
|
||||
|
|
@ -225,19 +233,11 @@ struct MD_VERBATIMLINE_tag {
|
|||
*** Debugging ***
|
||||
*******************/
|
||||
|
||||
// change to 0 to disable MD_PARSER.debug_log
|
||||
#define WITH_PARSER_DEBUG_LOG 0
|
||||
|
||||
#if WITH_PARSER_DEBUG_LOG
|
||||
#define MD_LOG(msg) \
|
||||
do { \
|
||||
if(ctx->parser.debug_log != NULL) \
|
||||
ctx->parser.debug_log((msg), ctx->userdata); \
|
||||
} while(0)
|
||||
#else
|
||||
#define MD_LOG(msg) do{}while(0)
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef DEBUG
|
||||
#define MD_ASSERT(cond) \
|
||||
|
|
@ -275,7 +275,7 @@ struct MD_VERBATIMLINE_tag {
|
|||
/* Character classification.
|
||||
* Note we assume ASCII compatibility of code points < 128 here. */
|
||||
#define ISIN_(ch, ch_min, ch_max) ((ch_min) <= (unsigned)(ch) && (unsigned)(ch) <= (ch_max))
|
||||
#define ISANYOF_(ch, palette) (md_strchr((palette), (ch)) != NULL)
|
||||
#define ISANYOF_(ch, palette) ((ch) != _T('\0') && md_strchr((palette), (ch)) != NULL)
|
||||
#define ISANYOF2_(ch, ch1, ch2) ((ch) == (ch1) || (ch) == (ch2))
|
||||
#define ISANYOF3_(ch, ch1, ch2, ch3) ((ch) == (ch1) || (ch) == (ch2) || (ch) == (ch3))
|
||||
#define ISASCII_(ch) ((unsigned)(ch) <= 127)
|
||||
|
|
@ -306,16 +306,14 @@ struct MD_VERBATIMLINE_tag {
|
|||
#define ISDIGIT(off) ISDIGIT_(CH(off))
|
||||
#define ISXDIGIT(off) ISXDIGIT_(CH(off))
|
||||
#define ISALNUM(off) ISALNUM_(CH(off))
|
||||
static inline const CHAR*
|
||||
md_strchr(const CHAR* str, CHAR ch)
|
||||
{
|
||||
OFF i;
|
||||
for(i = 0; str[i] != _T('\0'); i++) {
|
||||
if(ch == str[i])
|
||||
return (str + i);
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
#if defined MD4C_USE_UTF16
|
||||
#define md_strchr wcschr
|
||||
#else
|
||||
#define md_strchr strchr
|
||||
#endif
|
||||
|
||||
|
||||
/* Case insensitive check of string equality. */
|
||||
static inline int
|
||||
|
|
@ -548,22 +546,22 @@ struct MD_UNICODE_FOLD_INFO_tag {
|
|||
R(0x2030,0x2043), R(0x2045,0x2051), R(0x2053,0x205e), R(0x207d,0x207e), R(0x208d,0x208e),
|
||||
R(0x2308,0x230b), R(0x2329,0x232a), R(0x2768,0x2775), R(0x27c5,0x27c6), R(0x27e6,0x27ef),
|
||||
R(0x2983,0x2998), R(0x29d8,0x29db), R(0x29fc,0x29fd), R(0x2cf9,0x2cfc), R(0x2cfe,0x2cff), S(0x2d70),
|
||||
R(0x2e00,0x2e2e), R(0x2e30,0x2e4f), R(0x3001,0x3003), R(0x3008,0x3011), R(0x3014,0x301f), S(0x3030),
|
||||
S(0x303d), S(0x30a0), S(0x30fb), R(0xa4fe,0xa4ff), R(0xa60d,0xa60f), S(0xa673), S(0xa67e),
|
||||
R(0x2e00,0x2e2e), R(0x2e30,0x2e4f), S(0x2e52), R(0x3001,0x3003), R(0x3008,0x3011), R(0x3014,0x301f),
|
||||
S(0x3030), S(0x303d), S(0x30a0), S(0x30fb), R(0xa4fe,0xa4ff), R(0xa60d,0xa60f), S(0xa673), S(0xa67e),
|
||||
R(0xa6f2,0xa6f7), R(0xa874,0xa877), R(0xa8ce,0xa8cf), R(0xa8f8,0xa8fa), S(0xa8fc), R(0xa92e,0xa92f),
|
||||
S(0xa95f), R(0xa9c1,0xa9cd), R(0xa9de,0xa9df), R(0xaa5c,0xaa5f), R(0xaade,0xaadf), R(0xaaf0,0xaaf1),
|
||||
S(0xabeb), R(0xfd3e,0xfd3f), R(0xfe10,0xfe19), R(0xfe30,0xfe52), R(0xfe54,0xfe61), S(0xfe63), S(0xfe68),
|
||||
R(0xfe6a,0xfe6b), R(0xff01,0xff03), R(0xff05,0xff0a), R(0xff0c,0xff0f), R(0xff1a,0xff1b),
|
||||
R(0xff1f,0xff20), R(0xff3b,0xff3d), S(0xff3f), S(0xff5b), S(0xff5d), R(0xff5f,0xff65), R(0x10100,0x10102),
|
||||
S(0x1039f), S(0x103d0), S(0x1056f), S(0x10857), S(0x1091f), S(0x1093f), R(0x10a50,0x10a58), S(0x10a7f),
|
||||
R(0x10af0,0x10af6), R(0x10b39,0x10b3f), R(0x10b99,0x10b9c), R(0x10f55,0x10f59), R(0x11047,0x1104d),
|
||||
R(0x110bb,0x110bc), R(0x110be,0x110c1), R(0x11140,0x11143), R(0x11174,0x11175), R(0x111c5,0x111c8),
|
||||
S(0x111cd), S(0x111db), R(0x111dd,0x111df), R(0x11238,0x1123d), S(0x112a9), R(0x1144b,0x1144f),
|
||||
S(0x1145b), S(0x1145d), S(0x114c6), R(0x115c1,0x115d7), R(0x11641,0x11643), R(0x11660,0x1166c),
|
||||
R(0x1173c,0x1173e), S(0x1183b), S(0x119e2), R(0x11a3f,0x11a46), R(0x11a9a,0x11a9c), R(0x11a9e,0x11aa2),
|
||||
R(0x11c41,0x11c45), R(0x11c70,0x11c71), R(0x11ef7,0x11ef8), S(0x11fff), R(0x12470,0x12474),
|
||||
R(0x16a6e,0x16a6f), S(0x16af5), R(0x16b37,0x16b3b), S(0x16b44), R(0x16e97,0x16e9a), S(0x16fe2),
|
||||
S(0x1bc9f), R(0x1da87,0x1da8b), R(0x1e95e,0x1e95f)
|
||||
R(0x10af0,0x10af6), R(0x10b39,0x10b3f), R(0x10b99,0x10b9c), S(0x10ead), R(0x10f55,0x10f59),
|
||||
R(0x11047,0x1104d), R(0x110bb,0x110bc), R(0x110be,0x110c1), R(0x11140,0x11143), R(0x11174,0x11175),
|
||||
R(0x111c5,0x111c8), S(0x111cd), S(0x111db), R(0x111dd,0x111df), R(0x11238,0x1123d), S(0x112a9),
|
||||
R(0x1144b,0x1144f), R(0x1145a,0x1145b), S(0x1145d), S(0x114c6), R(0x115c1,0x115d7), R(0x11641,0x11643),
|
||||
R(0x11660,0x1166c), R(0x1173c,0x1173e), S(0x1183b), R(0x11944,0x11946), S(0x119e2), R(0x11a3f,0x11a46),
|
||||
R(0x11a9a,0x11a9c), R(0x11a9e,0x11aa2), R(0x11c41,0x11c45), R(0x11c70,0x11c71), R(0x11ef7,0x11ef8),
|
||||
S(0x11fff), R(0x12470,0x12474), R(0x16a6e,0x16a6f), S(0x16af5), R(0x16b37,0x16b3b), S(0x16b44),
|
||||
R(0x16e97,0x16e9a), S(0x16fe2), S(0x1bc9f), R(0x1da87,0x1da8b), R(0x1e95e,0x1e95f)
|
||||
};
|
||||
#undef R
|
||||
#undef S
|
||||
|
|
@ -586,52 +584,56 @@ struct MD_UNICODE_FOLD_INFO_tag {
|
|||
static const unsigned FOLD_MAP_1[] = {
|
||||
R(0x0041,0x005a), S(0x00b5), R(0x00c0,0x00d6), R(0x00d8,0x00de), R(0x0100,0x012e), R(0x0132,0x0136),
|
||||
R(0x0139,0x0147), R(0x014a,0x0176), S(0x0178), R(0x0179,0x017d), S(0x017f), S(0x0181), S(0x0182),
|
||||
S(0x0186), S(0x0187), S(0x0189), S(0x018b), S(0x018e), S(0x018f), S(0x0190), S(0x0191), S(0x0193),
|
||||
S(0x0194), S(0x0196), S(0x0197), S(0x0198), S(0x019c), S(0x019d), S(0x019f), R(0x01a0,0x01a4), S(0x01a6),
|
||||
S(0x01a7), S(0x01a9), S(0x01ac), S(0x01ae), S(0x01af), S(0x01b1), S(0x01b3), S(0x01b7), S(0x01b8),
|
||||
S(0x01bc), S(0x01c4), S(0x01c5), S(0x01c7), S(0x01c8), S(0x01ca), R(0x01cb,0x01db), R(0x01de,0x01ee),
|
||||
S(0x01f1), S(0x01f2), S(0x01f6), S(0x01f7), R(0x01f8,0x021e), S(0x0220), R(0x0222,0x0232), S(0x023a),
|
||||
S(0x023b), S(0x023d), S(0x023e), S(0x0241), S(0x0243), S(0x0244), S(0x0245), R(0x0246,0x024e), S(0x0345),
|
||||
S(0x0370), S(0x0376), S(0x037f), S(0x0386), R(0x0388,0x038a), S(0x038c), S(0x038e), R(0x0391,0x03a1),
|
||||
R(0x03a3,0x03ab), S(0x03c2), S(0x03cf), S(0x03d0), S(0x03d1), S(0x03d5), S(0x03d6), R(0x03d8,0x03ee),
|
||||
S(0x03f0), S(0x03f1), S(0x03f4), S(0x03f5), S(0x03f7), S(0x03f9), S(0x03fa), R(0x03fd,0x03ff),
|
||||
R(0x0400,0x040f), R(0x0410,0x042f), R(0x0460,0x0480), R(0x048a,0x04be), S(0x04c0), R(0x04c1,0x04cd),
|
||||
R(0x04d0,0x052e), R(0x0531,0x0556), R(0x10a0,0x10c5), S(0x10c7), S(0x10cd), R(0x13f8,0x13fd), S(0x1c80),
|
||||
S(0x1c81), S(0x1c82), S(0x1c83), S(0x1c85), S(0x1c86), S(0x1c87), S(0x1c88), R(0x1c90,0x1cba),
|
||||
S(0x0184), S(0x0186), S(0x0187), S(0x0189), S(0x018a), S(0x018b), S(0x018e), S(0x018f), S(0x0190),
|
||||
S(0x0191), S(0x0193), S(0x0194), S(0x0196), S(0x0197), S(0x0198), S(0x019c), S(0x019d), S(0x019f),
|
||||
R(0x01a0,0x01a4), S(0x01a6), S(0x01a7), S(0x01a9), S(0x01ac), S(0x01ae), S(0x01af), S(0x01b1), S(0x01b2),
|
||||
S(0x01b3), S(0x01b5), S(0x01b7), S(0x01b8), S(0x01bc), S(0x01c4), S(0x01c5), S(0x01c7), S(0x01c8),
|
||||
S(0x01ca), R(0x01cb,0x01db), R(0x01de,0x01ee), S(0x01f1), S(0x01f2), S(0x01f4), S(0x01f6), S(0x01f7),
|
||||
R(0x01f8,0x021e), S(0x0220), R(0x0222,0x0232), S(0x023a), S(0x023b), S(0x023d), S(0x023e), S(0x0241),
|
||||
S(0x0243), S(0x0244), S(0x0245), R(0x0246,0x024e), S(0x0345), S(0x0370), S(0x0372), S(0x0376), S(0x037f),
|
||||
S(0x0386), R(0x0388,0x038a), S(0x038c), S(0x038e), S(0x038f), R(0x0391,0x03a1), R(0x03a3,0x03ab),
|
||||
S(0x03c2), S(0x03cf), S(0x03d0), S(0x03d1), S(0x03d5), S(0x03d6), R(0x03d8,0x03ee), S(0x03f0), S(0x03f1),
|
||||
S(0x03f4), S(0x03f5), S(0x03f7), S(0x03f9), S(0x03fa), R(0x03fd,0x03ff), R(0x0400,0x040f),
|
||||
R(0x0410,0x042f), R(0x0460,0x0480), R(0x048a,0x04be), S(0x04c0), R(0x04c1,0x04cd), R(0x04d0,0x052e),
|
||||
R(0x0531,0x0556), R(0x10a0,0x10c5), S(0x10c7), S(0x10cd), R(0x13f8,0x13fd), S(0x1c80), S(0x1c81),
|
||||
S(0x1c82), S(0x1c83), S(0x1c84), S(0x1c85), S(0x1c86), S(0x1c87), S(0x1c88), R(0x1c90,0x1cba),
|
||||
R(0x1cbd,0x1cbf), R(0x1e00,0x1e94), S(0x1e9b), R(0x1ea0,0x1efe), R(0x1f08,0x1f0f), R(0x1f18,0x1f1d),
|
||||
R(0x1f28,0x1f2f), R(0x1f38,0x1f3f), R(0x1f48,0x1f4d), S(0x1f59), S(0x1f5b), S(0x1f5d), S(0x1f5f),
|
||||
R(0x1f68,0x1f6f), S(0x1fb8), S(0x1fba), S(0x1fbe), R(0x1fc8,0x1fcb), S(0x1fd8), S(0x1fda), S(0x1fe8),
|
||||
S(0x1fea), S(0x1fec), S(0x1ff8), S(0x1ffa), S(0x2126), S(0x212a), S(0x212b), S(0x2132), R(0x2160,0x216f),
|
||||
S(0x2183), R(0x24b6,0x24cf), R(0x2c00,0x2c2e), S(0x2c60), S(0x2c62), S(0x2c63), S(0x2c64),
|
||||
R(0x2c67,0x2c6b), S(0x2c6d), S(0x2c6e), S(0x2c6f), S(0x2c70), S(0x2c72), S(0x2c75), S(0x2c7e),
|
||||
R(0x2c80,0x2ce2), S(0x2ceb), S(0x2cf2), R(0xa640,0xa66c), R(0xa680,0xa69a), R(0xa722,0xa72e),
|
||||
R(0xa732,0xa76e), S(0xa779), S(0xa77d), R(0xa77e,0xa786), S(0xa78b), S(0xa78d), S(0xa790),
|
||||
R(0x1f68,0x1f6f), S(0x1fb8), S(0x1fb9), S(0x1fba), S(0x1fbb), S(0x1fbe), R(0x1fc8,0x1fcb), S(0x1fd8),
|
||||
S(0x1fd9), S(0x1fda), S(0x1fdb), S(0x1fe8), S(0x1fe9), S(0x1fea), S(0x1feb), S(0x1fec), S(0x1ff8),
|
||||
S(0x1ff9), S(0x1ffa), S(0x1ffb), S(0x2126), S(0x212a), S(0x212b), S(0x2132), R(0x2160,0x216f), S(0x2183),
|
||||
R(0x24b6,0x24cf), R(0x2c00,0x2c2e), S(0x2c60), S(0x2c62), S(0x2c63), S(0x2c64), R(0x2c67,0x2c6b),
|
||||
S(0x2c6d), S(0x2c6e), S(0x2c6f), S(0x2c70), S(0x2c72), S(0x2c75), S(0x2c7e), S(0x2c7f), R(0x2c80,0x2ce2),
|
||||
S(0x2ceb), S(0x2ced), S(0x2cf2), R(0xa640,0xa66c), R(0xa680,0xa69a), R(0xa722,0xa72e), R(0xa732,0xa76e),
|
||||
S(0xa779), S(0xa77b), S(0xa77d), R(0xa77e,0xa786), S(0xa78b), S(0xa78d), S(0xa790), S(0xa792),
|
||||
R(0xa796,0xa7a8), S(0xa7aa), S(0xa7ab), S(0xa7ac), S(0xa7ad), S(0xa7ae), S(0xa7b0), S(0xa7b1), S(0xa7b2),
|
||||
S(0xa7b3), R(0xa7b4,0xa7be), S(0xa7c2), S(0xa7c4), S(0xa7c5), S(0xa7c6), R(0xab70,0xabbf),
|
||||
R(0xff21,0xff3a), R(0x10400,0x10427), R(0x104b0,0x104d3), R(0x10c80,0x10cb2), R(0x118a0,0x118bf),
|
||||
R(0x16e40,0x16e5f), R(0x1e900,0x1e921)
|
||||
S(0xa7b3), R(0xa7b4,0xa7be), S(0xa7c2), S(0xa7c4), S(0xa7c5), S(0xa7c6), S(0xa7c7), S(0xa7c9), S(0xa7f5),
|
||||
R(0xab70,0xabbf), R(0xff21,0xff3a), R(0x10400,0x10427), R(0x104b0,0x104d3), R(0x10c80,0x10cb2),
|
||||
R(0x118a0,0x118bf), R(0x16e40,0x16e5f), R(0x1e900,0x1e921)
|
||||
};
|
||||
static const unsigned FOLD_MAP_1_DATA[] = {
|
||||
0x0061, 0x007a, 0x03bc, 0x00e0, 0x00f6, 0x00f8, 0x00fe, 0x0101, 0x012f, 0x0133, 0x0137, 0x013a, 0x0148,
|
||||
0x014b, 0x0177, 0x00ff, 0x017a, 0x017e, 0x0073, 0x0253, 0x0183, 0x0254, 0x0188, 0x0256, 0x018c, 0x01dd,
|
||||
0x0259, 0x025b, 0x0192, 0x0260, 0x0263, 0x0269, 0x0268, 0x0199, 0x026f, 0x0272, 0x0275, 0x01a1, 0x01a5,
|
||||
0x0280, 0x01a8, 0x0283, 0x01ad, 0x0288, 0x01b0, 0x028a, 0x01b4, 0x0292, 0x01b9, 0x01bd, 0x01c6, 0x01c6,
|
||||
0x01c9, 0x01c9, 0x01cc, 0x01cc, 0x01dc, 0x01df, 0x01ef, 0x01f3, 0x01f3, 0x0195, 0x01bf, 0x01f9, 0x021f,
|
||||
0x019e, 0x0223, 0x0233, 0x2c65, 0x023c, 0x019a, 0x2c66, 0x0242, 0x0180, 0x0289, 0x028c, 0x0247, 0x024f,
|
||||
0x03b9, 0x0371, 0x0377, 0x03f3, 0x03ac, 0x03ad, 0x03af, 0x03cc, 0x03cd, 0x03b1, 0x03c1, 0x03c3, 0x03cb,
|
||||
0x03c3, 0x03d7, 0x03b2, 0x03b8, 0x03c6, 0x03c0, 0x03d9, 0x03ef, 0x03ba, 0x03c1, 0x03b8, 0x03b5, 0x03f8,
|
||||
0x03f2, 0x03fb, 0x037b, 0x037d, 0x0450, 0x045f, 0x0430, 0x044f, 0x0461, 0x0481, 0x048b, 0x04bf, 0x04cf,
|
||||
0x04c2, 0x04ce, 0x04d1, 0x052f, 0x0561, 0x0586, 0x2d00, 0x2d25, 0x2d27, 0x2d2d, 0x13f0, 0x13f5, 0x0432,
|
||||
0x0434, 0x043e, 0x0441, 0x0442, 0x044a, 0x0463, 0xa64b, 0x10d0, 0x10fa, 0x10fd, 0x10ff, 0x1e01, 0x1e95,
|
||||
0x1e61, 0x1ea1, 0x1eff, 0x1f00, 0x1f07, 0x1f10, 0x1f15, 0x1f20, 0x1f27, 0x1f30, 0x1f37, 0x1f40, 0x1f45,
|
||||
0x1f51, 0x1f53, 0x1f55, 0x1f57, 0x1f60, 0x1f67, 0x1fb0, 0x1f70, 0x03b9, 0x1f72, 0x1f75, 0x1fd0, 0x1f76,
|
||||
0x1fe0, 0x1f7a, 0x1fe5, 0x1f78, 0x1f7c, 0x03c9, 0x006b, 0x00e5, 0x214e, 0x2170, 0x217f, 0x2184, 0x24d0,
|
||||
0x24e9, 0x2c30, 0x2c5e, 0x2c61, 0x026b, 0x1d7d, 0x027d, 0x2c68, 0x2c6c, 0x0251, 0x0271, 0x0250, 0x0252,
|
||||
0x2c73, 0x2c76, 0x023f, 0x2c81, 0x2ce3, 0x2cec, 0x2cf3, 0xa641, 0xa66d, 0xa681, 0xa69b, 0xa723, 0xa72f,
|
||||
0xa733, 0xa76f, 0xa77a, 0x1d79, 0xa77f, 0xa787, 0xa78c, 0x0265, 0xa791, 0xa797, 0xa7a9, 0x0266, 0x025c,
|
||||
0x0261, 0x026c, 0x026a, 0x029e, 0x0287, 0x029d, 0xab53, 0xa7b5, 0xa7bf, 0xa7c3, 0xa794, 0x0282, 0x1d8e,
|
||||
0x13a0, 0x13ef, 0xff41, 0xff5a, 0x10428, 0x1044f, 0x104d8, 0x104fb, 0x10cc0, 0x10cf2, 0x118c0, 0x118df,
|
||||
0x16e60, 0x16e7f, 0x1e922, 0x1e943
|
||||
0x014b, 0x0177, 0x00ff, 0x017a, 0x017e, 0x0073, 0x0253, 0x0183, 0x0185, 0x0254, 0x0188, 0x0256, 0x0257,
|
||||
0x018c, 0x01dd, 0x0259, 0x025b, 0x0192, 0x0260, 0x0263, 0x0269, 0x0268, 0x0199, 0x026f, 0x0272, 0x0275,
|
||||
0x01a1, 0x01a5, 0x0280, 0x01a8, 0x0283, 0x01ad, 0x0288, 0x01b0, 0x028a, 0x028b, 0x01b4, 0x01b6, 0x0292,
|
||||
0x01b9, 0x01bd, 0x01c6, 0x01c6, 0x01c9, 0x01c9, 0x01cc, 0x01cc, 0x01dc, 0x01df, 0x01ef, 0x01f3, 0x01f3,
|
||||
0x01f5, 0x0195, 0x01bf, 0x01f9, 0x021f, 0x019e, 0x0223, 0x0233, 0x2c65, 0x023c, 0x019a, 0x2c66, 0x0242,
|
||||
0x0180, 0x0289, 0x028c, 0x0247, 0x024f, 0x03b9, 0x0371, 0x0373, 0x0377, 0x03f3, 0x03ac, 0x03ad, 0x03af,
|
||||
0x03cc, 0x03cd, 0x03ce, 0x03b1, 0x03c1, 0x03c3, 0x03cb, 0x03c3, 0x03d7, 0x03b2, 0x03b8, 0x03c6, 0x03c0,
|
||||
0x03d9, 0x03ef, 0x03ba, 0x03c1, 0x03b8, 0x03b5, 0x03f8, 0x03f2, 0x03fb, 0x037b, 0x037d, 0x0450, 0x045f,
|
||||
0x0430, 0x044f, 0x0461, 0x0481, 0x048b, 0x04bf, 0x04cf, 0x04c2, 0x04ce, 0x04d1, 0x052f, 0x0561, 0x0586,
|
||||
0x2d00, 0x2d25, 0x2d27, 0x2d2d, 0x13f0, 0x13f5, 0x0432, 0x0434, 0x043e, 0x0441, 0x0442, 0x0442, 0x044a,
|
||||
0x0463, 0xa64b, 0x10d0, 0x10fa, 0x10fd, 0x10ff, 0x1e01, 0x1e95, 0x1e61, 0x1ea1, 0x1eff, 0x1f00, 0x1f07,
|
||||
0x1f10, 0x1f15, 0x1f20, 0x1f27, 0x1f30, 0x1f37, 0x1f40, 0x1f45, 0x1f51, 0x1f53, 0x1f55, 0x1f57, 0x1f60,
|
||||
0x1f67, 0x1fb0, 0x1fb1, 0x1f70, 0x1f71, 0x03b9, 0x1f72, 0x1f75, 0x1fd0, 0x1fd1, 0x1f76, 0x1f77, 0x1fe0,
|
||||
0x1fe1, 0x1f7a, 0x1f7b, 0x1fe5, 0x1f78, 0x1f79, 0x1f7c, 0x1f7d, 0x03c9, 0x006b, 0x00e5, 0x214e, 0x2170,
|
||||
0x217f, 0x2184, 0x24d0, 0x24e9, 0x2c30, 0x2c5e, 0x2c61, 0x026b, 0x1d7d, 0x027d, 0x2c68, 0x2c6c, 0x0251,
|
||||
0x0271, 0x0250, 0x0252, 0x2c73, 0x2c76, 0x023f, 0x0240, 0x2c81, 0x2ce3, 0x2cec, 0x2cee, 0x2cf3, 0xa641,
|
||||
0xa66d, 0xa681, 0xa69b, 0xa723, 0xa72f, 0xa733, 0xa76f, 0xa77a, 0xa77c, 0x1d79, 0xa77f, 0xa787, 0xa78c,
|
||||
0x0265, 0xa791, 0xa793, 0xa797, 0xa7a9, 0x0266, 0x025c, 0x0261, 0x026c, 0x026a, 0x029e, 0x0287, 0x029d,
|
||||
0xab53, 0xa7b5, 0xa7bf, 0xa7c3, 0xa794, 0x0282, 0x1d8e, 0xa7c8, 0xa7ca, 0xa7f6, 0x13a0, 0x13ef, 0xff41,
|
||||
0xff5a, 0x10428, 0x1044f, 0x104d8, 0x104fb, 0x10cc0, 0x10cf2, 0x118c0, 0x118df, 0x16e60, 0x16e7f, 0x1e922,
|
||||
0x1e943
|
||||
};
|
||||
static const unsigned FOLD_MAP_2[] = {
|
||||
S(0x00df), S(0x0130), S(0x0149), S(0x01f0), S(0x0587), S(0x1e96), S(0x1e97), S(0x1e98), S(0x1e99),
|
||||
|
|
@ -1338,8 +1340,9 @@ md_build_attr_append_substr(MD_CTX* ctx, MD_ATTRIBUTE_BUILD* build,
|
|||
MD_TEXTTYPE* new_substr_types;
|
||||
OFF* new_substr_offsets;
|
||||
|
||||
build->substr_alloc = (build->substr_alloc == 0 ? 8 : build->substr_alloc * 2);
|
||||
|
||||
build->substr_alloc = (build->substr_alloc > 0
|
||||
? build->substr_alloc + build->substr_alloc / 2
|
||||
: 8);
|
||||
new_substr_types = (MD_TEXTTYPE*) realloc(build->substr_types,
|
||||
build->substr_alloc * sizeof(MD_TEXTTYPE));
|
||||
if(new_substr_types == NULL) {
|
||||
|
|
@ -1465,8 +1468,8 @@ abort:
|
|||
*** Dictionary of Reference Definitions ***
|
||||
*********************************************/
|
||||
|
||||
#define MD_FNV1A_BASE 2166136261
|
||||
#define MD_FNV1A_PRIME 16777619
|
||||
#define MD_FNV1A_BASE 2166136261U
|
||||
#define MD_FNV1A_PRIME 16777619U
|
||||
|
||||
static inline unsigned
|
||||
md_fnv1a(unsigned base, const void* data, size_t n)
|
||||
|
|
@ -1488,12 +1491,12 @@ struct MD_REF_DEF_tag {
|
|||
CHAR* label;
|
||||
CHAR* title;
|
||||
unsigned hash;
|
||||
SZ label_size : 24;
|
||||
unsigned label_needs_free : 1;
|
||||
unsigned title_needs_free : 1;
|
||||
SZ label_size;
|
||||
SZ title_size;
|
||||
OFF dest_beg;
|
||||
OFF dest_end;
|
||||
unsigned char label_needs_free : 1;
|
||||
unsigned char title_needs_free : 1;
|
||||
};
|
||||
|
||||
/* Label equivalence is quite complicated with regards to whitespace and case
|
||||
|
|
@ -1573,8 +1576,8 @@ md_link_label_cmp(const CHAR* a_label, SZ a_size, const CHAR* b_label, SZ b_size
|
|||
OFF b_off;
|
||||
int a_reached_end = FALSE;
|
||||
int b_reached_end = FALSE;
|
||||
MD_UNICODE_FOLD_INFO a_fi = { 0 };
|
||||
MD_UNICODE_FOLD_INFO b_fi = { 0 };
|
||||
MD_UNICODE_FOLD_INFO a_fi = { { 0 }, 0 };
|
||||
MD_UNICODE_FOLD_INFO b_fi = { { 0 }, 0 };
|
||||
OFF a_fi_off = 0;
|
||||
OFF b_fi_off = 0;
|
||||
int cmp;
|
||||
|
|
@ -1627,7 +1630,7 @@ md_ref_def_cmp(const void* a, const void* b)
|
|||
}
|
||||
|
||||
static int
|
||||
md_ref_def_cmp_stable(const void* a, const void* b)
|
||||
md_ref_def_cmp_for_sort(const void* a, const void* b)
|
||||
{
|
||||
int cmp;
|
||||
|
||||
|
|
@ -1680,6 +1683,7 @@ md_build_ref_def_hashtable(MD_CTX* ctx)
|
|||
bucket = ctx->ref_def_hashtable[def->hash % ctx->ref_def_hashtable_size];
|
||||
|
||||
if(bucket == NULL) {
|
||||
/* The bucket is empty. Make it just point to the def. */
|
||||
ctx->ref_def_hashtable[def->hash % ctx->ref_def_hashtable_size] = def;
|
||||
continue;
|
||||
}
|
||||
|
|
@ -1691,12 +1695,12 @@ md_build_ref_def_hashtable(MD_CTX* ctx)
|
|||
MD_REF_DEF* old_def = (MD_REF_DEF*) bucket;
|
||||
|
||||
if(md_link_label_cmp(def->label, def->label_size, old_def->label, old_def->label_size) == 0) {
|
||||
/* Ignore this ref. def. */
|
||||
/* Duplicate label: Ignore this ref. def. */
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Make the bucket capable of holding more ref. defs. */
|
||||
list = (MD_REF_DEF_LIST*) malloc(sizeof(MD_REF_DEF_LIST) + 4 * sizeof(MD_REF_DEF*));
|
||||
/* Make the bucket complex, i.e. able to hold more ref. defs. */
|
||||
list = (MD_REF_DEF_LIST*) malloc(sizeof(MD_REF_DEF_LIST) + 2 * sizeof(MD_REF_DEF*));
|
||||
if(list == NULL) {
|
||||
MD_LOG("malloc() failed.");
|
||||
goto abort;
|
||||
|
|
@ -1704,22 +1708,28 @@ md_build_ref_def_hashtable(MD_CTX* ctx)
|
|||
list->ref_defs[0] = old_def;
|
||||
list->ref_defs[1] = def;
|
||||
list->n_ref_defs = 2;
|
||||
list->alloc_ref_defs = 4;
|
||||
list->alloc_ref_defs = 2;
|
||||
ctx->ref_def_hashtable[def->hash % ctx->ref_def_hashtable_size] = list;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Append the def to the bucket list. */
|
||||
/* Append the def to the complex bucket list.
|
||||
*
|
||||
* Note in this case we ignore potential duplicates to avoid expensive
|
||||
* iterating over the complex bucket. Below, we revisit all the complex
|
||||
* buckets and handle it more cheaply after the complex bucket contents
|
||||
* is sorted. */
|
||||
list = (MD_REF_DEF_LIST*) bucket;
|
||||
if(list->n_ref_defs >= list->alloc_ref_defs) {
|
||||
int alloc_ref_defs = list->alloc_ref_defs + list->alloc_ref_defs / 2;
|
||||
MD_REF_DEF_LIST* list_tmp = (MD_REF_DEF_LIST*) realloc(list,
|
||||
sizeof(MD_REF_DEF_LIST) + 2 * list->alloc_ref_defs * sizeof(MD_REF_DEF*));
|
||||
sizeof(MD_REF_DEF_LIST) + alloc_ref_defs * sizeof(MD_REF_DEF*));
|
||||
if(list_tmp == NULL) {
|
||||
MD_LOG("realloc() failed.");
|
||||
goto abort;
|
||||
}
|
||||
list = list_tmp;
|
||||
list->alloc_ref_defs *= 2;
|
||||
list->alloc_ref_defs = alloc_ref_defs;
|
||||
ctx->ref_def_hashtable[def->hash % ctx->ref_def_hashtable_size] = list;
|
||||
}
|
||||
|
||||
|
|
@ -1738,9 +1748,12 @@ md_build_ref_def_hashtable(MD_CTX* ctx)
|
|||
continue;
|
||||
|
||||
list = (MD_REF_DEF_LIST*) bucket;
|
||||
qsort(list->ref_defs, list->n_ref_defs, sizeof(MD_REF_DEF*), md_ref_def_cmp_stable);
|
||||
qsort(list->ref_defs, list->n_ref_defs, sizeof(MD_REF_DEF*), md_ref_def_cmp_for_sort);
|
||||
|
||||
/* Disable duplicates. */
|
||||
/* Disable all duplicates in the complex bucket by forcing all such
|
||||
* records to point to the 1st such ref. def. I.e. no matter which
|
||||
* record is found during the lookup, it will always point to the right
|
||||
* ref. def. in ctx->ref_defs[]. */
|
||||
for(j = 1; j < list->n_ref_defs; j++) {
|
||||
if(md_ref_def_cmp(&list->ref_defs[j-1], &list->ref_defs[j]) == 0)
|
||||
list->ref_defs[j] = list->ref_defs[j-1];
|
||||
|
|
@ -2063,21 +2076,18 @@ md_is_link_reference_definition(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
|
|||
OFF label_contents_beg;
|
||||
OFF label_contents_end;
|
||||
int label_contents_line_index = -1;
|
||||
int label_is_multiline;
|
||||
CHAR* label;
|
||||
SZ label_size;
|
||||
int label_needs_free = FALSE;
|
||||
int label_is_multiline = FALSE;
|
||||
OFF dest_contents_beg;
|
||||
OFF dest_contents_end;
|
||||
OFF title_contents_beg;
|
||||
OFF title_contents_end;
|
||||
int title_contents_line_index;
|
||||
int title_is_multiline;
|
||||
int title_is_multiline = FALSE;
|
||||
OFF off;
|
||||
int line_index = 0;
|
||||
int tmp_line_index;
|
||||
MD_REF_DEF* def;
|
||||
int ret;
|
||||
MD_REF_DEF* def = NULL;
|
||||
int ret = 0;
|
||||
|
||||
/* Link label. */
|
||||
if(!md_is_link_label(ctx, lines, n_lines, lines[0].beg,
|
||||
|
|
@ -2128,65 +2138,58 @@ md_is_link_reference_definition(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
|
|||
if(off < lines[line_index].end)
|
||||
return FALSE;
|
||||
|
||||
/* Construct label. */
|
||||
if(!label_is_multiline) {
|
||||
label = (CHAR*) STR(label_contents_beg);
|
||||
label_size = label_contents_end - label_contents_beg;
|
||||
label_needs_free = FALSE;
|
||||
} else {
|
||||
MD_CHECK(md_merge_lines_alloc(ctx, label_contents_beg, label_contents_end,
|
||||
lines + label_contents_line_index, n_lines - label_contents_line_index,
|
||||
_T(' '), &label, &label_size));
|
||||
label_needs_free = TRUE;
|
||||
}
|
||||
|
||||
/* Store the reference definition. */
|
||||
/* So, it _is_ a reference definition. Remember it. */
|
||||
if(ctx->n_ref_defs >= ctx->alloc_ref_defs) {
|
||||
MD_REF_DEF* new_defs;
|
||||
|
||||
ctx->alloc_ref_defs = (ctx->alloc_ref_defs > 0 ? ctx->alloc_ref_defs * 2 : 16);
|
||||
ctx->alloc_ref_defs = (ctx->alloc_ref_defs > 0
|
||||
? ctx->alloc_ref_defs + ctx->alloc_ref_defs / 2
|
||||
: 16);
|
||||
new_defs = (MD_REF_DEF*) realloc(ctx->ref_defs, ctx->alloc_ref_defs * sizeof(MD_REF_DEF));
|
||||
if(new_defs == NULL) {
|
||||
MD_LOG("realloc() failed.");
|
||||
ret = -1;
|
||||
goto abort;
|
||||
}
|
||||
|
||||
ctx->ref_defs = new_defs;
|
||||
}
|
||||
|
||||
def = &ctx->ref_defs[ctx->n_ref_defs];
|
||||
memset(def, 0, sizeof(MD_REF_DEF));
|
||||
|
||||
def->label = label;
|
||||
def->label_size = label_size;
|
||||
def->label_needs_free = label_needs_free;
|
||||
|
||||
def->dest_beg = dest_contents_beg;
|
||||
def->dest_end = dest_contents_end;
|
||||
|
||||
if(title_contents_beg >= title_contents_end) {
|
||||
def->title = NULL;
|
||||
def->title_size = 0;
|
||||
} else if(!title_is_multiline) {
|
||||
def->title = (CHAR*) STR(title_contents_beg);
|
||||
def->title_size = title_contents_end - title_contents_beg;
|
||||
if(label_is_multiline) {
|
||||
MD_CHECK(md_merge_lines_alloc(ctx, label_contents_beg, label_contents_end,
|
||||
lines + label_contents_line_index, n_lines - label_contents_line_index,
|
||||
_T(' '), &def->label, &def->label_size));
|
||||
def->label_needs_free = TRUE;
|
||||
} else {
|
||||
def->label = (CHAR*) STR(label_contents_beg);
|
||||
def->label_size = label_contents_end - label_contents_beg;
|
||||
}
|
||||
|
||||
if(title_is_multiline) {
|
||||
MD_CHECK(md_merge_lines_alloc(ctx, title_contents_beg, title_contents_end,
|
||||
lines + title_contents_line_index, n_lines - title_contents_line_index,
|
||||
_T('\n'), &def->title, &def->title_size));
|
||||
def->title_needs_free = TRUE;
|
||||
} else {
|
||||
def->title = (CHAR*) STR(title_contents_beg);
|
||||
def->title_size = title_contents_end - title_contents_beg;
|
||||
}
|
||||
|
||||
def->dest_beg = dest_contents_beg;
|
||||
def->dest_end = dest_contents_end;
|
||||
|
||||
/* Success. */
|
||||
ctx->n_ref_defs++;
|
||||
return line_index + 1;
|
||||
|
||||
abort:
|
||||
/* Failure. */
|
||||
if(label_needs_free)
|
||||
free(label);
|
||||
return -1;
|
||||
if(def != NULL && def->label_needs_free)
|
||||
free(def->label);
|
||||
if(def != NULL && def->title_needs_free)
|
||||
free(def->title);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int
|
||||
|
|
@ -2479,7 +2482,7 @@ md_mark_chain(MD_CTX* ctx, int mark_index)
|
|||
switch(mark->ch) {
|
||||
case _T('*'): return md_asterisk_chain(ctx, mark->flags);
|
||||
case _T('_'): return &UNDERSCORE_OPENERS;
|
||||
case _T('~'): return &TILDE_OPENERS;
|
||||
case _T('~'): return (mark->end - mark->beg == 1) ? &TILDE_OPENERS_1 : &TILDE_OPENERS_2;
|
||||
case _T('['): return &BRACKET_OPENERS;
|
||||
case _T('|'): return &TABLECELLBOUNDARIES;
|
||||
default: return NULL;
|
||||
|
|
@ -2492,7 +2495,9 @@ md_push_mark(MD_CTX* ctx)
|
|||
if(ctx->n_marks >= ctx->alloc_marks) {
|
||||
MD_MARK* new_marks;
|
||||
|
||||
ctx->alloc_marks = (ctx->alloc_marks > 0 ? ctx->alloc_marks * 2 : 64);
|
||||
ctx->alloc_marks = (ctx->alloc_marks > 0
|
||||
? ctx->alloc_marks + ctx->alloc_marks / 2
|
||||
: 64);
|
||||
new_marks = realloc(ctx->marks, ctx->alloc_marks * sizeof(MD_MARK));
|
||||
if(new_marks == NULL) {
|
||||
MD_LOG("realloc() failed.");
|
||||
|
|
@ -2535,6 +2540,7 @@ md_mark_chain_append(MD_CTX* ctx, MD_MARKCHAIN* chain, int mark_index)
|
|||
chain->head = mark_index;
|
||||
|
||||
ctx->marks[mark_index].prev = chain->tail;
|
||||
ctx->marks[mark_index].next = -1;
|
||||
chain->tail = mark_index;
|
||||
}
|
||||
|
||||
|
|
@ -2626,7 +2632,7 @@ md_rollback(MD_CTX* ctx, int opener_index, int closer_index, int how)
|
|||
chain->head = -1;
|
||||
}
|
||||
|
||||
/* Go backwards so that un-resolved openers are re-added into their
|
||||
/* Go backwards so that unresolved openers are re-added into their
|
||||
* respective chains, in the right order. */
|
||||
mark_index = closer_index - 1;
|
||||
while(mark_index > opener_index) {
|
||||
|
|
@ -2718,7 +2724,7 @@ md_build_mark_char_map(MD_CTX* ctx)
|
|||
}
|
||||
}
|
||||
|
||||
/* We limit code span marks to lower then 32 backticks. This solves the
|
||||
/* We limit code span marks to lower than 32 backticks. This solves the
|
||||
* pathologic case of too many openers, each of different length: Their
|
||||
* resolving would be then O(n^2). */
|
||||
#define CODESPAN_MARK_MAXLEN 32
|
||||
|
|
@ -2905,7 +2911,7 @@ md_is_autolink_email(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end)
|
|||
return FALSE;
|
||||
off++;
|
||||
|
||||
/* Labels delimited with '.'; each label is sequence of 1 - 62 alnum
|
||||
/* Labels delimited with '.'; each label is sequence of 1 - 63 alnum
|
||||
* characters or '-', but '-' is not allowed as first or last char. */
|
||||
label_len = 0;
|
||||
while(off < max_end) {
|
||||
|
|
@ -2918,7 +2924,7 @@ md_is_autolink_email(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end)
|
|||
else
|
||||
break;
|
||||
|
||||
if(label_len > 62)
|
||||
if(label_len > 63)
|
||||
return FALSE;
|
||||
|
||||
off++;
|
||||
|
|
@ -3259,7 +3265,17 @@ md_collect_marks(MD_CTX* ctx, const MD_LINE* lines, int n_lines, int table_mode)
|
|||
while(tmp < line_end && CH(tmp) == _T('~'))
|
||||
tmp++;
|
||||
|
||||
PUSH_MARK(ch, off, tmp, MD_MARK_POTENTIAL_OPENER | MD_MARK_POTENTIAL_CLOSER);
|
||||
if(tmp - off < 3) {
|
||||
unsigned flags = 0;
|
||||
|
||||
if(tmp < line_end && !ISUNICODEWHITESPACE(tmp))
|
||||
flags |= MD_MARK_POTENTIAL_OPENER;
|
||||
if(off > line->beg && !ISUNICODEWHITESPACEBEFORE(off))
|
||||
flags |= MD_MARK_POTENTIAL_CLOSER;
|
||||
if(flags != 0)
|
||||
PUSH_MARK(ch, off, tmp, flags);
|
||||
}
|
||||
|
||||
off = tmp;
|
||||
continue;
|
||||
}
|
||||
|
|
@ -3407,92 +3423,91 @@ md_resolve_links(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
|
|||
continue;
|
||||
}
|
||||
|
||||
/* Detect and resolve wiki links. */
|
||||
/* Recognize and resolve wiki links.
|
||||
* Wiki-links maybe '[[destination]]' or '[[destination|label]]'.
|
||||
*/
|
||||
if ((ctx->parser.flags & MD_FLAG_WIKILINKS) &&
|
||||
next_opener != NULL && next_closer != NULL &&
|
||||
(opener->end - opener->beg == 1) &&
|
||||
(opener->end - opener->beg == 1) && /* not image */
|
||||
next_opener != NULL && /* double '[' opener */
|
||||
next_opener->ch == '[' &&
|
||||
(next_opener->beg == opener->beg - 1) &&
|
||||
(next_closer->beg == closer->beg + 1) &&
|
||||
(next_opener->end - next_opener->beg == 1) &&
|
||||
(next_closer->end - next_closer->beg == 1) &&
|
||||
(next_opener->ch == '[' && next_closer->ch == ']'))
|
||||
next_closer != NULL && /* double ']' closer */
|
||||
next_closer->ch == ']' &&
|
||||
(next_closer->beg == closer->beg + 1) &&
|
||||
(next_closer->end - next_closer->beg == 1))
|
||||
{
|
||||
MD_MARK* delim = NULL;
|
||||
int delim_index;
|
||||
OFF dest_beg, dest_end;
|
||||
|
||||
is_link = TRUE;
|
||||
|
||||
if (opener->end == closer->beg)
|
||||
is_link = FALSE;
|
||||
|
||||
int delim_index = opener_index;
|
||||
MD_MARK* delim = &ctx->marks[delim_index];
|
||||
|
||||
/* To prevent runaway O(n^2) performance, don't look too far for the delimiter (.. < 100). */
|
||||
while(is_link && delim_index < closer_index && (delim_index - opener_index) < 100 ) {
|
||||
if(delim->ch == '|' && delim->beg == opener->end) {
|
||||
is_link = FALSE;
|
||||
} else if(delim->ch == '|' && delim->end == closer->beg) {
|
||||
break;
|
||||
} else if(delim->ch == '|') {
|
||||
opener->end = delim->beg;
|
||||
/* We don't allow destination to be longer than 100 characters.
|
||||
* Lets scan to see whether there is '|'. (If not then the whole
|
||||
* wiki-link has to be below the 100 characters.) */
|
||||
delim_index = opener_index + 1;
|
||||
while(delim_index < closer_index) {
|
||||
MD_MARK* m = &ctx->marks[delim_index];
|
||||
if(m->ch == '|') {
|
||||
delim = m;
|
||||
break;
|
||||
}
|
||||
if(m->ch != 'D' && m->beg - opener->end > 100)
|
||||
break;
|
||||
delim_index++;
|
||||
delim = &ctx->marks[delim_index];
|
||||
}
|
||||
|
||||
OFF off = closer->beg-1;
|
||||
int count = 0;
|
||||
int has_label = (opener->end - opener->beg > 2);
|
||||
const MD_LINE* line;
|
||||
int line_index = n_lines-1;
|
||||
|
||||
/* An image inside the link target disables the wiki link. */
|
||||
if( (has_label && last_img_beg >= opener->beg && last_img_end <= opener->end) ||
|
||||
(!has_label && last_img_beg >= opener->beg && last_img_end <= closer->end))
|
||||
dest_beg = opener->end;
|
||||
dest_end = (delim != NULL) ? delim->beg : closer->beg;
|
||||
if(dest_end - dest_beg == 0 || dest_end - dest_beg > 100)
|
||||
is_link = FALSE;
|
||||
|
||||
while(is_link && off > opener->beg && count++ < 100) {
|
||||
|
||||
/* Newline not allowed in link target. */
|
||||
if(has_label && (off <= opener->end) && ISNEWLINE(off))
|
||||
/* There may not be any new line in the destination. */
|
||||
if(is_link) {
|
||||
OFF off;
|
||||
for(off = dest_beg; off < dest_end; off++) {
|
||||
if(ISNEWLINE(off)) {
|
||||
is_link = FALSE;
|
||||
else if(!has_label && off > opener->end && ISNEWLINE(off))
|
||||
is_link = FALSE;
|
||||
else if(ISNEWLINE(off)) {
|
||||
line = &lines[line_index--];
|
||||
count = count - line->total_indent - 1; /* Count newline too. */
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
off--;
|
||||
}
|
||||
|
||||
if(off > opener->beg)
|
||||
is_link = FALSE;
|
||||
|
||||
if(is_link) {
|
||||
if(delim->ch == '|')
|
||||
delim->flags |= MD_MARK_RESOLVED;
|
||||
if(delim != NULL) {
|
||||
if(delim->end < closer->beg) {
|
||||
opener->end = delim->beg;
|
||||
} else {
|
||||
/* The pipe is just before the closer: [[foo|]] */
|
||||
closer->beg = delim->beg;
|
||||
delim = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
opener->beg = next_opener->beg;
|
||||
closer->end = next_closer->end;
|
||||
|
||||
opener->next = closer_index;
|
||||
opener->flags |= MD_MARK_OPENER | MD_MARK_RESOLVED;
|
||||
|
||||
closer->end = next_closer->end;
|
||||
closer->prev = opener_index;
|
||||
closer->flags |= MD_MARK_CLOSER | MD_MARK_RESOLVED;
|
||||
|
||||
last_link_beg = opener->beg;
|
||||
last_link_end = closer->end;
|
||||
|
||||
if ((opener->end - opener->beg > 2))
|
||||
if(delim != NULL) {
|
||||
delim->flags |= MD_MARK_RESOLVED;
|
||||
md_rollback(ctx, opener_index, delim_index, MD_ROLLBACK_ALL);
|
||||
md_analyze_link_contents(ctx, lines, n_lines, opener_index+1, closer_index);
|
||||
} else {
|
||||
md_rollback(ctx, opener_index, closer_index, MD_ROLLBACK_ALL);
|
||||
}
|
||||
|
||||
opener_index = next_index;
|
||||
opener_index = next_opener->prev;
|
||||
continue;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
if(next_opener != NULL && next_opener->beg == closer->end) {
|
||||
if(next_closer->beg > closer->end + 1) {
|
||||
/* Might be full reference link. */
|
||||
|
|
@ -3571,6 +3586,7 @@ md_resolve_links(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
|
|||
|
||||
MD_ASSERT(ctx->marks[opener_index+2].ch == 'D');
|
||||
md_mark_store_ptr(ctx, opener_index+2, attr.title);
|
||||
/* The title might or might not have been allocated for us. */
|
||||
if(attr.title_needs_free)
|
||||
md_mark_chain_append(ctx, &PTR_CHAIN, opener_index+2);
|
||||
ctx->marks[opener_index+2].prev = attr.title_size;
|
||||
|
|
@ -3669,8 +3685,7 @@ md_analyze_emph(MD_CTX* ctx, int mark_index)
|
|||
int i, n_opener_chains;
|
||||
unsigned flags = mark->flags;
|
||||
|
||||
/* Apply "rule of three". (This is why we break asterisk opener
|
||||
* marks into multiple chains.) */
|
||||
/* Apply the "rule of three". */
|
||||
n_opener_chains = 0;
|
||||
opener_chains[n_opener_chains++] = &ASTERISK_OPENERS_intraword_mod3_0;
|
||||
if((flags & MD_MARK_EMPH_MOD3_MASK) != MD_MARK_EMPH_MOD3_2)
|
||||
|
|
@ -3706,7 +3721,7 @@ md_analyze_emph(MD_CTX* ctx, int mark_index)
|
|||
if(opener != NULL) {
|
||||
SZ opener_size = opener->end - opener->beg;
|
||||
SZ closer_size = mark->end - mark->beg;
|
||||
MD_MARKCHAIN* opener_chain = md_mark_chain(ctx, mark_index);
|
||||
MD_MARKCHAIN* opener_chain = md_mark_chain(ctx, opener_index);
|
||||
|
||||
if(opener_size > closer_size) {
|
||||
opener_index = md_split_emph_mark(ctx, opener_index, closer_size);
|
||||
|
|
@ -3729,20 +3744,23 @@ md_analyze_emph(MD_CTX* ctx, int mark_index)
|
|||
static void
|
||||
md_analyze_tilde(MD_CTX* ctx, int mark_index)
|
||||
{
|
||||
/* We attempt to be Github Flavored Markdown compatible here. GFM says
|
||||
* that length of the tilde sequence is not important at all. Note that
|
||||
* implies the TILDE_OPENERS chain can have at most one item. */
|
||||
MD_MARK* mark = &ctx->marks[mark_index];
|
||||
MD_MARKCHAIN* chain = md_mark_chain(ctx, mark_index);
|
||||
|
||||
if(TILDE_OPENERS.head >= 0) {
|
||||
/* The chain already contains an opener, so we may resolve the span. */
|
||||
int opener_index = TILDE_OPENERS.head;
|
||||
/* We attempt to be Github Flavored Markdown compatible here. GFM accepts
|
||||
* only tildes sequences of length 1 and 2, and the length of the opener
|
||||
* and closer has to match. */
|
||||
|
||||
if((mark->flags & MD_MARK_POTENTIAL_CLOSER) && chain->head >= 0) {
|
||||
int opener_index = chain->head;
|
||||
|
||||
md_rollback(ctx, opener_index, mark_index, MD_ROLLBACK_CROSSING);
|
||||
md_resolve_range(ctx, &TILDE_OPENERS, opener_index, mark_index);
|
||||
} else {
|
||||
/* We can only be opener. */
|
||||
md_mark_chain_append(ctx, &TILDE_OPENERS, mark_index);
|
||||
md_resolve_range(ctx, chain, opener_index, mark_index);
|
||||
return;
|
||||
}
|
||||
|
||||
if(mark->flags & MD_MARK_POTENTIAL_OPENER)
|
||||
md_mark_chain_append(ctx, chain, mark_index);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -3987,25 +4005,14 @@ static void
|
|||
md_analyze_link_contents(MD_CTX* ctx, const MD_LINE* lines, int n_lines,
|
||||
int mark_beg, int mark_end)
|
||||
{
|
||||
int i;
|
||||
|
||||
md_analyze_marks(ctx, lines, n_lines, mark_beg, mark_end, _T("*_~$@:."));
|
||||
ASTERISK_OPENERS_extraword_mod3_0.head = -1;
|
||||
ASTERISK_OPENERS_extraword_mod3_0.tail = -1;
|
||||
ASTERISK_OPENERS_extraword_mod3_1.head = -1;
|
||||
ASTERISK_OPENERS_extraword_mod3_1.tail = -1;
|
||||
ASTERISK_OPENERS_extraword_mod3_2.head = -1;
|
||||
ASTERISK_OPENERS_extraword_mod3_2.tail = -1;
|
||||
ASTERISK_OPENERS_intraword_mod3_0.head = -1;
|
||||
ASTERISK_OPENERS_intraword_mod3_0.tail = -1;
|
||||
ASTERISK_OPENERS_intraword_mod3_1.head = -1;
|
||||
ASTERISK_OPENERS_intraword_mod3_1.tail = -1;
|
||||
ASTERISK_OPENERS_intraword_mod3_2.head = -1;
|
||||
ASTERISK_OPENERS_intraword_mod3_2.tail = -1;
|
||||
UNDERSCORE_OPENERS.head = -1;
|
||||
UNDERSCORE_OPENERS.tail = -1;
|
||||
TILDE_OPENERS.head = -1;
|
||||
TILDE_OPENERS.tail = -1;
|
||||
DOLLAR_OPENERS.head = -1;
|
||||
DOLLAR_OPENERS.tail = -1;
|
||||
|
||||
for(i = OPENERS_CHAIN_FIRST; i <= OPENERS_CHAIN_LAST; i++) {
|
||||
ctx->mark_chains[i].head = -1;
|
||||
ctx->mark_chains[i].tail = -1;
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
|
|
@ -4113,7 +4120,23 @@ md_process_inlines(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
|
|||
}
|
||||
break;
|
||||
|
||||
case '_':
|
||||
case '_': /* Underline (or emphasis if we fall through). */
|
||||
if(ctx->parser.flags & MD_FLAG_UNDERLINE) {
|
||||
if(mark->flags & MD_MARK_OPENER) {
|
||||
while(off < mark->end) {
|
||||
MD_ENTER_SPAN(MD_SPAN_U, NULL);
|
||||
off++;
|
||||
}
|
||||
} else {
|
||||
while(off < mark->end) {
|
||||
MD_LEAVE_SPAN(MD_SPAN_U, NULL);
|
||||
off++;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
/* Fall though. */
|
||||
|
||||
case '*': /* Emphasis, strong emphasis. */
|
||||
if(mark->flags & MD_MARK_OPENER) {
|
||||
if((mark->end - off) % 2) {
|
||||
|
|
@ -4159,19 +4182,18 @@ md_process_inlines(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
|
|||
{
|
||||
const MD_MARK* opener = (mark->ch != ']' ? mark : &ctx->marks[mark->prev]);
|
||||
const MD_MARK* closer = &ctx->marks[opener->next];
|
||||
const MD_MARK* dest_mark;
|
||||
const MD_MARK* title_mark;
|
||||
|
||||
if ((opener->ch == '[' && closer->ch == ']') &&
|
||||
opener->end - opener->beg >= 2 &&
|
||||
closer->end - closer->beg == 2)
|
||||
closer->end - closer->beg >= 2)
|
||||
{
|
||||
const MD_MARK* delim = opener+3; /* Scan past the two dummy marks. */
|
||||
int has_label = (opener->end - opener->beg > 2);
|
||||
int target_sz;
|
||||
SZ target_sz;
|
||||
|
||||
if(has_label)
|
||||
target_sz = opener->end - (opener->beg+2);
|
||||
else if(delim->ch == '|')
|
||||
target_sz = (closer->beg-1) - opener->end;
|
||||
else
|
||||
target_sz = closer->beg - opener->end;
|
||||
|
||||
|
|
@ -4182,10 +4204,9 @@ md_process_inlines(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
|
|||
break;
|
||||
}
|
||||
|
||||
const MD_MARK* dest_mark = opener+1;
|
||||
const MD_MARK* title_mark = opener+2;
|
||||
|
||||
dest_mark = opener+1;
|
||||
MD_ASSERT(dest_mark->ch == 'D');
|
||||
title_mark = opener+2;
|
||||
MD_ASSERT(title_mark->ch == 'D');
|
||||
|
||||
MD_CHECK(md_enter_leave_span_a(ctx, (mark->ch != ']'),
|
||||
|
|
@ -4557,7 +4578,7 @@ md_process_verbatim_block_contents(MD_CTX* ctx, MD_TEXTTYPE text_type, const MD_
|
|||
MD_ASSERT(indent >= 0);
|
||||
|
||||
/* Output code indentation. */
|
||||
while(indent >= indent_chunk_size) {
|
||||
while(indent > (int) indent_chunk_size) {
|
||||
MD_TEXT(text_type, indent_chunk_str, indent_chunk_size);
|
||||
indent -= indent_chunk_size;
|
||||
}
|
||||
|
|
@ -4813,7 +4834,9 @@ md_push_block_bytes(MD_CTX* ctx, int n_bytes)
|
|||
if(ctx->n_block_bytes + n_bytes > ctx->alloc_block_bytes) {
|
||||
void* new_block_bytes;
|
||||
|
||||
ctx->alloc_block_bytes = (ctx->alloc_block_bytes > 0 ? ctx->alloc_block_bytes * 2 : 512);
|
||||
ctx->alloc_block_bytes = (ctx->alloc_block_bytes > 0
|
||||
? ctx->alloc_block_bytes + ctx->alloc_block_bytes / 2
|
||||
: 512);
|
||||
new_block_bytes = realloc(ctx->block_bytes, ctx->alloc_block_bytes);
|
||||
if(new_block_bytes == NULL) {
|
||||
MD_LOG("realloc() failed.");
|
||||
|
|
@ -5003,7 +5026,6 @@ md_add_line_into_current_block(MD_CTX* ctx, const MD_LINE_ANALYSIS* analysis)
|
|||
|
||||
line->beg = analysis->beg;
|
||||
line->end = analysis->end;
|
||||
line->total_indent = analysis->total_indent;
|
||||
}
|
||||
ctx->current_block->n_lines++;
|
||||
|
||||
|
|
@ -5246,7 +5268,7 @@ md_is_html_block_start_condition(MD_CTX* ctx, OFF beg)
|
|||
#ifdef X
|
||||
#undef X
|
||||
#endif
|
||||
#define X(name) { _T(name), sizeof(name)-1 }
|
||||
#define X(name) { _T(name), (sizeof(name)-1) / sizeof(CHAR) }
|
||||
#define Xend { NULL, 0 }
|
||||
static const TAG t1[] = { X("script"), X("pre"), X("style"), Xend };
|
||||
|
||||
|
|
@ -5302,7 +5324,7 @@ md_is_html_block_start_condition(MD_CTX* ctx, OFF beg)
|
|||
|
||||
/* Check for type 5: <![CDATA[ */
|
||||
if(off + 8 < ctx->size) {
|
||||
if(md_ascii_eq(STR(off), _T("![CDATA["), 8 * sizeof(CHAR)))
|
||||
if(md_ascii_eq(STR(off), _T("![CDATA["), 8))
|
||||
return 5;
|
||||
}
|
||||
}
|
||||
|
|
@ -5451,7 +5473,9 @@ md_push_container(MD_CTX* ctx, const MD_CONTAINER* container)
|
|||
if(ctx->n_containers >= ctx->alloc_containers) {
|
||||
MD_CONTAINER* new_containers;
|
||||
|
||||
ctx->alloc_containers = (ctx->alloc_containers > 0 ? ctx->alloc_containers * 2 : 16);
|
||||
ctx->alloc_containers = (ctx->alloc_containers > 0
|
||||
? ctx->alloc_containers + ctx->alloc_containers / 2
|
||||
: 16);
|
||||
new_containers = realloc(ctx->containers, ctx->alloc_containers * sizeof(MD_CONTAINER));
|
||||
if(new_containers == NULL) {
|
||||
MD_LOG("realloc() failed.");
|
||||
|
|
@ -5783,7 +5807,7 @@ md_analyze_line(MD_CTX* ctx, OFF beg, OFF* p_end,
|
|||
#if 1
|
||||
/* This is 2nd half of the hack. If the flag is set (that is there
|
||||
* were 2nd blank line at the start of the list item) and we would also
|
||||
* belonging to such list item, then interrupt the list. */
|
||||
* belonging to such list item, than interrupt the list. */
|
||||
ctx->last_line_has_list_loosening_effect = FALSE;
|
||||
if(ctx->last_list_item_starts_with_two_blank_lines) {
|
||||
if(n_parents > 0 && ctx->containers[n_parents-1].ch != _T('>') &&
|
||||
|
|
@ -6008,8 +6032,6 @@ md_analyze_line(MD_CTX* ctx, OFF beg, OFF* p_end,
|
|||
break;
|
||||
}
|
||||
|
||||
line->total_indent = total_indent;
|
||||
|
||||
/* Scan for end of the line.
|
||||
*
|
||||
* Note this is quite a bottleneck of the parsing as we here iterate almost
|
||||
|
|
@ -6245,10 +6267,8 @@ md_parse(const MD_CHAR* text, MD_SIZE size, const MD_PARSER* parser, void* userd
|
|||
int ret;
|
||||
|
||||
if(parser->abi_version != 0) {
|
||||
#if WITH_PARSER_DEBUG_LOG
|
||||
if(parser->debug_log != NULL)
|
||||
parser->debug_log("Unsupported abi_version.", userdata);
|
||||
#endif
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
|
|
|||
52
src/md4c.h
52
src/md4c.h
|
|
@ -2,7 +2,7 @@
|
|||
* MD4C: Markdown parser for C
|
||||
* (http://github.com/mity/md4c)
|
||||
*
|
||||
* Copyright (c) 2016-2019 Martin Mitas
|
||||
* Copyright (c) 2016-2020 Martin Mitas
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
|
|
@ -23,15 +23,15 @@
|
|||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef MD4C_MARKDOWN_H
|
||||
#define MD4C_MARKDOWN_H
|
||||
#ifndef MD4C_H
|
||||
#define MD4C_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#if defined MD4C_USE_UTF16
|
||||
/* Magic to support UTF-16. Not that in order to use it, you have to define
|
||||
/* Magic to support UTF-16. Note that in order to use it, you have to define
|
||||
* the macro MD4C_USE_UTF16 both when building MD4C as well as when
|
||||
* including this header in your code. */
|
||||
#ifdef _WIN32
|
||||
|
|
@ -119,7 +119,7 @@ typedef enum MD_SPANTYPE {
|
|||
* Detail: Structure MD_SPAN_IMG_DETAIL.
|
||||
* Note: Image text can contain nested spans and even nested images.
|
||||
* If rendered into ALT attribute of HTML <IMG> tag, it's responsibility
|
||||
* of the renderer to deal with it.
|
||||
* of the parser to deal with it.
|
||||
*/
|
||||
MD_SPAN_IMG,
|
||||
|
||||
|
|
@ -140,7 +140,11 @@ typedef enum MD_SPANTYPE {
|
|||
/* Wiki links
|
||||
* Note: Recognized only when MD_FLAG_WIKILINKS is enabled.
|
||||
*/
|
||||
MD_SPAN_WIKILINK
|
||||
MD_SPAN_WIKILINK,
|
||||
|
||||
/* <u>...</u>
|
||||
* Note: Recognized only when MD_FLAG_UNDERLINE is enabled. */
|
||||
MD_SPAN_U
|
||||
} MD_SPANTYPE;
|
||||
|
||||
/* Text is the actual textual contents of span. */
|
||||
|
|
@ -167,7 +171,7 @@ typedef enum MD_TEXTTYPE {
|
|||
* (c) Hexadecimal entity, e.g. ካ
|
||||
*
|
||||
* As MD4C is mostly encoding agnostic, application gets the verbatim
|
||||
* entity text into the MD_RENDERER::text_callback(). */
|
||||
* entity text into the MD_PARSER::text_callback(). */
|
||||
MD_TEXT_ENTITY,
|
||||
|
||||
/* Text in a code block (inside MD_BLOCK_CODE) or inlined code (`code`).
|
||||
|
|
@ -202,8 +206,13 @@ typedef enum MD_ALIGN {
|
|||
* propagated within various detailed structures, but which still may contain
|
||||
* string portions of different types like e.g. entities.
|
||||
*
|
||||
* So, for example, lets consider an image has a title attribute string
|
||||
* set to "foo " bar". (Note the string size is 14.)
|
||||
* So, for example, lets consider this image:
|
||||
*
|
||||
* 
|
||||
*
|
||||
* The image alt text is propagated as a normal text via the MD_PARSER::text()
|
||||
* callback. However, the image title ('foo " bar') is propagated as
|
||||
* MD_ATTRIBUTE in MD_SPAN_IMG_DETAIL::title.
|
||||
*
|
||||
* Then the attribute MD_SPAN_IMG_DETAIL::title shall provide the following:
|
||||
* -- [0]: "foo " (substr_types[0] == MD_TEXT_NORMAL; substr_offsets[0] == 0)
|
||||
|
|
@ -211,10 +220,12 @@ typedef enum MD_ALIGN {
|
|||
* -- [2]: " bar" (substr_types[2] == MD_TEXT_NORMAL; substr_offsets[2] == 10)
|
||||
* -- [3]: (n/a) (n/a ; substr_offsets[3] == 14)
|
||||
*
|
||||
* Note that these conditions are guaranteed:
|
||||
* Note that these invariants are always guaranteed:
|
||||
* -- substr_offsets[0] == 0
|
||||
* -- substr_offsets[LAST+1] == size
|
||||
* -- Only MD_TEXT_NORMAL, MD_TEXT_ENTITY, MD_TEXT_NULLCHAR substrings can appear.
|
||||
* -- Currently, only MD_TEXT_NORMAL, MD_TEXT_ENTITY, MD_TEXT_NULLCHAR
|
||||
* substrings can appear. This could change only of the specification
|
||||
* changes.
|
||||
*/
|
||||
typedef struct MD_ATTRIBUTE {
|
||||
const MD_CHAR* text;
|
||||
|
|
@ -280,7 +291,7 @@ typedef struct MD_SPAN_WIKILINK {
|
|||
|
||||
/* Flags specifying extensions/deviations from CommonMark specification.
|
||||
*
|
||||
* By default (when MD_RENDERER::flags == 0), we follow CommonMark specification.
|
||||
* By default (when MD_PARSER::flags == 0), we follow CommonMark specification.
|
||||
* The following flags may allow some extensions or deviations from it.
|
||||
*/
|
||||
#define MD_FLAG_COLLAPSEWHITESPACE 0x0001 /* In MD_TEXT_NORMAL, collapse non-trivial whitespace into single ' ' */
|
||||
|
|
@ -296,6 +307,7 @@ typedef struct MD_SPAN_WIKILINK {
|
|||
#define MD_FLAG_TASKLISTS 0x0800 /* Enable task list extension. */
|
||||
#define MD_FLAG_LATEXMATHSPANS 0x1000 /* Enable $ and $$ containing LaTeX equations. */
|
||||
#define MD_FLAG_WIKILINKS 0x2000 /* Enable wiki links extension. */
|
||||
#define MD_FLAG_UNDERLINE 0x4000 /* Enable underline extension (and disables '_' for normal emphasis). */
|
||||
|
||||
#define MD_FLAG_PERMISSIVEAUTOLINKS (MD_FLAG_PERMISSIVEEMAILAUTOLINKS | MD_FLAG_PERMISSIVEURLAUTOLINKS | MD_FLAG_PERMISSIVEWWWAUTOLINKS)
|
||||
#define MD_FLAG_NOHTML (MD_FLAG_NOHTMLBLOCKS | MD_FLAG_NOHTMLSPANS)
|
||||
|
|
@ -312,7 +324,7 @@ typedef struct MD_SPAN_WIKILINK {
|
|||
#define MD_DIALECT_COMMONMARK 0
|
||||
#define MD_DIALECT_GITHUB (MD_FLAG_PERMISSIVEAUTOLINKS | MD_FLAG_TABLES | MD_FLAG_STRIKETHROUGH | MD_FLAG_TASKLISTS)
|
||||
|
||||
/* Renderer structure.
|
||||
/* Parser structure.
|
||||
*/
|
||||
typedef struct MD_PARSER {
|
||||
/* Reserved. Set to zero.
|
||||
|
|
@ -333,9 +345,10 @@ typedef struct MD_PARSER {
|
|||
*
|
||||
* Note any strings provided to the callbacks as their arguments or as
|
||||
* members of any detail structure are generally not zero-terminated.
|
||||
* Application has take the respective size information into account.
|
||||
* Application has to take the respective size information into account.
|
||||
*
|
||||
* Callbacks may abort further parsing of the document by returning non-zero.
|
||||
* Any rendering callback may abort further parsing of the document by
|
||||
* returning non-zero.
|
||||
*/
|
||||
int (*enter_block)(MD_BLOCKTYPE /*type*/, void* /*detail*/, void* /*userdata*/);
|
||||
int (*leave_block)(MD_BLOCKTYPE /*type*/, void* /*detail*/, void* /*userdata*/);
|
||||
|
|
@ -360,18 +373,19 @@ typedef struct MD_PARSER {
|
|||
} MD_PARSER;
|
||||
|
||||
|
||||
/* For backward compatibility. Do not use in new code. */
|
||||
/* For backward compatibility. Do not use in new code.
|
||||
*/
|
||||
typedef MD_PARSER MD_RENDERER;
|
||||
|
||||
|
||||
/* Parse the Markdown document stored in the string 'text' of size 'size'.
|
||||
* The renderer provides callbacks to be called during the parsing so the
|
||||
* The parser provides callbacks to be called during the parsing so the
|
||||
* caller can render the document on the screen or convert the Markdown
|
||||
* to another format.
|
||||
*
|
||||
* Zero is returned on success. If a runtime error occurs (e.g. a memory
|
||||
* fails), -1 is returned. If the processing is aborted due any callback
|
||||
* returning non-zero, md_parse() the return value of the callback is returned.
|
||||
* returning non-zero, the return value of the callback is returned.
|
||||
*/
|
||||
int md_parse(const MD_CHAR* text, MD_SIZE size, const MD_PARSER* parser, void* userdata);
|
||||
|
||||
|
|
@ -380,4 +394,4 @@ int md_parse(const MD_CHAR* text, MD_SIZE size, const MD_PARSER* parser, void* u
|
|||
} /* extern "C" { */
|
||||
#endif
|
||||
|
||||
#endif /* MD4C_MARKDOWN_H */
|
||||
#endif /* MD4C_H */
|
||||
|
|
|
|||
Loading…
Reference in a new issue