mirror of
https://github.com/danbulant/markdown-wasm
synced 2026-06-16 21:21:06 +00:00
initial commit
This commit is contained in:
commit
46892d1110
35 changed files with 9258 additions and 0 deletions
5
.gitignore
vendored
Normal file
5
.gitignore
vendored
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
/_*
|
||||
**/.DS_Store
|
||||
/build
|
||||
/node_modules
|
||||
*.sublime*
|
||||
19
LICENSE
Normal file
19
LICENSE
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
Copyright (c) 2019 Rasmus Andersson <https://rsms.me/>
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
114
README.md
Normal file
114
README.md
Normal file
|
|
@ -0,0 +1,114 @@
|
|||
# markdown-wasm
|
||||
|
||||
Very fast Markdown parser & HTML renderer implemented in WebAssembly
|
||||
|
||||
- Zero dependencies
|
||||
- Portable
|
||||
- Simple API
|
||||
- Fast and efficient
|
||||
|
||||
Based on [md4c](http://github.com/mity/md4c)
|
||||
|
||||
|
||||
## Examples
|
||||
|
||||
In Nodejs
|
||||
|
||||
```js
|
||||
const markdown = require("./dist/markdown.node.js")
|
||||
console.log(markdown.parse("# hello\n*world*"))
|
||||
```
|
||||
|
||||
ES module
|
||||
|
||||
```js
|
||||
import * as markdown from "./dist/markdown.es"
|
||||
console.log(markdown.parse("# hello\n*world*"))
|
||||
```
|
||||
|
||||
Separately loded wasm module (useful in web browsers)
|
||||
|
||||
```js
|
||||
require("./dist/markdown").ready.then(markdown => {
|
||||
console.log(markdown.parse("# hello\n*world*"))
|
||||
})
|
||||
```
|
||||
|
||||
|
||||
## API
|
||||
|
||||
```ts
|
||||
/**
|
||||
* parse reads markdown source at s and converts it to HTML.
|
||||
* When output is a byte array, it will be a reference.
|
||||
*/
|
||||
export function parse(s :Source, o? :ParseOptions & { asMemoryView? :never|false }) :string
|
||||
export function parse(s :Source, o? :ParseOptions & { asMemoryView :true }) :Uint8Array
|
||||
|
||||
/** Markdown source code can be provided as a JavaScript string or UTF8 encoded data */
|
||||
type Source = string|ArrayLike<number>
|
||||
|
||||
/** Options for the parse function */
|
||||
export interface ParseOptions {
|
||||
/**
|
||||
* Customize parsing.
|
||||
* If not provided, the following flags are used, equating to github-style parsing:
|
||||
* COLLAPSE_WHITESPACE
|
||||
* PERMISSIVE_ATX_HEADERS
|
||||
* PERMISSIVE_URL_AUTO_LINKS
|
||||
* STRIKETHROUGH
|
||||
* TABLES
|
||||
* TASK_LISTS
|
||||
*/
|
||||
parseFlags? :ParseFlags
|
||||
|
||||
/**
|
||||
* asMemoryView=true causes parse() to return a view of heap memory as a Uint8Array,
|
||||
* instead of a string.
|
||||
*
|
||||
* The returned Uint8Array is only valid until the next call to parse().
|
||||
* If you need to keep the returned data around, call Uint8Array.slice() to make a copy,
|
||||
* as each call to parse() uses the same underlying memory.
|
||||
*
|
||||
* This only provides a performance benefit when you never need to convert the output
|
||||
* to a string. In most cases you're better off leaving this unset or false.
|
||||
*/
|
||||
asMemoryView? :boolean
|
||||
}
|
||||
|
||||
/** Flags that customize Markdown parsing */
|
||||
export enum ParseFlags {
|
||||
/** In TEXT, collapse non-trivial whitespace into single ' ' */ COLLAPSE_WHITESPACE,
|
||||
/** Enable $ and $$ containing LaTeX equations. */ LATEX_MATH_SPANS,
|
||||
/** Disable raw HTML blocks. */ NO_HTML_BLOCKS,
|
||||
/** Disable raw HTML (inline). */ NO_HTML_SPANS,
|
||||
/** Disable indented code blocks. (Only fenced code works.) */ NO_INDENTED_CODE_BLOCKS,
|
||||
/** Do not require space in ATX headers ( ###header ) */ PERMISSIVE_ATX_HEADERS,
|
||||
/** Recognize e-mails as links even without <...> */ PERMISSIVE_EMAIL_AUTO_LINKS,
|
||||
/** Recognize URLs as links even without <...> */ PERMISSIVE_URL_AUTO_LINKS,
|
||||
/** Enable WWW autolinks (without proto; just 'www.') */ PERMISSIVE_WWW_AUTOLINKS,
|
||||
/** Enable strikethrough extension. */ STRIKETHROUGH,
|
||||
/** Enable tables extension. */ TABLES,
|
||||
/** Enable task list extension. */ TASK_LISTS,
|
||||
/** Enable wiki links extension. */ WIKI_LINKS,
|
||||
|
||||
/** Default flags */ DEFAULT,
|
||||
/** Shorthand for NO_HTML_BLOCKS | NO_HTML_SPANS */ NO_HTML,
|
||||
}
|
||||
```
|
||||
|
||||
See `markdown.d.ts`
|
||||
|
||||
|
||||
## Building from source
|
||||
|
||||
```
|
||||
npm install
|
||||
npx wasmc
|
||||
```
|
||||
|
||||
Build debug version of markdown-es into ./build/debug and watch source files:
|
||||
|
||||
```
|
||||
npx wasmc -g -w
|
||||
```
|
||||
3
dist/markdown.es.js
vendored
Normal file
3
dist/markdown.es.js
vendored
Normal file
File diff suppressed because one or more lines are too long
1
dist/markdown.es.js.map
vendored
Normal file
1
dist/markdown.es.js.map
vendored
Normal file
File diff suppressed because one or more lines are too long
4
dist/markdown.js
vendored
Normal file
4
dist/markdown.js
vendored
Normal file
File diff suppressed because one or more lines are too long
1
dist/markdown.js.map
vendored
Normal file
1
dist/markdown.js.map
vendored
Normal file
File diff suppressed because one or more lines are too long
4
dist/markdown.node.js
vendored
Normal file
4
dist/markdown.node.js
vendored
Normal file
File diff suppressed because one or more lines are too long
1
dist/markdown.node.js.map
vendored
Normal file
1
dist/markdown.node.js.map
vendored
Normal file
File diff suppressed because one or more lines are too long
BIN
dist/markdown.wasm
vendored
Normal file
BIN
dist/markdown.wasm
vendored
Normal file
Binary file not shown.
252
docs/index.html
Normal file
252
docs/index.html
Normal file
|
|
@ -0,0 +1,252 @@
|
|||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<title>markdown-wasm demo</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=edge">
|
||||
<link rel="stylesheet" href="https://rsms.me/raster/raster.css?v=8">
|
||||
<script type="text/javascript" src="markdown.js"></script>
|
||||
<style type="text/css">
|
||||
|
||||
:root {
|
||||
--fontSize: 14px;
|
||||
}
|
||||
|
||||
c.label { font-weight:500; letter-spacing:0; color:#999; }
|
||||
|
||||
c.input, c.output {
|
||||
max-width:50vw;
|
||||
}
|
||||
|
||||
c.textarea {
|
||||
display: flex;
|
||||
justify-content: stretch;
|
||||
}
|
||||
c.textarea textarea {
|
||||
flex: 1 1 auto;
|
||||
min-height: 50vh;
|
||||
border: 2px solid black;
|
||||
border-radius: 3px;
|
||||
padding: 1rem;
|
||||
font-size: 14px;
|
||||
}
|
||||
c.textarea textarea:focus {
|
||||
outline: none;
|
||||
border-color: var(--blue);
|
||||
}
|
||||
|
||||
blockquote {
|
||||
padding-left: var(--lineHeight);
|
||||
border-left: 2px solid #ccc;
|
||||
}
|
||||
|
||||
h1 > a.anchor,
|
||||
h2 > a.anchor,
|
||||
h3 > a.anchor,
|
||||
h4 > a.anchor,
|
||||
h5 > a.anchor,
|
||||
h6 > a.anchor {
|
||||
display: inline-block;
|
||||
float: left;
|
||||
height: 1.2em;
|
||||
width: 1em;
|
||||
margin-left: -1em;
|
||||
position: relative;
|
||||
outline: none;
|
||||
}
|
||||
/*.anchor:target { background: yellow; }*/
|
||||
h1 > a.anchor:before,
|
||||
h2 > a.anchor:before,
|
||||
h3 > a.anchor:before,
|
||||
h4 > a.anchor:before,
|
||||
h5 > a.anchor:before,
|
||||
h6 > a.anchor:before {
|
||||
visibility: hidden;
|
||||
position: absolute;
|
||||
opacity: 0.2;
|
||||
right:0;
|
||||
top:0;
|
||||
width: 1em;
|
||||
font-weight:300;
|
||||
line-height: inherit;
|
||||
content: ""; /* U+E08F */
|
||||
text-align: center;
|
||||
}
|
||||
h1 > a.anchor:hover:before,
|
||||
h2 > a.anchor:hover:before,
|
||||
h3 > a.anchor:hover:before,
|
||||
h4 > a.anchor:hover:before,
|
||||
h5 > a.anchor:hover:before,
|
||||
h6 > a.anchor:hover:before {
|
||||
visibility: visible;
|
||||
opacity:0.8;
|
||||
}
|
||||
h1 > a.anchor:focus:before,
|
||||
h2 > a.anchor:focus:before,
|
||||
h3 > a.anchor:focus:before,
|
||||
h4 > a.anchor:focus:before,
|
||||
h5 > a.anchor:focus:before,
|
||||
h6 > a.anchor:focus:before,
|
||||
h1:hover .anchor:before,
|
||||
h2:hover .anchor:before,
|
||||
h3:hover .anchor:before,
|
||||
h4:hover .anchor:before,
|
||||
h5:hover .anchor:before,
|
||||
h6:hover .anchor:before {
|
||||
visibility: visible;
|
||||
}
|
||||
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<grid columns=2 class=_debug>
|
||||
<c><h1>Markdown-wasm demo</h1></c>
|
||||
<c>
|
||||
<p>
|
||||
Fast Markdown parser and HTML renderer implemented in WebAssembly.
|
||||
<a href="https://github.com/rsms/markdown-wasm">Learn more & download on GitHub</a>
|
||||
</p>
|
||||
</c>
|
||||
|
||||
<c class=label>Type some Markdown here</c>
|
||||
<c class=label>HTML output from markdown-wasm</c>
|
||||
|
||||
<c class="input textarea"><textarea class="code" id="markdown-input"># Example
|
||||
|
||||
A third of the distance across the Beach, the meadow ends and sand begins. This slopes gradually up for another third of the distance, to the foot of the sand hills, which seem tumbled into their places by some mighty power, sometimes three tiers of them deep, sometimes two, _and sometimes only one._ A third of the distance across the Beach, the meadow ends and sand begins.
|
||||
|
||||
The outline of this inner shore is most irregular, curving and bending in and out and back upon itself, making coves and points and creeks and channels, and often pushing out in flats with not water enough on them at low tide to wet your ankles.
|
||||
|
||||
## Subtitle
|
||||
|
||||
This is another fine paragraph
|
||||
|
||||
### Smaller subtitle
|
||||
|
||||
This is a paragraph `with` ~style~ *italic* _italic_ **bold** __bold__
|
||||
|
||||

|
||||
|
||||
*Hello [link](https://rsms.me/) lol*
|
||||
|
||||
Hello [*link*](https://rsms.me/) lol "cat"
|
||||
|
||||
Hello from *[link](https://rsms.me/)* to __everyone__ `reading this`
|
||||
|
||||
Here's an [**important** anchor link](#example).
|
||||
|
||||
line 1
|
||||
line 2
|
||||
|
||||
Code & Poetry
|
||||
-------------
|
||||
|
||||
You can also indent
|
||||
blocks to display
|
||||
code or poetry.
|
||||
|
||||
Indented code/poetry blocks
|
||||
can be hard-wrapped.
|
||||
|
||||
**Or, wrap your code in three backticks:**
|
||||
|
||||
```js
|
||||
function codeBlocks() {
|
||||
return "Can be inserted"
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
### Embedded HTML
|
||||
|
||||
With default settings, markdown-wasm allows embedded HTML.
|
||||
|
||||
> It has been disabled in this demo for safety reasons, by means of setting `ParseFlags.NO_HTML`.
|
||||
> Not setting the `NO_HTML` flag allows embedding HTML like this:
|
||||
|
||||
<input type=“text” value=“type”></input>
|
||||
|
||||
|
||||
### Block Quotes
|
||||
|
||||
> You can insert quotes by
|
||||
> preceeding each line with `>`.
|
||||
>
|
||||
> Blockquotes can also contain line
|
||||
> breaks.
|
||||
|
||||
|
||||
## Lists
|
||||
|
||||
### Unordered lists
|
||||
|
||||
- Unordered
|
||||
- Lists
|
||||
- Hello
|
||||
|
||||
### Ordered lists
|
||||
|
||||
1. Ordered
|
||||
2. Lists
|
||||
4. Numbers are ignored
|
||||
1. Ordered
|
||||
|
||||
121) Ordered lists can start
|
||||
122) with any number and
|
||||
123) use . as well as ) as a separator.
|
||||
|
||||
### Task lists
|
||||
|
||||
- [ ] Task 1
|
||||
- [x] Task 2
|
||||
- [ ] Task 3
|
||||
- Regular list item
|
||||
|
||||
## Tables
|
||||
|
||||
| Column 1 | Column 2 | Column 3 | Column 4
|
||||
|----------|:---------|:--------:|---------:
|
||||
| default | left | center | right
|
||||
|
||||
### Table of image file types
|
||||
|
||||
| Header | Mime type | Extensions | Description
|
||||
|---------------------------|--------------|------------|-------------
|
||||
| `89 50 4E 47 0D 0A 1A 0A` | image/png | png | PNG image
|
||||
| `47 49 46 38 39 61` | image/gif | gif | GIF image
|
||||
| `FF D8 FF` | image/jpeg | jpg jpeg | JPEG image
|
||||
| `4D 4D 00 2B` | image/tiff | tif tiff | TIFF image
|
||||
| `42 4D` | image/bmp | bmp | Bitmap image
|
||||
| `00 00 01 00` | image/x-icon | ico | Icon image
|
||||
|
||||
|
||||
</textarea></c>
|
||||
<c class="output" id="html-output">
|
||||
<div title="Loading markdown module...">•••</div>
|
||||
</c>
|
||||
</grid>
|
||||
|
||||
<script type="text/javascript">
|
||||
// await the loading of the web assembly module
|
||||
window["markdown"].ready.then(function(markdown){
|
||||
|
||||
const inputEl = document.getElementById("markdown-input")
|
||||
const outputEl = document.getElementById("html-output")
|
||||
|
||||
function update() {
|
||||
let source = inputEl.value
|
||||
let html = markdown.parse(source, {
|
||||
parseFlags: markdown.ParseFlags.DEFAULT | markdown.ParseFlags.NO_HTML,
|
||||
})
|
||||
outputEl.innerHTML = html
|
||||
}
|
||||
|
||||
inputEl.addEventListener("input", update)
|
||||
update()
|
||||
|
||||
console.log({markdown})
|
||||
|
||||
})</script>
|
||||
</body>
|
||||
</html>
|
||||
4
docs/markdown.js
Normal file
4
docs/markdown.js
Normal file
File diff suppressed because one or more lines are too long
1
docs/markdown.js.map
Normal file
1
docs/markdown.js.map
Normal file
File diff suppressed because one or more lines are too long
BIN
docs/markdown.wasm
Normal file
BIN
docs/markdown.wasm
Normal file
Binary file not shown.
81
example/example.html
Normal file
81
example/example.html
Normal file
|
|
@ -0,0 +1,81 @@
|
|||
<h1><a id="h1" class="anchor" aria-hidden="true" href="#h1"></a>H1</h1>
|
||||
<p>This is a paragraph</p>
|
||||
<h2><a id="h2" class="anchor" aria-hidden="true" href="#h2"></a>H2</h2>
|
||||
<p>This is a paragraph</p>
|
||||
<h2><a id="another" class="anchor" aria-hidden="true" href="#another"></a>Another</h2>
|
||||
<p>This is a paragraph with style <em>italic</em> <em>italic</em> <b>bold</b> <b>bold</b></p>
|
||||
<p><img src="https://rsms.me/image.png" alt="image">
|
||||
<img src="https://rsms.me/image.png?without-alt" alt=""></p>
|
||||
<p><em>Hello <a href="https://rsms.me/">link</a> lol</em></p>
|
||||
<p>Hello <a href="https://rsms.me/"><em>link</em></a> lol "cat"</p>
|
||||
<p>Hello from <em><a href="https://rsms.me/">link</a></em> to <b>everyone</b> <code>reading this</code></p>
|
||||
<p>Here's an <a href="#example"><b>important</b> anchor link</a>.</p>
|
||||
<p>line 1
|
||||
line 2</p>
|
||||
<h2><a id="code-poetry" class="anchor" aria-hidden="true" href="#code-poetry"></a>Code & Poetry</h2>
|
||||
<pre><code>You can also indent
|
||||
blocks to display
|
||||
code or poetry.
|
||||
|
||||
Indented code/poetry blocks
|
||||
can be hard-wrapped.
|
||||
</code></pre>
|
||||
<p><b>Or, wrap your code in three backticks:</b></p>
|
||||
<pre><code class="language-js">function codeBlocks() {
|
||||
return "Can be inserted"
|
||||
}
|
||||
</code></pre>
|
||||
<h3><a id="block-quotes" class="anchor" aria-hidden="true" href="#block-quotes"></a>Block Quotes</h3>
|
||||
<blockquote>
|
||||
<p>You can insert quotes by
|
||||
preceeding each line with <code>></code>.</p>
|
||||
<p>Blockquotes can also contain line
|
||||
breaks.</p>
|
||||
</blockquote>
|
||||
<h2><a id="lists" class="anchor" aria-hidden="true" href="#lists"></a>Lists</h2>
|
||||
<ul>
|
||||
<li>Unordered</li>
|
||||
</ul>
|
||||
<ul>
|
||||
<li>Lists</li>
|
||||
</ul>
|
||||
<ul>
|
||||
<li>Of mixed type</li>
|
||||
</ul>
|
||||
<ol>
|
||||
<li>Ordered</li>
|
||||
<li>Lists</li>
|
||||
<li>Numbers are ignored</li>
|
||||
</ol>
|
||||
<ol start="121">
|
||||
<li>Ordered lists can start</li>
|
||||
<li>with any number and</li>
|
||||
<li>use . as well as ) as a separator.</li>
|
||||
</ol>
|
||||
<ul>
|
||||
<li class="task-list-item"><input type="checkbox" disabled>Task 1</li>
|
||||
<li class="task-list-item"><input type="checkbox" disabled checked>Task 2</li>
|
||||
<li class="task-list-item"><input type="checkbox" disabled>Task 3</li>
|
||||
</ul>
|
||||
<h2><a id="tables" class="anchor" aria-hidden="true" href="#tables"></a>Tables</h2>
|
||||
<table>
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Column 1</th>
|
||||
<th align="left">Column 2</th>
|
||||
<th align="center">Column 3</th>
|
||||
<th align="right">Column 4</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr>
|
||||
<td>default</td>
|
||||
<td align="left">left</td>
|
||||
<td align="center">center</td>
|
||||
<td align="right">right</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
<h2><a id="anot-her" class="anchor" aria-hidden="true" href="#anot-her"></a>Anöt######her!</h2>
|
||||
<h2><a id="anot-her" class="anchor" aria-hidden="true" href="#anot-her"></a>?!Anöt//her!!</h2>
|
||||
<h2><a id="" class="anchor" aria-hidden="true" href="#"></a>?!!</h2>
|
||||
24
example/example.js
Normal file
24
example/example.js
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
const RELEASE = process.argv.includes("-release")
|
||||
|
||||
const fs = require("fs")
|
||||
const md = RELEASE ? require("../build/release/md.js") : require("../build/debug/md.js")
|
||||
|
||||
const source = fs.readFileSync(__dirname + "/example.md")
|
||||
const outbuf = md.parse(source, { asMemoryView: true })
|
||||
const outfile = __dirname + "/example.html"
|
||||
console.log("write", outfile)
|
||||
fs.writeFileSync(outfile, outbuf)
|
||||
|
||||
console.log(fs.readFileSync(outfile, "utf8"))
|
||||
|
||||
// mini benchmark
|
||||
if (RELEASE) {
|
||||
console.log("benchmark start")
|
||||
const timeStart = Date.now()
|
||||
const iterations = 10000
|
||||
for (let i = 0; i < iterations; i++) {
|
||||
global["dont-optimize-away"] = md.parse(source, { asMemoryView: true })
|
||||
}
|
||||
const timeSpent = Date.now() - timeStart
|
||||
console.log(`benchmark end -- avg parse time: ${(timeSpent / iterations).toFixed(2)}ms`)
|
||||
}
|
||||
84
example/example.md
Normal file
84
example/example.md
Normal file
|
|
@ -0,0 +1,84 @@
|
|||
# H1
|
||||
|
||||
This is a paragraph
|
||||
|
||||
## H2
|
||||
|
||||
This is a paragraph
|
||||
|
||||
## Another
|
||||
|
||||
This is a paragraph with style *italic* _italic_ **bold** __bold__
|
||||
|
||||

|
||||

|
||||
|
||||
*Hello [link](https://rsms.me/) lol*
|
||||
|
||||
Hello [*link*](https://rsms.me/) lol "cat"
|
||||
|
||||
Hello from *[link](https://rsms.me/)* to __everyone__ `reading this`
|
||||
|
||||
Here's an [**important** anchor link](#example).
|
||||
|
||||
line 1
|
||||
line 2
|
||||
|
||||
Code & Poetry
|
||||
-------------
|
||||
|
||||
You can also indent
|
||||
blocks to display
|
||||
code or poetry.
|
||||
|
||||
Indented code/poetry blocks
|
||||
can be hard-wrapped.
|
||||
|
||||
**Or, wrap your code in three backticks:**
|
||||
|
||||
```js
|
||||
function codeBlocks() {
|
||||
return "Can be inserted"
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
### Block Quotes
|
||||
|
||||
> You can insert quotes by
|
||||
> preceeding each line with `>`.
|
||||
>
|
||||
> Blockquotes can also contain line
|
||||
> breaks.
|
||||
|
||||
|
||||
## Lists
|
||||
|
||||
- Unordered
|
||||
* Lists
|
||||
+ Of mixed type
|
||||
|
||||
1. Ordered
|
||||
2. Lists
|
||||
4. Numbers are ignored
|
||||
|
||||
121) Ordered lists can start
|
||||
122) with any number and
|
||||
123) use . as well as ) as a separator.
|
||||
|
||||
- [ ] Task 1
|
||||
- [x] Task 2
|
||||
- [ ] Task 3
|
||||
|
||||
## Tables
|
||||
|
||||
| Column 1 | Column 2 | Column 3 | Column 4 |
|
||||
|----------|:---------|:--------:|---------:|
|
||||
| default | left | center | right |
|
||||
|
||||
|
||||
## Anöt######her!
|
||||
|
||||
## ?!Anöt//her!!
|
||||
|
||||
## ?!!
|
||||
57
markdown.d.ts
vendored
Normal file
57
markdown.d.ts
vendored
Normal file
|
|
@ -0,0 +1,57 @@
|
|||
/**
|
||||
* parse reads markdown source at s and converts it to HTML.
|
||||
* When output is a byte array, it will be a reference.
|
||||
*/
|
||||
export function parse(s :Source, o? :ParseOptions & { asMemoryView? :never|false }) :string
|
||||
export function parse(s :Source, o? :ParseOptions & { asMemoryView :true }) :Uint8Array
|
||||
|
||||
/** Markdown source code can be provided as a JavaScript string or UTF8 encoded data */
|
||||
type Source = string|ArrayLike<number>
|
||||
|
||||
/** Options for the parse function */
|
||||
export interface ParseOptions {
|
||||
/**
|
||||
* Customize parsing.
|
||||
* If not provided, the following flags are used, equating to github-style parsing:
|
||||
* COLLAPSE_WHITESPACE
|
||||
* PERMISSIVE_ATX_HEADERS
|
||||
* PERMISSIVE_URL_AUTO_LINKS
|
||||
* STRIKETHROUGH
|
||||
* TABLES
|
||||
* TASK_LISTS
|
||||
*/
|
||||
parseFlags? :ParseFlags
|
||||
|
||||
/**
|
||||
* asMemoryView=true causes parse() to return a view of heap memory as a Uint8Array,
|
||||
* instead of a string.
|
||||
*
|
||||
* The returned Uint8Array is only valid until the next call to parse().
|
||||
* If you need to keep the returned data around, call Uint8Array.slice() to make a copy,
|
||||
* as each call to parse() uses the same underlying memory.
|
||||
*
|
||||
* This only provides a performance benefit when you never need to convert the output
|
||||
* to a string. In most cases you're better off leaving this unset or false.
|
||||
*/
|
||||
asMemoryView? :boolean
|
||||
}
|
||||
|
||||
/** Flags that customize Markdown parsing */
|
||||
export enum ParseFlags {
|
||||
/** In TEXT, collapse non-trivial whitespace into single ' ' */ COLLAPSE_WHITESPACE,
|
||||
/** Enable $ and $$ containing LaTeX equations. */ LATEX_MATH_SPANS,
|
||||
/** Disable raw HTML blocks. */ NO_HTML_BLOCKS,
|
||||
/** Disable raw HTML (inline). */ NO_HTML_SPANS,
|
||||
/** Disable indented code blocks. (Only fenced code works.) */ NO_INDENTED_CODE_BLOCKS,
|
||||
/** Do not require space in ATX headers ( ###header ) */ PERMISSIVE_ATX_HEADERS,
|
||||
/** Recognize e-mails as links even without <...> */ PERMISSIVE_EMAIL_AUTO_LINKS,
|
||||
/** Recognize URLs as links even without <...> */ PERMISSIVE_URL_AUTO_LINKS,
|
||||
/** Enable WWW autolinks (without proto; just 'www.') */ PERMISSIVE_WWW_AUTOLINKS,
|
||||
/** Enable strikethrough extension. */ STRIKETHROUGH,
|
||||
/** Enable tables extension. */ TABLES,
|
||||
/** Enable task list extension. */ TASK_LISTS,
|
||||
/** Enable wiki links extension. */ WIKI_LINKS,
|
||||
|
||||
/** Default flags */ DEFAULT,
|
||||
/** Shorthand for NO_HTML_BLOCKS | NO_HTML_SPANS */ NO_HTML,
|
||||
}
|
||||
14
package-lock.json
generated
Normal file
14
package-lock.json
generated
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
{
|
||||
"name": "markdown-wasm",
|
||||
"version": "1.0.0",
|
||||
"lockfileVersion": 1,
|
||||
"requires": true,
|
||||
"dependencies": {
|
||||
"wasmc": {
|
||||
"version": "2.0.1",
|
||||
"resolved": "https://registry.npmjs.org/wasmc/-/wasmc-2.0.1.tgz",
|
||||
"integrity": "sha512-EVa1ep5aKKHZpd3+L4s3hah4JO2AId+V13kASKgHvZhAlji0SvyY7WOu/uM8Of0ZTQUdgm6K3zmmPo7G3Pt00w==",
|
||||
"dev": true
|
||||
}
|
||||
}
|
||||
}
|
||||
34
package.json
Normal file
34
package.json
Normal file
|
|
@ -0,0 +1,34 @@
|
|||
{
|
||||
"name": "markdown-wasm",
|
||||
"version": "1.0.0",
|
||||
"description": "Markdown parser and html generator implemented in WebAssembly",
|
||||
"main": "dist/markdown.js",
|
||||
"jsnext:main": "dist/markdown.es.js",
|
||||
"module": "dist/markdown.es.js",
|
||||
"typings": "markdown.d.ts",
|
||||
"homepage": "https://github.com/rsms/markdown-wasm",
|
||||
"repository": {
|
||||
"type": "git",
|
||||
"url": "git+https://github.com/rsms/markdown-wasm.git"
|
||||
},
|
||||
"files": [
|
||||
"dist/markdown.wasm",
|
||||
"dist/markdown.js",
|
||||
"dist/markdown.js.map",
|
||||
"dist/markdown.node.js",
|
||||
"dist/markdown.node.js.map",
|
||||
"dist/markdown.es.js",
|
||||
"dist/markdown.es.js.map",
|
||||
"markdown.d.ts",
|
||||
"README.md",
|
||||
"LICENSE"
|
||||
],
|
||||
"scripts": {
|
||||
"build": "wasmc"
|
||||
},
|
||||
"author": "Rasmus Andersson <https://rsms.me/>",
|
||||
"license": "MIT",
|
||||
"devDependencies": {
|
||||
"wasmc": "^2.0.1"
|
||||
}
|
||||
}
|
||||
60
src/common.h
Normal file
60
src/common.h
Normal file
|
|
@ -0,0 +1,60 @@
|
|||
#pragma once
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
// #include <emscripten/emscripten.h>
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#ifndef __cplusplus
|
||||
#define inline __inline
|
||||
#endif
|
||||
#endif
|
||||
|
||||
typedef uint8_t bool;
|
||||
typedef uint8_t u8;
|
||||
typedef uint16_t u16;
|
||||
typedef int16_t i16;
|
||||
typedef uint32_t u32;
|
||||
typedef int32_t i32;
|
||||
|
||||
#ifndef TRUE
|
||||
#define TRUE 1
|
||||
#endif
|
||||
#ifndef FALSE
|
||||
#define FALSE 0
|
||||
#endif
|
||||
|
||||
#define true TRUE
|
||||
#define false FALSE
|
||||
|
||||
#ifndef static_assert
|
||||
#if __has_feature(c_static_assert)
|
||||
#define static_assert _Static_assert
|
||||
#else
|
||||
#define static_assert(cond, msg) ((void*)0)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#define max(a,b) \
|
||||
({__typeof__ (a) _a = (a); \
|
||||
__typeof__ (b) _b = (b); \
|
||||
_a > _b ? _a : _b; })
|
||||
|
||||
#define min(a,b) \
|
||||
({__typeof__ (a) _a = (a); \
|
||||
__typeof__ (b) _b = (b); \
|
||||
_a < _b ? _a : _b; })
|
||||
|
||||
#define countof(x) \
|
||||
((sizeof(x)/sizeof(0[x])) / ((size_t)(!(sizeof(x) % sizeof(0[x])))))
|
||||
|
||||
#ifndef DEBUG
|
||||
#define DEBUG 0
|
||||
#endif
|
||||
#if DEBUG
|
||||
#include <stdio.h>
|
||||
#define dlog(...) printf(__VA_ARGS__)
|
||||
#else
|
||||
#define dlog(...)
|
||||
#endif /* DEBUG > 0 */
|
||||
429
src/fmt_html.c
Normal file
429
src/fmt_html.c
Normal file
|
|
@ -0,0 +1,429 @@
|
|||
/*
|
||||
* md4c modified for mdjs.
|
||||
* Original source code is licensed as follows:
|
||||
*
|
||||
* Copyright (c) 2016-2019 Martin Mitas
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include <ctype.h>
|
||||
|
||||
#include "common.h"
|
||||
#include "fmt_html.h"
|
||||
#include "md4c.h"
|
||||
|
||||
typedef struct HtmlRenderer_st {
|
||||
WBuf* outbuf;
|
||||
int imgnest;
|
||||
int addanchor;
|
||||
} HtmlRenderer;
|
||||
|
||||
|
||||
static char htmlEscapeMap[256] = {
|
||||
/* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
|
||||
/* 0x00 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // <CTRL> ...
|
||||
/* 0x10 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // <CTRL> ...
|
||||
/* 0x20 */ 0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0, // ! " # $ % & ' ( ) * + , - . /
|
||||
/* 0x30 */ 0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0, // 0 1 2 3 4 5 6 7 8 9 : ; < = > ?
|
||||
/* 0x40 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // @ A B C D E F G H I J K L M N O
|
||||
/* 0x50 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // P Q R S T U V W X Y Z [ \ ] ^ _
|
||||
/* 0x60 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // ` a b c d e f g h i j k l m n o
|
||||
/* 0x70 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // p q r s t u v w x y z { | } ~ <DEL>
|
||||
/* 0x80 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // <CTRL> ...
|
||||
/* 0x90 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // <CTRL> ...
|
||||
/* 0xA0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // <NBSP> ¡ ¢ £ ¤ ¥ ¦ § ¨ © ª « ¬ <SOFTHYPEN> ® ¯
|
||||
/* 0xB0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // ° ± ² ³ ´ µ ¶ · ¸ ¹ º » ¼ ½ ¾ ¿
|
||||
/* 0xC0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // À Á Â Ã Ä Å Æ Ç È É Ê Ë Ì Í Î Ï
|
||||
/* 0xD0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // Ð Ñ Ò Ó Ô Õ Ö × Ø Ù Ú Û Ü Ý Þ ß
|
||||
/* 0xE0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // à á â ã ä å æ ç è é ê ë ì í î ï
|
||||
/* 0xF0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // ð ñ ò ó ô õ ö ÷ ø ù ú û ü ý þ ÿ
|
||||
};
|
||||
|
||||
static const char ucReplacementUTF8[] = { 0xef, 0xbf, 0xbd };
|
||||
|
||||
|
||||
static inline void render_text(HtmlRenderer* r, const char* pch, size_t len) {
|
||||
WBufAppendBytes(r->outbuf, pch, len);
|
||||
}
|
||||
|
||||
static inline void render_literal(HtmlRenderer* r, const char* cs) {
|
||||
WBufAppendBytes(r->outbuf, cs, strlen(cs));
|
||||
}
|
||||
|
||||
static inline void render_char(HtmlRenderer* r, char c) {
|
||||
WBufAppendc(r->outbuf, c);
|
||||
}
|
||||
|
||||
|
||||
static void render_html_escaped(HtmlRenderer* r, const char* data, size_t size) {
|
||||
MD_OFFSET beg = 0;
|
||||
MD_OFFSET off = 0;
|
||||
|
||||
/* Some characters need to be escaped in normal HTML text. */
|
||||
#define HTML_NEED_ESCAPE(ch) (htmlEscapeMap[(unsigned char)(ch)] != 0)
|
||||
|
||||
while (1) {
|
||||
while (
|
||||
off + 3 < size &&
|
||||
!HTML_NEED_ESCAPE(data[off+0]) &&
|
||||
!HTML_NEED_ESCAPE(data[off+1]) &&
|
||||
!HTML_NEED_ESCAPE(data[off+2]) &&
|
||||
!HTML_NEED_ESCAPE(data[off+3])
|
||||
) {
|
||||
off += 4;
|
||||
}
|
||||
|
||||
while (off < size && !HTML_NEED_ESCAPE(data[off])) {
|
||||
off++;
|
||||
}
|
||||
|
||||
if (off > beg) {
|
||||
render_text(r, data + beg, off - beg);
|
||||
}
|
||||
|
||||
if (off < size) {
|
||||
switch (data[off]) {
|
||||
case '&': render_literal(r, "&"); break;
|
||||
case '<': render_literal(r, "<"); break;
|
||||
case '>': render_literal(r, ">"); break;
|
||||
case '"': render_literal(r, """); break;
|
||||
}
|
||||
off++;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
beg = off;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static char slugMap[256] = {
|
||||
/* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
|
||||
/* 0x00 */ '-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-', // <CTRL> ...
|
||||
/* 0x10 */ '-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-', // <CTRL> ...
|
||||
/* 0x20 */ '-','-','-','-','-','-','-','-','-','-','-','-','-','-','.','-', // ! " # $ % & ' ( ) * + , - . /
|
||||
/* 0x30 */ '0','1','2','3','4','5','6','7','8','9','-','-','-','-','-','-', // 0 1 2 3 4 5 6 7 8 9 : ; < = > ?
|
||||
/* 0x40 */ '-','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o', // @ A B C D E F G H I J K L M N O
|
||||
/* 0x50 */ 'p','q','r','s','t','u','v','w','x','y','z','-','-','-','-','_', // P Q R S T U V W X Y Z [ \ ] ^ _
|
||||
/* 0x60 */ '-','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o', // ` a b c d e f g h i j k l m n o
|
||||
/* 0x70 */ 'p','q','r','s','t','u','v','w','x','y','z','-','-','-','-','-', // p q r s t u v w x y z { | } ~ <DEL>
|
||||
/* 0x80 */ '-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-', // <CTRL> ...
|
||||
/* 0x90 */ '-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-', // <CTRL> ...
|
||||
/* 0xA0 */ '-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-', // <NBSP> ¡ ¢ £ ¤ ¥ ¦ § ¨ © ª « ¬ <SOFTHYPEN> ® ¯
|
||||
/* 0xB0 */ '-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-', // ° ± ² ³ ´ µ ¶ · ¸ ¹ º » ¼ ½ ¾ ¿
|
||||
/* 0xC0 */ 'a','a','a','a','a','a','a','c','e','e','e','e','i','i','i','i', // À Á Â Ã Ä Å Æ Ç È É Ê Ë Ì Í Î Ï
|
||||
/* 0xD0 */ 'd','n','o','o','o','o','o','x','o','u','u','u','u','y','-','s', // Ð Ñ Ò Ó Ô Õ Ö × Ø Ù Ú Û Ü Ý Þ ß
|
||||
/* 0xE0 */ 'a','a','a','a','a','a','a','c','e','e','e','e','i','i','i','i', // à á â ã ä å æ ç è é ê ë ì í î ï
|
||||
/* 0xF0 */ 'd','n','o','o','o','o','o','-','o','u','u','u','u','y','-','y', // ð ñ ò ó ô õ ö ÷ ø ù ú û ü ý þ ÿ
|
||||
};
|
||||
|
||||
|
||||
static size_t WBufAppendSlug(WBuf* b, const char* pch, size_t len) {
|
||||
WBufReserve(b, len);
|
||||
const char* start = b->ptr;
|
||||
char c = 0, pc = 0;
|
||||
for (size_t i = 0; i < len; i++) {
|
||||
u8 x = (u8)pch[i];
|
||||
if (x >= 0x80) {
|
||||
// decode UTF8-encoded character as Latin-1
|
||||
if ((x >> 5) == 0x6 && i+1 < len) {
|
||||
u32 cp = ((x << 6) & 0x7ff) + ((pch[++i]) & 0x3f);
|
||||
x = cp <= 0xFF ? cp : 0;
|
||||
} else {
|
||||
x = 0;
|
||||
}
|
||||
}
|
||||
c = slugMap[x];
|
||||
if (c != '-' || pc != '-' && pc) {
|
||||
// note: check "pc" to trim leading '-'
|
||||
*(b->ptr++) = c;
|
||||
pc = c;
|
||||
}
|
||||
}
|
||||
if (pc == '-') {
|
||||
// trim trailing '-'
|
||||
b->ptr--;
|
||||
}
|
||||
return b->ptr - start;
|
||||
}
|
||||
|
||||
|
||||
static void render_attribute(HtmlRenderer* r, const MD_ATTRIBUTE* attr) {
|
||||
int i;
|
||||
for (i = 0; attr->substr_offsets[i] < attr->size; i++) {
|
||||
MD_TEXTTYPE type = attr->substr_types[i];
|
||||
MD_OFFSET off = attr->substr_offsets[i];
|
||||
MD_SIZE size = attr->substr_offsets[i+1] - off;
|
||||
const MD_CHAR* text = attr->text + off;
|
||||
switch (type) {
|
||||
case MD_TEXT_NULLCHAR: render_text(r, ucReplacementUTF8, sizeof(ucReplacementUTF8)); break;
|
||||
case MD_TEXT_ENTITY: render_text(r, text, size); break;
|
||||
default: render_html_escaped(r, text, size); break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void render_open_ol_block(HtmlRenderer* r, const MD_BLOCK_OL_DETAIL* det) {
|
||||
if (det->start == 1) {
|
||||
render_literal(r, "<ol>\n");
|
||||
} else {
|
||||
render_literal(r, "<ol start=\"");
|
||||
WBufAppendU32(r->outbuf, det->start, 10);
|
||||
render_literal(r, "\">\n");
|
||||
}
|
||||
}
|
||||
|
||||
static void render_open_li_block(HtmlRenderer* r, const MD_BLOCK_LI_DETAIL* det) {
|
||||
if (det->is_task) {
|
||||
render_literal(r, "<li class=\"task-list-item\"><input type=\"checkbox\" disabled");
|
||||
if (det->task_mark == 'x' || det->task_mark == 'X') {
|
||||
render_literal(r, " checked");
|
||||
}
|
||||
render_char(r, '>');
|
||||
} else {
|
||||
render_literal(r, "<li>");
|
||||
}
|
||||
}
|
||||
|
||||
static void render_open_code_block(HtmlRenderer* r, const MD_BLOCK_CODE_DETAIL* det) {
|
||||
render_literal(r, "<pre><code");
|
||||
if (det->lang.text != NULL) {
|
||||
render_literal(r, " class=\"language-");
|
||||
render_attribute(r, &det->lang);
|
||||
render_char(r, '"');
|
||||
}
|
||||
render_char(r, '>');
|
||||
}
|
||||
|
||||
static void render_open_td_block(HtmlRenderer* r, bool isTH, const MD_BLOCK_TD_DETAIL* det) {
|
||||
render_text(r, isTH ? "<th" : "<td", 3);
|
||||
switch (det->align) {
|
||||
case MD_ALIGN_LEFT: render_literal(r, " align=\"left\">"); break;
|
||||
case MD_ALIGN_CENTER: render_literal(r, " align=\"center\">"); break;
|
||||
case MD_ALIGN_RIGHT: render_literal(r, " align=\"right\">"); break;
|
||||
default: render_char(r, '>'); break;
|
||||
}
|
||||
}
|
||||
|
||||
static void render_open_a_span(HtmlRenderer* r, const MD_SPAN_A_DETAIL* det) {
|
||||
render_literal(r, "<a href=\"");
|
||||
render_attribute(r, &det->href);
|
||||
if (det->title.text != NULL) {
|
||||
render_literal(r, "\" title=\"");
|
||||
render_attribute(r, &det->title);
|
||||
}
|
||||
render_literal(r, "\">");
|
||||
}
|
||||
|
||||
static void render_open_img_span(HtmlRenderer* r, const MD_SPAN_IMG_DETAIL* det) {
|
||||
render_literal(r, "<img src=\"");
|
||||
render_attribute(r, &det->src);
|
||||
render_literal(r, "\" alt=\"");
|
||||
r->imgnest++;
|
||||
}
|
||||
|
||||
static void render_close_img_span(HtmlRenderer* r, const MD_SPAN_IMG_DETAIL* det) {
|
||||
if(det->title.text != NULL) {
|
||||
render_literal(r, "\" title=\"");
|
||||
render_attribute(r, &det->title);
|
||||
}
|
||||
render_literal(r, "\">");
|
||||
r->imgnest--;
|
||||
}
|
||||
|
||||
static void render_open_wikilink_span(HtmlRenderer* r, const MD_SPAN_WIKILINK_DETAIL* det) {
|
||||
render_literal(r, "<x-wikilink data-target=\"");
|
||||
render_attribute(r, &det->target);
|
||||
render_literal(r, "\">");
|
||||
}
|
||||
|
||||
|
||||
/**************************************
|
||||
*** HTML renderer implementation ***
|
||||
**************************************/
|
||||
|
||||
|
||||
|
||||
static int enter_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata) {
|
||||
static const MD_CHAR* head[6] = { "<h1>", "<h2>", "<h3>", "<h4>", "<h5>", "<h6>" };
|
||||
HtmlRenderer* r = (HtmlRenderer*) userdata;
|
||||
|
||||
switch(type) {
|
||||
case MD_BLOCK_DOC: /* noop */ break;
|
||||
case MD_BLOCK_QUOTE: render_literal(r, "<blockquote>\n"); break;
|
||||
case MD_BLOCK_UL: render_literal(r, "<ul>\n"); break;
|
||||
case MD_BLOCK_OL: render_open_ol_block(r, (const MD_BLOCK_OL_DETAIL*)detail); break;
|
||||
case MD_BLOCK_LI: render_open_li_block(r, (const MD_BLOCK_LI_DETAIL*)detail); break;
|
||||
case MD_BLOCK_HR: render_literal(r, "<hr>\n"); break;
|
||||
case MD_BLOCK_H:
|
||||
{
|
||||
render_literal(r, head[((MD_BLOCK_H_DETAIL*)detail)->level - 1]);
|
||||
r->addanchor = 1;
|
||||
break;
|
||||
}
|
||||
case MD_BLOCK_CODE: render_open_code_block(r, (const MD_BLOCK_CODE_DETAIL*) detail); break;
|
||||
case MD_BLOCK_HTML: /* noop */ break;
|
||||
case MD_BLOCK_P: render_literal(r, "<p>"); break;
|
||||
case MD_BLOCK_TABLE: render_literal(r, "<table>\n"); break;
|
||||
case MD_BLOCK_THEAD: render_literal(r, "<thead>\n"); break;
|
||||
case MD_BLOCK_TBODY: render_literal(r, "<tbody>\n"); break;
|
||||
case MD_BLOCK_TR: render_literal(r, "<tr>\n"); break;
|
||||
case MD_BLOCK_TH: render_open_td_block(r, true, (MD_BLOCK_TD_DETAIL*)detail); break;
|
||||
case MD_BLOCK_TD: render_open_td_block(r, false, (MD_BLOCK_TD_DETAIL*)detail); break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int leave_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata) {
|
||||
static const MD_CHAR* head[6] = { "</h1>\n", "</h2>\n", "</h3>\n", "</h4>\n", "</h5>\n", "</h6>\n" };
|
||||
HtmlRenderer* r = (HtmlRenderer*) userdata;
|
||||
|
||||
switch(type) {
|
||||
case MD_BLOCK_DOC: /*noop*/ break;
|
||||
case MD_BLOCK_QUOTE: render_literal(r, "</blockquote>\n"); break;
|
||||
case MD_BLOCK_UL: render_literal(r, "</ul>\n"); break;
|
||||
case MD_BLOCK_OL: render_literal(r, "</ol>\n"); break;
|
||||
case MD_BLOCK_LI: render_literal(r, "</li>\n"); break;
|
||||
case MD_BLOCK_HR: /*noop*/ break;
|
||||
case MD_BLOCK_H: render_literal(r, head[((MD_BLOCK_H_DETAIL*)detail)->level - 1]); break;
|
||||
case MD_BLOCK_CODE: render_literal(r, "</code></pre>\n"); break;
|
||||
case MD_BLOCK_HTML: /* noop */ break;
|
||||
case MD_BLOCK_P: render_literal(r, "</p>\n"); break;
|
||||
case MD_BLOCK_TABLE: render_literal(r, "</table>\n"); break;
|
||||
case MD_BLOCK_THEAD: render_literal(r, "</thead>\n"); break;
|
||||
case MD_BLOCK_TBODY: render_literal(r, "</tbody>\n"); break;
|
||||
case MD_BLOCK_TR: render_literal(r, "</tr>\n"); break;
|
||||
case MD_BLOCK_TH: render_literal(r, "</th>\n"); break;
|
||||
case MD_BLOCK_TD: render_literal(r, "</td>\n"); break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int enter_span_callback(MD_SPANTYPE type, void* detail, void* userdata) {
|
||||
HtmlRenderer* r = (HtmlRenderer*) userdata;
|
||||
|
||||
if(r->imgnest > 0) {
|
||||
/* We are inside an image, i.e. rendering the ALT attribute of
|
||||
* <IMG> tag. */
|
||||
return 0;
|
||||
}
|
||||
|
||||
switch(type) {
|
||||
case MD_SPAN_EM: render_literal(r, "<em>"); break;
|
||||
case MD_SPAN_STRONG: render_literal(r, "<b>"); break;
|
||||
case MD_SPAN_A: render_open_a_span(r, (MD_SPAN_A_DETAIL*) detail); break;
|
||||
case MD_SPAN_IMG: render_open_img_span(r, (MD_SPAN_IMG_DETAIL*) detail); break;
|
||||
case MD_SPAN_CODE: render_literal(r, "<code>"); break;
|
||||
case MD_SPAN_DEL: render_literal(r, "<del>"); break;
|
||||
case MD_SPAN_LATEXMATH: render_literal(r, "<x-equation>"); break;
|
||||
case MD_SPAN_LATEXMATH_DISPLAY: render_literal(r, "<x-equation type=\"display\">"); break;
|
||||
case MD_SPAN_WIKILINK: render_open_wikilink_span(r, (MD_SPAN_WIKILINK_DETAIL*) detail); break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int leave_span_callback(MD_SPANTYPE type, void* detail, void* userdata) {
|
||||
HtmlRenderer* r = (HtmlRenderer*) userdata;
|
||||
|
||||
if(r->imgnest > 0) {
|
||||
/* We are inside an image, i.e. rendering the ALT attribute of
|
||||
* <IMG> tag. */
|
||||
if(r->imgnest == 1 && type == MD_SPAN_IMG)
|
||||
render_close_img_span(r, (MD_SPAN_IMG_DETAIL*) detail);
|
||||
return 0;
|
||||
}
|
||||
|
||||
switch(type) {
|
||||
case MD_SPAN_EM: render_literal(r, "</em>"); break;
|
||||
case MD_SPAN_STRONG: render_literal(r, "</b>"); break;
|
||||
case MD_SPAN_A: render_literal(r, "</a>"); break;
|
||||
case MD_SPAN_IMG: /*noop, handled above*/ break;
|
||||
case MD_SPAN_CODE: render_literal(r, "</code>"); break;
|
||||
case MD_SPAN_DEL: render_literal(r, "</del>"); break;
|
||||
case MD_SPAN_LATEXMATH: /*fall through*/
|
||||
case MD_SPAN_LATEXMATH_DISPLAY: render_literal(r, "</x-equation>"); break;
|
||||
case MD_SPAN_WIKILINK: render_literal(r, "</x-wikilink>"); break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int text_callback(MD_TEXTTYPE type, const MD_CHAR* text, MD_SIZE size, void* userdata) {
|
||||
HtmlRenderer* r = (HtmlRenderer*) userdata;
|
||||
|
||||
if (r->addanchor) {
|
||||
r->addanchor = 0;
|
||||
if (type != MD_TEXT_NULLCHAR && type != MD_TEXT_BR && type != MD_TEXT_SOFTBR) {
|
||||
render_literal(r, "<a id=\"");
|
||||
|
||||
const char* slugptr = r->outbuf->ptr;
|
||||
size_t sluglen = WBufAppendSlug(r->outbuf, text, size);
|
||||
|
||||
render_literal(r, "\" class=\"anchor\" aria-hidden=\"true\" href=\"#");
|
||||
|
||||
if (sluglen > 0) {
|
||||
WBufReserve(r->outbuf, sluglen);
|
||||
memcpy(r->outbuf->ptr, slugptr, sluglen);
|
||||
r->outbuf->ptr += sluglen;
|
||||
}
|
||||
|
||||
render_literal(r, "\"></a>");
|
||||
}
|
||||
}
|
||||
|
||||
switch(type) {
|
||||
case MD_TEXT_NULLCHAR: render_text(r, ucReplacementUTF8, sizeof(ucReplacementUTF8)); break;
|
||||
case MD_TEXT_BR: render_literal(r, (r->imgnest == 0 ? "<br>\n" : " ")); break;
|
||||
case MD_TEXT_SOFTBR: render_literal(r, (r->imgnest == 0 ? "\n" : " ")); break;
|
||||
case MD_TEXT_HTML: render_text(r, text, size); break;
|
||||
case MD_TEXT_ENTITY: render_text(r, text, size); break;
|
||||
default: render_html_escaped(r, text, size); break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// static void debug_log_callback(const char* msg, void* userdata) {
|
||||
// dlog("MD4C: %s\n", msg);
|
||||
// }
|
||||
|
||||
int
|
||||
fmt_html(const MD_CHAR* input, MD_SIZE input_size, WBuf* outbuf, unsigned parser_flags) {
|
||||
HtmlRenderer render = { outbuf, 0, 0 };
|
||||
|
||||
MD_PARSER parser = {
|
||||
0,
|
||||
parser_flags,
|
||||
enter_block_callback,
|
||||
leave_block_callback,
|
||||
enter_span_callback,
|
||||
leave_span_callback,
|
||||
text_callback,
|
||||
NULL, // debug_log_callback,
|
||||
NULL
|
||||
};
|
||||
|
||||
return md_parse(input, input_size, &parser, (void*) &render);
|
||||
}
|
||||
4
src/fmt_html.h
Normal file
4
src/fmt_html.h
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
#pragma once
|
||||
#include "wbuf.h"
|
||||
|
||||
int fmt_html(const char* input, u32 inputlen, WBuf* outbuf, u32 parserFlags);
|
||||
542
src/fmt_json.c
Normal file
542
src/fmt_json.c
Normal file
|
|
@ -0,0 +1,542 @@
|
|||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <ctype.h>
|
||||
|
||||
#include "fmt_json.h"
|
||||
#include "md4c.h"
|
||||
// #include "md4c_render_html.h"
|
||||
// #include "entity.h"
|
||||
|
||||
//
|
||||
//
|
||||
// -------------- WORK IN PROGRESS
|
||||
//
|
||||
//
|
||||
|
||||
#ifdef _WIN32
|
||||
#define snprintf _snprintf
|
||||
#endif
|
||||
|
||||
|
||||
#ifndef DEBUG
|
||||
#define DEBUG 1
|
||||
#endif
|
||||
#if DEBUG
|
||||
#include <stdio.h>
|
||||
# define dlog(...) printf(__VA_ARGS__)
|
||||
#else
|
||||
# define dlog(...)
|
||||
#endif /* DEBUG > 0 */
|
||||
|
||||
|
||||
typedef struct JsonFormatter_st {
|
||||
WBuf* outbuf;
|
||||
} JsonFormatter;
|
||||
|
||||
|
||||
#define ISDIGIT(ch) ('0' <= (ch) && (ch) <= '9')
|
||||
#define ISLOWER(ch) ('a' <= (ch) && (ch) <= 'z')
|
||||
#define ISUPPER(ch) ('A' <= (ch) && (ch) <= 'Z')
|
||||
#define ISALNUM(ch) (ISLOWER(ch) || ISUPPER(ch) || ISDIGIT(ch))
|
||||
|
||||
|
||||
// static inline void render_text(JsonFormatter* r, const MD_CHAR* text, MD_SIZE size) {
|
||||
// // r->process_output(text, size, r->userdata);
|
||||
// WBufAppendBytes(r->outbuf, text, size);
|
||||
// }
|
||||
|
||||
#define render_text(f, textptr, textlen) \
|
||||
WBufAppendBytes((r)->outbuf, (textptr), (textlen))
|
||||
|
||||
// #define RENDER_LITERAL(r, literal) \
|
||||
// WBufAppendBytes((r)->outbuf, (literal), (MD_SIZE)strlen(literal))
|
||||
|
||||
|
||||
static char jsonEscapeMap[256];
|
||||
|
||||
static void __attribute__((constructor)) init() {
|
||||
jsonEscapeMap[(unsigned char)'"'] = 1;
|
||||
jsonEscapeMap[(unsigned char)'\n'] = 1;
|
||||
jsonEscapeMap[(unsigned char)'\r'] = 1;
|
||||
jsonEscapeMap[(unsigned char)'\t'] = 1;
|
||||
}
|
||||
|
||||
|
||||
static void writeJsonEscaped(JsonFormatter* r, const MD_CHAR* data, MD_SIZE size) {
|
||||
MD_OFFSET beg = 0;
|
||||
MD_OFFSET off = 0;
|
||||
|
||||
#define NEED_ESCAPE(ch) (jsonEscapeMap[(unsigned char)(ch)] != 0)
|
||||
|
||||
while(1) {
|
||||
/* Optimization: Use some loop unrolling. */
|
||||
while (
|
||||
off + 3 < size &&
|
||||
!NEED_ESCAPE(data[off+0]) &&
|
||||
!NEED_ESCAPE(data[off+1]) &&
|
||||
!NEED_ESCAPE(data[off+2]) &&
|
||||
!NEED_ESCAPE(data[off+3])
|
||||
) {
|
||||
off += 4;
|
||||
}
|
||||
while (off < size && !NEED_ESCAPE(data[off])) {
|
||||
off++;
|
||||
}
|
||||
|
||||
if (off > beg) {
|
||||
WBufAppendBytes(r->outbuf, data + beg, off - beg);
|
||||
}
|
||||
|
||||
if (off < size) {
|
||||
switch (data[off]) {
|
||||
case '"': WBufAppendCStr(r->outbuf, "\\\""); break;
|
||||
case '\n': WBufAppendCStr(r->outbuf, "\\n"); break;
|
||||
case '\r': WBufAppendCStr(r->outbuf, "\\r"); break;
|
||||
case '\t': WBufAppendCStr(r->outbuf, "\\t"); break;
|
||||
}
|
||||
off++;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
|
||||
beg = off;
|
||||
}
|
||||
|
||||
#undef NEED_ESCAPE
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
render_url_escaped(JsonFormatter* r, const MD_CHAR* data, MD_SIZE size)
|
||||
{
|
||||
static const MD_CHAR hex_chars[] = "0123456789ABCDEF";
|
||||
MD_OFFSET beg = 0;
|
||||
MD_OFFSET off = 0;
|
||||
|
||||
#define URL_NEED_ESCAPE(ch) \
|
||||
(!ISALNUM(ch) && strchr("-_.+!*'(),%#@?=;:/,+$", ch) == NULL)
|
||||
|
||||
while(1) {
|
||||
while(off < size && !URL_NEED_ESCAPE(data[off]))
|
||||
off++;
|
||||
if(off > beg)
|
||||
render_text(r, data + beg, off - beg);
|
||||
|
||||
if(off < size) {
|
||||
char hex[3];
|
||||
|
||||
switch(data[off]) {
|
||||
case '&': WBufAppendCStr(r->outbuf, "&"); break;
|
||||
case '\'': WBufAppendCStr(r->outbuf, "'"); break;
|
||||
default:
|
||||
hex[0] = '%';
|
||||
hex[1] = hex_chars[((unsigned)data[off] >> 4) & 0xf];
|
||||
hex[2] = hex_chars[((unsigned)data[off] >> 0) & 0xf];
|
||||
render_text(r, hex, 3);
|
||||
break;
|
||||
}
|
||||
off++;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
|
||||
beg = off;
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned
|
||||
hex_val(char ch)
|
||||
{
|
||||
if('0' <= ch && ch <= '9')
|
||||
return ch - '0';
|
||||
if('A' <= ch && ch <= 'Z')
|
||||
return ch - 'A' + 10;
|
||||
else
|
||||
return ch - 'a' + 10;
|
||||
}
|
||||
|
||||
static void WBufAppendUTF8Codepoint(WBuf* b, u32 codepoint) {
|
||||
if (codepoint <= 0x7f) {
|
||||
WBufAppendc(b, (char)codepoint);
|
||||
return;
|
||||
}
|
||||
|
||||
unsigned char utf8[4];
|
||||
size_t n;
|
||||
if (codepoint <= 0x7ff) {
|
||||
n = 2;
|
||||
utf8[0] = 0xc0 | ((codepoint >> 6) & 0x1f);
|
||||
utf8[1] = 0x80 + ((codepoint >> 0) & 0x3f);
|
||||
} else if (codepoint <= 0xffff) {
|
||||
n = 3;
|
||||
utf8[0] = 0xe0 | ((codepoint >> 12) & 0xf);
|
||||
utf8[1] = 0x80 + ((codepoint >> 6) & 0x3f);
|
||||
utf8[2] = 0x80 + ((codepoint >> 0) & 0x3f);
|
||||
} else {
|
||||
n = 4;
|
||||
utf8[0] = 0xf0 | ((codepoint >> 18) & 0x7);
|
||||
utf8[1] = 0x80 + ((codepoint >> 12) & 0x3f);
|
||||
utf8[2] = 0x80 + ((codepoint >> 6) & 0x3f);
|
||||
utf8[3] = 0x80 + ((codepoint >> 0) & 0x3f);
|
||||
}
|
||||
|
||||
if (0 < codepoint && codepoint <= 0x10ffff) {
|
||||
WBufAppendBytes(b, (const char*)utf8, n);
|
||||
} else {
|
||||
static const MD_CHAR utf8_replacement_char[] = { 0xef, 0xbf, 0xbd };
|
||||
WBufAppendBytes(b, utf8_replacement_char, sizeof(utf8_replacement_char));
|
||||
}
|
||||
}
|
||||
|
||||
/* Translate entity to its UTF-8 equivalent, or output the verbatim one
|
||||
* if such entity is unknown (or if the translation is disabled). */
|
||||
static void writeDecodeXmlEntity(JsonFormatter* r, const MD_CHAR* text, MD_SIZE size) {
|
||||
if (size > 3 && text[1] == '#') {
|
||||
unsigned codepoint = 0;
|
||||
|
||||
if(text[2] == 'x' || text[2] == 'X') {
|
||||
// Hexadecimal entity (e.g. "�")).
|
||||
for (MD_SIZE i = 3; i < size-1; i++) {
|
||||
codepoint = 16 * codepoint + hex_val(text[i]);
|
||||
}
|
||||
} else {
|
||||
// Decimal entity (e.g. "&1234;")
|
||||
for (MD_SIZE i = 2; i < size-1; i++) {
|
||||
codepoint = 10 * codepoint + (text[i] - '0');
|
||||
}
|
||||
}
|
||||
|
||||
WBufAppendUTF8Codepoint(r->outbuf, codepoint);
|
||||
} else {
|
||||
WBufAppendBytes(r->outbuf, text, size);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
render_attribute(JsonFormatter* r, const MD_ATTRIBUTE* attr,
|
||||
void (*fn_append)(JsonFormatter*, const MD_CHAR*, MD_SIZE))
|
||||
{
|
||||
int i;
|
||||
|
||||
for(i = 0; attr->substr_offsets[i] < attr->size; i++) {
|
||||
MD_TEXTTYPE type = attr->substr_types[i];
|
||||
MD_OFFSET off = attr->substr_offsets[i];
|
||||
MD_SIZE size = attr->substr_offsets[i+1] - off;
|
||||
const MD_CHAR* text = attr->text + off;
|
||||
|
||||
switch(type) {
|
||||
case MD_TEXT_NULLCHAR: WBufAppendc(r->outbuf, 0); break;
|
||||
case MD_TEXT_ENTITY: writeDecodeXmlEntity(r, text, size); break;
|
||||
default: WBufAppendBytes(r->outbuf, text, size); break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
render_open_ol_block(JsonFormatter* r, const MD_BLOCK_OL_DETAIL* det)
|
||||
{
|
||||
char buf[64];
|
||||
|
||||
if(det->start == 1) {
|
||||
WBufAppendCStr(r->outbuf, "<ol>\n");
|
||||
return;
|
||||
}
|
||||
|
||||
snprintf(buf, sizeof(buf), "<ol start=\"%u\">\n", det->start);
|
||||
WBufAppendCStr(r->outbuf, buf);
|
||||
}
|
||||
|
||||
static void
|
||||
render_open_li_block(JsonFormatter* r, const MD_BLOCK_LI_DETAIL* det)
|
||||
{
|
||||
if(det->is_task) {
|
||||
WBufAppendCStr(r->outbuf,
|
||||
"<li class=\"task-list-item\">"
|
||||
"<input type=\"checkbox\" class=\"task-list-item-checkbox\" disabled");
|
||||
if (det->task_mark == 'x' || det->task_mark == 'X') {
|
||||
WBufAppendCStr(r->outbuf, " checked");
|
||||
}
|
||||
WBufAppendc(r->outbuf, '>');
|
||||
} else {
|
||||
WBufAppendCStr(r->outbuf, "<li>");
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
render_open_code_block(JsonFormatter* r, const MD_BLOCK_CODE_DETAIL* det)
|
||||
{
|
||||
WBufAppendCStr(r->outbuf, "<pre><code");
|
||||
|
||||
/* If known, output the HTML 5 attribute class="language-LANGNAME". */
|
||||
if(det->lang.text != NULL) {
|
||||
WBufAppendCStr(r->outbuf, " class=\"language-");
|
||||
render_attribute(r, &det->lang, writeJsonEscaped);
|
||||
WBufAppendc(r->outbuf, '"');
|
||||
}
|
||||
|
||||
WBufAppendc(r->outbuf, '>');
|
||||
}
|
||||
|
||||
static void
|
||||
render_open_td_block(JsonFormatter* r, const MD_CHAR* cell_type, const MD_BLOCK_TD_DETAIL* det)
|
||||
{
|
||||
WBufAppendc(r->outbuf, '<');
|
||||
WBufAppendCStr(r->outbuf, cell_type);
|
||||
|
||||
switch (det->align) {
|
||||
case MD_ALIGN_LEFT: WBufAppendCStr(r->outbuf, " align=\"left\">"); break;
|
||||
case MD_ALIGN_CENTER: WBufAppendCStr(r->outbuf, " align=\"center\">"); break;
|
||||
case MD_ALIGN_RIGHT: WBufAppendCStr(r->outbuf, " align=\"right\">"); break;
|
||||
default: WBufAppendCStr(r->outbuf, ">"); break;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
render_open_a_span(JsonFormatter* r, const MD_SPAN_A_DETAIL* det)
|
||||
{
|
||||
WBufAppendCStr(r->outbuf, "<a href=\"");
|
||||
render_attribute(r, &det->href, render_url_escaped);
|
||||
|
||||
if(det->title.text != NULL) {
|
||||
WBufAppendCStr(r->outbuf, "\" title=\"");
|
||||
render_attribute(r, &det->title, writeJsonEscaped);
|
||||
}
|
||||
|
||||
WBufAppendCStr(r->outbuf, "\">");
|
||||
}
|
||||
|
||||
static void
|
||||
render_open_img_span(JsonFormatter* r, const MD_SPAN_IMG_DETAIL* det)
|
||||
{
|
||||
WBufAppendCStr(r->outbuf, "<img src=\"");
|
||||
render_attribute(r, &det->src, render_url_escaped);
|
||||
|
||||
WBufAppendCStr(r->outbuf, "\" alt=\"");
|
||||
}
|
||||
|
||||
static void
|
||||
render_close_img_span(JsonFormatter* r, const MD_SPAN_IMG_DETAIL* det)
|
||||
{
|
||||
if(det->title.text != NULL) {
|
||||
WBufAppendCStr(r->outbuf, "\" title=\"");
|
||||
render_attribute(r, &det->title, writeJsonEscaped);
|
||||
}
|
||||
|
||||
WBufAppendCStr(r->outbuf, "\">");
|
||||
}
|
||||
|
||||
static void
|
||||
render_open_wikilink_span(JsonFormatter* r, const MD_SPAN_WIKILINK_DETAIL* det)
|
||||
{
|
||||
WBufAppendCStr(r->outbuf, "<x-wikilink data-target=\"");
|
||||
render_attribute(r, &det->target, writeJsonEscaped);
|
||||
WBufAppendCStr(r->outbuf, "\">");
|
||||
}
|
||||
|
||||
|
||||
// ------------------------------------------------------------------------------------------------
|
||||
|
||||
|
||||
static void writeTypeStart(JsonFormatter* r, const char* typename, size_t typenamelen) {
|
||||
WBufAppendCStr(r->outbuf, "{\"_\":\"");
|
||||
WBufAppendBytes(r->outbuf, typename, typenamelen);
|
||||
WBufAppendc(r->outbuf, '"');
|
||||
}
|
||||
|
||||
|
||||
static void writeAttribute(JsonFormatter* r, const MD_ATTRIBUTE* attr) {
|
||||
for (u32 i = 0; attr->substr_offsets[i] < attr->size; i++) {
|
||||
MD_TEXTTYPE type = attr->substr_types[i];
|
||||
MD_OFFSET off = attr->substr_offsets[i];
|
||||
MD_SIZE size = attr->substr_offsets[i+1] - off;
|
||||
const MD_CHAR* text = attr->text + off;
|
||||
switch (type) {
|
||||
case MD_TEXT_NULLCHAR: WBufAppendCStr(r->outbuf, "\\0"); break;
|
||||
case MD_TEXT_ENTITY: writeDecodeXmlEntity(r, text, size); break;
|
||||
default: WBufAppendBytes(r->outbuf, text, size); break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static int enter_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata) {
|
||||
static const MD_CHAR* head[6] = { "h1", "h2", "h3", "h4", "h5", "h6" };
|
||||
JsonFormatter* r = (JsonFormatter*) userdata;
|
||||
const char* typename = "";
|
||||
size_t typenamelen = 0;
|
||||
#define WRITE_TYPE_START(name) writeTypeStart(r, (name), strlen((name)))
|
||||
|
||||
switch (type) {
|
||||
case MD_BLOCK_DOC: WRITE_TYPE_START("doc"); break;
|
||||
case MD_BLOCK_QUOTE: WRITE_TYPE_START("quote"); break;
|
||||
case MD_BLOCK_UL: WRITE_TYPE_START("ul"); break;
|
||||
case MD_BLOCK_HTML: WRITE_TYPE_START("html"); break;
|
||||
case MD_BLOCK_P: WRITE_TYPE_START("p"); break;
|
||||
case MD_BLOCK_TABLE: WRITE_TYPE_START("table"); break;
|
||||
case MD_BLOCK_THEAD: WRITE_TYPE_START("thead"); break;
|
||||
case MD_BLOCK_TBODY: WRITE_TYPE_START("tbody"); break;
|
||||
case MD_BLOCK_TR: WRITE_TYPE_START("tr"); break;
|
||||
case MD_BLOCK_HR: WRITE_TYPE_START("hr"); break;
|
||||
|
||||
case MD_BLOCK_H: {
|
||||
WRITE_TYPE_START(head[((MD_BLOCK_H_DETAIL*)detail)->level - 1]);
|
||||
break;
|
||||
}
|
||||
|
||||
case MD_BLOCK_OL: {
|
||||
WRITE_TYPE_START("ol");
|
||||
const MD_BLOCK_OL_DETAIL* d = (const MD_BLOCK_OL_DETAIL*)detail;
|
||||
if (d->start != 1) {
|
||||
char buf[24];
|
||||
snprintf(buf, sizeof(buf), ", \"start\":%u", d->start);
|
||||
WBufAppendCStr(r->outbuf, buf);
|
||||
}
|
||||
if (d->is_tight) {
|
||||
WBufAppendCStr(r->outbuf, ", \"tight\":true");
|
||||
}
|
||||
WBufAppendCStr(r->outbuf, ", \"delimiter\":\"");
|
||||
WBufAppendc(r->outbuf, d->mark_delimiter);
|
||||
WBufAppendc(r->outbuf, '"');
|
||||
break;
|
||||
}
|
||||
|
||||
case MD_BLOCK_LI: {
|
||||
const MD_BLOCK_LI_DETAIL* d = (const MD_BLOCK_LI_DETAIL*)detail;
|
||||
if (d->is_task) {
|
||||
WBufAppendCStr(r->outbuf, "{\"_\":\"task\"");
|
||||
if (d->task_mark == 'x' || d->task_mark == 'X') {
|
||||
WBufAppendCStr(r->outbuf, ", \"complete\":true");
|
||||
}
|
||||
} else {
|
||||
WRITE_TYPE_START("li");
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case MD_BLOCK_CODE: {
|
||||
WRITE_TYPE_START("code");
|
||||
const MD_BLOCK_CODE_DETAIL* d = (const MD_BLOCK_CODE_DETAIL*)detail;
|
||||
if (d->lang.text != NULL) {
|
||||
WBufAppendCStr(r->outbuf, ", \"lang\":\"");
|
||||
writeAttribute(r, &d->lang);
|
||||
WBufAppendc(r->outbuf, '"');
|
||||
}
|
||||
if (d->info.text != NULL) {
|
||||
WBufAppendCStr(r->outbuf, ", \"info\":\"");
|
||||
writeAttribute(r, &d->info);
|
||||
WBufAppendc(r->outbuf, '"');
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case MD_BLOCK_TH:
|
||||
case MD_BLOCK_TD: {
|
||||
writeTypeStart(r, type == MD_BLOCK_TH ? "th" : "td", 2);
|
||||
const MD_BLOCK_TD_DETAIL* d = (const MD_BLOCK_TD_DETAIL*)detail;
|
||||
switch (d->align) {
|
||||
case MD_ALIGN_LEFT: WBufAppendCStr(r->outbuf, ", \"align\":\"left\""); break;
|
||||
case MD_ALIGN_CENTER: WBufAppendCStr(r->outbuf, ", \"align\":\"center\""); break;
|
||||
case MD_ALIGN_RIGHT: WBufAppendCStr(r->outbuf, ", \"align\":\"right\""); break;
|
||||
default: break; // unspecified alignment
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// static void
|
||||
// render_open_td_block(MD_RENDER_HTML* r, const MD_CHAR* cell_type, const MD_BLOCK_TD_DETAIL* det)
|
||||
// {
|
||||
// RENDER_LITERAL(r, "<");
|
||||
// RENDER_LITERAL(r, cell_type);
|
||||
|
||||
// switch(det->align) {
|
||||
// case MD_ALIGN_LEFT: RENDER_LITERAL(r, " align=\"left\">"); break;
|
||||
// case MD_ALIGN_CENTER: RENDER_LITERAL(r, " align=\"center\">"); break;
|
||||
// case MD_ALIGN_RIGHT: RENDER_LITERAL(r, " align=\"right\">"); break;
|
||||
// default: RENDER_LITERAL(r, ">"); break;
|
||||
// }
|
||||
}
|
||||
|
||||
WBufAppendCStr(r->outbuf, ", \"children\":[\n ");
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static int leave_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata) {
|
||||
JsonFormatter* r = (JsonFormatter*)userdata;
|
||||
WBufAppendCStr(r->outbuf, "]},\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static int enter_span_callback(MD_SPANTYPE type, void* detail, void* userdata) {
|
||||
JsonFormatter* r = (JsonFormatter*) userdata;
|
||||
|
||||
switch(type) {
|
||||
case MD_SPAN_EM: WBufAppendCStr(r->outbuf, "<em>"); break;
|
||||
case MD_SPAN_STRONG: WBufAppendCStr(r->outbuf, "<b>"); break;
|
||||
case MD_SPAN_A: render_open_a_span(r, (MD_SPAN_A_DETAIL*) detail); break;
|
||||
case MD_SPAN_IMG: render_open_img_span(r, (MD_SPAN_IMG_DETAIL*) detail); break;
|
||||
case MD_SPAN_CODE: WBufAppendCStr(r->outbuf, "<code>"); break;
|
||||
case MD_SPAN_DEL: WBufAppendCStr(r->outbuf, "<del>"); break;
|
||||
case MD_SPAN_LATEXMATH: WBufAppendCStr(r->outbuf, "<x-equation>"); break;
|
||||
case MD_SPAN_LATEXMATH_DISPLAY: WBufAppendCStr(r->outbuf, "<x-equation type=\"display\">"); break;
|
||||
case MD_SPAN_WIKILINK: render_open_wikilink_span(r, (MD_SPAN_WIKILINK_DETAIL*) detail); break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int leave_span_callback(MD_SPANTYPE type, void* detail, void* userdata) {
|
||||
JsonFormatter* r = (JsonFormatter*) userdata;
|
||||
|
||||
switch(type) {
|
||||
case MD_SPAN_EM: WBufAppendCStr(r->outbuf, "</em>"); break;
|
||||
case MD_SPAN_STRONG: WBufAppendCStr(r->outbuf, "</b>"); break;
|
||||
case MD_SPAN_A: WBufAppendCStr(r->outbuf, "</a>"); break;
|
||||
case MD_SPAN_IMG: /*noop, handled above*/ break;
|
||||
case MD_SPAN_CODE: WBufAppendCStr(r->outbuf, "</code>"); break;
|
||||
case MD_SPAN_DEL: WBufAppendCStr(r->outbuf, "</del>"); break;
|
||||
case MD_SPAN_LATEXMATH: /*fall through*/
|
||||
case MD_SPAN_LATEXMATH_DISPLAY: WBufAppendCStr(r->outbuf, "</x-equation>"); break;
|
||||
case MD_SPAN_WIKILINK: WBufAppendCStr(r->outbuf, "</x-wikilink>"); break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int text_callback(MD_TEXTTYPE type, const MD_CHAR* text, MD_SIZE size, void* userdata) {
|
||||
JsonFormatter* r = (JsonFormatter*)userdata;
|
||||
|
||||
WBufAppendCStr(r->outbuf, ", \"");
|
||||
|
||||
switch (type) {
|
||||
case MD_TEXT_NULLCHAR: WBufAppendCStr(r->outbuf, "\\0"); break;
|
||||
case MD_TEXT_BR: WBufAppendCStr(r->outbuf, "<br>"); break;
|
||||
case MD_TEXT_SOFTBR: WBufAppendc(r->outbuf, '\n'); break;
|
||||
case MD_TEXT_HTML: render_text(r, text, size); break;
|
||||
case MD_TEXT_ENTITY: writeDecodeXmlEntity(r, text, size); break;
|
||||
default: writeJsonEscaped(r, text, size); break;
|
||||
}
|
||||
|
||||
WBufAppendc(r->outbuf, '"');
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int fmt_json(const MD_CHAR* input, MD_SIZE input_size, WBuf* outbuf, u32 parser_flags) {
|
||||
JsonFormatter render = { outbuf };
|
||||
MD_PARSER parser = {
|
||||
0,
|
||||
parser_flags,
|
||||
enter_block_callback,
|
||||
leave_block_callback,
|
||||
enter_span_callback,
|
||||
leave_span_callback,
|
||||
text_callback,
|
||||
NULL, //debug_log_callback,
|
||||
NULL
|
||||
};
|
||||
|
||||
return md_parse(input, input_size, &parser, (void*) &render);
|
||||
}
|
||||
4
src/fmt_json.h
Normal file
4
src/fmt_json.h
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
#pragma once
|
||||
#include "wbuf.h"
|
||||
|
||||
int fmt_json(const char* input, u32 inputlen, WBuf* outbuf, u32 parserFlags);
|
||||
66
src/md.c
Normal file
66
src/md.c
Normal file
|
|
@ -0,0 +1,66 @@
|
|||
#include <ctype.h>
|
||||
#include "common.h"
|
||||
#include "wlib.h"
|
||||
#include "wbuf.h"
|
||||
#include "fmt_html.h"
|
||||
// #include "fmt_json.h"
|
||||
|
||||
// #include "md4c.h"
|
||||
/* If set, debug output from md_parse() is sent to stderr. */
|
||||
#define MD_RENDER_FLAG_DEBUG 0x0001
|
||||
#define MD_RENDER_FLAG_VERBATIM_ENTITIES 0x0002
|
||||
|
||||
typedef enum OutputFlags {
|
||||
OutputFlagsHTML = 1 << 0,
|
||||
} OutputFlags;
|
||||
|
||||
typedef enum ErrorCode {
|
||||
ERR_NONE,
|
||||
ERR_MD_PARSE,
|
||||
ERR_OUTFLAGS,
|
||||
} ErrorCode;
|
||||
|
||||
|
||||
#if DEBUG
|
||||
void __attribute__((constructor)) init() {
|
||||
dlog("WASM INIT\n");
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
// Shared, reusable output buffer.
|
||||
// Must make sure to never use this across calls from WASM host.
|
||||
static WBuf outbuf;
|
||||
|
||||
|
||||
export size_t parseUTF8(
|
||||
const char* inbufptr,
|
||||
u32 inbuflen,
|
||||
u32 parser_flags,
|
||||
OutputFlags outflags,
|
||||
const char** outptr
|
||||
) {
|
||||
dlog("parseUTF8 called with inbufptr=%p inbuflen=%u\n", inbufptr, inbuflen);
|
||||
|
||||
WBufReset(&outbuf);
|
||||
|
||||
if (outflags & OutputFlagsHTML) {
|
||||
WBufReserve(&outbuf, inbuflen * 2); // approximate output size to minimize reallocations
|
||||
|
||||
if (fmt_html(inbufptr, inbuflen, &outbuf, parser_flags) != 0) {
|
||||
// fmt_html returns status of md_parse which only fails in extreme cases
|
||||
// like when out of memory. md4c does not provide error codes or error messages.
|
||||
WErrSet(ERR_MD_PARSE, "md parser error");
|
||||
*outptr = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
*outptr = outbuf.start;
|
||||
// dlog("outbuf =>\n%.*s\n", WBufLen(&outbuf), outbuf.start);
|
||||
return WBufLen(&outbuf);
|
||||
}
|
||||
|
||||
WErrSet(ERR_OUTFLAGS, "no output format set in output flags");
|
||||
*outptr = 0;
|
||||
return 0;
|
||||
}
|
||||
77
src/md.js
Normal file
77
src/md.js
Normal file
|
|
@ -0,0 +1,77 @@
|
|||
import { utf8, withTmpBytePtr, withOutPtr, werrCheck } from "./wlib"
|
||||
|
||||
export const ready = Module.ready
|
||||
|
||||
// console.time('wasm load')
|
||||
// Module.postRun.push(() => {
|
||||
// console.timeEnd('wasm load')
|
||||
// })
|
||||
|
||||
export const ParseFlags = {
|
||||
COLLAPSE_WHITESPACE: 0x0001, // In TEXT, collapse non-trivial whitespace into single ' '
|
||||
PERMISSIVE_ATX_HEADERS: 0x0002, // Do not require space in ATX headers ( ###header )
|
||||
PERMISSIVE_URL_AUTO_LINKS: 0x0004, // Recognize URLs as links even without <...>
|
||||
PERMISSIVE_EMAIL_AUTO_LINKS: 0x0008, // Recognize e-mails as links even without <...>
|
||||
NO_INDENTED_CODE_BLOCKS: 0x0010, // Disable indented code blocks. (Only fenced code works.)
|
||||
NO_HTML_BLOCKS: 0x0020, // Disable raw HTML blocks.
|
||||
NO_HTML_SPANS: 0x0040, // Disable raw HTML (inline).
|
||||
TABLES: 0x0100, // Enable tables extension.
|
||||
STRIKETHROUGH: 0x0200, // Enable strikethrough extension.
|
||||
PERMISSIVE_WWW_AUTOLINKS: 0x0400, // Enable WWW autolinks (without proto; just 'www.')
|
||||
TASK_LISTS: 0x0800, // Enable task list extension.
|
||||
LATEX_MATH_SPANS: 0x1000, // Enable $ and $$ containing LaTeX equations.
|
||||
WIKI_LINKS: 0x2000, // Enable wiki links extension.
|
||||
|
||||
// Github style default flags
|
||||
DEFAULT: 0x0001 | 0x0002 | 0x0004 | 0x0200 | 0x0100 | 0x0800,
|
||||
// COLLAPSE_WHITESPACE
|
||||
// PERMISSIVE_ATX_HEADERS
|
||||
// PERMISSIVE_URL_AUTO_LINKS
|
||||
// STRIKETHROUGH
|
||||
// TABLES
|
||||
// TASK_LISTS
|
||||
|
||||
NO_HTML: 0x0020 | 0x0040, // NO_HTML_BLOCKS | NO_HTML_SPANS
|
||||
}
|
||||
|
||||
const OutputFlags = {
|
||||
HTML: 1 << 0, // Output HTML
|
||||
}
|
||||
|
||||
const defaultOptions = {
|
||||
parseFlags: ParseFlags.DEFAULT,
|
||||
|
||||
// how to format the output
|
||||
format: "html",
|
||||
|
||||
// Return a view of heap memory as a Uint8Array, instead of a string.
|
||||
//
|
||||
// The returned Uint8Array is only valid until the next call to parse().
|
||||
// If you need to keep the returned Uint8Array around, call Uint8Array.slice()
|
||||
// to make a copy, as each call to parse() reuses the same underlying memory.
|
||||
asMemoryView: false,
|
||||
}
|
||||
|
||||
export function parse(source, options) {
|
||||
options = options ? {__proto__:defaultOptions, ...options} : defaultOptions
|
||||
let outflags = (0
|
||||
| (options.format == "html" ? OutputFlags.HTML : 0)
|
||||
)
|
||||
|
||||
let buf = typeof source == "string" ? utf8.encode(source) : source
|
||||
let outbuf = withOutPtr(outptr => withTmpBytePtr(buf, (inptr, inlen) =>
|
||||
_parseUTF8(inptr, inlen, options.parseFlags, outflags, outptr)
|
||||
))
|
||||
|
||||
// check for error and throw if needed
|
||||
werrCheck()
|
||||
|
||||
// if (outbuf) {
|
||||
// console.log(utf8.decode(outbuf))
|
||||
// }
|
||||
|
||||
if (options.asMemoryView) {
|
||||
return outbuf
|
||||
}
|
||||
return utf8.decode(outbuf)
|
||||
}
|
||||
6286
src/md4c.c
Normal file
6286
src/md4c.c
Normal file
File diff suppressed because it is too large
Load diff
383
src/md4c.h
Normal file
383
src/md4c.h
Normal file
|
|
@ -0,0 +1,383 @@
|
|||
/*
|
||||
* MD4C: Markdown parser for C
|
||||
* (http://github.com/mity/md4c)
|
||||
*
|
||||
* Copyright (c) 2016-2019 Martin Mitas
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef MD4C_MARKDOWN_H
|
||||
#define MD4C_MARKDOWN_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#if defined MD4C_USE_UTF16
|
||||
/* Magic to support UTF-16. Not that in order to use it, you have to define
|
||||
* the macro MD4C_USE_UTF16 both when building MD4C as well as when
|
||||
* including this header in your code. */
|
||||
#ifdef _WIN32
|
||||
#include <windows.h>
|
||||
typedef WCHAR MD_CHAR;
|
||||
#else
|
||||
#error MD4C_USE_UTF16 is only supported on Windows.
|
||||
#endif
|
||||
#else
|
||||
typedef char MD_CHAR;
|
||||
#endif
|
||||
|
||||
typedef unsigned MD_SIZE;
|
||||
typedef unsigned MD_OFFSET;
|
||||
|
||||
|
||||
/* Block represents a part of document hierarchy structure like a paragraph
|
||||
* or list item.
|
||||
*/
|
||||
typedef enum MD_BLOCKTYPE {
|
||||
/* <body>...</body> */
|
||||
MD_BLOCK_DOC = 0,
|
||||
|
||||
/* <blockquote>...</blockquote> */
|
||||
MD_BLOCK_QUOTE,
|
||||
|
||||
/* <ul>...</ul>
|
||||
* Detail: Structure MD_BLOCK_UL_DETAIL. */
|
||||
MD_BLOCK_UL,
|
||||
|
||||
/* <ol>...</ol>
|
||||
* Detail: Structure MD_BLOCK_OL_DETAIL. */
|
||||
MD_BLOCK_OL,
|
||||
|
||||
/* <li>...</li>
|
||||
* Detail: Structure MD_BLOCK_LI_DETAIL. */
|
||||
MD_BLOCK_LI,
|
||||
|
||||
/* <hr> */
|
||||
MD_BLOCK_HR,
|
||||
|
||||
/* <h1>...</h1> (for levels up to 6)
|
||||
* Detail: Structure MD_BLOCK_H_DETAIL. */
|
||||
MD_BLOCK_H,
|
||||
|
||||
/* <pre><code>...</code></pre>
|
||||
* Note the text lines within code blocks are terminated with '\n'
|
||||
* instead of explicit MD_TEXT_BR. */
|
||||
MD_BLOCK_CODE,
|
||||
|
||||
/* Raw HTML block. This itself does not correspond to any particular HTML
|
||||
* tag. The contents of it _is_ raw HTML source intended to be put
|
||||
* in verbatim form to the HTML output. */
|
||||
MD_BLOCK_HTML,
|
||||
|
||||
/* <p>...</p> */
|
||||
MD_BLOCK_P,
|
||||
|
||||
/* <table>...</table> and its contents.
|
||||
* Detail: Structure MD_BLOCK_TD_DETAIL (used with MD_BLOCK_TH and MD_BLOCK_TD)
|
||||
* Note all of these are used only if extension MD_FLAG_TABLES is enabled. */
|
||||
MD_BLOCK_TABLE,
|
||||
MD_BLOCK_THEAD,
|
||||
MD_BLOCK_TBODY,
|
||||
MD_BLOCK_TR,
|
||||
MD_BLOCK_TH,
|
||||
MD_BLOCK_TD
|
||||
} MD_BLOCKTYPE;
|
||||
|
||||
/* Span represents an in-line piece of a document which should be rendered with
|
||||
* the same font, color and other attributes. A sequence of spans forms a block
|
||||
* like paragraph or list item. */
|
||||
typedef enum MD_SPANTYPE {
|
||||
/* <em>...</em> */
|
||||
MD_SPAN_EM,
|
||||
|
||||
/* <strong>...</strong> */
|
||||
MD_SPAN_STRONG,
|
||||
|
||||
/* <a href="xxx">...</a>
|
||||
* Detail: Structure MD_SPAN_A_DETAIL. */
|
||||
MD_SPAN_A,
|
||||
|
||||
/* <img src="xxx">...</a>
|
||||
* Detail: Structure MD_SPAN_IMG_DETAIL.
|
||||
* Note: Image text can contain nested spans and even nested images.
|
||||
* If rendered into ALT attribute of HTML <IMG> tag, it's responsibility
|
||||
* of the renderer to deal with it.
|
||||
*/
|
||||
MD_SPAN_IMG,
|
||||
|
||||
/* <code>...</code> */
|
||||
MD_SPAN_CODE,
|
||||
|
||||
/* <del>...</del>
|
||||
* Note: Recognized only when MD_FLAG_STRIKETHROUGH is enabled.
|
||||
*/
|
||||
MD_SPAN_DEL,
|
||||
|
||||
/* For recognizing inline ($) and display ($$) equations
|
||||
* Note: Recognized only when MD_FLAG_LATEXMATHSPANS is enabled.
|
||||
*/
|
||||
MD_SPAN_LATEXMATH,
|
||||
MD_SPAN_LATEXMATH_DISPLAY,
|
||||
|
||||
/* Wiki links
|
||||
* Note: Recognized only when MD_FLAG_WIKILINKS is enabled.
|
||||
*/
|
||||
MD_SPAN_WIKILINK
|
||||
} MD_SPANTYPE;
|
||||
|
||||
/* Text is the actual textual contents of span. */
|
||||
typedef enum MD_TEXTTYPE {
|
||||
/* Normal text. */
|
||||
MD_TEXT_NORMAL = 0,
|
||||
|
||||
/* NULL character. CommonMark requires replacing NULL character with
|
||||
* the replacement char U+FFFD, so this allows caller to do that easily. */
|
||||
MD_TEXT_NULLCHAR,
|
||||
|
||||
/* Line breaks.
|
||||
* Note these are not sent from blocks with verbatim output (MD_BLOCK_CODE
|
||||
* or MD_BLOCK_HTML). In such cases, '\n' is part of the text itself. */
|
||||
MD_TEXT_BR, /* <br> (hard break) */
|
||||
MD_TEXT_SOFTBR, /* '\n' in source text where it is not semantically meaningful (soft break) */
|
||||
|
||||
/* Entity.
|
||||
* (a) Named entity, e.g.
|
||||
* (Note MD4C does not have a list of known entities.
|
||||
* Anything matching the regexp /&[A-Za-z][A-Za-z0-9]{1,47};/ is
|
||||
* treated as a named entity.)
|
||||
* (b) Numerical entity, e.g. Ӓ
|
||||
* (c) Hexadecimal entity, e.g. ካ
|
||||
*
|
||||
* As MD4C is mostly encoding agnostic, application gets the verbatim
|
||||
* entity text into the MD_RENDERER::text_callback(). */
|
||||
MD_TEXT_ENTITY,
|
||||
|
||||
/* Text in a code block (inside MD_BLOCK_CODE) or inlined code (`code`).
|
||||
* If it is inside MD_BLOCK_CODE, it includes spaces for indentation and
|
||||
* '\n' for new lines. MD_TEXT_BR and MD_TEXT_SOFTBR are not sent for this
|
||||
* kind of text. */
|
||||
MD_TEXT_CODE,
|
||||
|
||||
/* Text is a raw HTML. If it is contents of a raw HTML block (i.e. not
|
||||
* an inline raw HTML), then MD_TEXT_BR and MD_TEXT_SOFTBR are not used.
|
||||
* The text contains verbatim '\n' for the new lines. */
|
||||
MD_TEXT_HTML,
|
||||
|
||||
/* Text is inside an equation. This is processed the same way as inlined code
|
||||
* spans (`code`). */
|
||||
MD_TEXT_LATEXMATH
|
||||
} MD_TEXTTYPE;
|
||||
|
||||
|
||||
/* Alignment enumeration. */
|
||||
typedef enum MD_ALIGN {
|
||||
MD_ALIGN_DEFAULT = 0, /* When unspecified. */
|
||||
MD_ALIGN_LEFT,
|
||||
MD_ALIGN_CENTER,
|
||||
MD_ALIGN_RIGHT
|
||||
} MD_ALIGN;
|
||||
|
||||
|
||||
/* String attribute.
|
||||
*
|
||||
* This wraps strings which are outside of a normal text flow and which are
|
||||
* propagated within various detailed structures, but which still may contain
|
||||
* string portions of different types like e.g. entities.
|
||||
*
|
||||
* So, for example, lets consider an image has a title attribute string
|
||||
* set to "foo " bar". (Note the string size is 14.)
|
||||
*
|
||||
* Then the attribute MD_SPAN_IMG_DETAIL::title shall provide the following:
|
||||
* -- [0]: "foo " (substr_types[0] == MD_TEXT_NORMAL; substr_offsets[0] == 0)
|
||||
* -- [1]: """ (substr_types[1] == MD_TEXT_ENTITY; substr_offsets[1] == 4)
|
||||
* -- [2]: " bar" (substr_types[2] == MD_TEXT_NORMAL; substr_offsets[2] == 10)
|
||||
* -- [3]: (n/a) (n/a ; substr_offsets[3] == 14)
|
||||
*
|
||||
* Note that these conditions are guaranteed:
|
||||
* -- substr_offsets[0] == 0
|
||||
* -- substr_offsets[LAST+1] == size
|
||||
* -- Only MD_TEXT_NORMAL, MD_TEXT_ENTITY, MD_TEXT_NULLCHAR substrings can appear.
|
||||
*/
|
||||
typedef struct MD_ATTRIBUTE {
|
||||
const MD_CHAR* text;
|
||||
MD_SIZE size;
|
||||
const MD_TEXTTYPE* substr_types;
|
||||
const MD_OFFSET* substr_offsets;
|
||||
} MD_ATTRIBUTE;
|
||||
|
||||
|
||||
/* Detailed info for MD_BLOCK_UL. */
|
||||
typedef struct MD_BLOCK_UL_DETAIL {
|
||||
int is_tight; /* Non-zero if tight list, zero if loose. */
|
||||
MD_CHAR mark; /* Item bullet character in MarkDown source of the list, e.g. '-', '+', '*'. */
|
||||
} MD_BLOCK_UL_DETAIL;
|
||||
|
||||
/* Detailed info for MD_BLOCK_OL. */
|
||||
typedef struct MD_BLOCK_OL_DETAIL {
|
||||
unsigned start; /* Start index of the ordered list. */
|
||||
int is_tight; /* Non-zero if tight list, zero if loose. */
|
||||
MD_CHAR mark_delimiter; /* Character delimiting the item marks in MarkDown source, e.g. '.' or ')' */
|
||||
} MD_BLOCK_OL_DETAIL;
|
||||
|
||||
/* Detailed info for MD_BLOCK_LI. */
|
||||
typedef struct MD_BLOCK_LI_DETAIL {
|
||||
int is_task; /* Can be non-zero only with MD_FLAG_TASKLISTS */
|
||||
MD_CHAR task_mark; /* If is_task, then one of 'x', 'X' or ' '. Undefined otherwise. */
|
||||
MD_OFFSET task_mark_offset; /* If is_task, then offset in the input of the char between '[' and ']'. */
|
||||
} MD_BLOCK_LI_DETAIL;
|
||||
|
||||
/* Detailed info for MD_BLOCK_H. */
|
||||
typedef struct MD_BLOCK_H_DETAIL {
|
||||
unsigned level; /* Header level (1 - 6) */
|
||||
} MD_BLOCK_H_DETAIL;
|
||||
|
||||
/* Detailed info for MD_BLOCK_CODE. */
|
||||
typedef struct MD_BLOCK_CODE_DETAIL {
|
||||
MD_ATTRIBUTE info;
|
||||
MD_ATTRIBUTE lang;
|
||||
MD_CHAR fence_char; /* The character used for fenced code block; or zero for indented code block. */
|
||||
} MD_BLOCK_CODE_DETAIL;
|
||||
|
||||
/* Detailed info for MD_BLOCK_TH and MD_BLOCK_TD. */
|
||||
typedef struct MD_BLOCK_TD_DETAIL {
|
||||
MD_ALIGN align;
|
||||
} MD_BLOCK_TD_DETAIL;
|
||||
|
||||
/* Detailed info for MD_SPAN_A. */
|
||||
typedef struct MD_SPAN_A_DETAIL {
|
||||
MD_ATTRIBUTE href;
|
||||
MD_ATTRIBUTE title;
|
||||
} MD_SPAN_A_DETAIL;
|
||||
|
||||
/* Detailed info for MD_SPAN_IMG. */
|
||||
typedef struct MD_SPAN_IMG_DETAIL {
|
||||
MD_ATTRIBUTE src;
|
||||
MD_ATTRIBUTE title;
|
||||
} MD_SPAN_IMG_DETAIL;
|
||||
|
||||
/* Detailed info for MD_SPAN_WIKILINK. */
|
||||
typedef struct MD_SPAN_WIKILINK {
|
||||
MD_ATTRIBUTE target;
|
||||
} MD_SPAN_WIKILINK_DETAIL;
|
||||
|
||||
/* Flags specifying extensions/deviations from CommonMark specification.
|
||||
*
|
||||
* By default (when MD_RENDERER::flags == 0), we follow CommonMark specification.
|
||||
* The following flags may allow some extensions or deviations from it.
|
||||
*/
|
||||
#define MD_FLAG_COLLAPSEWHITESPACE 0x0001 /* In MD_TEXT_NORMAL, collapse non-trivial whitespace into single ' ' */
|
||||
#define MD_FLAG_PERMISSIVEATXHEADERS 0x0002 /* Do not require space in ATX headers ( ###header ) */
|
||||
#define MD_FLAG_PERMISSIVEURLAUTOLINKS 0x0004 /* Recognize URLs as autolinks even without '<', '>' */
|
||||
#define MD_FLAG_PERMISSIVEEMAILAUTOLINKS 0x0008 /* Recognize e-mails as autolinks even without '<', '>' and 'mailto:' */
|
||||
#define MD_FLAG_NOINDENTEDCODEBLOCKS 0x0010 /* Disable indented code blocks. (Only fenced code works.) */
|
||||
#define MD_FLAG_NOHTMLBLOCKS 0x0020 /* Disable raw HTML blocks. */
|
||||
#define MD_FLAG_NOHTMLSPANS 0x0040 /* Disable raw HTML (inline). */
|
||||
#define MD_FLAG_TABLES 0x0100 /* Enable tables extension. */
|
||||
#define MD_FLAG_STRIKETHROUGH 0x0200 /* Enable strikethrough extension. */
|
||||
#define MD_FLAG_PERMISSIVEWWWAUTOLINKS 0x0400 /* Enable WWW autolinks (even without any scheme prefix, if they begin with 'www.') */
|
||||
#define MD_FLAG_TASKLISTS 0x0800 /* Enable task list extension. */
|
||||
#define MD_FLAG_LATEXMATHSPANS 0x1000 /* Enable $ and $$ containing LaTeX equations. */
|
||||
#define MD_FLAG_WIKILINKS 0x2000 /* Enable wiki links extension. */
|
||||
|
||||
#define MD_FLAG_PERMISSIVEAUTOLINKS (MD_FLAG_PERMISSIVEEMAILAUTOLINKS | MD_FLAG_PERMISSIVEURLAUTOLINKS | MD_FLAG_PERMISSIVEWWWAUTOLINKS)
|
||||
#define MD_FLAG_NOHTML (MD_FLAG_NOHTMLBLOCKS | MD_FLAG_NOHTMLSPANS)
|
||||
|
||||
/* Convenient sets of flags corresponding to well-known Markdown dialects.
|
||||
*
|
||||
* Note we may only support subset of features of the referred dialect.
|
||||
* The constant just enables those extensions which bring us as close as
|
||||
* possible given what features we implement.
|
||||
*
|
||||
* ABI compatibility note: Meaning of these can change in time as new
|
||||
* extensions, bringing the dialect closer to the original, are implemented.
|
||||
*/
|
||||
#define MD_DIALECT_COMMONMARK 0
|
||||
#define MD_DIALECT_GITHUB (MD_FLAG_PERMISSIVEAUTOLINKS | MD_FLAG_TABLES | MD_FLAG_STRIKETHROUGH | MD_FLAG_TASKLISTS)
|
||||
|
||||
/* Renderer structure.
|
||||
*/
|
||||
typedef struct MD_PARSER {
|
||||
/* Reserved. Set to zero.
|
||||
*/
|
||||
unsigned abi_version;
|
||||
|
||||
/* Dialect options. Bitmask of MD_FLAG_xxxx values.
|
||||
*/
|
||||
unsigned flags;
|
||||
|
||||
/* Caller-provided rendering callbacks.
|
||||
*
|
||||
* For some block/span types, more detailed information is provided in a
|
||||
* type-specific structure pointed by the argument 'detail'.
|
||||
*
|
||||
* The last argument of all callbacks, 'userdata', is just propagated from
|
||||
* md_parse() and is available for any use by the application.
|
||||
*
|
||||
* Note any strings provided to the callbacks as their arguments or as
|
||||
* members of any detail structure are generally not zero-terminated.
|
||||
* Application has take the respective size information into account.
|
||||
*
|
||||
* Callbacks may abort further parsing of the document by returning non-zero.
|
||||
*/
|
||||
int (*enter_block)(MD_BLOCKTYPE /*type*/, void* /*detail*/, void* /*userdata*/);
|
||||
int (*leave_block)(MD_BLOCKTYPE /*type*/, void* /*detail*/, void* /*userdata*/);
|
||||
|
||||
int (*enter_span)(MD_SPANTYPE /*type*/, void* /*detail*/, void* /*userdata*/);
|
||||
int (*leave_span)(MD_SPANTYPE /*type*/, void* /*detail*/, void* /*userdata*/);
|
||||
|
||||
int (*text)(MD_TEXTTYPE /*type*/, const MD_CHAR* /*text*/, MD_SIZE /*size*/, void* /*userdata*/);
|
||||
|
||||
/* Debug callback. Optional (may be NULL).
|
||||
*
|
||||
* If provided and something goes wrong, this function gets called.
|
||||
* This is intended for debugging and problem diagnosis for developers;
|
||||
* it is not intended to provide any errors suitable for displaying to an
|
||||
* end user.
|
||||
*/
|
||||
void (*debug_log)(const char* /*msg*/, void* /*userdata*/);
|
||||
|
||||
/* Reserved. Set to NULL.
|
||||
*/
|
||||
void (*syntax)(void);
|
||||
} MD_PARSER;
|
||||
|
||||
|
||||
/* For backward compatibility. Do not use in new code. */
|
||||
typedef MD_PARSER MD_RENDERER;
|
||||
|
||||
|
||||
/* Parse the Markdown document stored in the string 'text' of size 'size'.
|
||||
* The renderer provides callbacks to be called during the parsing so the
|
||||
* caller can render the document on the screen or convert the Markdown
|
||||
* to another format.
|
||||
*
|
||||
* Zero is returned on success. If a runtime error occurs (e.g. a memory
|
||||
* fails), -1 is returned. If the processing is aborted due any callback
|
||||
* returning non-zero, md_parse() the return value of the callback is returned.
|
||||
*/
|
||||
int md_parse(const MD_CHAR* text, MD_SIZE size, const MD_PARSER* parser, void* userdata);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" { */
|
||||
#endif
|
||||
|
||||
#endif /* MD4C_MARKDOWN_H */
|
||||
187
src/wbuf.c
Normal file
187
src/wbuf.c
Normal file
|
|
@ -0,0 +1,187 @@
|
|||
#include "wbuf.h"
|
||||
|
||||
void WBufInit(WBuf* b) {
|
||||
b->start = 0;
|
||||
b->end = 0;
|
||||
b->ptr = 0;
|
||||
}
|
||||
|
||||
void WBufFree(WBuf* b) {
|
||||
free(b->start);
|
||||
}
|
||||
|
||||
void WBufReset(WBuf* b) {
|
||||
b->end = b->start;
|
||||
b->ptr = b->start;
|
||||
}
|
||||
|
||||
inline size_t WBufCap(WBuf* b) { return b->end - b->start; } // total capacity (size)
|
||||
inline size_t WBufLen(WBuf* b) { return b->ptr - b->start; } // valid bytes at start
|
||||
inline size_t WBufAvail(WBuf* b) { return b->end - b->ptr; } // bytes available
|
||||
|
||||
// grows buffer so that there is at least minspace available space
|
||||
static void WBufGrow(WBuf* b, size_t minspace) {
|
||||
// size_t avail = b->end - b->ptr;
|
||||
size_t len = WBufLen(b); // store len before changing b
|
||||
size_t cap = WBufCap(b);
|
||||
do {
|
||||
if (cap == 0) {
|
||||
cap = 512;
|
||||
} else {
|
||||
cap *= 2;
|
||||
}
|
||||
} while (cap - len < minspace);
|
||||
b->start = realloc(b->start, cap);
|
||||
b->end = b->start + cap;
|
||||
b->ptr = b->start + len;
|
||||
}
|
||||
|
||||
void WBufReserve(WBuf* b, size_t minspace) {
|
||||
if (WBufAvail(b) < minspace) {
|
||||
WBufGrow(b, minspace);
|
||||
}
|
||||
}
|
||||
|
||||
void WBufAppendc(WBuf* b, char c) {
|
||||
if (WBufAvail(b) < 1) {
|
||||
WBufGrow(b, 1);
|
||||
}
|
||||
*(b->ptr++) = c;
|
||||
}
|
||||
|
||||
void WBufAppendBytes(WBuf* b, const void* bytes, size_t len) {
|
||||
if (WBufAvail(b) < len) {
|
||||
WBufGrow(b, len);
|
||||
}
|
||||
memcpy(b->ptr, bytes, len);
|
||||
b->ptr += len;
|
||||
}
|
||||
|
||||
void WBufAppendStr(WBuf* b, const char* pch) {
|
||||
WBufAppendBytes(b, pch, strlen(pch));
|
||||
}
|
||||
|
||||
void _WBufAppendHtml(WBuf* b, const char* pch, bool isattr) {
|
||||
u32 len = strlen(pch);
|
||||
if (WBufAvail(b) < len) {
|
||||
WBufGrow(b, len);
|
||||
}
|
||||
|
||||
while_loop:
|
||||
while (*pch) {
|
||||
u32 slen = 0;
|
||||
const char* s = NULL;
|
||||
#define S(cstr) s = cstr; slen = strlen(cstr); break;
|
||||
switch (*pch) {
|
||||
case '&': S("&")
|
||||
case '<': S("<")
|
||||
case '>': S(">")
|
||||
case '"': if (isattr) S(""") // must be last since fallthrough
|
||||
default:
|
||||
*(b->ptr++) = *pch;
|
||||
pch++;
|
||||
goto while_loop;
|
||||
}
|
||||
#undef S
|
||||
WBufAppendBytes(b, s, slen);
|
||||
pch++;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static inline size_t fmtu32(u32 n, u32 radix, char* buf) {
|
||||
if (n == 0) {
|
||||
*buf = '0';
|
||||
return 1;
|
||||
}
|
||||
|
||||
// longest possible string is 32 bytes
|
||||
// (0xFFFFFFFF, 4294967295, 11111111111111111111111111111111)
|
||||
const u32 Z = 32;
|
||||
char* p = buf + Z;
|
||||
|
||||
if (radix < 11) {
|
||||
if (radix > 1) {
|
||||
// 0-9
|
||||
for (; n; n /= radix) {
|
||||
*--p = '0' + n % radix;
|
||||
}
|
||||
}
|
||||
} else if (radix < 37) {
|
||||
// 0-9, A-Z
|
||||
u32 c = 0;
|
||||
for (; n; n /= radix) {
|
||||
c = n % radix;
|
||||
if (c > 9) {
|
||||
c += 7;
|
||||
}
|
||||
*--p = '0' + c;
|
||||
}
|
||||
}
|
||||
|
||||
size_t len = Z - (p - buf);
|
||||
|
||||
// Shift buffer contents to the beginning
|
||||
//
|
||||
// len=4 Z=7
|
||||
// ___ABCD i=3 'A' o=0 A__ABCD
|
||||
// A__ABCD i=4 'B' o=1 AB_ABCD
|
||||
// AB_ABCD i=5 'C' o=2 ABCABCD
|
||||
// ABCABCD i=6 'D' o=3 ABCDBCD
|
||||
//
|
||||
// len=5 Z=7
|
||||
// _ABCDEF i=1 'A' o=0 AABCDEF
|
||||
// AABCDEF i=2 'B' o=1 ABBCDEF
|
||||
// ABBCDEF i=3 'C' o=2 ABCCDEF
|
||||
// ABCCDEF i=4 'D' o=3 ABCDDEF
|
||||
// ABCDDEF i=5 'E' o=4 ABCDEEF
|
||||
// ABCDEEF i=6 'F' o=5 ABCDEFF
|
||||
//
|
||||
if (len <= Z/2) {
|
||||
// move short strings in 4-byte chunks
|
||||
u32* inp = (u32*)p;
|
||||
u32* out = (u32*)buf;
|
||||
const u32* end = (u32*)(buf + Z);
|
||||
while (inp < end) {
|
||||
*out++ = *inp++;
|
||||
}
|
||||
} else if (len < Z) {
|
||||
const char* end = buf + Z;
|
||||
while (p < end) {
|
||||
*buf++ = *p++;
|
||||
}
|
||||
}
|
||||
|
||||
return len;
|
||||
}
|
||||
|
||||
|
||||
void WBufAppendU32(WBuf* b, u32 n, u32 radix) {
|
||||
WBufReserve(b, 32);
|
||||
b->ptr += fmtu32(n, radix, b->ptr);
|
||||
}
|
||||
|
||||
|
||||
// static void WBufAppendSlug(WBuf* b, const char* text) {
|
||||
// size_t len = strlen(text);
|
||||
// WBufReserve(b, len);
|
||||
// while (*text) {
|
||||
// char c = *text++;
|
||||
// if ((c >= '0' && c <= '9') ||
|
||||
// (c >= 'a' && c <= 'z') ||
|
||||
// c == '.' ||
|
||||
// c == '-'
|
||||
// ) {
|
||||
// *(b->ptr++) = c;
|
||||
// } else if ((c >= '0' && c <= '9') ||
|
||||
// (c >= 'a' && c <= 'z') ||
|
||||
// (c >= 'A' && c <= 'Z') ||
|
||||
// c == '.' ||
|
||||
// c == '-'
|
||||
// ) {
|
||||
// *(b->ptr++) = tolower(c);
|
||||
// } else if (c == ' ') {
|
||||
// *(b->ptr++) = '-';
|
||||
// } // else: ignore
|
||||
// }
|
||||
// }
|
||||
29
src/wbuf.h
Normal file
29
src/wbuf.h
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
#pragma once
|
||||
#include "common.h"
|
||||
|
||||
typedef struct WBuf_s {
|
||||
char* start; // pointer to start of data
|
||||
char* end; // pointer to end of data
|
||||
char* ptr; // pointer to end of valid bytes (cursor)
|
||||
} WBuf;
|
||||
|
||||
void WBufInit(WBuf*);
|
||||
void WBufFree(WBuf*);
|
||||
void WBufReset(WBuf*);
|
||||
|
||||
size_t WBufCap(WBuf*); // total capacity (size)
|
||||
size_t WBufLen(WBuf*); // valid bytes at start
|
||||
size_t WBufAvail(WBuf*); // bytes available
|
||||
|
||||
void WBufReserve(WBuf*, size_t minspace);
|
||||
|
||||
void WBufAppendc(WBuf*, char c);
|
||||
void WBufAppendBytes(WBuf*, const void* bytes, size_t len);
|
||||
void WBufAppendStr(WBuf*, const char* pch);
|
||||
#define WBufAppendCStr(b, cstr) WBufAppendBytes((b), (cstr), strlen(cstr))
|
||||
#define WBufAppendHtml(b, pch) _WBufAppendHtml(b, pch, false)
|
||||
#define WBufAppendHtmlAttr(b, pch) _WBufAppendHtml(b, pch, true)
|
||||
void _WBufAppendHtml(WBuf*, const char* pch, bool isattr);
|
||||
|
||||
// append u32 integer n. radix must be in range [2-36]
|
||||
void WBufAppendU32(WBuf*, u32 n, u32 radix);
|
||||
46
src/wlib.c
Normal file
46
src/wlib.c
Normal file
|
|
@ -0,0 +1,46 @@
|
|||
#include "common.h"
|
||||
#include "wlib.h"
|
||||
|
||||
// globals of last error
|
||||
static u32 errcode = 0;
|
||||
static const char* errmsg = "";
|
||||
static bool errmsg_free = false;
|
||||
|
||||
|
||||
export void* wrealloc(void* ptr, size_t size) {
|
||||
return realloc(ptr, size);
|
||||
}
|
||||
|
||||
export void wfree(void* ptr) {
|
||||
free(ptr);
|
||||
}
|
||||
|
||||
|
||||
export u32 WErrGetCode() {
|
||||
return errcode;
|
||||
}
|
||||
|
||||
export const char* WErrGetMsg() {
|
||||
return errmsg;
|
||||
}
|
||||
|
||||
|
||||
export void WErrClear() {
|
||||
errcode = 0;
|
||||
if (errmsg_free) {
|
||||
free((void*)errmsg);
|
||||
errmsg_free = false;
|
||||
}
|
||||
errmsg = NULL;
|
||||
}
|
||||
|
||||
|
||||
bool WErrSet(u32 code, const char* msg) {
|
||||
WErrClear();
|
||||
if (code != 0) {
|
||||
errcode = code;
|
||||
errmsg = msg;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
17
src/wlib.h
Normal file
17
src/wlib.h
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
#pragma once
|
||||
|
||||
// WErrOK == no error
|
||||
#define WErrOK 0
|
||||
|
||||
// WErrSet sets the code and message. Returns false if code == 0, true otherwise.
|
||||
bool WErrSet(u32 code, const char* msg);
|
||||
|
||||
// clear error state
|
||||
// sets code to WErrOK and message to ""
|
||||
void WErrClear();
|
||||
|
||||
// read code
|
||||
u32 WErrGetCode();
|
||||
|
||||
// read message
|
||||
const char* WErrGetMsg();
|
||||
382
src/wlib.js
Normal file
382
src/wlib.js
Normal file
|
|
@ -0,0 +1,382 @@
|
|||
// WError represents an error from a wasm module
|
||||
//
|
||||
export class WError extends Error {
|
||||
constructor(code, message, file, line) {
|
||||
super(message, file || "wasm", line || 0)
|
||||
this.name = "WError"
|
||||
this.code = code
|
||||
}
|
||||
}
|
||||
|
||||
// Get & clear last WErr. Returns null if there was no error.
|
||||
// Uses a descriptive name so to help in stack traces.
|
||||
export function error_from_wasm() { // :WError|null
|
||||
let code = _WErrGetCode()
|
||||
if (code != 0) {
|
||||
let msgptr = _WErrGetMsg()
|
||||
let message = msgptr != 0 ? UTF8ArrayToString(HEAPU8, msgptr) : ""
|
||||
_WErrClear()
|
||||
return new WError(code, message)
|
||||
}
|
||||
}
|
||||
|
||||
export function werrCheck() {
|
||||
let err = error_from_wasm()
|
||||
if (err) {
|
||||
throw err
|
||||
}
|
||||
}
|
||||
|
||||
// bytebuf takes an ArrayBuffer or Iterable<byte> and returns a Uint8Array
|
||||
//
|
||||
// bytebuf(buf :ArrayBuffer|Iterable<byte>|byte[]) : Uint8Array
|
||||
//
|
||||
export function bytebuf(buf) {
|
||||
if (buf instanceof Uint8Array) {
|
||||
return buf
|
||||
}
|
||||
return new Uint8Array(buf)
|
||||
}
|
||||
|
||||
// mallocbuf allocates memory in the WASM heap and copies length bytes
|
||||
// from byteArray into the allocated location.
|
||||
// Returns the address to the allocated memory.
|
||||
//
|
||||
export function mallocbuf(byteArray, length) {
|
||||
const offs = _wrealloc(0, length)
|
||||
HEAPU8.set(byteArray, offs)
|
||||
return offs
|
||||
}
|
||||
|
||||
// malloc32 allocates at least size bytes on 32-bit boundary.
|
||||
// Returns two values: original_address and aligned_address.
|
||||
// You should call free() with original_address.
|
||||
//
|
||||
export function malloc32(size) {
|
||||
let ptr_orig = _wrealloc(0, size + 3)
|
||||
return [ptr_orig, ptr_orig + (4 - (ptr_orig % 4))]
|
||||
}
|
||||
|
||||
// malloc16 allocates at least size bytes on 16-bit boundary.
|
||||
// Returns two values: original_address and aligned_address.
|
||||
// You should call free() with original_address.
|
||||
//
|
||||
export function malloc16(size) {
|
||||
let ptr_orig = _wrealloc(0, size + 1)
|
||||
return [ptr_orig, ptr_orig + (ptr_orig % 2)]
|
||||
}
|
||||
|
||||
// free wasm heap memory
|
||||
export function free(ptr) {
|
||||
_wfree(ptr)
|
||||
}
|
||||
|
||||
|
||||
// writeUTF16Str writes str as UTF16 to address ptr.
|
||||
// ptr must be aligned on a 16-bit boundary.
|
||||
//
|
||||
export function writeUTF16Str(str, ptr) {
|
||||
for (let i = 0; i < str.length; ++i) {
|
||||
HEAP16[ptr >> 1] = str.charCodeAt(i)
|
||||
ptr += 2
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// withTmpBytePtr takes an ArrayBuffer or Uint8Array and:
|
||||
// 1. copies it into the WASM module memory
|
||||
// 2. calls fn(pointer, size)
|
||||
// 3. calls free(pointer)
|
||||
//
|
||||
export function withTmpBytePtr(buf, fn) {
|
||||
const u8buf = bytebuf(buf)
|
||||
const size = u8buf.length
|
||||
const ptr = mallocbuf(u8buf, size)
|
||||
const r = fn(ptr, size)
|
||||
free(ptr)
|
||||
return r
|
||||
}
|
||||
|
||||
|
||||
// withUTF16Str takes a JavaScript string and:
|
||||
// 1. copies it into the WASM module memory as UTF16, 16-bit aligned
|
||||
// 2. calls fn(aligned_pointer, bytesize)
|
||||
// 3. calls free(original_pointer)
|
||||
//
|
||||
export function withUTF16Str(str, fn) {
|
||||
let bytesize = str.length * 2
|
||||
let ptr = _wrealloc(0, bytesize + 1) // +1 for alignment
|
||||
let aligned_ptr = (ptr % 2 != 0) ? ptr + 1 : ptr
|
||||
writeUTF16Str(str, aligned_ptr, bytesize)
|
||||
let r = fn(aligned_ptr, bytesize)
|
||||
free(ptr)
|
||||
return r
|
||||
}
|
||||
|
||||
|
||||
export function cstrlen(ptr) {
|
||||
let end = ptr >> 0
|
||||
while (HEAP8[end]) { end++ }
|
||||
return end - ptr
|
||||
}
|
||||
|
||||
|
||||
// asciicstr interprets memory in buf at offset as an ASCII-encoded string,
|
||||
// and returns a JavaScript string.
|
||||
//
|
||||
export function asciicstr(buf, offset) {
|
||||
let str = ''
|
||||
while (true) {
|
||||
let b = buf[offset++ >> 0]
|
||||
if (b == 0) { break }
|
||||
str += String.fromCharCode(b)
|
||||
}
|
||||
return str
|
||||
}
|
||||
|
||||
// cstrStack allocates a UTF-8 encoded version of str as a nul-terminated
|
||||
// "C string" on the stack.
|
||||
//
|
||||
export function cstrStack(str) {
|
||||
var ret = 0
|
||||
if (str !== null && str !== undefined && str !== 0) {
|
||||
var len = (str.length << 2) + 1
|
||||
ret = stackAlloc(len)
|
||||
stringToUTF8Array(str, HEAPU8, ret, len)
|
||||
}
|
||||
return ret
|
||||
}
|
||||
|
||||
// used by strFromUTF8Ptr as a temporary address-sized integer
|
||||
let tmpPtr = 0
|
||||
|
||||
Module.postRun.push(() => {
|
||||
tmpPtr = _wrealloc(0, 4)
|
||||
})
|
||||
|
||||
|
||||
// strFromUTF8Ptr provides a pointer-sized integer that can be written
|
||||
// to by fn. fn is expected to return the number of bytes written to the
|
||||
// address pointed to by p. The address is dereferenced and the written
|
||||
// number of bytes are interpreted as UTF8, returning a JS string.
|
||||
//
|
||||
// This is useful for efficiently converting UTF8 strings that are
|
||||
// already allocated inside the library to JavaScript strings.
|
||||
//
|
||||
// Example:
|
||||
// strFromUTF8Ptr((p)=> _FooGetName(ptr, p))
|
||||
// ...
|
||||
// u32 EXPORT FooGetName(Foo* f, const char** p) {
|
||||
// *p = f->name_ptr;
|
||||
// return f->name_len;
|
||||
// }
|
||||
//
|
||||
// Synopsis:
|
||||
// strFromUTF8Ptr( fn :(p:int)=>int )
|
||||
//
|
||||
export function strFromUTF8Ptr(fn) {
|
||||
let z = fn(tmpPtr)
|
||||
let offs = HEAP32[tmpPtr >> 2]
|
||||
return z == 0 ? "" : utf8.decode(HEAPU8.subarray(offs, offs + z))
|
||||
}
|
||||
|
||||
// withOutPtr facilitates the following:
|
||||
//
|
||||
// 1. calls fn with an address to memory that fits a pointer.
|
||||
// fn(outptr) is expected to:
|
||||
// a. Write some data into heap memory
|
||||
// b. Write the address of that data at outptr (i.e. *outptr = heapaddr)
|
||||
// c. Return the length of data written
|
||||
//
|
||||
// 2. withOutPtr reads the address from outptr
|
||||
// a. If the address is 0 (NULL), returns null
|
||||
// b. Else a slice of the heap memory is created, starting at *outptr
|
||||
// and ending at ((*outptr) + length_returned_by_fn).
|
||||
// A free() function is added to the buffer and it is returned.
|
||||
//
|
||||
// It is important to free() the memory of the returned buffer when the caller is done.
|
||||
// This is implementation specific, so this function can not help you with that.
|
||||
//
|
||||
// The return type is as follows:
|
||||
// interface HeapData extends Uint8Array {
|
||||
// readonly heapAddr :number // address in heap == *outptr
|
||||
// }
|
||||
//
|
||||
// Example:
|
||||
//
|
||||
// // WASM module, in C:
|
||||
// typedef struct Color_ { char r, g, b; } Color;
|
||||
// size_t newColor(const Color** outp) {
|
||||
// Color* c = (Color*)malloc(sizeof(Color));
|
||||
// c->r = 0xFF;
|
||||
// c->g = 0xCA;
|
||||
// c->b = 0x0;
|
||||
// *outp = c;
|
||||
// return sizeof(Color);
|
||||
// }
|
||||
// void freeColor(const Color* p) {
|
||||
// free(p);
|
||||
// }
|
||||
//
|
||||
// // JavaScript
|
||||
// let color = withOutPtr(_newColor)
|
||||
// console.log("RGB:", color[0], color[1], color[2])
|
||||
// _freeColor(color.heapAddr)
|
||||
//
|
||||
export function withOutPtr(fn) {
|
||||
let len = fn(tmpPtr)
|
||||
let addr = HEAP32[tmpPtr >> 2]
|
||||
if (addr == 0) {
|
||||
return null
|
||||
}
|
||||
let buf = HEAPU8.subarray(addr, addr + len)
|
||||
buf.heapAddr = addr
|
||||
return buf
|
||||
}
|
||||
|
||||
|
||||
|
||||
// withStackFrame saves the stack and calls fn; code in fn can then
|
||||
// allocate stack memory. When fn returns or throws, the stack is restored
|
||||
// to the point before this function was called.
|
||||
// Returns the return value of fn.
|
||||
//
|
||||
export function withStackFrame(fn) {
|
||||
let stack = stackSave()
|
||||
try {
|
||||
return fn()
|
||||
} finally {
|
||||
stackRestore(stack)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// ureadU16 reads a (little endian) unsigned 16-bit integer from buf at addr
|
||||
//
|
||||
export function ureadU16(buf, addr) {
|
||||
return ((buf[addr] | (buf[addr + 1] << 8))) >>> 0
|
||||
}
|
||||
|
||||
// ureadI16 reads a (little endian) signed 16-bit integer from buf at addr
|
||||
//
|
||||
export function ureadI16(buf, addr) {
|
||||
let n = ((buf[addr]) | (buf[addr + 1] << 8))
|
||||
return n >= 0x8000 ? n - 0x10000 : n
|
||||
}
|
||||
|
||||
// ureadU32 reads a (little endian) unsigned 32-bit integer from buf at addr
|
||||
//
|
||||
export function ureadU32(buf, addr) {
|
||||
return (
|
||||
(buf[addr + 3] << 24) |
|
||||
(buf[addr + 2] << 16) |
|
||||
(buf[addr + 1] << 8) |
|
||||
(buf[addr] >>> 0)
|
||||
) >>> 0
|
||||
}
|
||||
|
||||
// ureadU32 reads a (little endian) signed 32-bit integer from buf at addr
|
||||
//
|
||||
export function ureadI32(buf, addr) {
|
||||
return (
|
||||
(buf[addr + 3] << 24) |
|
||||
(buf[addr + 2] << 16) |
|
||||
(buf[addr + 1] << 8) |
|
||||
(buf[addr] >>> 0)
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
// export function ureadI16be(buf, addr) {
|
||||
// let n = ((buf[addr] << 8) | (buf[addr + 1]))
|
||||
// return n >= 0x8000 ? n - 0x10000 : n
|
||||
// }
|
||||
|
||||
// export function ureadU16be(buf, addr) {
|
||||
// return ((buf[addr] << 8) | (buf[addr + 1])) >>> 0
|
||||
// }
|
||||
|
||||
// export function ureadU32be(buf, addr) {
|
||||
// return (
|
||||
// (buf[addr] << 24) |
|
||||
// (buf[addr + 1] << 16) |
|
||||
// (buf[addr + 2] << 8) |
|
||||
// (buf[addr + 3])
|
||||
// ) >>> 0
|
||||
// }
|
||||
|
||||
// export function ureadI32be(buf, addr) {
|
||||
// return (
|
||||
// (buf[addr] << 24) |
|
||||
// (buf[addr + 1] << 16) |
|
||||
// (buf[addr + 2] << 8) |
|
||||
// (buf[addr + 3])
|
||||
// )
|
||||
// }
|
||||
|
||||
|
||||
// asciiStrToU32 converts a <=4 character string to a u32.
|
||||
// For example, string -> hb_tag_t
|
||||
//
|
||||
export function asciiStrToU32(s) {
|
||||
// Note: Should match #define HB_TAG(c1,c2,c3,c4) in hb-common.h
|
||||
return (
|
||||
((s.charCodeAt(0) >>> 0) << 24) >>> 0 | // "">>> 0" u32 please
|
||||
((s.charCodeAt(1) >>> 0) << 16) |
|
||||
((s.charCodeAt(2) >>> 0) << 8) |
|
||||
(s.charCodeAt(3) >>> 0)
|
||||
)
|
||||
}
|
||||
|
||||
export const hbtag = asciiStrToU32
|
||||
|
||||
// u32ToAsciiStr converts a u32 to a ASCII string
|
||||
// For example, hb_tag_t -> string
|
||||
//
|
||||
export function u32ToAsciiStr(u) {
|
||||
return String.fromCharCode(
|
||||
((u >> 24) & 0xff),
|
||||
((u >> 16) & 0xff),
|
||||
((u >> 8) & 0xff),
|
||||
((u >> 0) & 0xff)
|
||||
)
|
||||
}
|
||||
|
||||
// interface utf8 {
|
||||
// encode(s :string) :Uint8Array
|
||||
// decode(b :Uint8Array) :string
|
||||
// }
|
||||
export const utf8 = typeof TextEncoder != 'undefined' ? (() => {
|
||||
// Modern browsers
|
||||
const enc = new TextEncoder("utf-8")
|
||||
const dec = new TextDecoder("utf-8")
|
||||
return {
|
||||
encode: s => enc.encode(s),
|
||||
decode: b => dec.decode(b),
|
||||
};
|
||||
})() : typeof Buffer != 'undefined' ? {
|
||||
// Nodejs
|
||||
encode: s => new Uint8Array(Buffer.from(s, 'utf-8')),
|
||||
decode: b =>
|
||||
Buffer.from(b.buffer, b.byteOffset, b.byteLength).toString('utf8'),
|
||||
} : {
|
||||
// Some other pesky JS environment
|
||||
encode: s => {
|
||||
let asciiBytes = [];
|
||||
for (let i = 0, L = s.length; i != L; ++i) {
|
||||
asciiBytes[i] = 0xff & s.charCodeAt(i);
|
||||
}
|
||||
return new Uint8Array(asciiBytes);
|
||||
},
|
||||
decode: b => String(b),
|
||||
}
|
||||
|
||||
|
||||
// Converts between 16.16 fixed-point number and 64-bit floating-point numbers
|
||||
export function fixedToFloat(i) {
|
||||
return i / 65536.0
|
||||
}
|
||||
export function floatToFixed(f) {
|
||||
return (f * 65536.0) >> 0
|
||||
}
|
||||
|
||||
43
wasmc.js
Normal file
43
wasmc.js
Normal file
|
|
@ -0,0 +1,43 @@
|
|||
const package = require("./package.json")
|
||||
const outdir = debug ? builddir : "dist"
|
||||
|
||||
const m = {
|
||||
name: "markdown",
|
||||
out: outdir + "/markdown.js",
|
||||
jsentry: "src/md.js",
|
||||
sources: [
|
||||
"src/wlib.c",
|
||||
"src/wbuf.c",
|
||||
"src/md.c",
|
||||
"src/md4c.c",
|
||||
"src/fmt_html.c",
|
||||
// "src/fmt_json.c",
|
||||
],
|
||||
cflags: debug ? ["-DDEBUG=1"] : [],
|
||||
constants: {
|
||||
VERSION: package.version,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
if (!debug) {
|
||||
// generic js module
|
||||
module({...m})
|
||||
|
||||
// node cjs module
|
||||
module({ ...m,
|
||||
name: "markdown-node",
|
||||
out: outdir + "/markdown.node.js",
|
||||
target: "node",
|
||||
embed: true,
|
||||
})
|
||||
}
|
||||
|
||||
// node es module
|
||||
module({ ...m,
|
||||
name: "markdown-es",
|
||||
out: outdir + "/markdown.es.js",
|
||||
format: "es",
|
||||
target: "node",
|
||||
embed: true,
|
||||
})
|
||||
Loading…
Reference in a new issue