From 4b48783c3e209d900a459a2bd76aa314cb62f7e1 Mon Sep 17 00:00:00 2001
From: Rasmus Andersson
Date: Wed, 30 Jun 2021 20:01:49 -0700
Subject: [PATCH] Disallow "javascript:" URIs in links. Adds option allowJSURIs
to explicitly allow it. Closes #14
---
example/example.html | 1 +
example/example.md | 2 ++
markdown.d.ts | 5 ++++-
src/common.h | 7 +++++++
src/fmt_html.c | 27 ++++++++++++++++++++-------
src/fmt_html.h | 8 +++-----
src/md.c | 13 ++-----------
src/md.js | 10 ++++++----
8 files changed, 45 insertions(+), 28 deletions(-)
diff --git a/example/example.html b/example/example.html
index e46b151..7ecb278 100644
--- a/example/example.html
+++ b/example/example.html
@@ -79,3 +79,4 @@ breaks.
Anöt######her!
?!Anöt//her!!
?!!
+XSS test
diff --git a/example/example.md b/example/example.md
index 5d5ce8d..5beb1ea 100644
--- a/example/example.md
+++ b/example/example.md
@@ -82,3 +82,5 @@ function codeBlocks() {
## ?!Anöt//her!!
## ?!!
+
+[XSS test](javAscRipt:alert("xss"))
diff --git a/markdown.d.ts b/markdown.d.ts
index 6e7d1a5..638fd82 100644
--- a/markdown.d.ts
+++ b/markdown.d.ts
@@ -28,8 +28,11 @@ export interface ParseOptions {
*/
bytes? :boolean
+ /** Allow "javascript:" in links */
+ allowJSURIs? :boolean
+
/**
- * onCodeBlock is an optional callback which if provided is called for each code block.
+ * Optional callback which if provided is called for each code block.
* langname holds the "language tag", if any, of the block.
*
* The returned value is inserted into the resulting HTML verbatim, without HTML escaping.
diff --git a/src/common.h b/src/common.h
index 836b2a5..5a0c8f3 100644
--- a/src/common.h
+++ b/src/common.h
@@ -61,6 +61,13 @@ typedef int32_t i32;
#include "wbuf.h"
+// these should be in sync with "OutputFlags" in md.js
+typedef enum OutputFlags {
+ OutputFlagHTML = 1 << 0,
+ OutputFlagXHTML = 1 << 1,
+ OutputFlagAllowJSURI = 1 << 2, // allow "javascript:" URIs in links
+} OutputFlags;
+
typedef int(*JSTextFilterFun)(
const char* metaptr, u32 metalen,
const char* inptr, u32 inlen,
diff --git a/src/fmt_html.c b/src/fmt_html.c
index 2e71b04..6894ef4 100644
--- a/src/fmt_html.c
+++ b/src/fmt_html.c
@@ -25,6 +25,7 @@
#include
#include
+#include
#include "common.h"
#include "fmt_html.h"
@@ -257,9 +258,21 @@ static void render_open_td_block(FmtHTML* r, bool isTH, const MD_BLOCK_TD_DETAIL
}
}
+static bool is_javascript_uri(const MD_CHAR* text, size_t len) {
+ return (
+ len >= strlen("javascript:") &&
+ strncasecmp(text, "javascript:", strlen("javascript:")) == 0
+ );
+}
+
static void render_open_a_span(FmtHTML* r, const MD_SPAN_A_DETAIL* det) {
render_literal(r, "href);
+ // skip "javascript:" URIs unless explicitly allowed
+ if ((r->flags & OutputFlagAllowJSURI) != 0 ||
+ !is_javascript_uri(det->href.text, det->href.size))
+ {
+ render_attribute(r, &det->href);
+ }
if (det->title.text != NULL) {
render_literal(r, "\" title=\"");
render_attribute(r, &det->title);
@@ -279,7 +292,7 @@ static void render_close_img_span(FmtHTML* r, const MD_SPAN_IMG_DETAIL* det) {
render_literal(r, "\" title=\"");
render_attribute(r, &det->title);
}
- render_literal(r, (r->flags & MD_HTML_FLAG_XHTML) ? "\"/>" : "\">");
+ render_literal(r, (r->flags & OutputFlagXHTML) ? "\"/>" : "\">");
r->imgnest--;
}
@@ -306,7 +319,7 @@ static int enter_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata)
case MD_BLOCK_UL: render_literal(r, "\n"); break;
case MD_BLOCK_OL: render_open_ol_block(r, (const MD_BLOCK_OL_DETAIL*)detail); break;
case MD_BLOCK_LI: render_open_li_block(r, (const MD_BLOCK_LI_DETAIL*)detail); break;
- case MD_BLOCK_HR: render_literal(r, (r->flags & MD_HTML_FLAG_XHTML) ? "
\n" : "
\n"); break;
+ case MD_BLOCK_HR: render_literal(r, (r->flags & OutputFlagXHTML) ? "
\n" : "
\n"); break;
case MD_BLOCK_H:
{
render_literal(r, head[((MD_BLOCK_H_DETAIL*)detail)->level - 1]);
@@ -379,8 +392,8 @@ static int enter_span_callback(MD_SPANTYPE type, void* detail, void* userdata) {
case MD_SPAN_EM: render_literal(r, ""); break;
case MD_SPAN_STRONG: render_literal(r, ""); break;
case MD_SPAN_U: render_literal(r, ""); break;
- case MD_SPAN_A: render_open_a_span(r, (MD_SPAN_A_DETAIL*) detail); break;
- case MD_SPAN_IMG: render_open_img_span(r, (MD_SPAN_IMG_DETAIL*) detail); break;
+ case MD_SPAN_A: render_open_a_span(r, (MD_SPAN_A_DETAIL*)detail); break;
+ case MD_SPAN_IMG: render_open_img_span(r, (MD_SPAN_IMG_DETAIL*)detail); break;
case MD_SPAN_CODE: render_literal(r, ""); break;
case MD_SPAN_DEL: render_literal(r, ""); break;
case MD_SPAN_LATEXMATH: render_literal(r, ""); break;
@@ -452,12 +465,12 @@ static int text_callback(MD_TEXTTYPE type, const MD_CHAR* text, MD_SIZE size, vo
render_literal(
r,
r->imgnest == 0 ?
- ((r->flags & MD_HTML_FLAG_XHTML) ? "
\n" : "
\n") :
+ ((r->flags & OutputFlagXHTML) ? "
\n" : "
\n") :
" "
);
break;
- render_literal(r, (r->flags & MD_HTML_FLAG_XHTML) ? "
\n" : "
\n"); break;
+ render_literal(r, (r->flags & OutputFlagXHTML) ? "
\n" : "
\n"); break;
case MD_TEXT_SOFTBR: render_literal(r, (r->imgnest == 0 ? "\n" : " ")); break;
case MD_TEXT_HTML: render_text(r, text, size); break;
diff --git a/src/fmt_html.h b/src/fmt_html.h
index a903981..cfe8215 100644
--- a/src/fmt_html.h
+++ b/src/fmt_html.h
@@ -1,11 +1,9 @@
#pragma once
-#define MD_HTML_FLAG_XHTML 0x0008 // instead of e.g.
, generate
-
typedef struct FmtHTML {
- u32 flags; // MD_HTML_FLAG_*
- u32 parserFlags; // passed along to md_parse
- WBuf* outbuf;
+ OutputFlags flags;
+ u32 parserFlags; // passed along to md_parse
+ WBuf* outbuf;
// optional callbacks
JSTextFilterFun onCodeBlock;
diff --git a/src/md.c b/src/md.c
index b0c6225..f17f8cc 100644
--- a/src/md.c
+++ b/src/md.c
@@ -4,12 +4,6 @@
#include "fmt_html.h"
// #include "fmt_json.h"
-// these should be in sync with "OutputFlags" in md.js
-typedef enum OutputFlags {
- OutputFlagHTML = 1 << 0,
- OutputFlagXHTML = 1 << 1,
-} OutputFlags;
-
typedef enum ErrorCode {
ERR_NONE,
ERR_MD_PARSE,
@@ -41,19 +35,16 @@ export size_t parseUTF8(
WBufReset(&outbuf);
- if (outflags & OutputFlagHTML) {
+ if ((outflags & OutputFlagHTML) || (outflags & OutputFlagXHTML)) {
WBufReserve(&outbuf, inbuflen * 2); // approximate output size to minimize reallocations
FmtHTML fmt = {
- .flags = 0,
+ .flags = outflags,
.parserFlags = parser_flags,
.outbuf = &outbuf,
.onCodeBlock = onCodeBlock,
};
- if (outflags & OutputFlagXHTML)
- fmt.flags |= MD_HTML_FLAG_XHTML;
-
if (fmt_html(inbufptr, inbuflen, &fmt) != 0) {
// fmt_html returns status of md_parse which only fails in extreme cases
// like when out of memory. md4c does not provide error codes or error messages.
diff --git a/src/md.js b/src/md.js
index c5d05cd..c85a52f 100644
--- a/src/md.js
+++ b/src/md.js
@@ -41,10 +41,11 @@ export const ParseFlags = {
NO_HTML: 0x0020 | 0x0040, // NO_HTML_BLOCKS | NO_HTML_SPANS
}
-// these should be in sync with "OutputFlags" in md.c
+// these should be in sync with "OutputFlags" in common.h
const OutputFlags = {
- HTML: 1 << 0, // Output HTML
- XHTML: 1 << 1, // Output XHTML (only has effect with HTML flag set)
+ HTML: 1 << 0, // Output HTML
+ XHTML: 1 << 1, // Output XHTML (only has effect with HTML flag set)
+ AllowJSURI: 1 << 2, // Allow "javascript:" URIs
}
@@ -56,7 +57,8 @@ export function parse(source, options) {
options.parseFlags
)
- let outputFlags = 0
+ let outputFlags = options.allowJSURIs ? OutputFlags.AllowJSURI : 0
+
switch (options.format) {
case "xhtml":
outputFlags |= OutputFlags.HTML | OutputFlags.XHTML