From ee97545974891251f72eb3eba87ec87a184c65ea Mon Sep 17 00:00:00 2001 From: taoqf Date: Thu, 9 Apr 2020 10:36:16 +0800 Subject: [PATCH 1/3] :bug: fix issue #38 --- src/nodes/html.ts | 13 ++++++++++--- test/html.js | 10 ++++++---- test/parse.js | 13 +++++++++++++ 3 files changed, 29 insertions(+), 7 deletions(-) create mode 100644 test/parse.js diff --git a/src/nodes/html.ts b/src/nodes/html.ts index 2f796bb..1c128e8 100644 --- a/src/nodes/html.ts +++ b/src/nodes/html.ts @@ -598,6 +598,8 @@ export interface Options { comment?: boolean; } +const frameflag = 'documentfragmentcontainer'; + /** * Parses HTML and returns a root element * Parse a chuck of HTML source. @@ -610,6 +612,8 @@ export function parse(data: string, options = {} as Options) { const stack = [root]; let lastTextPos = -1; let match: RegExpExecArray; + // https://github.com/taoqf/node-html-parser/issues/38 + data = `<${frameflag}>${data}`; while (match = kMarkupPattern.exec(data)) { if (lastTextPos > -1) { if (lastTextPos + match[0].length < kMarkupPattern.lastIndex) { @@ -645,9 +649,12 @@ export function parse(data: string, options = {} as Options) { currentParent = arr_back(stack); } } - currentParent = currentParent.appendChild( - new HTMLElement(match[2], attrs, match[3])); - stack.push(currentParent); + if (match[2] !== frameflag) { + // ignore container tag we add above + // https://github.com/taoqf/node-html-parser/issues/38 + currentParent = currentParent.appendChild(new HTMLElement(match[2], attrs, match[3])); + stack.push(currentParent); + } if (kBlockTextElements[match[2]]) { // a little test to find next or ... const closeMarkup = ''; diff --git a/test/html.js b/test/html.js index 26cc404..ee4b84e 100644 --- a/test/html.js +++ b/test/html.js @@ -429,10 +429,12 @@ describe('HTML Parser', function () { }); describe('#removeAttribute', function () { - const root = parseHTML(''); - const input = root.firstChild; - input.removeAttribute('required'); - input.toString().should.eql(''); + it('should remove attribute required', function () { + const root = parseHTML(''); + const input = root.firstChild; + input.removeAttribute('required'); + input.toString().should.eql(''); + }); }); describe('#hasAttribute', function () { diff --git a/test/parse.js b/test/parse.js new file mode 100644 index 0000000..b6aa8e0 --- /dev/null +++ b/test/parse.js @@ -0,0 +1,13 @@ +const { parse } = require('../dist'); + +// https://github.com/taoqf/node-html-parser/issues/38 +describe('HTML Parser', function () { + it('should parse text element', function () { + const root = parse('foo bar
aaa
'); + root.toString().should.eql('foo bar
aaa
'); + }); + it('should parse pure text element', function () { + const root = parse('foo bar'); + root.toString().should.eql('foo bar'); + }); +}) From d266fb999a0d5c4db5526e4f796e547c3fc04b0a Mon Sep 17 00:00:00 2001 From: taoqf Date: Thu, 9 Apr 2020 10:41:42 +0800 Subject: [PATCH 2/3] speed up --- src/nodes/html.ts | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/nodes/html.ts b/src/nodes/html.ts index 1c128e8..dff0364 100644 --- a/src/nodes/html.ts +++ b/src/nodes/html.ts @@ -623,6 +623,9 @@ export function parse(data: string, options = {} as Options) { } } lastTextPos = kMarkupPattern.lastIndex; + if (match[2] === frameflag) { + continue; + } if (match[0][1] === '!') { // this is a comment if (options.comment) { @@ -649,12 +652,10 @@ export function parse(data: string, options = {} as Options) { currentParent = arr_back(stack); } } - if (match[2] !== frameflag) { - // ignore container tag we add above - // https://github.com/taoqf/node-html-parser/issues/38 - currentParent = currentParent.appendChild(new HTMLElement(match[2], attrs, match[3])); - stack.push(currentParent); - } + // ignore container tag we add above + // https://github.com/taoqf/node-html-parser/issues/38 + currentParent = currentParent.appendChild(new HTMLElement(match[2], attrs, match[3])); + stack.push(currentParent); if (kBlockTextElements[match[2]]) { // a little test to find next or ... const closeMarkup = ''; From 7827ae965f44404eca2aa120fb556912a6424751 Mon Sep 17 00:00:00 2001 From: taoqf Date: Thu, 9 Apr 2020 10:42:21 +0800 Subject: [PATCH 3/3] v1.2.15 --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index 705f10f..88c5ebf 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "node-html-parser", - "version": "1.2.14", + "version": "1.2.15", "description": "A very fast HTML parser, generating a simplified DOM, with basic element query support.", "main": "dist/index.js", "types": "dist/index.d.ts",