diff --git a/package.json b/package.json index 705f10f..88c5ebf 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "node-html-parser", - "version": "1.2.14", + "version": "1.2.15", "description": "A very fast HTML parser, generating a simplified DOM, with basic element query support.", "main": "dist/index.js", "types": "dist/index.d.ts", diff --git a/src/nodes/html.ts b/src/nodes/html.ts index 4f41a13..704fb50 100644 --- a/src/nodes/html.ts +++ b/src/nodes/html.ts @@ -595,6 +595,8 @@ export interface Options { comment?: boolean; } +const frameflag = 'documentfragmentcontainer'; + /** * Parses HTML and returns a root element * Parse a chuck of HTML source. @@ -607,6 +609,8 @@ export function parse(data: string, options = {} as Options) { const stack = [root]; let lastTextPos = -1; let match: RegExpExecArray; + // https://github.com/taoqf/node-html-parser/issues/38 + data = `<${frameflag}>${data}${frameflag}>`; while (match = kMarkupPattern.exec(data)) { if (lastTextPos > -1) { if (lastTextPos + match[0].length < kMarkupPattern.lastIndex) { @@ -616,6 +620,9 @@ export function parse(data: string, options = {} as Options) { } } lastTextPos = kMarkupPattern.lastIndex; + if (match[2] === frameflag) { + continue; + } if (match[0][1] === '!') { // this is a comment if (options.comment) { @@ -642,8 +649,9 @@ export function parse(data: string, options = {} as Options) { currentParent = arr_back(stack); } } - currentParent = currentParent.appendChild( - new HTMLElement(match[2], attrs, match[3])); + // ignore container tag we add above + // https://github.com/taoqf/node-html-parser/issues/38 + currentParent = currentParent.appendChild(new HTMLElement(match[2], attrs, match[3])); stack.push(currentParent); if (kBlockTextElements[match[2]]) { // a little test to find next or ... diff --git a/test/html.js b/test/html.js index 10bab7e..a00ac35 100644 --- a/test/html.js +++ b/test/html.js @@ -429,10 +429,12 @@ describe('HTML Parser', function () { }); describe('#removeAttribute', function () { - const root = parseHTML(''); - const input = root.firstChild; - input.removeAttribute('required'); - input.toString().should.eql(''); + it('should remove attribute required', function () { + const root = parseHTML(''); + const input = root.firstChild; + input.removeAttribute('required'); + input.toString().should.eql(''); + }); }); describe('#hasAttribute', function () { diff --git a/test/parse.js b/test/parse.js new file mode 100644 index 0000000..b6aa8e0 --- /dev/null +++ b/test/parse.js @@ -0,0 +1,13 @@ +const { parse } = require('../dist'); + +// https://github.com/taoqf/node-html-parser/issues/38 +describe('HTML Parser', function () { + it('should parse text element', function () { + const root = parse('foo bar