Merge branch 'master' into fix-unclosed

This commit is contained in:
taoqf 2020-04-14 09:04:33 +08:00 committed by GitHub
commit 8f418ce085
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 30 additions and 7 deletions

View file

@ -1,6 +1,6 @@
{
"name": "node-html-parser",
"version": "1.2.14",
"version": "1.2.15",
"description": "A very fast HTML parser, generating a simplified DOM, with basic element query support.",
"main": "dist/index.js",
"types": "dist/index.d.ts",

View file

@ -595,6 +595,8 @@ export interface Options {
comment?: boolean;
}
const frameflag = 'documentfragmentcontainer';
/**
* Parses HTML and returns a root element
* Parse a chuck of HTML source.
@ -607,6 +609,8 @@ export function parse(data: string, options = {} as Options) {
const stack = [root];
let lastTextPos = -1;
let match: RegExpExecArray;
// https://github.com/taoqf/node-html-parser/issues/38
data = `<${frameflag}>${data}</${frameflag}>`;
while (match = kMarkupPattern.exec(data)) {
if (lastTextPos > -1) {
if (lastTextPos + match[0].length < kMarkupPattern.lastIndex) {
@ -616,6 +620,9 @@ export function parse(data: string, options = {} as Options) {
}
}
lastTextPos = kMarkupPattern.lastIndex;
if (match[2] === frameflag) {
continue;
}
if (match[0][1] === '!') {
// this is a comment
if (options.comment) {
@ -642,8 +649,9 @@ export function parse(data: string, options = {} as Options) {
currentParent = arr_back(stack);
}
}
currentParent = currentParent.appendChild(
new HTMLElement(match[2], attrs, match[3]));
// ignore container tag we add above
// https://github.com/taoqf/node-html-parser/issues/38
currentParent = currentParent.appendChild(new HTMLElement(match[2], attrs, match[3]));
stack.push(currentParent);
if (kBlockTextElements[match[2]]) {
// a little test to find next </script> or </style> ...

View file

@ -429,10 +429,12 @@ describe('HTML Parser', function () {
});
describe('#removeAttribute', function () {
const root = parseHTML('<input required>');
const input = root.firstChild;
input.removeAttribute('required');
input.toString().should.eql('<input>');
it('should remove attribute required', function () {
const root = parseHTML('<input required>');
const input = root.firstChild;
input.removeAttribute('required');
input.toString().should.eql('<input>');
});
});
describe('#hasAttribute', function () {

13
test/parse.js Normal file
View file

@ -0,0 +1,13 @@
const { parse } = require('../dist');
// https://github.com/taoqf/node-html-parser/issues/38
describe('HTML Parser', function () {
it('should parse text element', function () {
const root = parse('foo bar<div>aaa</div>');
root.toString().should.eql('foo bar<div>aaa</div>');
});
it('should parse pure text element', function () {
const root = parse('foo bar');
root.toString().should.eql('foo bar');
});
})