mirror of
https://github.com/danbulant/node-html-parser
synced 2026-06-19 14:41:10 +00:00
Update files to work with deno
This commit is contained in:
parent
170ef84b18
commit
fd966fcd39
4 changed files with 336 additions and 23 deletions
305
src/he.ts
Normal file
305
src/he.ts
Normal file
File diff suppressed because one or more lines are too long
|
|
@ -17,7 +17,7 @@ let pMatchFunctionCache = {} as { [name: string]: MatherFunction };
|
||||||
/**
|
/**
|
||||||
* Function cache
|
* Function cache
|
||||||
*/
|
*/
|
||||||
const functionCache = {
|
const functionCache: { [key: string]: any } = {
|
||||||
f145(el: HTMLElement, tagName: string, classes: string[]) {
|
f145(el: HTMLElement, tagName: string, classes: string[]) {
|
||||||
'use strict';
|
'use strict';
|
||||||
tagName = tagName || '';
|
tagName = tagName || '';
|
||||||
|
|
@ -164,7 +164,7 @@ export default class Matcher {
|
||||||
let attr_key = '';
|
let attr_key = '';
|
||||||
let value = '';
|
let value = '';
|
||||||
if (tagName && tagName !== '*') {
|
if (tagName && tagName !== '*') {
|
||||||
let reg: RegExpMatchArray;
|
let reg: RegExpMatchArray | null;
|
||||||
if (tagName.startsWith('#')) {
|
if (tagName.startsWith('#')) {
|
||||||
// source += 'if (el.id != ' + JSON.stringify(tagName.substr(1)) + ') return false;';// 1
|
// source += 'if (el.id != ' + JSON.stringify(tagName.substr(1)) + ') return false;';// 1
|
||||||
function_name += '1';
|
function_name += '1';
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
import { decode } from 'https://raw.githubusercontent.com/mathiasbynens/he/master/src/he.js';
|
import { decode } from '../he.ts';
|
||||||
import Node from './node.ts';
|
import Node from './node.ts';
|
||||||
import NodeType from './type.ts';
|
import NodeType from './type.ts';
|
||||||
import TextNode from './text.ts';
|
import TextNode from './text.ts';
|
||||||
|
|
@ -16,12 +16,12 @@ export interface Attributes {
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface RawAttributes {
|
export interface RawAttributes {
|
||||||
[key: string]: string;
|
[key: string]: string | null;
|
||||||
}
|
}
|
||||||
|
|
||||||
export type InsertPosition = 'beforebegin' | 'afterbegin' | 'beforeend' | 'afterend';
|
export type InsertPosition = 'beforebegin' | 'afterbegin' | 'beforeend' | 'afterend';
|
||||||
|
|
||||||
const kBlockElements = {
|
const kBlockElements: { [key: string]: boolean } = {
|
||||||
div: true,
|
div: true,
|
||||||
p: true,
|
p: true,
|
||||||
// ul: true,
|
// ul: true,
|
||||||
|
|
@ -44,9 +44,9 @@ const kBlockElements = {
|
||||||
* @extends {Node}
|
* @extends {Node}
|
||||||
*/
|
*/
|
||||||
export default class HTMLElement extends Node {
|
export default class HTMLElement extends Node {
|
||||||
private _attrs: Attributes;
|
private _attrs?: Attributes;
|
||||||
private _rawAttrs: RawAttributes;
|
private _rawAttrs?: RawAttributes;
|
||||||
public id: string;
|
public id?: string;
|
||||||
public classNames = [] as string[];
|
public classNames = [] as string[];
|
||||||
/**
|
/**
|
||||||
* Node Type declaration.
|
* Node Type declaration.
|
||||||
|
|
@ -59,7 +59,7 @@ export default class HTMLElement extends Node {
|
||||||
*
|
*
|
||||||
* @memberof HTMLElement
|
* @memberof HTMLElement
|
||||||
*/
|
*/
|
||||||
public constructor(public tagName: string, keyAttrs: KeyAttributes, private rawAttrs = '', public parentNode = null as Node) {
|
public constructor(public tagName: string, keyAttrs: KeyAttributes, private rawAttrs = '', public parentNode: null | Node = null) {
|
||||||
super();
|
super();
|
||||||
this.rawAttrs = rawAttrs || '';
|
this.rawAttrs = rawAttrs || '';
|
||||||
this.parentNode = parentNode || null;
|
this.parentNode = parentNode || null;
|
||||||
|
|
@ -180,9 +180,14 @@ export default class HTMLElement extends Node {
|
||||||
content = [content];
|
content = [content];
|
||||||
} else if (typeof content == 'string') {
|
} else if (typeof content == 'string') {
|
||||||
const r = parse(content, options);
|
const r = parse(content, options);
|
||||||
|
if(!r) return console.trace("R undefined");
|
||||||
content = r.childNodes.length ? r.childNodes : [new TextNode(content)];
|
content = r.childNodes.length ? r.childNodes : [new TextNode(content)];
|
||||||
}
|
}
|
||||||
this.childNodes = content;
|
if(Array.isArray(content)) {
|
||||||
|
this.childNodes = content;
|
||||||
|
} else {
|
||||||
|
this.childNodes = [content as Node];
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public get outerHTML() {
|
public get outerHTML() {
|
||||||
|
|
@ -428,7 +433,7 @@ export default class HTMLElement extends Node {
|
||||||
const attrs = {} as RawAttributes;
|
const attrs = {} as RawAttributes;
|
||||||
if (this.rawAttrs) {
|
if (this.rawAttrs) {
|
||||||
const re = /\b([a-z][a-z0-9\-]*)(?:\s*=\s*(?:"([^"]*)"|'([^']*)'|(\S+)))?/ig;
|
const re = /\b([a-z][a-z0-9\-]*)(?:\s*=\s*(?:"([^"]*)"|'([^']*)'|(\S+)))?/ig;
|
||||||
let match: RegExpExecArray;
|
let match: RegExpExecArray | null;
|
||||||
while (match = re.exec(this.rawAttrs)) {
|
while (match = re.exec(this.rawAttrs)) {
|
||||||
attrs[match[1]] = match[2] || match[3] || match[4] || null;
|
attrs[match[1]] = match[2] || match[3] || match[4] || null;
|
||||||
}
|
}
|
||||||
|
|
@ -545,7 +550,7 @@ export default class HTMLElement extends Node {
|
||||||
// https://html.spec.whatwg.org/multipage/custom-elements.html#valid-custom-element-name
|
// https://html.spec.whatwg.org/multipage/custom-elements.html#valid-custom-element-name
|
||||||
const kMarkupPattern = /<!--[^]*?(?=-->)-->|<(\/?)([a-z][-.:0-9_a-z]*)\s*([^>]*?)(\/?)>/ig;
|
const kMarkupPattern = /<!--[^]*?(?=-->)-->|<(\/?)([a-z][-.:0-9_a-z]*)\s*([^>]*?)(\/?)>/ig;
|
||||||
const kAttributePattern = /(^|\s)(id|class)\s*=\s*("([^"]+)"|'([^']+)'|(\S+))/ig;
|
const kAttributePattern = /(^|\s)(id|class)\s*=\s*("([^"]+)"|'([^']+)'|(\S+))/ig;
|
||||||
const kSelfClosingElements = {
|
const kSelfClosingElements: { [key: string]: boolean } = {
|
||||||
area: true,
|
area: true,
|
||||||
base: true,
|
base: true,
|
||||||
br: true,
|
br: true,
|
||||||
|
|
@ -557,7 +562,7 @@ const kSelfClosingElements = {
|
||||||
meta: true,
|
meta: true,
|
||||||
source: true
|
source: true
|
||||||
};
|
};
|
||||||
const kElementsClosedByOpening = {
|
const kElementsClosedByOpening: { [key: string]: any } = {
|
||||||
li: { li: true },
|
li: { li: true },
|
||||||
p: { p: true, div: true },
|
p: { p: true, div: true },
|
||||||
b: { div: true },
|
b: { div: true },
|
||||||
|
|
@ -570,7 +575,7 @@ const kElementsClosedByOpening = {
|
||||||
h5: { h5: true },
|
h5: { h5: true },
|
||||||
h6: { h6: true }
|
h6: { h6: true }
|
||||||
};
|
};
|
||||||
const kElementsClosedByClosing = {
|
const kElementsClosedByClosing: { [key: string]: any } = {
|
||||||
li: { ul: true, ol: true },
|
li: { ul: true, ol: true },
|
||||||
a: { div: true },
|
a: { div: true },
|
||||||
b: { div: true },
|
b: { div: true },
|
||||||
|
|
@ -579,7 +584,7 @@ const kElementsClosedByClosing = {
|
||||||
td: { tr: true, table: true },
|
td: { tr: true, table: true },
|
||||||
th: { tr: true, table: true }
|
th: { tr: true, table: true }
|
||||||
};
|
};
|
||||||
const kBlockTextElements = {
|
const kBlockTextElements: { [key: string]: boolean } = {
|
||||||
script: true,
|
script: true,
|
||||||
noscript: true,
|
noscript: true,
|
||||||
style: true,
|
style: true,
|
||||||
|
|
@ -603,12 +608,12 @@ const frameflag = 'documentfragmentcontainer';
|
||||||
* @param {string} data html
|
* @param {string} data html
|
||||||
* @return {HTMLElement} root element
|
* @return {HTMLElement} root element
|
||||||
*/
|
*/
|
||||||
export function parse(data: string, options = {} as Options) {
|
export function parse(data: string, options: any = {} as Options) {
|
||||||
const root = new HTMLElement(null, {});
|
const root = new HTMLElement("", {});
|
||||||
let currentParent = root;
|
let currentParent = root;
|
||||||
const stack = [root];
|
const stack = [root];
|
||||||
let lastTextPos = -1;
|
let lastTextPos = -1;
|
||||||
let match: RegExpExecArray;
|
let match: RegExpExecArray | any;
|
||||||
// https://github.com/taoqf/node-html-parser/issues/38
|
// https://github.com/taoqf/node-html-parser/issues/38
|
||||||
data = `<${frameflag}>${data}</${frameflag}>`;
|
data = `<${frameflag}>${data}</${frameflag}>`;
|
||||||
while (match = kMarkupPattern.exec(data)) {
|
while (match = kMarkupPattern.exec(data)) {
|
||||||
|
|
@ -637,7 +642,7 @@ export function parse(data: string, options = {} as Options) {
|
||||||
}
|
}
|
||||||
if (!match[1]) {
|
if (!match[1]) {
|
||||||
// not </ tags
|
// not </ tags
|
||||||
const attrs = {};
|
const attrs: { [key: string]: any } = {};
|
||||||
for (let attMatch; attMatch = kAttributePattern.exec(match[3]);) {
|
for (let attMatch; attMatch = kAttributePattern.exec(match[3]);) {
|
||||||
attrs[attMatch[2]] = attMatch[4] || attMatch[5] || attMatch[6];
|
attrs[attMatch[2]] = attMatch[4] || attMatch[5] || attMatch[6];
|
||||||
}
|
}
|
||||||
|
|
@ -714,19 +719,20 @@ export function parse(data: string, options = {} as Options) {
|
||||||
while (stack.length > 1) {
|
while (stack.length > 1) {
|
||||||
// Handle each error elements.
|
// Handle each error elements.
|
||||||
const last = stack.pop();
|
const last = stack.pop();
|
||||||
|
if(!last) return;
|
||||||
const oneBefore = arr_back(stack);
|
const oneBefore = arr_back(stack);
|
||||||
if (last.parentNode && (last.parentNode as HTMLElement).parentNode) {
|
if (last.parentNode && (last.parentNode as HTMLElement).parentNode) {
|
||||||
if (last.parentNode === oneBefore && last.tagName === oneBefore.tagName) {
|
if (last.parentNode === oneBefore && last.tagName === oneBefore.tagName) {
|
||||||
// Pair error case <h3> <h3> handle : Fixes to <h3> </h3>
|
// Pair error case <h3> <h3> handle : Fixes to <h3> </h3>
|
||||||
oneBefore.removeChild(last);
|
oneBefore.removeChild(last);
|
||||||
last.childNodes.forEach((child) => {
|
last.childNodes.forEach((child: any) => {
|
||||||
(oneBefore.parentNode as HTMLElement).appendChild(child);
|
(oneBefore.parentNode as HTMLElement).appendChild(child);
|
||||||
});
|
});
|
||||||
stack.pop();
|
stack.pop();
|
||||||
} else {
|
} else {
|
||||||
// Single error <div> <h3> </div> handle: Just removes <h3>
|
// Single error <div> <h3> </div> handle: Just removes <h3>
|
||||||
oneBefore.removeChild(last);
|
oneBefore.removeChild(last);
|
||||||
last.childNodes.forEach((child) => {
|
last.childNodes.forEach((child: any) => {
|
||||||
oneBefore.appendChild(child);
|
oneBefore.appendChild(child);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
@ -734,7 +740,7 @@ export function parse(data: string, options = {} as Options) {
|
||||||
// If it's final element just skip.
|
// If it's final element just skip.
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
response.childNodes.forEach((node) => {
|
response.childNodes.forEach((node: any) => {
|
||||||
if (node instanceof HTMLElement) {
|
if (node instanceof HTMLElement) {
|
||||||
node.parentNode = null;
|
node.parentNode = null;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -4,9 +4,11 @@ import NodeType from './type.ts';
|
||||||
* Node Class as base class for TextNode and HTMLElement.
|
* Node Class as base class for TextNode and HTMLElement.
|
||||||
*/
|
*/
|
||||||
export default abstract class Node {
|
export default abstract class Node {
|
||||||
nodeType: NodeType;
|
nodeType: NodeType | null = null;
|
||||||
childNodes = [] as Node[];
|
childNodes = [] as Node[];
|
||||||
|
// @ts-ignore
|
||||||
text: string;
|
text: string;
|
||||||
|
// @ts-ignore
|
||||||
rawText: string;
|
rawText: string;
|
||||||
abstract toString(): string;
|
abstract toString(): string;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue