mirror of
https://github.com/danbulant/node-html-parser
synced 2026-06-18 06:01:05 +00:00
lint ts
This commit is contained in:
parent
5ae3604472
commit
cba17a7439
11 changed files with 1005 additions and 817 deletions
142
.eslintrc.js
Normal file
142
.eslintrc.js
Normal file
|
|
@ -0,0 +1,142 @@
|
||||||
|
module.exports = {
|
||||||
|
"env": {
|
||||||
|
"browser": true,
|
||||||
|
"es6": true,
|
||||||
|
"node": true
|
||||||
|
},
|
||||||
|
"extends": [
|
||||||
|
"plugin:@typescript-eslint/recommended",
|
||||||
|
"plugin:@typescript-eslint/recommended-requiring-type-checking"
|
||||||
|
],
|
||||||
|
"parser": "@typescript-eslint/parser",
|
||||||
|
"parserOptions": {
|
||||||
|
"project": "tsconfig.json",
|
||||||
|
"sourceType": "module"
|
||||||
|
},
|
||||||
|
"plugins": [
|
||||||
|
"@typescript-eslint",
|
||||||
|
"@typescript-eslint/tslint"
|
||||||
|
],
|
||||||
|
"rules": {
|
||||||
|
"@typescript-eslint/prefer-includes": "off",
|
||||||
|
"@typescript-eslint/unbound-method": "off",
|
||||||
|
"@typescript-eslint/no-non-null-assertion": "off",
|
||||||
|
"@typescript-eslint/member-delimiter-style": "error",
|
||||||
|
"@typescript-eslint/no-misused-promises": "error",
|
||||||
|
"@typescript-eslint/explicit-function-return-type": "off",
|
||||||
|
"@typescript-eslint/camelcase": "off",
|
||||||
|
"@typescript-eslint/adjacent-overload-signatures": "error",
|
||||||
|
"@typescript-eslint/array-type": "error",
|
||||||
|
"@typescript-eslint/ban-types": "error",
|
||||||
|
"@typescript-eslint/class-name-casing": "error",
|
||||||
|
"@typescript-eslint/consistent-type-assertions": "error",
|
||||||
|
"@typescript-eslint/indent": [
|
||||||
|
"error",
|
||||||
|
"tab",
|
||||||
|
{
|
||||||
|
"ArrayExpression": "first",
|
||||||
|
"ObjectExpression": "first"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"@typescript-eslint/interface-name-prefix": "off",
|
||||||
|
"@typescript-eslint/no-empty-function": "off",
|
||||||
|
"@typescript-eslint/no-empty-interface": "error",
|
||||||
|
"@typescript-eslint/no-explicit-any": "off",
|
||||||
|
"@typescript-eslint/no-misused-new": "error",
|
||||||
|
"@typescript-eslint/no-namespace": "error",
|
||||||
|
"@typescript-eslint/no-parameter-properties": "off",
|
||||||
|
"@typescript-eslint/no-use-before-define": "off",
|
||||||
|
"@typescript-eslint/no-var-requires": "error",
|
||||||
|
"@typescript-eslint/prefer-for-of": "error",
|
||||||
|
"@typescript-eslint/prefer-function-type": "error",
|
||||||
|
"@typescript-eslint/prefer-namespace-keyword": "error",
|
||||||
|
"@typescript-eslint/quotes": [
|
||||||
|
"error",
|
||||||
|
"single",
|
||||||
|
{
|
||||||
|
"avoidEscape": true
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"@typescript-eslint/triple-slash-reference": "error",
|
||||||
|
"@typescript-eslint/unified-signatures": "off",
|
||||||
|
"camelcase": "off",
|
||||||
|
"comma-dangle": "error",
|
||||||
|
"complexity": "off",
|
||||||
|
"constructor-super": "error",
|
||||||
|
"dot-notation": "error",
|
||||||
|
"eqeqeq": [
|
||||||
|
"error",
|
||||||
|
"smart"
|
||||||
|
],
|
||||||
|
"guard-for-in": "off",
|
||||||
|
"id-blacklist": [
|
||||||
|
"error",
|
||||||
|
"any",
|
||||||
|
"Number",
|
||||||
|
"number",
|
||||||
|
"String",
|
||||||
|
"string",
|
||||||
|
"Boolean",
|
||||||
|
"boolean",
|
||||||
|
"Undefined"
|
||||||
|
],
|
||||||
|
"id-match": "error",
|
||||||
|
"max-classes-per-file": [
|
||||||
|
"error",
|
||||||
|
1
|
||||||
|
],
|
||||||
|
"max-len": "off",
|
||||||
|
"new-parens": "error",
|
||||||
|
"no-bitwise": "off",
|
||||||
|
"no-caller": "error",
|
||||||
|
"no-cond-assign": "off",
|
||||||
|
"no-console": "off",
|
||||||
|
"no-debugger": "error",
|
||||||
|
"no-empty": "off",
|
||||||
|
"no-eval": "error",
|
||||||
|
"no-fallthrough": "off",
|
||||||
|
"no-invalid-this": "off",
|
||||||
|
"no-multiple-empty-lines": "off",
|
||||||
|
"no-new-wrappers": "error",
|
||||||
|
"no-shadow": [
|
||||||
|
"error",
|
||||||
|
{
|
||||||
|
"hoist": "all"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"no-throw-literal": "error",
|
||||||
|
"no-trailing-spaces": "error",
|
||||||
|
"no-undef-init": "error",
|
||||||
|
"no-underscore-dangle": "off",
|
||||||
|
"no-unsafe-finally": "error",
|
||||||
|
"no-unused-expressions": "error",
|
||||||
|
"no-unused-labels": "error",
|
||||||
|
"no-var": "error",
|
||||||
|
"object-shorthand": "error",
|
||||||
|
"one-var": [
|
||||||
|
"error",
|
||||||
|
"never"
|
||||||
|
],
|
||||||
|
"prefer-arrow/prefer-arrow-functions": "off",
|
||||||
|
"prefer-const": "error",
|
||||||
|
"radix": "error",
|
||||||
|
"spaced-comment": "error",
|
||||||
|
"use-isnan": "error",
|
||||||
|
"valid-typeof": "off",
|
||||||
|
"@typescript-eslint/tslint/config": [
|
||||||
|
"error",
|
||||||
|
{
|
||||||
|
"rules": {
|
||||||
|
"jsdoc-format": true,
|
||||||
|
"no-reference-import": true,
|
||||||
|
"no-unsafe-any": true,
|
||||||
|
"whitespace": [
|
||||||
|
true,
|
||||||
|
"check-branch",
|
||||||
|
"check-operator"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
@ -6,7 +6,7 @@
|
||||||
"types": "dist/index.d.ts",
|
"types": "dist/index.d.ts",
|
||||||
"scripts": {
|
"scripts": {
|
||||||
"test": "mocha",
|
"test": "mocha",
|
||||||
"lint": "eslint ./src/*.ts",
|
"lint": "eslint ./src/*.ts ./src/**/*.ts",
|
||||||
"clean": "del-cli ./dist/",
|
"clean": "del-cli ./dist/",
|
||||||
"ts:cjs": "tsc -m commonjs",
|
"ts:cjs": "tsc -m commonjs",
|
||||||
"ts:umd": "tsc -t es5 -m umd -d false --outDir ./dist/umd/",
|
"ts:umd": "tsc -t es5 -m umd -d false --outDir ./dist/umd/",
|
||||||
|
|
|
||||||
3
src/back.ts
Normal file
3
src/back.ts
Normal file
|
|
@ -0,0 +1,3 @@
|
||||||
|
export default function arr_back<T>(arr: T[]) {
|
||||||
|
return arr[arr.length - 1];
|
||||||
|
}
|
||||||
841
src/index.ts
841
src/index.ts
|
|
@ -1,791 +1,9 @@
|
||||||
import { decode } from 'he';
|
import arr_back from './back';
|
||||||
|
import CommentNode from './nodes/comment';
|
||||||
export enum NodeType {
|
export { default as HTMLElement } from './nodes/html';
|
||||||
ELEMENT_NODE = 1,
|
import HTMLElement from './nodes/html';
|
||||||
TEXT_NODE = 3,
|
import TextNode from './nodes/text';
|
||||||
COMMENT_NODE = 8
|
export { default as Node } from './nodes/node';
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Node Class as base class for TextNode and HTMLElement.
|
|
||||||
*/
|
|
||||||
export abstract class Node {
|
|
||||||
nodeType: NodeType;
|
|
||||||
childNodes = [] as Node[];
|
|
||||||
text: string;
|
|
||||||
rawText: string;
|
|
||||||
abstract toString(): String;
|
|
||||||
}
|
|
||||||
/**
|
|
||||||
* TextNode to contain a text element in DOM tree.
|
|
||||||
* @param {string} value [description]
|
|
||||||
*/
|
|
||||||
export class TextNode extends Node {
|
|
||||||
constructor(value: string) {
|
|
||||||
super();
|
|
||||||
this.rawText = value;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Node Type declaration.
|
|
||||||
* @type {Number}
|
|
||||||
*/
|
|
||||||
nodeType = NodeType.TEXT_NODE;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get unescaped text value of current node and its children.
|
|
||||||
* @return {string} text content
|
|
||||||
*/
|
|
||||||
get text() {
|
|
||||||
return decode(this.rawText);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Detect if the node contains only white space.
|
|
||||||
* @return {bool}
|
|
||||||
*/
|
|
||||||
get isWhitespace() {
|
|
||||||
return /^(\s| )*$/.test(this.rawText);
|
|
||||||
}
|
|
||||||
|
|
||||||
toString() {
|
|
||||||
return this.text;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
export class CommentNode extends Node {
|
|
||||||
constructor(value: string) {
|
|
||||||
super();
|
|
||||||
this.rawText = value;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Node Type declaration.
|
|
||||||
* @type {Number}
|
|
||||||
*/
|
|
||||||
nodeType = NodeType.COMMENT_NODE;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get unescaped text value of current node and its children.
|
|
||||||
* @return {string} text content
|
|
||||||
*/
|
|
||||||
get text() {
|
|
||||||
return decode(this.rawText);
|
|
||||||
}
|
|
||||||
|
|
||||||
toString() {
|
|
||||||
return `<!--${this.rawText}-->`;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const kBlockElements = {
|
|
||||||
div: true,
|
|
||||||
p: true,
|
|
||||||
// ul: true,
|
|
||||||
// ol: true,
|
|
||||||
li: true,
|
|
||||||
// table: true,
|
|
||||||
// tr: true,
|
|
||||||
td: true,
|
|
||||||
section: true,
|
|
||||||
br: true
|
|
||||||
};
|
|
||||||
|
|
||||||
export interface KeyAttributes {
|
|
||||||
id?: string;
|
|
||||||
class?: string;
|
|
||||||
}
|
|
||||||
|
|
||||||
export interface Attributes {
|
|
||||||
[key: string]: string;
|
|
||||||
}
|
|
||||||
|
|
||||||
export interface RawAttributes {
|
|
||||||
[key: string]: string;
|
|
||||||
}
|
|
||||||
|
|
||||||
function arr_back<T>(arr: T[]) {
|
|
||||||
return arr[arr.length - 1];
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* HTMLElement, which contains a set of children.
|
|
||||||
*
|
|
||||||
* Note: this is a minimalist implementation, no complete tree
|
|
||||||
* structure provided (no parentNode, nextSibling,
|
|
||||||
* previousSibling etc).
|
|
||||||
* @class HTMLElement
|
|
||||||
* @extends {Node}
|
|
||||||
*/
|
|
||||||
export class HTMLElement extends Node {
|
|
||||||
private _attrs: Attributes;
|
|
||||||
private _rawAttrs: RawAttributes;
|
|
||||||
public id: string;
|
|
||||||
public classNames = [] as string[];
|
|
||||||
/**
|
|
||||||
* Node Type declaration.
|
|
||||||
*/
|
|
||||||
public nodeType = NodeType.ELEMENT_NODE;
|
|
||||||
/**
|
|
||||||
* Creates an instance of HTMLElement.
|
|
||||||
* @param keyAttrs id and class attribute
|
|
||||||
* @param [rawAttrs] attributes in string
|
|
||||||
*
|
|
||||||
* @memberof HTMLElement
|
|
||||||
*/
|
|
||||||
constructor(public tagName: string, keyAttrs: KeyAttributes, private rawAttrs = '', public parentNode = null as Node) {
|
|
||||||
super();
|
|
||||||
this.rawAttrs = rawAttrs || '';
|
|
||||||
this.parentNode = parentNode || null;
|
|
||||||
this.childNodes = [];
|
|
||||||
if (keyAttrs.id) {
|
|
||||||
this.id = keyAttrs.id;
|
|
||||||
}
|
|
||||||
if (keyAttrs.class) {
|
|
||||||
this.classNames = keyAttrs.class.split(/\s+/);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
/**
|
|
||||||
* Remove Child element from childNodes array
|
|
||||||
* @param {HTMLElement} node node to remove
|
|
||||||
*/
|
|
||||||
public removeChild(node: Node) {
|
|
||||||
this.childNodes = this.childNodes.filter((child) => {
|
|
||||||
return (child !== node);
|
|
||||||
});
|
|
||||||
}
|
|
||||||
/**
|
|
||||||
* Exchanges given child with new child
|
|
||||||
* @param {HTMLElement} oldNode node to exchange
|
|
||||||
* @param {HTMLElement} newNode new node
|
|
||||||
*/
|
|
||||||
public exchangeChild(oldNode: Node, newNode: Node) {
|
|
||||||
let idx = -1;
|
|
||||||
for (let i = 0; i < this.childNodes.length; i++) {
|
|
||||||
if (this.childNodes[i] === oldNode) {
|
|
||||||
idx = i;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
this.childNodes[idx] = newNode;
|
|
||||||
}
|
|
||||||
/**
|
|
||||||
* Get escpaed (as-it) text value of current node and its children.
|
|
||||||
* @return {string} text content
|
|
||||||
*/
|
|
||||||
get rawText() {
|
|
||||||
let res = '';
|
|
||||||
for (let i = 0; i < this.childNodes.length; i++)
|
|
||||||
res += this.childNodes[i].rawText;
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
/**
|
|
||||||
* Get unescaped text value of current node and its children.
|
|
||||||
* @return {string} text content
|
|
||||||
*/
|
|
||||||
get text() {
|
|
||||||
return decode(this.rawText);
|
|
||||||
}
|
|
||||||
/**
|
|
||||||
* Get structured Text (with '\n' etc.)
|
|
||||||
* @return {string} structured text
|
|
||||||
*/
|
|
||||||
get structuredText() {
|
|
||||||
let currentBlock = [] as string[];
|
|
||||||
const blocks = [currentBlock];
|
|
||||||
function dfs(node: Node) {
|
|
||||||
if (node.nodeType === NodeType.ELEMENT_NODE) {
|
|
||||||
if (kBlockElements[(node as HTMLElement).tagName]) {
|
|
||||||
if (currentBlock.length > 0) {
|
|
||||||
blocks.push(currentBlock = []);
|
|
||||||
}
|
|
||||||
node.childNodes.forEach(dfs);
|
|
||||||
if (currentBlock.length > 0) {
|
|
||||||
blocks.push(currentBlock = []);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
node.childNodes.forEach(dfs);
|
|
||||||
}
|
|
||||||
} else if (node.nodeType === NodeType.TEXT_NODE) {
|
|
||||||
if ((node as TextNode).isWhitespace) {
|
|
||||||
// Whitespace node, postponed output
|
|
||||||
(currentBlock as any).prependWhitespace = true;
|
|
||||||
} else {
|
|
||||||
let text = node.text;
|
|
||||||
if ((currentBlock as any).prependWhitespace) {
|
|
||||||
text = ' ' + text;
|
|
||||||
(currentBlock as any).prependWhitespace = false;
|
|
||||||
}
|
|
||||||
currentBlock.push(text);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
dfs(this);
|
|
||||||
return blocks
|
|
||||||
.map(function (block) {
|
|
||||||
// Normalize each line's whitespace
|
|
||||||
return block.join('').trim().replace(/\s{2,}/g, ' ');
|
|
||||||
})
|
|
||||||
.join('\n').replace(/\s+$/, ''); // trimRight;
|
|
||||||
}
|
|
||||||
|
|
||||||
public toString() {
|
|
||||||
const tag = this.tagName;
|
|
||||||
if (tag) {
|
|
||||||
const is_un_closed = /^meta$/i.test(tag);
|
|
||||||
const is_self_closed = /^(img|br|hr|area|base|input|doctype|link)$/i.test(tag);
|
|
||||||
const attrs = this.rawAttrs ? ' ' + this.rawAttrs : '';
|
|
||||||
if (is_un_closed) {
|
|
||||||
return `<${tag}${attrs}>`;
|
|
||||||
} else if (is_self_closed) {
|
|
||||||
return `<${tag}${attrs} />`;
|
|
||||||
} else {
|
|
||||||
return `<${tag}${attrs}>${this.innerHTML}</${tag}>`;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
return this.innerHTML;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
get innerHTML() {
|
|
||||||
return this.childNodes.map((child) => {
|
|
||||||
return child.toString();
|
|
||||||
}).join('');
|
|
||||||
}
|
|
||||||
|
|
||||||
public set_content(content: string | Node | Node[]) {
|
|
||||||
if (content instanceof Node) {
|
|
||||||
content = [content];
|
|
||||||
} else if (typeof content == 'string') {
|
|
||||||
const r = parse(content);
|
|
||||||
content = r.childNodes.length ? r.childNodes : [new TextNode(content)];
|
|
||||||
}
|
|
||||||
this.childNodes = content as Node[];
|
|
||||||
}
|
|
||||||
|
|
||||||
get outerHTML() {
|
|
||||||
return this.toString();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Trim element from right (in block) after seeing pattern in a TextNode.
|
|
||||||
* @param {RegExp} pattern pattern to find
|
|
||||||
* @return {HTMLElement} reference to current node
|
|
||||||
*/
|
|
||||||
public trimRight(pattern: RegExp) {
|
|
||||||
for (let i = 0; i < this.childNodes.length; i++) {
|
|
||||||
const childNode = this.childNodes[i];
|
|
||||||
if (childNode.nodeType === NodeType.ELEMENT_NODE) {
|
|
||||||
(childNode as HTMLElement).trimRight(pattern);
|
|
||||||
} else {
|
|
||||||
const index = childNode.rawText.search(pattern);
|
|
||||||
if (index > -1) {
|
|
||||||
childNode.rawText = childNode.rawText.substr(0, index);
|
|
||||||
// trim all following nodes.
|
|
||||||
this.childNodes.length = i + 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return this;
|
|
||||||
}
|
|
||||||
/**
|
|
||||||
* Get DOM structure
|
|
||||||
* @return {string} strucutre
|
|
||||||
*/
|
|
||||||
get structure() {
|
|
||||||
const res = [] as string[];
|
|
||||||
let indention = 0;
|
|
||||||
function write(str: string) {
|
|
||||||
res.push(' '.repeat(indention) + str);
|
|
||||||
}
|
|
||||||
function dfs(node: HTMLElement) {
|
|
||||||
const idStr = node.id ? ('#' + node.id) : '';
|
|
||||||
const classStr = node.classNames.length ? ('.' + node.classNames.join('.')) : '';
|
|
||||||
write(node.tagName + idStr + classStr);
|
|
||||||
indention++;
|
|
||||||
for (let i = 0; i < node.childNodes.length; i++) {
|
|
||||||
const childNode = node.childNodes[i];
|
|
||||||
if (childNode.nodeType === NodeType.ELEMENT_NODE) {
|
|
||||||
dfs(childNode as HTMLElement);
|
|
||||||
} else if (childNode.nodeType === NodeType.TEXT_NODE) {
|
|
||||||
if (!(childNode as TextNode).isWhitespace)
|
|
||||||
write('#text');
|
|
||||||
}
|
|
||||||
}
|
|
||||||
indention--;
|
|
||||||
}
|
|
||||||
dfs(this);
|
|
||||||
return res.join('\n');
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Remove whitespaces in this sub tree.
|
|
||||||
* @return {HTMLElement} pointer to this
|
|
||||||
*/
|
|
||||||
public removeWhitespace() {
|
|
||||||
let o = 0;
|
|
||||||
for (let i = 0; i < this.childNodes.length; i++) {
|
|
||||||
const node = this.childNodes[i];
|
|
||||||
if (node.nodeType === NodeType.TEXT_NODE) {
|
|
||||||
if ((node as TextNode).isWhitespace)
|
|
||||||
continue;
|
|
||||||
node.rawText = node.rawText.trim();
|
|
||||||
} else if (node.nodeType === NodeType.ELEMENT_NODE) {
|
|
||||||
(node as HTMLElement).removeWhitespace();
|
|
||||||
}
|
|
||||||
this.childNodes[o++] = node;
|
|
||||||
}
|
|
||||||
this.childNodes.length = o;
|
|
||||||
return this;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Query CSS selector to find matching nodes.
|
|
||||||
* @param {string} selector Simplified CSS selector
|
|
||||||
* @param {Matcher} selector A Matcher instance
|
|
||||||
* @return {HTMLElement[]} matching elements
|
|
||||||
*/
|
|
||||||
public querySelectorAll(selector: string | Matcher) {
|
|
||||||
let matcher: Matcher;
|
|
||||||
if (selector instanceof Matcher) {
|
|
||||||
matcher = selector;
|
|
||||||
matcher.reset();
|
|
||||||
} else {
|
|
||||||
if (selector.includes(',')) {
|
|
||||||
const selectors = selector.split(',') as string[];
|
|
||||||
return Array.from(selectors.reduce((pre, cur) => {
|
|
||||||
const result = this.querySelectorAll(cur.trim()) as HTMLElement[];
|
|
||||||
return result.reduce((p, c) => {
|
|
||||||
return p.add(c);
|
|
||||||
}, pre);
|
|
||||||
}, new Set<HTMLElement>()));
|
|
||||||
}
|
|
||||||
matcher = new Matcher(selector);
|
|
||||||
}
|
|
||||||
const res = [] as HTMLElement[];
|
|
||||||
const stack = [] as { 0: Node; 1: 0 | 1; 2: boolean; }[];
|
|
||||||
for (let i = 0; i < this.childNodes.length; i++) {
|
|
||||||
stack.push([this.childNodes[i], 0, false]);
|
|
||||||
while (stack.length) {
|
|
||||||
const state = arr_back(stack);
|
|
||||||
const el = state[0];
|
|
||||||
if (state[1] === 0) {
|
|
||||||
// Seen for first time.
|
|
||||||
if (el.nodeType !== NodeType.ELEMENT_NODE) {
|
|
||||||
stack.pop();
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (state[2] = matcher.advance(el)) {
|
|
||||||
if (matcher.matched) {
|
|
||||||
res.push(el as HTMLElement);
|
|
||||||
// no need to go further.
|
|
||||||
matcher.rewind();
|
|
||||||
stack.pop();
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (state[1] < el.childNodes.length) {
|
|
||||||
stack.push([el.childNodes[state[1]++], 0, false]);
|
|
||||||
} else {
|
|
||||||
if (state[2])
|
|
||||||
matcher.rewind();
|
|
||||||
stack.pop();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Query CSS Selector to find matching node.
|
|
||||||
* @param {string} selector Simplified CSS selector
|
|
||||||
* @param {Matcher} selector A Matcher instance
|
|
||||||
* @return {HTMLElement} matching node
|
|
||||||
*/
|
|
||||||
public querySelector(selector: string | Matcher) {
|
|
||||||
let matcher: Matcher;
|
|
||||||
if (selector instanceof Matcher) {
|
|
||||||
matcher = selector;
|
|
||||||
matcher.reset();
|
|
||||||
} else {
|
|
||||||
matcher = new Matcher(selector);
|
|
||||||
}
|
|
||||||
const stack = [] as { 0: Node; 1: 0 | 1; 2: boolean; }[];
|
|
||||||
for (let i = 0; i < this.childNodes.length; i++) {
|
|
||||||
stack.push([this.childNodes[i], 0, false]);
|
|
||||||
while (stack.length) {
|
|
||||||
const state = arr_back(stack);
|
|
||||||
const el = state[0];
|
|
||||||
if (state[1] === 0) {
|
|
||||||
// Seen for first time.
|
|
||||||
if (el.nodeType !== NodeType.ELEMENT_NODE) {
|
|
||||||
stack.pop();
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (state[2] = matcher.advance(el)) {
|
|
||||||
if (matcher.matched) {
|
|
||||||
return el as HTMLElement;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (state[1] < el.childNodes.length) {
|
|
||||||
stack.push([el.childNodes[state[1]++], 0, false]);
|
|
||||||
} else {
|
|
||||||
if (state[2])
|
|
||||||
matcher.rewind();
|
|
||||||
stack.pop();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Append a child node to childNodes
|
|
||||||
* @param {Node} node node to append
|
|
||||||
* @return {Node} node appended
|
|
||||||
*/
|
|
||||||
public appendChild<T extends Node = Node>(node: T) {
|
|
||||||
// node.parentNode = this;
|
|
||||||
this.childNodes.push(node);
|
|
||||||
if (node instanceof HTMLElement) {
|
|
||||||
node.parentNode = this;
|
|
||||||
}
|
|
||||||
return node;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get first child node
|
|
||||||
* @return {Node} first child node
|
|
||||||
*/
|
|
||||||
get firstChild() {
|
|
||||||
return this.childNodes[0];
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get last child node
|
|
||||||
* @return {Node} last child node
|
|
||||||
*/
|
|
||||||
get lastChild() {
|
|
||||||
return arr_back(this.childNodes);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get attributes
|
|
||||||
* @return {Object} parsed and unescaped attributes
|
|
||||||
*/
|
|
||||||
get attributes() {
|
|
||||||
if (this._attrs)
|
|
||||||
return this._attrs;
|
|
||||||
this._attrs = {};
|
|
||||||
const attrs = this.rawAttributes;
|
|
||||||
for (const key in attrs) {
|
|
||||||
const val = attrs[key] || '';
|
|
||||||
this._attrs[key] = decode(val.replace(/^['"]/, '').replace(/['"]$/, ''));
|
|
||||||
}
|
|
||||||
return this._attrs;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get escaped (as-it) attributes
|
|
||||||
* @return {Object} parsed attributes
|
|
||||||
*/
|
|
||||||
get rawAttributes() {
|
|
||||||
if (this._rawAttrs)
|
|
||||||
return this._rawAttrs;
|
|
||||||
const attrs = {} as RawAttributes;
|
|
||||||
if (this.rawAttrs) {
|
|
||||||
const re = /\b([a-z][a-z0-9\-]*)(?:\s*=\s*("(?:[^"]*)"|'(?:[^']*)'|(?:\S+)))?/ig;
|
|
||||||
let match: RegExpExecArray;
|
|
||||||
while (match = re.exec(this.rawAttrs)) {
|
|
||||||
attrs[match[1]] = match[2] || null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
this._rawAttrs = attrs;
|
|
||||||
return attrs;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Set an attribute value to the HTMLElement
|
|
||||||
* @param {string} key The attribute name
|
|
||||||
* @param {string|number} value The value to set, or null / undefined to remove an attribute
|
|
||||||
*/
|
|
||||||
setAttribute(key: string, value: string | number) {
|
|
||||||
// Update the this.attributes
|
|
||||||
if (this._attrs) {
|
|
||||||
delete this._attrs;
|
|
||||||
}
|
|
||||||
const attrs = this.rawAttributes; // ref this._rawAttrs
|
|
||||||
if (value === undefined || value === null) {
|
|
||||||
delete attrs[key];
|
|
||||||
} else {
|
|
||||||
attrs[key] = JSON.stringify(value);
|
|
||||||
// if (typeof value === 'string') {
|
|
||||||
// attrs[key] = JSON.stringify(encode(value));//??? should we encode value here?
|
|
||||||
// } else {
|
|
||||||
// attrs[key] = JSON.stringify(value);
|
|
||||||
// }
|
|
||||||
}
|
|
||||||
// Update rawString
|
|
||||||
this.rawAttrs = Object.keys(attrs).map((name) => {
|
|
||||||
const val = attrs[name];
|
|
||||||
if (val === undefined || val === null) {
|
|
||||||
return name;
|
|
||||||
} else {
|
|
||||||
return name + '=' + val;
|
|
||||||
}
|
|
||||||
}).join(' ');
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Replace all the attributes of the HTMLElement by the provided attributes
|
|
||||||
* @param {Attributes} attributes the new attribute set
|
|
||||||
*/
|
|
||||||
setAttributes(attributes: Attributes) {
|
|
||||||
// Update the this.attributes
|
|
||||||
if (this._attrs) {
|
|
||||||
delete this._attrs;
|
|
||||||
}
|
|
||||||
// Update the raw attributes map
|
|
||||||
if (this._rawAttrs) {
|
|
||||||
delete this._rawAttrs;
|
|
||||||
}
|
|
||||||
// Update rawString
|
|
||||||
this.rawAttrs = Object.keys(attributes).map((name) => {
|
|
||||||
const val = attributes[name];
|
|
||||||
if (val === undefined || val === null) {
|
|
||||||
return name;
|
|
||||||
} else {
|
|
||||||
return name + '=' + JSON.stringify(val);
|
|
||||||
// if (typeof val === 'string') {
|
|
||||||
// return name + '=' + JSON.stringify(encode(val)); //??? should we encode value here?
|
|
||||||
// } else {
|
|
||||||
// return name + '=' + JSON.stringify(val);
|
|
||||||
// }
|
|
||||||
}
|
|
||||||
}).join(' ');
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
interface MatherFunction { func: any; tagName: string; classes: string | string[]; attr_key: any; value: any; }
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Cache to store generated match functions
|
|
||||||
* @type {Object}
|
|
||||||
*/
|
|
||||||
let pMatchFunctionCache = {} as { [name: string]: MatherFunction };
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Function cache
|
|
||||||
*/
|
|
||||||
const functionCache = {
|
|
||||||
"f145": function (el: HTMLElement, tagName: string, classes: string[], attr_key: string, value: string) {
|
|
||||||
"use strict";
|
|
||||||
tagName = tagName || "";
|
|
||||||
classes = classes || [];
|
|
||||||
attr_key = attr_key || "";
|
|
||||||
value = value || "";
|
|
||||||
if (el.id != tagName.substr(1)) return false;
|
|
||||||
for (let cls = classes, i = 0; i < cls.length; i++) if (el.classNames.indexOf(cls[i]) === -1) return false;
|
|
||||||
return true;
|
|
||||||
},
|
|
||||||
"f45": function (el: HTMLElement, tagName: string, classes: string[], attr_key: string, value: string) {
|
|
||||||
"use strict";
|
|
||||||
tagName = tagName || "";
|
|
||||||
classes = classes || [];
|
|
||||||
attr_key = attr_key || "";
|
|
||||||
value = value || "";
|
|
||||||
for (let cls = classes, i = 0; i < cls.length; i++) if (el.classNames.indexOf(cls[i]) === -1) return false;
|
|
||||||
return true;
|
|
||||||
},
|
|
||||||
"f15": function (el: HTMLElement, tagName: string, classes: string[], attr_key: string, value: string) {
|
|
||||||
"use strict";
|
|
||||||
tagName = tagName || "";
|
|
||||||
classes = classes || [];
|
|
||||||
attr_key = attr_key || "";
|
|
||||||
value = value || "";
|
|
||||||
if (el.id != tagName.substr(1)) return false;
|
|
||||||
return true;
|
|
||||||
},
|
|
||||||
"f1": function (el: HTMLElement, tagName: string, classes: string[], attr_key: string, value: string) {
|
|
||||||
"use strict";
|
|
||||||
tagName = tagName || "";
|
|
||||||
classes = classes || [];
|
|
||||||
attr_key = attr_key || "";
|
|
||||||
value = value || "";
|
|
||||||
if (el.id != tagName.substr(1)) return false;
|
|
||||||
},
|
|
||||||
"f5": function (el: HTMLElement, tagName: string, classes: string[], attr_key: string, value: string) {
|
|
||||||
"use strict";
|
|
||||||
el = el || {} as HTMLElement;
|
|
||||||
tagName = tagName || "";
|
|
||||||
classes = classes || [];
|
|
||||||
attr_key = attr_key || "";
|
|
||||||
value = value || "";
|
|
||||||
return true;
|
|
||||||
},
|
|
||||||
"f245": function (el: HTMLElement, tagName: string, classes: string[], attr_key: string, value: string) {
|
|
||||||
"use strict";
|
|
||||||
tagName = tagName || "";
|
|
||||||
classes = classes || [];
|
|
||||||
attr_key = attr_key || "";
|
|
||||||
value = value || "";
|
|
||||||
let attrs = el.attributes; for (let key in attrs) { const val = attrs[key]; if (key == attr_key && val == value) { return true; } } return false;
|
|
||||||
// for (let cls = classes, i = 0; i < cls.length; i++) {if (el.classNames.indexOf(cls[i]) === -1){ return false;}}
|
|
||||||
// return true;
|
|
||||||
},
|
|
||||||
"f25": function (el: HTMLElement, tagName: string, classes: string[], attr_key: string, value: string) {
|
|
||||||
"use strict";
|
|
||||||
tagName = tagName || "";
|
|
||||||
classes = classes || [];
|
|
||||||
attr_key = attr_key || "";
|
|
||||||
value = value || "";
|
|
||||||
let attrs = el.attributes; for (let key in attrs) { const val = attrs[key]; if (key == attr_key && val == value) { return true; } } return false;
|
|
||||||
//return true;
|
|
||||||
},
|
|
||||||
"f2": function (el: HTMLElement, tagName: string, classes: string[], attr_key: string, value: string) {
|
|
||||||
"use strict";
|
|
||||||
tagName = tagName || "";
|
|
||||||
classes = classes || [];
|
|
||||||
attr_key = attr_key || "";
|
|
||||||
value = value || "";
|
|
||||||
let attrs = el.attributes; for (let key in attrs) { const val = attrs[key]; if (key == attr_key && val == value) { return true; } } return false;
|
|
||||||
},
|
|
||||||
"f345": function (el: HTMLElement, tagName: string, classes: string[], attr_key: string, value: string) {
|
|
||||||
"use strict";
|
|
||||||
tagName = tagName || "";
|
|
||||||
classes = classes || [];
|
|
||||||
attr_key = attr_key || "";
|
|
||||||
value = value || "";
|
|
||||||
if (el.tagName != tagName) return false;
|
|
||||||
for (let cls = classes, i = 0; i < cls.length; i++) if (el.classNames.indexOf(cls[i]) === -1) return false;
|
|
||||||
return true;
|
|
||||||
},
|
|
||||||
"f35": function (el: HTMLElement, tagName: string, classes: string[], attr_key: string, value: string) {
|
|
||||||
"use strict";
|
|
||||||
tagName = tagName || "";
|
|
||||||
classes = classes || [];
|
|
||||||
attr_key = attr_key || "";
|
|
||||||
value = value || "";
|
|
||||||
if (el.tagName != tagName) return false;
|
|
||||||
return true;
|
|
||||||
},
|
|
||||||
"f3": function (el: HTMLElement, tagName: string, classes: string[], attr_key: string, value: string) {
|
|
||||||
"use strict";
|
|
||||||
tagName = tagName || "";
|
|
||||||
classes = classes || [];
|
|
||||||
attr_key = attr_key || "";
|
|
||||||
value = value || "";
|
|
||||||
if (el.tagName != tagName) return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
/**
|
|
||||||
* Matcher class to make CSS match
|
|
||||||
*
|
|
||||||
* @class Matcher
|
|
||||||
*/
|
|
||||||
export class Matcher {
|
|
||||||
private matchers: MatherFunction[];
|
|
||||||
private nextMatch = 0;
|
|
||||||
/**
|
|
||||||
* Creates an instance of Matcher.
|
|
||||||
* @param {string} selector
|
|
||||||
*
|
|
||||||
* @memberof Matcher
|
|
||||||
*/
|
|
||||||
constructor(selector: string) {
|
|
||||||
functionCache["f5"] = functionCache["f5"];
|
|
||||||
this.matchers = selector.split(' ').map((matcher) => {
|
|
||||||
if (pMatchFunctionCache[matcher])
|
|
||||||
return pMatchFunctionCache[matcher];
|
|
||||||
const parts = matcher.split('.');
|
|
||||||
const tagName = parts[0];
|
|
||||||
const classes = parts.slice(1).sort();
|
|
||||||
let source = '"use strict";';
|
|
||||||
let function_name = 'f';
|
|
||||||
let attr_key = "";
|
|
||||||
let value = "";
|
|
||||||
if (tagName && tagName != '*') {
|
|
||||||
let matcher: RegExpMatchArray;
|
|
||||||
if (tagName[0] == '#') {
|
|
||||||
source += 'if (el.id != ' + JSON.stringify(tagName.substr(1)) + ') return false;';//1
|
|
||||||
function_name += '1';
|
|
||||||
} else if (matcher = tagName.match(/^\[\s*(\S+)\s*(=|!=)\s*((((["'])([^\6]*)\6))|(\S*?))\]\s*/)) {
|
|
||||||
attr_key = matcher[1];
|
|
||||||
let method = matcher[2];
|
|
||||||
if (method !== '=' && method !== '!=') {
|
|
||||||
throw new Error('Selector not supported, Expect [key${op}value].op must be =,!=');
|
|
||||||
}
|
|
||||||
if (method === '=') {
|
|
||||||
method = '==';
|
|
||||||
}
|
|
||||||
value = matcher[7] || matcher[8];
|
|
||||||
|
|
||||||
source += `let attrs = el.attributes;for (let key in attrs){const val = attrs[key]; if (key == "${attr_key}" && val == "${value}"){return true;}} return false;`;//2
|
|
||||||
function_name += '2';
|
|
||||||
} else {
|
|
||||||
source += 'if (el.tagName != ' + JSON.stringify(tagName) + ') return false;';//3
|
|
||||||
function_name += '3';
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (classes.length > 0) {
|
|
||||||
source += 'for (let cls = ' + JSON.stringify(classes) + ', i = 0; i < cls.length; i++) if (el.classNames.indexOf(cls[i]) === -1) return false;';//4
|
|
||||||
function_name += '4';
|
|
||||||
}
|
|
||||||
source += 'return true;';//5
|
|
||||||
function_name += '5';
|
|
||||||
let obj = {
|
|
||||||
func: functionCache[function_name],
|
|
||||||
tagName: tagName || "",
|
|
||||||
classes: classes || "",
|
|
||||||
attr_key: attr_key || "",
|
|
||||||
value: value || ""
|
|
||||||
}
|
|
||||||
source = source || "";
|
|
||||||
return pMatchFunctionCache[matcher] = obj as MatherFunction;
|
|
||||||
});
|
|
||||||
}
|
|
||||||
/**
|
|
||||||
* Trying to advance match pointer
|
|
||||||
* @param {HTMLElement} el element to make the match
|
|
||||||
* @return {bool} true when pointer advanced.
|
|
||||||
*/
|
|
||||||
advance(el: Node) {
|
|
||||||
if (this.nextMatch < this.matchers.length &&
|
|
||||||
this.matchers[this.nextMatch].func(el, this.matchers[this.nextMatch].tagName, this.matchers[this.nextMatch].classes, this.matchers[this.nextMatch].attr_key, this.matchers[this.nextMatch].value)) {
|
|
||||||
this.nextMatch++;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
/**
|
|
||||||
* Rewind the match pointer
|
|
||||||
*/
|
|
||||||
rewind() {
|
|
||||||
this.nextMatch--;
|
|
||||||
}
|
|
||||||
/**
|
|
||||||
* Trying to determine if match made.
|
|
||||||
* @return {bool} true when the match is made
|
|
||||||
*/
|
|
||||||
get matched() {
|
|
||||||
return this.nextMatch == this.matchers.length;
|
|
||||||
}
|
|
||||||
/**
|
|
||||||
* Rest match pointer.
|
|
||||||
* @return {[type]} [description]
|
|
||||||
*/
|
|
||||||
reset() {
|
|
||||||
this.nextMatch = 0;
|
|
||||||
}
|
|
||||||
/**
|
|
||||||
* flush cache to free memory
|
|
||||||
*/
|
|
||||||
flushCache() {
|
|
||||||
pMatchFunctionCache = {};
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// https://html.spec.whatwg.org/multipage/custom-elements.html#valid-custom-element-name
|
// https://html.spec.whatwg.org/multipage/custom-elements.html#valid-custom-element-name
|
||||||
const kMarkupPattern = /<!--[^]*?(?=-->)-->|<(\/?)([a-z][-.:0-9_a-z]*)\s*([^>]*?)(\/?)>/ig;
|
const kMarkupPattern = /<!--[^]*?(?=-->)-->|<(\/?)([a-z][-.:0-9_a-z]*)\s*([^>]*?)(\/?)>/ig;
|
||||||
|
|
@ -831,25 +49,26 @@ const kBlockTextElements = {
|
||||||
pre: true
|
pre: true
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
export interface Options {
|
||||||
* Parses HTML and returns a root element
|
|
||||||
* Parse a chuck of HTML source.
|
|
||||||
* @param {string} data html
|
|
||||||
* @return {HTMLElement} root element
|
|
||||||
*/
|
|
||||||
export function parse(data: string, options?: {
|
|
||||||
lowerCaseTagName?: boolean;
|
lowerCaseTagName?: boolean;
|
||||||
noFix?: boolean;
|
noFix?: boolean;
|
||||||
script?: boolean;
|
script?: boolean;
|
||||||
style?: boolean;
|
style?: boolean;
|
||||||
pre?: boolean;
|
pre?: boolean;
|
||||||
comment?: boolean;
|
comment?: boolean;
|
||||||
}) {
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parses HTML and returns a root element
|
||||||
|
* Parse a chuck of HTML source.
|
||||||
|
* @param {string} data html
|
||||||
|
* @return {HTMLElement} root element
|
||||||
|
*/
|
||||||
|
export function parse(data: string, options = {} as Options) {
|
||||||
const root = new HTMLElement(null, {});
|
const root = new HTMLElement(null, {});
|
||||||
let currentParent = root;
|
let currentParent = root;
|
||||||
const stack = [root];
|
const stack = [root];
|
||||||
let lastTextPos = -1;
|
let lastTextPos = -1;
|
||||||
options = options || {} as any;
|
|
||||||
let match: RegExpExecArray;
|
let match: RegExpExecArray;
|
||||||
while (match = kMarkupPattern.exec(data)) {
|
while (match = kMarkupPattern.exec(data)) {
|
||||||
if (lastTextPos > -1) {
|
if (lastTextPos > -1) {
|
||||||
|
|
@ -860,7 +79,7 @@ export function parse(data: string, options?: {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
lastTextPos = kMarkupPattern.lastIndex;
|
lastTextPos = kMarkupPattern.lastIndex;
|
||||||
if (match[0][1] == '!') {
|
if (match[0][1] === '!') {
|
||||||
// this is a comment
|
// this is a comment
|
||||||
if (options.comment) {
|
if (options.comment) {
|
||||||
// Only keep what is in between <!-- and -->
|
// Only keep what is in between <!-- and -->
|
||||||
|
|
@ -873,13 +92,14 @@ export function parse(data: string, options?: {
|
||||||
match[2] = match[2].toLowerCase();
|
match[2] = match[2].toLowerCase();
|
||||||
if (!match[1]) {
|
if (!match[1]) {
|
||||||
// not </ tags
|
// not </ tags
|
||||||
let attrs = {};
|
const attrs = {};
|
||||||
for (let attMatch; attMatch = kAttributePattern.exec(match[3]);) {
|
for (let attMatch; attMatch = kAttributePattern.exec(match[3]);) {
|
||||||
attrs[attMatch[2]] = attMatch[4] || attMatch[5] || attMatch[6];
|
attrs[attMatch[2]] = attMatch[4] || attMatch[5] || attMatch[6];
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!match[4] && kElementsClosedByOpening[currentParent.tagName]) {
|
const tagName = currentParent.tagName as 'li' | 'p' | 'b' | 'td' | 'th' | 'h1' | 'h2' | 'h3' | 'h4' | 'h5' | 'h6';
|
||||||
if (kElementsClosedByOpening[currentParent.tagName][match[2]]) {
|
if (!match[4] && kElementsClosedByOpening[tagName]) {
|
||||||
|
if (kElementsClosedByOpening[tagName][match[2]]) {
|
||||||
stack.pop();
|
stack.pop();
|
||||||
currentParent = arr_back(stack);
|
currentParent = arr_back(stack);
|
||||||
}
|
}
|
||||||
|
|
@ -889,11 +109,11 @@ export function parse(data: string, options?: {
|
||||||
stack.push(currentParent);
|
stack.push(currentParent);
|
||||||
if (kBlockTextElements[match[2]]) {
|
if (kBlockTextElements[match[2]]) {
|
||||||
// a little test to find next </script> or </style> ...
|
// a little test to find next </script> or </style> ...
|
||||||
let closeMarkup = '</' + match[2] + '>';
|
const closeMarkup = '</' + match[2] + '>';
|
||||||
let index = data.indexOf(closeMarkup, kMarkupPattern.lastIndex);
|
const index = data.indexOf(closeMarkup, kMarkupPattern.lastIndex);
|
||||||
if (options[match[2]]) {
|
if (options[match[2]]) {
|
||||||
let text: string;
|
let text: string;
|
||||||
if (index == -1) {
|
if (index === -1) {
|
||||||
// there is no matching ending for the text element.
|
// there is no matching ending for the text element.
|
||||||
text = data.substr(kMarkupPattern.lastIndex);
|
text = data.substr(kMarkupPattern.lastIndex);
|
||||||
} else {
|
} else {
|
||||||
|
|
@ -903,7 +123,7 @@ export function parse(data: string, options?: {
|
||||||
currentParent.appendChild(new TextNode(text));
|
currentParent.appendChild(new TextNode(text));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (index == -1) {
|
if (index === -1) {
|
||||||
lastTextPos = kMarkupPattern.lastIndex = data.length + 1;
|
lastTextPos = kMarkupPattern.lastIndex = data.length + 1;
|
||||||
} else {
|
} else {
|
||||||
lastTextPos = kMarkupPattern.lastIndex = index + closeMarkup.length;
|
lastTextPos = kMarkupPattern.lastIndex = index + closeMarkup.length;
|
||||||
|
|
@ -915,14 +135,15 @@ export function parse(data: string, options?: {
|
||||||
kSelfClosingElements[match[2]]) {
|
kSelfClosingElements[match[2]]) {
|
||||||
// </ or /> or <br> etc.
|
// </ or /> or <br> etc.
|
||||||
while (true) {
|
while (true) {
|
||||||
if (currentParent.tagName == match[2]) {
|
if (currentParent.tagName === match[2]) {
|
||||||
stack.pop();
|
stack.pop();
|
||||||
currentParent = arr_back(stack);
|
currentParent = arr_back(stack);
|
||||||
break;
|
break;
|
||||||
} else {
|
} else {
|
||||||
|
const tagName = currentParent.tagName as 'li' | 'a' | 'b' | 'i' | 'p' | 'td' | 'th';
|
||||||
// Trying to close current tag, and move on
|
// Trying to close current tag, and move on
|
||||||
if (kElementsClosedByClosing[currentParent.tagName]) {
|
if (kElementsClosedByClosing[tagName]) {
|
||||||
if (kElementsClosedByClosing[currentParent.tagName][match[2]]) {
|
if (kElementsClosedByClosing[tagName][match[2]]) {
|
||||||
stack.pop();
|
stack.pop();
|
||||||
currentParent = arr_back(stack);
|
currentParent = arr_back(stack);
|
||||||
continue;
|
continue;
|
||||||
|
|
@ -934,7 +155,7 @@ export function parse(data: string, options?: {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
type Response = (HTMLElement | TextNode) & { valid: boolean; };
|
type Response = (HTMLElement | TextNode) & { valid: boolean };
|
||||||
const valid = !!(stack.length === 1);
|
const valid = !!(stack.length === 1);
|
||||||
if (!options.noFix) {
|
if (!options.noFix) {
|
||||||
const response = root as Response;
|
const response = root as Response;
|
||||||
|
|
@ -974,3 +195,5 @@ export function parse(data: string, options?: {
|
||||||
return response;
|
return response;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export default parse;
|
||||||
|
|
|
||||||
240
src/matcher.ts
Normal file
240
src/matcher.ts
Normal file
|
|
@ -0,0 +1,240 @@
|
||||||
|
import HTMLElement from './nodes/html';
|
||||||
|
|
||||||
|
interface MatherFunction {
|
||||||
|
func(el: HTMLElement, tagName: string, classes: string[] | string, attr_key: string, value: string): boolean;
|
||||||
|
tagName: string;
|
||||||
|
classes: string | string[];
|
||||||
|
attr_key: string;
|
||||||
|
value: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Cache to store generated match functions
|
||||||
|
* @type {Object}
|
||||||
|
*/
|
||||||
|
let pMatchFunctionCache = {} as { [name: string]: MatherFunction };
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Function cache
|
||||||
|
*/
|
||||||
|
const functionCache = {
|
||||||
|
f145(el: HTMLElement, tagName: string, classes: string[]) {
|
||||||
|
'use strict';
|
||||||
|
tagName = tagName || '';
|
||||||
|
classes = classes || [];
|
||||||
|
if (el.id !== tagName.substr(1)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
for (let cls = classes, i = 0; i < cls.length; i++) {
|
||||||
|
if (el.classNames.indexOf(cls[i]) === -1) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
},
|
||||||
|
f45(el: HTMLElement, tagName: string, classes: string[]) {
|
||||||
|
'use strict';
|
||||||
|
tagName = tagName || '';
|
||||||
|
classes = classes || [];
|
||||||
|
for (let cls = classes, i = 0; i < cls.length; i++) {
|
||||||
|
if (el.classNames.indexOf(cls[i]) === -1) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
},
|
||||||
|
f15(el: HTMLElement, tagName: string) {
|
||||||
|
'use strict';
|
||||||
|
tagName = tagName || '';
|
||||||
|
if (el.id !== tagName.substr(1)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
},
|
||||||
|
f1(el: HTMLElement, tagName: string) {
|
||||||
|
'use strict';
|
||||||
|
tagName = tagName || '';
|
||||||
|
if (el.id !== tagName.substr(1)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
},
|
||||||
|
f5() {
|
||||||
|
'use strict';
|
||||||
|
return true;
|
||||||
|
},
|
||||||
|
f245(el: HTMLElement, tagName: string, classes: string[], attr_key: string, value: string) {
|
||||||
|
'use strict';
|
||||||
|
tagName = tagName || '';
|
||||||
|
classes = classes || [];
|
||||||
|
attr_key = attr_key || '';
|
||||||
|
value = value || '';
|
||||||
|
const attrs = el.attributes;
|
||||||
|
return Object.keys(attrs).some((key) => {
|
||||||
|
const val = attrs[key];
|
||||||
|
return key === attr_key && val === value
|
||||||
|
});
|
||||||
|
// for (let cls = classes, i = 0; i < cls.length; i++) {if (el.classNames.indexOf(cls[i]) === -1){ return false;}}
|
||||||
|
// return true;
|
||||||
|
},
|
||||||
|
f25(el: HTMLElement, tagName: string, classes: string[], attr_key: string, value: string) {
|
||||||
|
'use strict';
|
||||||
|
tagName = tagName || '';
|
||||||
|
classes = classes || [];
|
||||||
|
attr_key = attr_key || '';
|
||||||
|
value = value || '';
|
||||||
|
const attrs = el.attributes;
|
||||||
|
return Object.keys(attrs).some((key) => {
|
||||||
|
const val = attrs[key];
|
||||||
|
return key === attr_key && val === value
|
||||||
|
});
|
||||||
|
// return true;
|
||||||
|
},
|
||||||
|
f2(el: HTMLElement, tagName: string, classes: string[], attr_key: string, value: string) {
|
||||||
|
'use strict';
|
||||||
|
tagName = tagName || '';
|
||||||
|
classes = classes || [];
|
||||||
|
attr_key = attr_key || '';
|
||||||
|
value = value || '';
|
||||||
|
const attrs = el.attributes;
|
||||||
|
return Object.keys(attrs).some((key) => {
|
||||||
|
const val = attrs[key];
|
||||||
|
return key === attr_key && val === value
|
||||||
|
});
|
||||||
|
},
|
||||||
|
f345(el: HTMLElement, tagName: string, classes: string[]) {
|
||||||
|
'use strict';
|
||||||
|
tagName = tagName || '';
|
||||||
|
classes = classes || [];
|
||||||
|
if (el.tagName !== tagName) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
for (let cls = classes, i = 0; i < cls.length; i++) {
|
||||||
|
if (el.classNames.indexOf(cls[i]) === -1) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
},
|
||||||
|
f35(el: HTMLElement, tagName: string) {
|
||||||
|
'use strict';
|
||||||
|
tagName = tagName || '';
|
||||||
|
return el.tagName === tagName;
|
||||||
|
},
|
||||||
|
f3(el: HTMLElement, tagName: string) {
|
||||||
|
'use strict';
|
||||||
|
tagName = tagName || '';
|
||||||
|
if (el.tagName !== tagName) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Matcher class to make CSS match
|
||||||
|
*
|
||||||
|
* @class Matcher
|
||||||
|
*/
|
||||||
|
export default class Matcher {
|
||||||
|
private matchers: MatherFunction[];
|
||||||
|
private nextMatch = 0;
|
||||||
|
/**
|
||||||
|
* Creates an instance of Matcher.
|
||||||
|
* @param {string} selector
|
||||||
|
*
|
||||||
|
* @memberof Matcher
|
||||||
|
*/
|
||||||
|
constructor(selector: string) {
|
||||||
|
functionCache.f5 = functionCache.f5;
|
||||||
|
this.matchers = selector.split(' ').map((matcher) => {
|
||||||
|
if (pMatchFunctionCache[matcher])
|
||||||
|
return pMatchFunctionCache[matcher];
|
||||||
|
const parts = matcher.split('.');
|
||||||
|
const tagName = parts[0];
|
||||||
|
const classes = parts.slice(1).sort();
|
||||||
|
// let source = '"use strict";';
|
||||||
|
let function_name = 'f';
|
||||||
|
let attr_key = '';
|
||||||
|
let value = '';
|
||||||
|
if (tagName && tagName !== '*') {
|
||||||
|
let reg: RegExpMatchArray;
|
||||||
|
if (tagName.startsWith('#')) {
|
||||||
|
// source += 'if (el.id != ' + JSON.stringify(tagName.substr(1)) + ') return false;';// 1
|
||||||
|
function_name += '1';
|
||||||
|
} else {
|
||||||
|
reg = /^\[\s*(\S+)\s*(=|!=)\s*((((["'])([^\6]*)\6))|(\S*?))\]\s*/.exec(tagName);
|
||||||
|
if (reg) {
|
||||||
|
attr_key = reg[1];
|
||||||
|
let method = reg[2];
|
||||||
|
if (method !== '=' && method !== '!=') {
|
||||||
|
throw new Error('Selector not supported, Expect [key${op}value].op must be =,!=');
|
||||||
|
}
|
||||||
|
if (method === '=') {
|
||||||
|
method = '==';
|
||||||
|
}
|
||||||
|
value = reg[7] || reg[8];
|
||||||
|
|
||||||
|
// source += `let attrs = el.attributes;for (let key in attrs){const val = attrs[key]; if (key == "${attr_key}" && val == "${value}"){return true;}} return false;`;// 2
|
||||||
|
function_name += '2';
|
||||||
|
} else {
|
||||||
|
// source += 'if (el.tagName != ' + JSON.stringify(tagName) + ') return false;';// 3
|
||||||
|
function_name += '3';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (classes.length > 0) {
|
||||||
|
// source += 'for (let cls = ' + JSON.stringify(classes) + ', i = 0; i < cls.length; i++) if (el.classNames.indexOf(cls[i]) === -1) return false;';// 4
|
||||||
|
function_name += '4';
|
||||||
|
}
|
||||||
|
// source += 'return true;';// 5
|
||||||
|
function_name += '5';
|
||||||
|
const obj = {
|
||||||
|
func: functionCache[function_name],
|
||||||
|
tagName: tagName || '',
|
||||||
|
classes: classes || '',
|
||||||
|
attr_key: attr_key || '',
|
||||||
|
value: value || ''
|
||||||
|
}
|
||||||
|
// source = source || '';
|
||||||
|
return pMatchFunctionCache[matcher] = obj as MatherFunction;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Trying to advance match pointer
|
||||||
|
* @param {HTMLElement} el element to make the match
|
||||||
|
* @return {bool} true when pointer advanced.
|
||||||
|
*/
|
||||||
|
advance(el: HTMLElement) {
|
||||||
|
if (this.nextMatch < this.matchers.length &&
|
||||||
|
this.matchers[this.nextMatch].func(el, this.matchers[this.nextMatch].tagName, this.matchers[this.nextMatch].classes, this.matchers[this.nextMatch].attr_key, this.matchers[this.nextMatch].value)) {
|
||||||
|
this.nextMatch++;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Rewind the match pointer
|
||||||
|
*/
|
||||||
|
rewind() {
|
||||||
|
this.nextMatch--;
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Trying to determine if match made.
|
||||||
|
* @return {bool} true when the match is made
|
||||||
|
*/
|
||||||
|
get matched() {
|
||||||
|
return this.nextMatch === this.matchers.length;
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Rest match pointer.
|
||||||
|
* @return {[type]} [description]
|
||||||
|
*/
|
||||||
|
reset() {
|
||||||
|
this.nextMatch = 0;
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* flush cache to free memory
|
||||||
|
*/
|
||||||
|
flushCache() {
|
||||||
|
pMatchFunctionCache = {};
|
||||||
|
}
|
||||||
|
}
|
||||||
28
src/nodes/comment.ts
Normal file
28
src/nodes/comment.ts
Normal file
|
|
@ -0,0 +1,28 @@
|
||||||
|
import { decode } from 'he';
|
||||||
|
import Node from './node';
|
||||||
|
import NodeType from './type';
|
||||||
|
|
||||||
|
export default class CommentNode extends Node {
|
||||||
|
constructor(value: string) {
|
||||||
|
super();
|
||||||
|
this.rawText = value;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Node Type declaration.
|
||||||
|
* @type {Number}
|
||||||
|
*/
|
||||||
|
nodeType = NodeType.COMMENT_NODE;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get unescaped text value of current node and its children.
|
||||||
|
* @return {string} text content
|
||||||
|
*/
|
||||||
|
get text() {
|
||||||
|
return decode(this.rawText);
|
||||||
|
}
|
||||||
|
|
||||||
|
toString() {
|
||||||
|
return `<!--${this.rawText}-->`;
|
||||||
|
}
|
||||||
|
}
|
||||||
494
src/nodes/html.ts
Normal file
494
src/nodes/html.ts
Normal file
|
|
@ -0,0 +1,494 @@
|
||||||
|
import { decode } from 'he';
|
||||||
|
import Node from './node';
|
||||||
|
import NodeType from './type';
|
||||||
|
import TextNode from './text';
|
||||||
|
import Matcher from '../matcher';
|
||||||
|
import { parse } from '../index';
|
||||||
|
import arr_back from '../back';
|
||||||
|
|
||||||
|
export interface KeyAttributes {
|
||||||
|
id?: string;
|
||||||
|
class?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface Attributes {
|
||||||
|
[key: string]: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface RawAttributes {
|
||||||
|
[key: string]: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
const kBlockElements = {
|
||||||
|
div: true,
|
||||||
|
p: true,
|
||||||
|
// ul: true,
|
||||||
|
// ol: true,
|
||||||
|
li: true,
|
||||||
|
// table: true,
|
||||||
|
// tr: true,
|
||||||
|
td: true,
|
||||||
|
section: true,
|
||||||
|
br: true
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* HTMLElement, which contains a set of children.
|
||||||
|
*
|
||||||
|
* Note: this is a minimalist implementation, no complete tree
|
||||||
|
* structure provided (no parentNode, nextSibling,
|
||||||
|
* previousSibling etc).
|
||||||
|
* @class HTMLElement
|
||||||
|
* @extends {Node}
|
||||||
|
*/
|
||||||
|
export default class HTMLElement extends Node {
|
||||||
|
private _attrs: Attributes;
|
||||||
|
private _rawAttrs: RawAttributes;
|
||||||
|
public id: string;
|
||||||
|
public classNames = [] as string[];
|
||||||
|
/**
|
||||||
|
* Node Type declaration.
|
||||||
|
*/
|
||||||
|
public nodeType = NodeType.ELEMENT_NODE;
|
||||||
|
/**
|
||||||
|
* Creates an instance of HTMLElement.
|
||||||
|
* @param keyAttrs id and class attribute
|
||||||
|
* @param [rawAttrs] attributes in string
|
||||||
|
*
|
||||||
|
* @memberof HTMLElement
|
||||||
|
*/
|
||||||
|
constructor(public tagName: string, keyAttrs: KeyAttributes, private rawAttrs = '', public parentNode = null as Node) {
|
||||||
|
super();
|
||||||
|
this.rawAttrs = rawAttrs || '';
|
||||||
|
this.parentNode = parentNode || null;
|
||||||
|
this.childNodes = [];
|
||||||
|
if (keyAttrs.id) {
|
||||||
|
this.id = keyAttrs.id;
|
||||||
|
}
|
||||||
|
if (keyAttrs.class) {
|
||||||
|
this.classNames = keyAttrs.class.split(/\s+/);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Remove Child element from childNodes array
|
||||||
|
* @param {HTMLElement} node node to remove
|
||||||
|
*/
|
||||||
|
public removeChild(node: Node) {
|
||||||
|
this.childNodes = this.childNodes.filter((child) => {
|
||||||
|
return (child !== node);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Exchanges given child with new child
|
||||||
|
* @param {HTMLElement} oldNode node to exchange
|
||||||
|
* @param {HTMLElement} newNode new node
|
||||||
|
*/
|
||||||
|
public exchangeChild(oldNode: Node, newNode: Node) {
|
||||||
|
let idx = -1;
|
||||||
|
for (let i = 0; i < this.childNodes.length; i++) {
|
||||||
|
if (this.childNodes[i] === oldNode) {
|
||||||
|
idx = i;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
this.childNodes[idx] = newNode;
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Get escpaed (as-it) text value of current node and its children.
|
||||||
|
* @return {string} text content
|
||||||
|
*/
|
||||||
|
get rawText() {
|
||||||
|
return this.childNodes.reduce((pre, cur) => {
|
||||||
|
return pre += cur.rawText;
|
||||||
|
}, '');
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Get unescaped text value of current node and its children.
|
||||||
|
* @return {string} text content
|
||||||
|
*/
|
||||||
|
get text() {
|
||||||
|
return decode(this.rawText);
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Get structured Text (with '\n' etc.)
|
||||||
|
* @return {string} structured text
|
||||||
|
*/
|
||||||
|
get structuredText() {
|
||||||
|
let currentBlock = [] as string[];
|
||||||
|
const blocks = [currentBlock];
|
||||||
|
function dfs(node: Node) {
|
||||||
|
if (node.nodeType === NodeType.ELEMENT_NODE) {
|
||||||
|
if (kBlockElements[(node as HTMLElement).tagName]) {
|
||||||
|
if (currentBlock.length > 0) {
|
||||||
|
blocks.push(currentBlock = []);
|
||||||
|
}
|
||||||
|
node.childNodes.forEach(dfs);
|
||||||
|
if (currentBlock.length > 0) {
|
||||||
|
blocks.push(currentBlock = []);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
node.childNodes.forEach(dfs);
|
||||||
|
}
|
||||||
|
} else if (node.nodeType === NodeType.TEXT_NODE) {
|
||||||
|
if ((node as TextNode).isWhitespace) {
|
||||||
|
// Whitespace node, postponed output
|
||||||
|
(currentBlock as any).prependWhitespace = true;
|
||||||
|
} else {
|
||||||
|
let text = node.text;
|
||||||
|
if ((currentBlock as any).prependWhitespace) {
|
||||||
|
text = ' ' + text;
|
||||||
|
(currentBlock as any).prependWhitespace = false;
|
||||||
|
}
|
||||||
|
currentBlock.push(text);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
dfs(this);
|
||||||
|
return blocks
|
||||||
|
.map(function (block) {
|
||||||
|
// Normalize each line's whitespace
|
||||||
|
return block.join('').trim().replace(/\s{2,}/g, ' ');
|
||||||
|
})
|
||||||
|
.join('\n').replace(/\s+$/, ''); // trimRight;
|
||||||
|
}
|
||||||
|
|
||||||
|
public toString() {
|
||||||
|
const tag = this.tagName;
|
||||||
|
if (tag) {
|
||||||
|
const is_un_closed = /^meta$/i.test(tag);
|
||||||
|
const is_self_closed = /^(img|br|hr|area|base|input|doctype|link)$/i.test(tag);
|
||||||
|
const attrs = this.rawAttrs ? ' ' + this.rawAttrs : '';
|
||||||
|
if (is_un_closed) {
|
||||||
|
return `<${tag}${attrs}>`;
|
||||||
|
} else if (is_self_closed) {
|
||||||
|
return `<${tag}${attrs} />`;
|
||||||
|
} else {
|
||||||
|
return `<${tag}${attrs}>${this.innerHTML}</${tag}>`;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return this.innerHTML;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
get innerHTML() {
|
||||||
|
return this.childNodes.map((child) => {
|
||||||
|
return child.toString();
|
||||||
|
}).join('');
|
||||||
|
}
|
||||||
|
|
||||||
|
public set_content(content: string | Node | Node[]) {
|
||||||
|
if (content instanceof Node) {
|
||||||
|
content = [content];
|
||||||
|
} else if (typeof content == 'string') {
|
||||||
|
const r = parse(content);
|
||||||
|
content = r.childNodes.length ? r.childNodes : [new TextNode(content)];
|
||||||
|
}
|
||||||
|
this.childNodes = content;
|
||||||
|
}
|
||||||
|
|
||||||
|
get outerHTML() {
|
||||||
|
return this.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Trim element from right (in block) after seeing pattern in a TextNode.
|
||||||
|
* @param {RegExp} pattern pattern to find
|
||||||
|
* @return {HTMLElement} reference to current node
|
||||||
|
*/
|
||||||
|
public trimRight(pattern: RegExp) {
|
||||||
|
for (let i = 0; i < this.childNodes.length; i++) {
|
||||||
|
const childNode = this.childNodes[i];
|
||||||
|
if (childNode.nodeType === NodeType.ELEMENT_NODE) {
|
||||||
|
(childNode as HTMLElement).trimRight(pattern);
|
||||||
|
} else {
|
||||||
|
const index = childNode.rawText.search(pattern);
|
||||||
|
if (index > -1) {
|
||||||
|
childNode.rawText = childNode.rawText.substr(0, index);
|
||||||
|
// trim all following nodes.
|
||||||
|
this.childNodes.length = i + 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Get DOM structure
|
||||||
|
* @return {string} strucutre
|
||||||
|
*/
|
||||||
|
get structure() {
|
||||||
|
const res = [] as string[];
|
||||||
|
let indention = 0;
|
||||||
|
function write(str: string) {
|
||||||
|
res.push(' '.repeat(indention) + str);
|
||||||
|
}
|
||||||
|
function dfs(node: HTMLElement) {
|
||||||
|
const idStr = node.id ? ('#' + node.id) : '';
|
||||||
|
const classStr = node.classNames.length ? ('.' + node.classNames.join('.')) : '';
|
||||||
|
write(node.tagName + idStr + classStr);
|
||||||
|
indention++;
|
||||||
|
node.childNodes.forEach((childNode) => {
|
||||||
|
if (childNode.nodeType === NodeType.ELEMENT_NODE) {
|
||||||
|
dfs(childNode as HTMLElement);
|
||||||
|
} else if (childNode.nodeType === NodeType.TEXT_NODE) {
|
||||||
|
if (!(childNode as TextNode).isWhitespace)
|
||||||
|
write('#text');
|
||||||
|
}
|
||||||
|
});
|
||||||
|
indention--;
|
||||||
|
}
|
||||||
|
dfs(this);
|
||||||
|
return res.join('\n');
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Remove whitespaces in this sub tree.
|
||||||
|
* @return {HTMLElement} pointer to this
|
||||||
|
*/
|
||||||
|
public removeWhitespace() {
|
||||||
|
let o = 0;
|
||||||
|
this.childNodes.forEach((node) => {
|
||||||
|
if (node.nodeType === NodeType.TEXT_NODE) {
|
||||||
|
if ((node as TextNode).isWhitespace) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
node.rawText = node.rawText.trim();
|
||||||
|
} else if (node.nodeType === NodeType.ELEMENT_NODE) {
|
||||||
|
(node as HTMLElement).removeWhitespace();
|
||||||
|
}
|
||||||
|
this.childNodes[o++] = node;
|
||||||
|
});
|
||||||
|
this.childNodes.length = o;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Query CSS selector to find matching nodes.
|
||||||
|
* @param {string} selector Simplified CSS selector
|
||||||
|
* @param {Matcher} selector A Matcher instance
|
||||||
|
* @return {HTMLElement[]} matching elements
|
||||||
|
*/
|
||||||
|
public querySelectorAll(selector: string | Matcher): HTMLElement[] {
|
||||||
|
let matcher: Matcher;
|
||||||
|
if (selector instanceof Matcher) {
|
||||||
|
matcher = selector;
|
||||||
|
matcher.reset();
|
||||||
|
} else {
|
||||||
|
if (selector.includes(',')) {
|
||||||
|
const selectors = selector.split(',');
|
||||||
|
return Array.from(selectors.reduce((pre, cur) => {
|
||||||
|
const result = this.querySelectorAll(cur.trim());
|
||||||
|
return result.reduce((p, c) => {
|
||||||
|
return p.add(c);
|
||||||
|
}, pre);
|
||||||
|
}, new Set<HTMLElement>()));
|
||||||
|
}
|
||||||
|
matcher = new Matcher(selector);
|
||||||
|
}
|
||||||
|
const stack = [] as { 0: Node; 1: 0 | 1; 2: boolean }[];
|
||||||
|
return this.childNodes.reduce((res, cur) => {
|
||||||
|
stack.push([cur, 0, false]);
|
||||||
|
while (stack.length) {
|
||||||
|
const state = arr_back(stack);
|
||||||
|
const el = state[0];
|
||||||
|
if (state[1] === 0) {
|
||||||
|
// Seen for first time.
|
||||||
|
if (el.nodeType !== NodeType.ELEMENT_NODE) {
|
||||||
|
stack.pop();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
state[2] = matcher.advance(el as HTMLElement);
|
||||||
|
if (state[2]) {
|
||||||
|
if (matcher.matched) {
|
||||||
|
res.push(el as HTMLElement);
|
||||||
|
// no need to go further.
|
||||||
|
matcher.rewind();
|
||||||
|
stack.pop();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (state[1] < el.childNodes.length) {
|
||||||
|
stack.push([el.childNodes[state[1]++], 0, false]);
|
||||||
|
} else {
|
||||||
|
if (state[2]) {
|
||||||
|
matcher.rewind();
|
||||||
|
}
|
||||||
|
stack.pop();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return res;
|
||||||
|
}, [] as HTMLElement[]);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Query CSS Selector to find matching node.
|
||||||
|
* @param {string} selector Simplified CSS selector
|
||||||
|
* @param {Matcher} selector A Matcher instance
|
||||||
|
* @return {HTMLElement} matching node
|
||||||
|
*/
|
||||||
|
public querySelector(selector: string | Matcher) {
|
||||||
|
let matcher: Matcher;
|
||||||
|
if (selector instanceof Matcher) {
|
||||||
|
matcher = selector;
|
||||||
|
matcher.reset();
|
||||||
|
} else {
|
||||||
|
matcher = new Matcher(selector);
|
||||||
|
}
|
||||||
|
const stack = [] as { 0: Node; 1: 0 | 1; 2: boolean }[];
|
||||||
|
for (const node of this.childNodes) {
|
||||||
|
stack.push([node, 0, false]);
|
||||||
|
while (stack.length) {
|
||||||
|
const state = arr_back(stack);
|
||||||
|
const el = state[0];
|
||||||
|
if (state[1] === 0) {
|
||||||
|
// Seen for first time.
|
||||||
|
if (el.nodeType !== NodeType.ELEMENT_NODE) {
|
||||||
|
stack.pop();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
state[2] = matcher.advance(el as HTMLElement);
|
||||||
|
if (state[2]) {
|
||||||
|
if (matcher.matched) {
|
||||||
|
return el;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (state[1] < el.childNodes.length) {
|
||||||
|
stack.push([el.childNodes[state[1]++], 0, false]);
|
||||||
|
} else {
|
||||||
|
if (state[2])
|
||||||
|
matcher.rewind();
|
||||||
|
stack.pop();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Append a child node to childNodes
|
||||||
|
* @param {Node} node node to append
|
||||||
|
* @return {Node} node appended
|
||||||
|
*/
|
||||||
|
public appendChild<T extends Node = Node>(node: T) {
|
||||||
|
// node.parentNode = this;
|
||||||
|
this.childNodes.push(node);
|
||||||
|
if (node instanceof HTMLElement) {
|
||||||
|
node.parentNode = this;
|
||||||
|
}
|
||||||
|
return node;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get first child node
|
||||||
|
* @return {Node} first child node
|
||||||
|
*/
|
||||||
|
get firstChild() {
|
||||||
|
return this.childNodes[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get last child node
|
||||||
|
* @return {Node} last child node
|
||||||
|
*/
|
||||||
|
get lastChild() {
|
||||||
|
return arr_back(this.childNodes);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get attributes
|
||||||
|
* @return {Object} parsed and unescaped attributes
|
||||||
|
*/
|
||||||
|
get attributes() {
|
||||||
|
if (this._attrs) {
|
||||||
|
return this._attrs;
|
||||||
|
}
|
||||||
|
this._attrs = {};
|
||||||
|
const attrs = this.rawAttributes;
|
||||||
|
for (const key in attrs) {
|
||||||
|
const val = attrs[key] || '';
|
||||||
|
this._attrs[key] = decode(val.replace(/^['"]/, '').replace(/['"]$/, ''));
|
||||||
|
}
|
||||||
|
return this._attrs;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get escaped (as-it) attributes
|
||||||
|
* @return {Object} parsed attributes
|
||||||
|
*/
|
||||||
|
get rawAttributes() {
|
||||||
|
if (this._rawAttrs)
|
||||||
|
return this._rawAttrs;
|
||||||
|
const attrs = {} as RawAttributes;
|
||||||
|
if (this.rawAttrs) {
|
||||||
|
const re = /\b([a-z][a-z0-9\-]*)(?:\s*=\s*("(?:[^"]*)"|'(?:[^']*)'|(?:\S+)))?/ig;
|
||||||
|
let match: RegExpExecArray;
|
||||||
|
while (match = re.exec(this.rawAttrs)) {
|
||||||
|
attrs[match[1]] = match[2] || null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
this._rawAttrs = attrs;
|
||||||
|
return attrs;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set an attribute value to the HTMLElement
|
||||||
|
* @param {string} key The attribute name
|
||||||
|
* @param {string|number} value The value to set, or null / undefined to remove an attribute
|
||||||
|
*/
|
||||||
|
setAttribute(key: string, value: string | number) {
|
||||||
|
// Update the this.attributes
|
||||||
|
if (this._attrs) {
|
||||||
|
delete this._attrs;
|
||||||
|
}
|
||||||
|
const attrs = this.rawAttributes; // ref this._rawAttrs
|
||||||
|
if (value === undefined || value === null) {
|
||||||
|
delete attrs[key];
|
||||||
|
} else {
|
||||||
|
attrs[key] = JSON.stringify(value);
|
||||||
|
// if (typeof value === 'string') {
|
||||||
|
// attrs[key] = JSON.stringify(encode(value));//??? should we encode value here?
|
||||||
|
// } else {
|
||||||
|
// attrs[key] = JSON.stringify(value);
|
||||||
|
// }
|
||||||
|
}
|
||||||
|
// Update rawString
|
||||||
|
this.rawAttrs = Object.keys(attrs).map((name) => {
|
||||||
|
const val = attrs[name];
|
||||||
|
if (val === undefined || val === null) {
|
||||||
|
return name;
|
||||||
|
} else {
|
||||||
|
return name + '=' + val;
|
||||||
|
}
|
||||||
|
}).join(' ');
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Replace all the attributes of the HTMLElement by the provided attributes
|
||||||
|
* @param {Attributes} attributes the new attribute set
|
||||||
|
*/
|
||||||
|
setAttributes(attributes: Attributes) {
|
||||||
|
// Update the this.attributes
|
||||||
|
if (this._attrs) {
|
||||||
|
delete this._attrs;
|
||||||
|
}
|
||||||
|
// Update the raw attributes map
|
||||||
|
if (this._rawAttrs) {
|
||||||
|
delete this._rawAttrs;
|
||||||
|
}
|
||||||
|
// Update rawString
|
||||||
|
this.rawAttrs = Object.keys(attributes).map((name) => {
|
||||||
|
const val = attributes[name];
|
||||||
|
if (val === undefined || val === null) {
|
||||||
|
return name;
|
||||||
|
} else {
|
||||||
|
return name + '=' + JSON.stringify(val);
|
||||||
|
// if (typeof val === 'string') {
|
||||||
|
// return name + '=' + JSON.stringify(encode(val)); //??? should we encode value here?
|
||||||
|
// } else {
|
||||||
|
// return name + '=' + JSON.stringify(val);
|
||||||
|
// }
|
||||||
|
}
|
||||||
|
}).join(' ');
|
||||||
|
}
|
||||||
|
}
|
||||||
12
src/nodes/node.ts
Normal file
12
src/nodes/node.ts
Normal file
|
|
@ -0,0 +1,12 @@
|
||||||
|
import NodeType from './type';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Node Class as base class for TextNode and HTMLElement.
|
||||||
|
*/
|
||||||
|
export default abstract class Node {
|
||||||
|
nodeType: NodeType;
|
||||||
|
childNodes = [] as Node[];
|
||||||
|
text: string;
|
||||||
|
rawText: string;
|
||||||
|
abstract toString(): string;
|
||||||
|
}
|
||||||
40
src/nodes/text.ts
Normal file
40
src/nodes/text.ts
Normal file
|
|
@ -0,0 +1,40 @@
|
||||||
|
import { decode } from 'he';
|
||||||
|
import NodeType from './type';
|
||||||
|
import Node from './node';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* TextNode to contain a text element in DOM tree.
|
||||||
|
* @param {string} value [description]
|
||||||
|
*/
|
||||||
|
export default class TextNode extends Node {
|
||||||
|
constructor(value: string) {
|
||||||
|
super();
|
||||||
|
this.rawText = value;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Node Type declaration.
|
||||||
|
* @type {Number}
|
||||||
|
*/
|
||||||
|
nodeType = NodeType.TEXT_NODE;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get unescaped text value of current node and its children.
|
||||||
|
* @return {string} text content
|
||||||
|
*/
|
||||||
|
get text() {
|
||||||
|
return decode(this.rawText);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Detect if the node contains only white space.
|
||||||
|
* @return {bool}
|
||||||
|
*/
|
||||||
|
get isWhitespace() {
|
||||||
|
return /^(\s| )*$/.test(this.rawText);
|
||||||
|
}
|
||||||
|
|
||||||
|
toString() {
|
||||||
|
return this.text;
|
||||||
|
}
|
||||||
|
}
|
||||||
7
src/nodes/type.ts
Normal file
7
src/nodes/type.ts
Normal file
|
|
@ -0,0 +1,7 @@
|
||||||
|
enum NodeType {
|
||||||
|
ELEMENT_NODE = 1,
|
||||||
|
TEXT_NODE = 3,
|
||||||
|
COMMENT_NODE = 8
|
||||||
|
}
|
||||||
|
|
||||||
|
export default NodeType;
|
||||||
13
test/html.js
13
test/html.js
|
|
@ -3,14 +3,12 @@ var fs = require('fs');
|
||||||
var util = require('util');
|
var util = require('util');
|
||||||
|
|
||||||
var HTMLParser = require('../dist');
|
var HTMLParser = require('../dist');
|
||||||
|
var Matcher = require('../dist/matcher').default;
|
||||||
|
var HTMLElement = require('../dist/nodes/html').default;
|
||||||
|
var TextNode = require('../dist/nodes/text').default;
|
||||||
|
var CommentNode = require('../dist/nodes/comment').default;
|
||||||
|
|
||||||
describe('HTML Parser', function () {
|
describe('HTML Parser', function () {
|
||||||
|
|
||||||
var Matcher = HTMLParser.Matcher;
|
|
||||||
var HTMLElement = HTMLParser.HTMLElement;
|
|
||||||
var TextNode = HTMLParser.TextNode;
|
|
||||||
var CommentNode = HTMLParser.CommentNode;
|
|
||||||
|
|
||||||
describe('Matcher', function () {
|
describe('Matcher', function () {
|
||||||
it('should match corrent elements', function () {
|
it('should match corrent elements', function () {
|
||||||
var matcher = new Matcher('#id .a a.b *.a.b .a.b * a');
|
var matcher = new Matcher('#id .a a.b *.a.b .a.b * a');
|
||||||
|
|
@ -372,7 +370,8 @@ describe('HTML Parser', function () {
|
||||||
'c': '12',
|
'c': '12',
|
||||||
d: '&&<>foo'
|
d: '&&<>foo'
|
||||||
});
|
});
|
||||||
root.firstChild.toString().should.eql('<p c=12 d="&&<>foo"></p>');
|
root.firstChild.toString().should.eql('<p c=12 d="&&<>foo"></p>');
|
||||||
|
// root.firstChild.toString().should.eql('<p c=12 d="&&<>foo"></p>');
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue