diff --git a/package.json b/package.json index 58770b8..0d069f0 100644 --- a/package.json +++ b/package.json @@ -6,10 +6,11 @@ "types": "dist/index.d.ts", "scripts": { "test": "mocha", + "lint": "eslint ./src/*.ts", "clean": "del-cli ./dist/", "ts:cjs": "tsc -m commonjs", "ts:umd": "tsc -t es5 -m umd -d false --outDir ./dist/umd/", - "build": "npm run clean && npm run ts:cjs && npm run ts:umd", + "build": "npm run lint && npm run clean && npm run ts:cjs && npm run ts:umd", "dev": "tsc -w", "pretest": "tsc -m commonjs" }, @@ -25,15 +26,20 @@ "he": "1.1.1" }, "devDependencies": { + "@typescript-eslint/eslint-plugin": "latest", + "@typescript-eslint/eslint-plugin-tslint": "latest", + "@typescript-eslint/parser": "latest", "@types/entities": "latest", "@types/he": "latest", "@types/node": "latest", "blanket": "latest", "del-cli": "latest", + "eslint": "latest", "mocha": "latest", "should": "latest", "spec": "latest", "travis-cov": "latest", + "tslint": "latest", "typescript": "next" }, "config": { @@ -58,4 +64,4 @@ "url": "https://github.com/taoqf/node-fast-html-parser/issues" }, "homepage": "https://github.com/taoqf/node-fast-html-parser" -} +} \ No newline at end of file diff --git a/src/index.ts b/src/index.ts index b0156ee..ecd7147 100755 --- a/src/index.ts +++ b/src/index.ts @@ -1,4 +1,4 @@ -import { decode, encode } from 'he'; +import { decode } from 'he'; export enum NodeType { ELEMENT_NODE = 1, @@ -481,7 +481,8 @@ export class HTMLElement extends Node { this._attrs = {}; const attrs = this.rawAttributes; for (const key in attrs) { - this._attrs[key] = decode(attrs[key]); + const val = attrs[key] || ''; + this._attrs[key] = decode(val.replace(/^['"]/, '').replace(/['"]$/, '')); } return this._attrs; } @@ -495,10 +496,10 @@ export class HTMLElement extends Node { return this._rawAttrs; const attrs = {} as RawAttributes; if (this.rawAttrs) { - const re = /\b([a-z][a-z0-9\-]*)(?:\s*=\s*(?:"([^"]*)"|'([^']*)'|(\S+)))?/ig; + const re = /\b([a-z][a-z0-9\-]*)(?:\s*=\s*("(?:[^"]*)"|'(?:[^']*)'|(?:\S+)))?/ig; let match: RegExpExecArray; while (match = re.exec(this.rawAttrs)) { - attrs[match[1]] = match[2] || match[3] || match[4] || ""; + attrs[match[1]] = match[2] || null; } } this._rawAttrs = attrs; @@ -508,20 +509,33 @@ export class HTMLElement extends Node { /** * Set an attribute value to the HTMLElement * @param {string} key The attribute name - * @param {string} value The value to set, or null / undefined to remove an attribute + * @param {string|number} value The value to set, or null / undefined to remove an attribute */ - setAttribute(key: string, value: string) { - //Update the attributes map - const attrs = this.attributes; - if(value===undefined || value===null) delete attrs[key]; - else attrs[key] = value+''; - //Update the raw attributes - if(this._rawAttrs) { - if(value===undefined || value===null) delete this._rawAttrs[key]; - else this._rawAttrs[key] = encode(value+''); + setAttribute(key: string, value: string | number) { + // Update the this.attributes + if (this._attrs) { + delete this._attrs; } - //Update rawString - this.rawAttrs = Object.keys(attrs).map(attr => attr+'='+encode(attrs[attr])).join(' '); + const attrs = this.rawAttributes; // ref this._rawAttrs + if (value === undefined || value === null) { + delete attrs[key]; + } else { + attrs[key] = JSON.stringify(value); + // if (typeof value === 'string') { + // attrs[key] = JSON.stringify(encode(value));//??? should we encode value here? + // } else { + // attrs[key] = JSON.stringify(value); + // } + } + // Update rawString + this.rawAttrs = Object.keys(attrs).map((name) => { + const val = attrs[name]; + if (val === undefined || val === null) { + return name; + } else { + return name + '=' + val; + } + }).join(' '); } /** @@ -529,18 +543,28 @@ export class HTMLElement extends Node { * @param {Attributes} attributes the new attribute set */ setAttributes(attributes: Attributes) { - //Update the attributes map - if(this.attributes) { - Object.keys(this.attributes).forEach(key => delete this.attributes[key]); - Object.keys(attributes).forEach(key => this.attributes[key] = attributes[key]+''); + // Update the this.attributes + if (this._attrs) { + delete this._attrs; } - //Update the raw attributes map - if(this.rawAttributes) { - Object.keys(this.rawAttributes).forEach(key => delete this.rawAttributes[key]); - Object.keys(attributes).forEach(key => this.rawAttributes[key] = encode(attributes[key]+'')); + // Update the raw attributes map + if (this._rawAttrs) { + delete this._rawAttrs; } - //Update rawString - this.rawAttrs = Object.keys(attributes).map(attr => attr+'='+encode(attributes[attr]+'')).join(' '); + // Update rawString + this.rawAttrs = Object.keys(attributes).map((name) => { + const val = attributes[name]; + if (val === undefined || val === null) { + return name; + } else { + return name + '=' + JSON.stringify(val); + // if (typeof val === 'string') { + // return name + '=' + JSON.stringify(encode(val)); //??? should we encode value here? + // } else { + // return name + '=' + JSON.stringify(val); + // } + } + }).join(' '); } } diff --git a/t.js b/t.js new file mode 100644 index 0000000..df42ae4 --- /dev/null +++ b/t.js @@ -0,0 +1,49 @@ +function rawAttributes(rawAttrs) { + const attrs = {}; + if (rawAttrs) { + // const re = /\b([a-z][a-z0-9\-]*)(?:\s*=\s*(?:("[^"]*")|('[^']*')|(\S+)))?/ig; + const re = /\b([a-z][a-z0-9\-]*)(?:\s*=\s*("(?:[^"]*)"|'(?:[^']*)'|(?:\S+)))?/ig; + let match; + console.debug('0000', rawAttrs); + while (match = re.exec(rawAttrs)) { + console.debug('1111', match[1]); + const v = match[2] || ''; + console.debug('2222', v.replace(/^['"]/, '').replace(/['"]$/, '')); + attrs[match[1]] = v.replace(/^['"]/, '').replace(/['"]$/, ''); + } + } + return attrs; +} + +function attr2str(attrs) { + return Object.keys(attrs).map((name) => { + const val = attrs[name]; + if (val === undefined || val === null) { + return name; + } else { + return name + '=' + val + } + }).join(' ') +} + +function main() { + let r; + // r = rawAttributes('a="1"'); + // r = rawAttributes('a=\'1\''); + // r = rawAttributes('a='); + // r = rawAttributes('a'); + // r = rawAttributes('a=1'); + // r = rawAttributes('a=aa b="bb" c= \'cc\' d="\'dd\'" e=e\'e\"e f'); + r = attr2str({ + a: 'aa', + b: '"bb"', + c: "'cc'", + d: "'dd'", + e: `e'e"e`, + f: null + }); + console.debug(r); +} + +main(); + diff --git a/test/html.js b/test/html.js index ab2dfa6..bb1ed38 100644 --- a/test/html.js +++ b/test/html.js @@ -302,8 +302,8 @@ describe('HTML Parser', function () { var root = parseHTML('
'); root.firstChild.rawAttributes.should.eql({ 'a': '12', - 'data-id': '!$$&', - 'yAz': '1' + 'data-id': '"!$$&"', + 'yAz': '\'1\'' }); }); }); @@ -348,16 +348,31 @@ describe('HTML Parser', function () { }); root.firstChild.toString().should.eql(''); }); + it('should keep quotes arount value', function () { + var root = parseHTML(''); + root.firstChild.setAttribute('b', 13); + root.firstChild.setAttribute('c', '2'); + root.firstChild.attributes.should.eql({ + 'a': '12', + 'b': '13', + 'c': '2' + }); + root.firstChild.toString().should.eql(''); + }); }); describe('#setAttributes', function () { it('should return attributes of the element', function () { var root = parseHTML(''); - root.firstChild.setAttributes({c: 12}); + root.firstChild.setAttributes({ + c: 12, + d: '&&<>foo' + }); root.firstChild.attributes.should.eql({ 'c': '12', + d: '&&<>foo' }); - root.firstChild.toString().should.eql(''); + root.firstChild.toString().should.eql(''); }); });