var should = require('should'); var fs = require('fs'); var util = require('util'); var HTMLParser = require('../dist'); describe('HTML Parser', function () { var Matcher = HTMLParser.Matcher; var HTMLElement = HTMLParser.HTMLElement; var TextNode = HTMLParser.TextNode; var CommentNode = HTMLParser.CommentNode; describe('Matcher', function () { it('should match corrent elements', function () { var matcher = new Matcher('#id .a a.b *.a.b .a.b * a'); var MatchesNothingButStarEl = new HTMLElement('_', {}); var withIdEl = new HTMLElement('p', { id: 'id' }); var withClassNameEl = new HTMLElement('a', { class: 'a b' }); matcher.advance(MatchesNothingButStarEl).should.not.be.ok; // #id matcher.advance(withClassNameEl).should.not.be.ok; // #id matcher.advance(withIdEl).should.be.ok; // #id matcher.advance(MatchesNothingButStarEl).should.not.be.ok; // .a matcher.advance(withIdEl).should.not.be.ok; // .a matcher.advance(withClassNameEl).should.be.ok; // .a matcher.advance(MatchesNothingButStarEl).should.not.be.ok; // a.b matcher.advance(withIdEl).should.not.be.ok; // a.b matcher.advance(withClassNameEl).should.be.ok; // a.b matcher.advance(withIdEl).should.not.be.ok; // *.a.b matcher.advance(MatchesNothingButStarEl).should.not.be.ok; // *.a.b matcher.advance(withClassNameEl).should.be.ok; // *.a.b matcher.advance(withIdEl).should.not.be.ok; // .a.b matcher.advance(MatchesNothingButStarEl).should.not.be.ok; // .a.b matcher.advance(withClassNameEl).should.be.ok; // .a.b matcher.advance(withIdEl).should.be.ok; // * matcher.rewind(); matcher.advance(MatchesNothingButStarEl).should.be.ok; // * matcher.rewind(); matcher.advance(withClassNameEl).should.be.ok; // * matcher.advance(withIdEl).should.not.be.ok; // a matcher.advance(MatchesNothingButStarEl).should.not.be.ok; // a matcher.advance(withClassNameEl).should.be.ok; // a matcher.matched.should.be.ok; }); }); var parseHTML = HTMLParser.parse; describe('parse()', function () { it('should parse "

Hello

" and return root element', function () { var root = parseHTML('

Hello

'); var p = new HTMLElement('p', { id: 'id' }, 'id="id"'); p.appendChild(new HTMLElement('a', { class: 'cls' }, 'class=\'cls\'')) .appendChild(new TextNode('Hello')); var ul = p.appendChild(new HTMLElement('ul', {}, '')); ul.appendChild(new HTMLElement('li', {}, '')); ul.appendChild(new HTMLElement('li', {}, '')); p.appendChild(new HTMLElement('span', {}, '')); root.firstChild.should.eql(p); }); it('should parse "

" and return root element', function () { var root = parseHTML('

', { lowerCaseTagName: true }); var div = new HTMLElement('div', {}, ''); var a = div.appendChild(new HTMLElement('a', {}, '')); var img = a.appendChild(new HTMLElement('img', {}, '')); var p = div.appendChild(new HTMLElement('p', {}, '')); root.firstChild.should.eql(div); }); it('should parse "

" and return root element', function () { var root = parseHTML('

'); var div = new HTMLElement('div', {}, ''); var a = div.appendChild(new HTMLElement('a', {}, '')); var img = a.appendChild(new HTMLElement('img', {}, '')); var p = div.appendChild(new HTMLElement('p', {}, '')); root.firstChild.should.eql(div); }); it('should parse "
" and return root element without comments', function () { var root = parseHTML('
'); var div = new HTMLElement('div', {}, ''); var a = div.appendChild(new HTMLElement('a', {}, '')); root.firstChild.should.eql(div); }); it('should parse "
" and return root element with comments', function () { var root = parseHTML('
', { comment: true }); var div = new HTMLElement('div', {}, ''); var a = div.appendChild(new HTMLElement('a', {}, '')); var comment = a.appendChild(new CommentNode(' my comment ')); root.firstChild.should.eql(div); }); it('should not parse HTML inside comments', function () { var root = parseHTML('
', { comment: true }); var div = new HTMLElement('div', {}, ''); var comment = div.appendChild(new CommentNode('')); root.firstChild.should.eql(div); }); it('should parse picture element', function () { var root = parseHTML('Example'); var picture = new HTMLElement('picture', {}, ''); var source = picture.appendChild(new HTMLElement('source', {}, 'srcset="/images/example-1.jpg 1200w, /images/example-2.jpg 1600w" sizes="100vw"')); var img = picture.appendChild(new HTMLElement('img', {}, 'src="/images/example.jpg" alt="Example"')); root.firstChild.should.eql(picture); }); it('should not extract text in script and style by default', function () { var root = parseHTML(''); root.firstChild.childNodes.should.be.empty; root.lastChild.childNodes.should.be.empty; }); it('should extract text in script and style when ask so', function () { var root = parseHTML('', { script: true, style: true }); root.firstChild.childNodes.should.not.be.empty; root.firstChild.childNodes.should.eql([new TextNode('1')]); root.firstChild.text.should.eql('1'); root.lastChild.childNodes.should.not.be.empty; root.lastChild.childNodes.should.eql([new TextNode('2&')]); root.lastChild.text.should.eql('2&'); root.lastChild.rawText.should.eql('2&'); }); it('should be able to parse "html/incomplete-script" file', function () { var root = parseHTML(fs.readFileSync(__dirname + '/html/incomplete-script').toString(), { script: true }); }); it('should parse "

.." very fast', function () { for (var i = 0; i < 100; i++) parseHTML('

'); }); it('should parse "

.." fast', function () { for (var i = 0; i < 100; i++) parseHTML('

', { lowerCaseTagName: true }); }); // Test for broken tags.

something

it('should parse "

content

other

" (fix h3, span closing tag) very fast', function () { var root = parseHTML(fs.readFileSync(__dirname + '/html/incomplete-script').toString()); }); }); describe('parseWithValidation', function () { // parse with validation tests it('should return Object with valid: true. does not count

as error. instead fixes it to

', function () { var result = parseHTML('

'); result.valid.should.eql(true); }) it('should return Object with valid: true. does not count

as error. instead fixes it to

', function () { var result = parseHTML('

'); result.valid.should.eql(true); }) it('should return Object with valid: false. does not count

as error', function () { var result = parseHTML('

'); result.valid.should.eql(false); }) it('hillcrestpartyrentals.html should return Object with valid: false. not closing

tag on line 476', function () { var result = parseHTML(fs.readFileSync(__dirname + '/html/hillcrestpartyrentals.html').toString(), { noFix: true }); result.valid.should.eql(false); }) it('google.html should return Object with valid: true', function () { var result = parseHTML(fs.readFileSync(__dirname + '/html/google.html').toString(), { noFix: true }); result.valid.should.eql(true); }) it('gmail.html should return Object with valid: true', function () { var result = parseHTML(fs.readFileSync(__dirname + '/html/gmail.html').toString(), { noFix: true }); result.valid.should.eql(true); }) it('ffmpeg.html should return Object with valid: false (extra opening

', function () { var result = parseHTML(fs.readFileSync(__dirname + '/html/ffmpeg.html').toString(), { noFix: true }); result.valid.should.eql(false); }) // fix issue speed test it('should fix "

" to "

"', function () { var result = parseHTML('

'); result.valid.should.eql(false); result.toString().should.eql('

'); }) it('should fix "

" to "

"', function () { var result = parseHTML('

'); result.valid.should.eql(false); result.toString().should.eql('

'); }) it('gmail.html should return Object with valid: true', function () { var result = parseHTML(fs.readFileSync(__dirname + '/html/gmail.html').toString().replace(/<\//gi, '<')); result.valid.should.eql(false); }) it('gmail.html should return Object with valid: true', function () { var result = parseHTML(fs.readFileSync(__dirname + '/html/nice.html').toString().replace(/<\//gi, '<')); result.valid.should.eql(false); }) }); describe('TextNode', function () { describe('#isWhitespace', function () { var node = new TextNode(''); node.isWhitespace.should.be.ok; node = new TextNode(' \t'); node.isWhitespace.should.be.ok; node = new TextNode(' \t  \t'); node.isWhitespace.should.be.ok; }); }); describe('HTMLElement', function () { describe('#removeWhitespace()', function () { it('should remove whitespaces while preserving nodes with content', function () { var root = parseHTML('

\r \n \t

123

'); var p = new HTMLElement('p', {}, ''); p.appendChild(new HTMLElement('h5', {}, '')) .appendChild(new TextNode('123')); root.firstChild.removeWhitespace().should.eql(p); }); }); describe('#rawAttributes', function () { it('should return escaped attributes of the element', function () { var root = parseHTML('

'); root.firstChild.rawAttributes.should.eql({ 'a': '12', 'data-id': '!$$&', 'yAz': '1' }); }); }); describe('#attributes', function () { it('should return attributes of the element', function () { var root = parseHTML('

'); root.firstChild.attributes.should.eql({ 'a': '12', 'data-id': '!$$&', 'yAz': '1', 'disabled': '', 'class': '' }); }); }); describe('#setAttribute', function () { it('should edit the attributes of the element', function () { var root = parseHTML('

'); root.firstChild.setAttribute('a', 13); root.firstChild.attributes.should.eql({ 'a': '13', }); root.firstChild.toString().should.eql('

'); }); it('should add an attribute to the element', function () { var root = parseHTML('

'); root.firstChild.setAttribute('b', 13); root.firstChild.attributes.should.eql({ 'a': '12', 'b': '13', }); root.firstChild.toString().should.eql('

'); }); it('should remove an attribute from the element', function () { var root = parseHTML('

'); root.firstChild.setAttribute('b', null); root.firstChild.setAttribute('c'); root.firstChild.attributes.should.eql({ 'a': '12', }); root.firstChild.toString().should.eql('

'); }); }); describe('#setAttributes', function () { it('should return attributes of the element', function () { var root = parseHTML('

'); root.firstChild.setAttributes({c: 12}); root.firstChild.attributes.should.eql({ 'c': '12', }); root.firstChild.toString().should.eql('

'); }); }); describe('#querySelector()', function () { it('should return correct elements in DOM tree', function () { var root = parseHTML('
'); root.querySelector('#id').should.eql(root.firstChild); root.querySelector('span.a').should.eql(root.firstChild.firstChild.firstChild); root.querySelector('span.b').should.eql(root.firstChild.firstChild.firstChild); root.querySelector('span.a.b').should.eql(root.firstChild.firstChild.firstChild); root.querySelector('#id .b').should.eql(root.firstChild.firstChild.firstChild); root.querySelector('#id span').should.eql(root.firstChild.firstChild.firstChild); root.querySelector('[data-id=myid]').should.eql(root.firstChild); root.querySelector('[data-id="myid"]').should.eql(root.firstChild); }); }); describe('#querySelectorAll()', function () { it('should return correct elements in DOM tree', function () { var root = parseHTML('
'); root.querySelectorAll('#id').should.eql([root.firstChild]); root.querySelectorAll('span.a').should.eql([root.firstChild.firstChild.firstChild]); root.querySelectorAll('span.b').should.eql([root.firstChild.firstChild.firstChild]); root.querySelectorAll('span.a.b').should.eql([root.firstChild.firstChild.firstChild]); root.querySelectorAll('#id .b').should.eql([root.firstChild.firstChild.firstChild]); root.querySelectorAll('#id span').should.eql(root.firstChild.firstChild.childNodes); root.querySelectorAll('#id, #id .b').should.eql([root.firstChild, root.firstChild.firstChild.firstChild]); }); }); describe('#structuredText', function () { it('should return correct structured text', function () { var root = parseHTML('o

a

b

c
'); root.structuredText.should.eql('o\na\nb\nc'); }); it('should not return comments in structured text', function () { var root = parseHTML('o

a

', { comment: true }); root.structuredText.should.eql('o\na'); }); }); describe('#set_content', function () { it('set content string', function () { var root = parseHTML('
'); root.childNodes[0].set_content('
abc
bla
'); root.toString().should.eql('
abc
bla
'); }); it('set content nodes', function () { var root = parseHTML('
'); root.childNodes[0].set_content(parseHTML('
abc
bla
').childNodes); root.toString().should.eql('
abc
bla
'); }); it('set content node', function () { var root = parseHTML('
'); root.childNodes[0].set_content(parseHTML('
abc
bla
').childNodes[0]); root.toString().should.eql('
abc
bla
'); }); it('set content text', function () { var root = parseHTML('
'); root.childNodes[0].set_content('abc'); root.toString().should.eql('
abc
'); }); }); }); describe('stringify', function () { it('#toString()', function () { const html = '

Hello

  • aaaaa
bbb

'; const root = parseHTML(html); root.toString().should.eql(html) }); it('#toString() should not return comments by default', function () { const html = '

'; const result = '

'; const root = parseHTML(html); root.toString().should.eql(result); }); it('#toString() should return comments when specified', function () { const html = '

'; const root = parseHTML(html, { comment: true }); root.toString().should.eql(html); }); }); describe('Comment Element', function () { it('comment nodeType should be 8', function () { var root = parseHTML('', { comment: true }); root.firstChild.nodeType.should.eql(8); }); }); describe('Custom Element', function () { it('parse "" tagName should be "my-widget"', function () { var root = parseHTML(''); root.firstChild.tagName.should.eql('my-widget'); }); }); describe('Custom Element multiple dash', function () { it('parse "" tagName should be "my-new-widget"', function () { var root = parseHTML(''); root.firstChild.tagName.should.eql('my-new-widget'); }); }); });