var should = require('should'); var fs = require('fs'); var util = require('util'); var HTMLParser = require('../dist'); var Matcher = require('../dist/matcher').default; var HTMLElement = require('../dist/nodes/html').default; var TextNode = require('../dist/nodes/text').default; var CommentNode = require('../dist/nodes/comment').default; describe('HTML Parser', function () { describe('Matcher', function () { it('should match corrent elements', function () { var matcher = new Matcher('#id .a a.b *.a.b .a.b * a'); var MatchesNothingButStarEl = new HTMLElement('_', {}); var withIdEl = new HTMLElement('p', { id: 'id' }); var withClassNameEl = new HTMLElement('a', { class: 'a b' }); matcher.advance(MatchesNothingButStarEl).should.not.be.ok; // #id matcher.advance(withClassNameEl).should.not.be.ok; // #id matcher.advance(withIdEl).should.be.ok; // #id matcher.advance(MatchesNothingButStarEl).should.not.be.ok; // .a matcher.advance(withIdEl).should.not.be.ok; // .a matcher.advance(withClassNameEl).should.be.ok; // .a matcher.advance(MatchesNothingButStarEl).should.not.be.ok; // a.b matcher.advance(withIdEl).should.not.be.ok; // a.b matcher.advance(withClassNameEl).should.be.ok; // a.b matcher.advance(withIdEl).should.not.be.ok; // *.a.b matcher.advance(MatchesNothingButStarEl).should.not.be.ok; // *.a.b matcher.advance(withClassNameEl).should.be.ok; // *.a.b matcher.advance(withIdEl).should.not.be.ok; // .a.b matcher.advance(MatchesNothingButStarEl).should.not.be.ok; // .a.b matcher.advance(withClassNameEl).should.be.ok; // .a.b matcher.advance(withIdEl).should.be.ok; // * matcher.rewind(); matcher.advance(MatchesNothingButStarEl).should.be.ok; // * matcher.rewind(); matcher.advance(withClassNameEl).should.be.ok; // * matcher.advance(withIdEl).should.not.be.ok; // a matcher.advance(MatchesNothingButStarEl).should.not.be.ok; // a matcher.advance(withClassNameEl).should.be.ok; // a matcher.matched.should.be.ok; }); }); var parseHTML = HTMLParser.parse; describe('parse()', function () { it('should parse "

Hello

" and return root element', function () { var root = parseHTML('

Hello

'); var p = new HTMLElement('p', { id: 'id' }, 'id="id"'); p.appendChild(new HTMLElement('a', { class: 'cls' }, 'class=\'cls\'')) .appendChild(new TextNode('Hello')); var ul = p.appendChild(new HTMLElement('ul', {}, '')); ul.appendChild(new HTMLElement('li', {}, '')); ul.appendChild(new HTMLElement('li', {}, '')); p.appendChild(new HTMLElement('span', {}, '')); root.firstChild.should.eql(p); }); it('should parse "

" and return root element', function () { var root = parseHTML('

', { lowerCaseTagName: true }); var div = new HTMLElement('div', {}, ''); var a = div.appendChild(new HTMLElement('a', {}, '')); var img = a.appendChild(new HTMLElement('img', {}, '')); var p = div.appendChild(new HTMLElement('p', {}, '')); root.firstChild.should.eql(div); }); it('should deal uppercase', function () { const html = 'SISREG III

CONSULTA AO CADASTRO DE PACIENTES SUS



Processando...


'; var root = parseHTML(html, { lowerCaseTagName: true }); root.toString().should.eql('SISREG III

CONSULTA AO CADASTRO DE PACIENTES SUS



Processando...


'); // root.toString().firstChild.should.eql(div); }); it('should parse "

" and return root element', function () { var root = parseHTML('

'); var div = new HTMLElement('div', {}, ''); var a = div.appendChild(new HTMLElement('a', {}, '')); var img = a.appendChild(new HTMLElement('img', {}, '')); var p = div.appendChild(new HTMLElement('p', {}, '')); root.firstChild.should.eql(div); }); it('should parse "
" and return root element without comments', function () { var root = parseHTML('
'); var div = new HTMLElement('div', {}, ''); var a = div.appendChild(new HTMLElement('a', {}, '')); root.firstChild.should.eql(div); }); it('should parse "
" and return root element with comments', function () { var root = parseHTML('
', { comment: true }); var div = new HTMLElement('div', {}, ''); var a = div.appendChild(new HTMLElement('a', {}, '')); var comment = a.appendChild(new CommentNode(' my comment ')); root.firstChild.should.eql(div); }); it('should not parse HTML inside comments', function () { var root = parseHTML('
', { comment: true }); var div = new HTMLElement('div', {}, ''); var comment = div.appendChild(new CommentNode('')); root.firstChild.should.eql(div); }); it('should parse picture element', function () { var root = parseHTML('Example'); var picture = new HTMLElement('picture', {}, ''); var source = picture.appendChild(new HTMLElement('source', {}, 'srcset="/images/example-1.jpg 1200w, /images/example-2.jpg 1600w" sizes="100vw"')); var img = picture.appendChild(new HTMLElement('img', {}, 'src="/images/example.jpg" alt="Example"')); root.firstChild.should.eql(picture); }); it('should not extract text in script and style by default', function () { var root = parseHTML(''); root.firstChild.childNodes.should.be.empty; root.lastChild.childNodes.should.be.empty; }); it('should extract text in script and style when ask so', function () { var root = parseHTML('', { script: true, style: true }); root.firstChild.childNodes.should.not.be.empty; root.firstChild.childNodes.should.eql([new TextNode('1')]); root.firstChild.text.should.eql('1'); root.lastChild.childNodes.should.not.be.empty; root.lastChild.childNodes.should.eql([new TextNode('2&')]); root.lastChild.text.should.eql('2&'); root.lastChild.rawText.should.eql('2&'); }); it('should be able to parse "html/incomplete-script" file', function () { var root = parseHTML(fs.readFileSync(__dirname + '/html/incomplete-script').toString(), { script: true }); }); it('should be able to parse namespaces', function () { const namespacedXML = 'content'; parseHTML(namespacedXML).toString().should.eql(namespacedXML); }); it('should parse "

.." very fast', function () { for (var i = 0; i < 100; i++) parseHTML('

'); }); it('should parse "

.." fast', function () { for (var i = 0; i < 100; i++) parseHTML('

', { lowerCaseTagName: true }); }); // Test for broken tags.

something

it('should parse "

content

other

" (fix h3, span closing tag) very fast', function () { var root = parseHTML(fs.readFileSync(__dirname + '/html/incomplete-script').toString()); }); }); describe('parseWithValidation', function () { // parse with validation tests it('should return Object with valid: true. does not count

as error. instead fixes it to

', function () { var result = parseHTML('

'); result.valid.should.eql(true); }) it('should return Object with valid: true. does not count

as error. instead fixes it to

', function () { var result = parseHTML('

'); result.valid.should.eql(true); }) it('should return Object with valid: false. does not count

as error', function () { var result = parseHTML('

'); result.valid.should.eql(false); }) it('hillcrestpartyrentals.html should return Object with valid: false. not closing

tag on line 476', function () { var result = parseHTML(fs.readFileSync(__dirname + '/html/hillcrestpartyrentals.html').toString(), { noFix: true }); result.valid.should.eql(false); }) it('google.html should return Object with valid: true', function () { var result = parseHTML(fs.readFileSync(__dirname + '/html/google.html').toString(), { noFix: true }); result.valid.should.eql(true); }) it('gmail.html should return Object with valid: true', function () { var result = parseHTML(fs.readFileSync(__dirname + '/html/gmail.html').toString(), { noFix: true }); result.valid.should.eql(true); }) it('ffmpeg.html should return Object with valid: false (extra opening

', function () { var result = parseHTML(fs.readFileSync(__dirname + '/html/ffmpeg.html').toString(), { noFix: true }); result.valid.should.eql(false); }) // fix issue speed test it('should fix "

" to "

"', function () { var result = parseHTML('

'); result.valid.should.eql(false); result.toString().should.eql('

'); }) it('should fix "

" to "

"', function () { var result = parseHTML('

'); result.valid.should.eql(false); result.toString().should.eql('

'); }) it('gmail.html should return Object with valid: true', function () { var result = parseHTML(fs.readFileSync(__dirname + '/html/gmail.html').toString().replace(/<\//gi, '<')); result.valid.should.eql(false); }) it('gmail.html should return Object with valid: true', function () { var result = parseHTML(fs.readFileSync(__dirname + '/html/nice.html').toString().replace(/<\//gi, '<')); result.valid.should.eql(false); }) }); describe('TextNode', function () { describe('#isWhitespace', function () { var node = new TextNode(''); node.isWhitespace.should.be.ok; node = new TextNode(' \t'); node.isWhitespace.should.be.ok; node = new TextNode(' \t  \t'); node.isWhitespace.should.be.ok; }); }); describe('HTMLElement', function () { describe('#removeWhitespace()', function () { it('should remove whitespaces while preserving nodes with content', function () { var root = parseHTML('

\r \n \t

123

'); var p = new HTMLElement('p', {}, ''); p.appendChild(new HTMLElement('h5', {}, '')) .appendChild(new TextNode('123')); root.firstChild.removeWhitespace().should.eql(p); }); }); describe('#rawAttributes', function () { it('should return escaped attributes of the element', function () { var root = parseHTML('

'); root.firstChild.rawAttributes.should.eql({ 'a': '12', 'data-id': '!$$&', 'yAz': '1' }); }); }); describe('#attributes', function () { it('should return attributes of the element', function () { var root = parseHTML('

'); root.firstChild.attributes.should.eql({ 'a': '12', 'data-id': '!$$&', 'yAz': '1', 'disabled': '', 'class': '' }); }); }); describe('#getAttribute', function () { it('should return value of the attribute', function () { var root = parseHTML('

'); root.firstChild.getAttribute('a').should.eql('a1b'); }); it('should return null when there is no such attribute', function () { var root = parseHTML('

'); should.equal(root.firstChild.getAttribute('b'), null); }); it('should return empty string as broser behavior', function () { var root = parseHTML(''); var input = root.firstChild; input.getAttribute('required').should.eql(''); }); it('should return null as broser behavior', function () { var root = parseHTML(''); var input = root.firstChild; input.setAttribute('readonly', null); input.getAttribute('readonly').should.eql('null'); }); }); describe('#setAttribute', function () { it('should edit the attributes of the element', function () { var root = parseHTML('

'); var attr = root.firstChild.attributes; root.firstChild.setAttribute('a', 13); attr.should.eql({ 'a': '13', }); root.firstChild.getAttribute('a').should.eql('13'); root.firstChild.toString().should.eql('

'); }); it('should add an attribute to the element', function () { var root = parseHTML('

'); root.firstChild.setAttribute('b', 13); root.firstChild.attributes.should.eql({ 'a': '12', 'b': '13', }); root.firstChild.toString().should.eql('

'); }); it('should convert value to string', function () { var root = parseHTML('

'); var p = root.firstChild; p.setAttribute('b', null); p.setAttribute('c', undefined); p.getAttribute('b').should.eql('null'); p.getAttribute('c').should.eql('undefined'); p.toString().should.eql('

'); }); it('should throw type Error', function () { var root = parseHTML('

'); var p = root.firstChild; should.throws(function () { p.setAttribute('b') }); should.throws(function () { p.setAttribute() }); }); it('should keep quotes arount value', function () { var root = parseHTML('

'); root.firstChild.setAttribute('b', 13); root.firstChild.setAttribute('c', '2'); root.firstChild.attributes.should.eql({ 'a': '12', 'b': '13', 'c': '2' }); root.firstChild.toString().should.eql('

'); }); }); describe('#setAttributes', function () { it('should return attributes of the element', function () { var root = parseHTML('

'); root.firstChild.setAttributes({ c: 12, d: '&&<>foo' }); root.firstChild.attributes.should.eql({ 'c': '12', d: '&&<>foo' }); root.firstChild.toString().should.eql('

'); // root.firstChild.toString().should.eql('

'); }); }); describe('#removeAttribute', function () { var root = parseHTML(''); var input = root.firstChild; input.removeAttribute('required'); input.toString().should.eql(''); }); describe('#hasAttribute', function () { it('should return true or false when has or has not some attribute', function () { var root = parseHTML(''); var input = root.firstChild; input.hasAttribute('required').should.eql(true); input.removeAttribute('required'); input.hasAttribute('required').should.eql(false); }); }); describe('#querySelector()', function () { it('should return correct elements in DOM tree', function () { var root = parseHTML('
'); root.querySelector('#id').should.eql(root.firstChild); root.querySelector('span.a').should.eql(root.firstChild.firstChild.firstChild); root.querySelector('span.b').should.eql(root.firstChild.firstChild.firstChild); root.querySelector('span.a.b').should.eql(root.firstChild.firstChild.firstChild); root.querySelector('#id .b').should.eql(root.firstChild.firstChild.firstChild); root.querySelector('#id span').should.eql(root.firstChild.firstChild.firstChild); root.querySelector('[data-id=myid]').should.eql(root.firstChild); root.querySelector('[data-id="myid"]').should.eql(root.firstChild); }); }); describe('#querySelectorAll()', function () { it('should return correct elements in DOM tree', function () { var root = parseHTML('
'); root.querySelectorAll('#id').should.eql([root.firstChild]); root.querySelectorAll('span.a').should.eql([root.firstChild.firstChild.firstChild]); root.querySelectorAll('span.b').should.eql([root.firstChild.firstChild.firstChild]); root.querySelectorAll('span.a.b').should.eql([root.firstChild.firstChild.firstChild]); root.querySelectorAll('#id .b').should.eql([root.firstChild.firstChild.firstChild]); root.querySelectorAll('#id span').should.eql(root.firstChild.firstChild.childNodes); root.querySelectorAll('#id, #id .b').should.eql([root.firstChild, root.firstChild.firstChild.firstChild]); }); it('should return just one element', function () { var root = parseHTML('