var should = require('should');
var fs = require('fs');
var util = require('util');
var HTMLParser = require('../');
describe('HTML Parser', function() {
var Matcher = HTMLParser.Matcher;
var HTMLElement = HTMLParser.HTMLElement;
var TextNode = HTMLParser.TextNode;
describe('Matcher', function() {
it('should match corrent elements', function() {
var matcher = new Matcher('#id .a a.b *.a.b .a.b * a');
var MatchesNothingButStarEl = new HTMLElement('_', {});
var withIdEl = new HTMLElement('p', { id: 'id' });
var withClassNameEl = new HTMLElement('a', { class: 'a b' });
// console.log(util.inspect([withIdEl, withClassNameEl], {
// showHidden: true,
// depth: null
// }));
matcher.advance(MatchesNothingButStarEl).should.not.be.ok; // #id
matcher.advance(withClassNameEl).should.not.be.ok; // #id
matcher.advance(withIdEl).should.be.ok; // #id
matcher.advance(MatchesNothingButStarEl).should.not.be.ok; // .a
matcher.advance(withIdEl).should.not.be.ok; // .a
matcher.advance(withClassNameEl).should.be.ok; // .a
matcher.advance(MatchesNothingButStarEl).should.not.be.ok; // a.b
matcher.advance(withIdEl).should.not.be.ok; // a.b
matcher.advance(withClassNameEl).should.be.ok; // a.b
matcher.advance(withIdEl).should.not.be.ok; // *.a.b
matcher.advance(MatchesNothingButStarEl).should.not.be.ok; // *.a.b
matcher.advance(withClassNameEl).should.be.ok; // *.a.b
matcher.advance(withIdEl).should.not.be.ok; // .a.b
matcher.advance(MatchesNothingButStarEl).should.not.be.ok; // .a.b
matcher.advance(withClassNameEl).should.be.ok; // .a.b
matcher.advance(withIdEl).should.be.ok; // *
matcher.rewind();
matcher.advance(MatchesNothingButStarEl).should.be.ok; // *
matcher.rewind();
matcher.advance(withClassNameEl).should.be.ok; // *
matcher.advance(withIdEl).should.not.be.ok; // a
matcher.advance(MatchesNothingButStarEl).should.not.be.ok; // a
matcher.advance(withClassNameEl).should.be.ok; // a
matcher.matched.should.be.ok;
});
});
var parseHTML = HTMLParser.parse;
describe('parse', function() {
it('should parse "
Hello
" and return root element', function() {
var root = parseHTML('Hello
');
var p = new HTMLElement('p', { id: 'id' }, 'id="id"');
p.appendChild(new HTMLElement('a', { class: 'cls' }, 'class=\'cls\''))
.appendChild(new TextNode('Hello'));
var ul = p.appendChild(new HTMLElement('ul', {}, ''));
ul.appendChild(new HTMLElement('li', {}, ''));
ul.appendChild(new HTMLElement('li', {}, ''));
p.appendChild(new HTMLElement('span', {}, ''));
root.firstChild.should.eql(p);
});
it('should parse "" and return root element', function() {
var root = parseHTML('', {
lowerCaseTagName: true
});
var div = new HTMLElement('div', {}, '');
var a = div.appendChild(new HTMLElement('a', {}, ''));
var img = a.appendChild(new HTMLElement('img', {}, ''));
var p = div.appendChild(new HTMLElement('p', {}, ''));
root.firstChild.should.eql(div);
});
it('should parse "" and return root element', function() {
var root = parseHTML('');
var div = new HTMLElement('div', {}, '');
var a = div.appendChild(new HTMLElement('a', {}, ''));
var img = a.appendChild(new HTMLElement('img', {}, ''));
var p = div.appendChild(new HTMLElement('p', {}, ''));
root.firstChild.should.eql(div);
});
it('should not extract text in script and style by default', function() {
var root = parseHTML('');
root.firstChild.childNodes.should.be.empty;
root.lastChild.childNodes.should.be.empty;
});
it('should extract text in script and style when ask so', function() {
var root = parseHTML('', {
script: true,
style: true
});
root.firstChild.childNodes.should.not.be.empty;
root.firstChild.childNodes.should.eql([new TextNode('1')]);
root.lastChild.childNodes.should.not.be.empty;
root.lastChild.childNodes.should.eql([new TextNode('2')]);
});
it('should be able to parse "html/incomplete-script" file', function() {
var root = parseHTML(fs.readFileSync(__dirname + '/html/incomplete-script').toString(), {
script: true
});
});
it('should parse ".." very fast', function() {
for (var i = 0; i < 100; i++)
parseHTML('');
});
it('should parse ".." fast', function() {
for (var i = 0; i < 100; i++)
parseHTML('', {
lowerCaseTagName: true
});
});
});
describe('TextNode', function() {
describe('isWhitespace', function() {
var node = new TextNode('');
node.isWhitespace.should.be.ok;
node = new TextNode(' \t');
node.isWhitespace.should.be.ok;
node = new TextNode(' \t \t');
node.isWhitespace.should.be.ok;
});
});
describe('HTMLElement', function() {
describe('removeWhitespace', function() {
it('should remove whitespaces while preserving nodes with content', function() {
var root = parseHTML(' \r \n \t
123
');
var p = new HTMLElement('p', {}, '');
p.appendChild(new HTMLElement('h5', {}, ''))
.appendChild(new TextNode('123'));
root.firstChild.removeWhitespace().should.eql(p);
});
});
describe('rawAttributes', function() {
it('should return escaped attributes of the element', function() {
var root = parseHTML('');
root.firstChild.rawAttributes.should.eql({
'a': '12',
'data-id': '!$$&',
'yAz': '1'
});
});
});
describe('attributes', function() {
it('should return attributes of the element', function() {
var root = parseHTML('');
root.firstChild.attributes.should.eql({
'a': '12',
'data-id': '!$$&',
'yAz': '1'
});
});
});
describe('querySelectorAll', function() {
it('should return correct elements in DOM tree', function() {
var root = parseHTML('
');
root.querySelectorAll('#id').should.eql([root.firstChild]);
root.querySelectorAll('span.a').should.eql([root.firstChild.firstChild.firstChild]);
root.querySelectorAll('span.b').should.eql([root.firstChild.firstChild.firstChild]);
root.querySelectorAll('span.a.b').should.eql([root.firstChild.firstChild.firstChild]);
root.querySelectorAll('#id .b').should.eql([root.firstChild.firstChild.firstChild]);
root.querySelectorAll('#id span').should.eql(root.firstChild.firstChild.childNodes);
});
});
describe('structuredText', function() {
it('should return correct structured text', function() {
var root = parseHTML('oa
b
c');
root.structuredText.should.eql('o\na\nb\nc');
});
});
});
});