var should = require('should'); var fs = require('fs'); var util = require('util'); var HTMLParser = require('../dist'); describe('HTML Parser', function () { var Matcher = HTMLParser.Matcher; var HTMLElement = HTMLParser.HTMLElement; var TextNode = HTMLParser.TextNode; describe('Matcher', function () { it('should match corrent elements', function () { var matcher = new Matcher('#id .a a.b *.a.b .a.b * a'); var MatchesNothingButStarEl = new HTMLElement('_', {}); var withIdEl = new HTMLElement('p', { id: 'id' }); var withClassNameEl = new HTMLElement('a', { class: 'a b' }); matcher.advance(MatchesNothingButStarEl).should.not.be.ok; // #id matcher.advance(withClassNameEl).should.not.be.ok; // #id matcher.advance(withIdEl).should.be.ok; // #id matcher.advance(MatchesNothingButStarEl).should.not.be.ok; // .a matcher.advance(withIdEl).should.not.be.ok; // .a matcher.advance(withClassNameEl).should.be.ok; // .a matcher.advance(MatchesNothingButStarEl).should.not.be.ok; // a.b matcher.advance(withIdEl).should.not.be.ok; // a.b matcher.advance(withClassNameEl).should.be.ok; // a.b matcher.advance(withIdEl).should.not.be.ok; // *.a.b matcher.advance(MatchesNothingButStarEl).should.not.be.ok; // *.a.b matcher.advance(withClassNameEl).should.be.ok; // *.a.b matcher.advance(withIdEl).should.not.be.ok; // .a.b matcher.advance(MatchesNothingButStarEl).should.not.be.ok; // .a.b matcher.advance(withClassNameEl).should.be.ok; // .a.b matcher.advance(withIdEl).should.be.ok; // * matcher.rewind(); matcher.advance(MatchesNothingButStarEl).should.be.ok; // * matcher.rewind(); matcher.advance(withClassNameEl).should.be.ok; // * matcher.advance(withIdEl).should.not.be.ok; // a matcher.advance(MatchesNothingButStarEl).should.not.be.ok; // a matcher.advance(withClassNameEl).should.be.ok; // a matcher.matched.should.be.ok; }); }); var parseHTML = HTMLParser.parse; describe('parse()', function () { it('should parse "
tag on line 476', function () { var result = parseHTML(fs.readFileSync(__dirname + '/html/hillcrestpartyrentals.html').toString(), { noFix: true }); result.valid.should.eql(false); }) it('google.html should return Object with valid: true', function () { var result = parseHTML(fs.readFileSync(__dirname + '/html/google.html').toString(), { noFix: true }); result.valid.should.eql(true); }) it('gmail.html should return Object with valid: true', function () { var result = parseHTML(fs.readFileSync(__dirname + '/html/gmail.html').toString(), { noFix: true }); result.valid.should.eql(true); }) it('ffmpeg.html should return Object with valid: false (extra opening
\r \n \t
a
b
c'); root.structuredText.should.eql('o\na\nb\nc'); }); }); describe('#set_content', function () { it('set content string', function () { var root = parseHTML(''); root.childNodes[0].set_content('