mirror of
https://github.com/danbulant/robotparser-rs
synced 2026-06-18 22:21:05 +00:00
Bug fix and add more tests
This commit is contained in:
parent
6153773f24
commit
e8928ec198
2 changed files with 62 additions and 1 deletions
|
|
@ -71,7 +71,7 @@ impl Entry {
|
|||
|
||||
/// check if this entry applies to the specified agent
|
||||
fn applies_to(&self, useragent: &str) -> bool {
|
||||
let ua = useragent.split("/").nth(0).unwrap_or("");
|
||||
let ua = useragent.split("/").nth(0).unwrap_or("").to_lowercase();
|
||||
let useragents = self.useragents.borrow();
|
||||
for agent in &*useragents {
|
||||
if agent == "*" {
|
||||
|
|
|
|||
61
tests/lib.rs
61
tests/lib.rs
|
|
@ -53,4 +53,65 @@ fn test_robots_txt_2() {
|
|||
let good = vec!["/","/test.html"];
|
||||
let bad = vec!["/cyberworld/map/index.html"];
|
||||
robot_test_simple(doc, good, bad);
|
||||
|
||||
let good = vec!["/cyberworld/map/index.html"];
|
||||
robot_test(doc, good, vec![], "cybermapper");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_robots_txt_3() {
|
||||
let doc = "\n\
|
||||
# go away\n\
|
||||
User-agent: *\n\
|
||||
Disallow: /\n\
|
||||
";
|
||||
let good = vec![];
|
||||
let bad = vec!["/cyberworld/map/index.html","/","/tmp/"];
|
||||
robot_test_simple(doc, good, bad);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_robots_txt_4() {
|
||||
let doc = "\n\
|
||||
User-agent: figtree\n\
|
||||
Disallow: /tmp\n\
|
||||
Disallow: /a%3cd.html\n\
|
||||
Disallow: /a%2fb.html\n\
|
||||
Disallow: /%7ejoe/index.html\n\
|
||||
";
|
||||
let good = vec![];
|
||||
let bad = vec![
|
||||
"/tmp", "/tmp.html", "/tmp/a.html", "/a%3cd.html", "/a%3Cd.html",
|
||||
"/a%2fb.html", "/~joe/index.html",
|
||||
];
|
||||
robot_test(doc, good.clone(), bad.clone(), "figtree");
|
||||
robot_test(doc, good, bad, "FigTree Robot libwww-perl/5.04");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_robots_txt_5() {
|
||||
let doc = "\n\
|
||||
User-agent: *\n\
|
||||
Disallow: /tmp/\n\
|
||||
Disallow: /a%3Cd.html\n\
|
||||
Disallow: /a/b.html\n\
|
||||
Disallow: /%7ejoe/index.html\n\
|
||||
";
|
||||
let good = vec!["/tmp"];
|
||||
let bad = vec![
|
||||
"/tmp/", "/tmp/a.html", "/a%3cd.html", "/a%3Cd.html",
|
||||
"/a/b.html", "/%7Ejoe/index.html",
|
||||
];
|
||||
robot_test_simple(doc, good, bad);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_robots_txt_6() {
|
||||
let doc = "\n\
|
||||
User-Agent: *\n\
|
||||
Disallow: /.\n\
|
||||
";
|
||||
let good = vec!["/foo.html"];
|
||||
let bad = vec![];
|
||||
robot_test_simple(doc, good, bad);
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue