From e8928ec198e9c1d473516f642915f28640085472 Mon Sep 17 00:00:00 2001 From: messense Date: Sun, 28 Jun 2015 09:59:52 +0800 Subject: [PATCH] Bug fix and add more tests --- src/lib.rs | 2 +- tests/lib.rs | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index 721bff2..8192535 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -71,7 +71,7 @@ impl Entry { /// check if this entry applies to the specified agent fn applies_to(&self, useragent: &str) -> bool { - let ua = useragent.split("/").nth(0).unwrap_or(""); + let ua = useragent.split("/").nth(0).unwrap_or("").to_lowercase(); let useragents = self.useragents.borrow(); for agent in &*useragents { if agent == "*" { diff --git a/tests/lib.rs b/tests/lib.rs index a5a3544..221d807 100644 --- a/tests/lib.rs +++ b/tests/lib.rs @@ -53,4 +53,65 @@ fn test_robots_txt_2() { let good = vec!["/","/test.html"]; let bad = vec!["/cyberworld/map/index.html"]; robot_test_simple(doc, good, bad); + + let good = vec!["/cyberworld/map/index.html"]; + robot_test(doc, good, vec![], "cybermapper"); +} + +#[test] +fn test_robots_txt_3() { + let doc = "\n\ + # go away\n\ + User-agent: *\n\ + Disallow: /\n\ + "; + let good = vec![]; + let bad = vec!["/cyberworld/map/index.html","/","/tmp/"]; + robot_test_simple(doc, good, bad); +} + +#[test] +fn test_robots_txt_4() { + let doc = "\n\ + User-agent: figtree\n\ + Disallow: /tmp\n\ + Disallow: /a%3cd.html\n\ + Disallow: /a%2fb.html\n\ + Disallow: /%7ejoe/index.html\n\ + "; + let good = vec![]; + let bad = vec![ + "/tmp", "/tmp.html", "/tmp/a.html", "/a%3cd.html", "/a%3Cd.html", + "/a%2fb.html", "/~joe/index.html", + ]; + robot_test(doc, good.clone(), bad.clone(), "figtree"); + robot_test(doc, good, bad, "FigTree Robot libwww-perl/5.04"); +} + +#[test] +fn test_robots_txt_5() { + let doc = "\n\ + User-agent: *\n\ + Disallow: /tmp/\n\ + Disallow: /a%3Cd.html\n\ + Disallow: /a/b.html\n\ + Disallow: /%7ejoe/index.html\n\ + "; + let good = vec!["/tmp"]; + let bad = vec![ + "/tmp/", "/tmp/a.html", "/a%3cd.html", "/a%3Cd.html", + "/a/b.html", "/%7Ejoe/index.html", + ]; + robot_test_simple(doc, good, bad); +} + +#[test] +fn test_robots_txt_6() { + let doc = "\n\ + User-Agent: *\n\ + Disallow: /.\n\ + "; + let good = vec!["/foo.html"]; + let bad = vec![]; + robot_test_simple(doc, good, bad); }