diff --git a/README.md b/README.md index c7a6b95..44e40e8 100644 --- a/README.md +++ b/README.md @@ -31,9 +31,9 @@ use url::Url; fn main() { let client = Client::new(); - let robots_txt_url = Url::parse("http://www.python.org/robots.txt").unwrap(); + let robots_txt_url = Url::parse("https://www.python.org/robots.txt").unwrap(); let robots_txt = client.fetch_robots_txt(robots_txt_url.origin()).unwrap().get_result(); - let fetch_url = Url::parse("http://www.python.org/robots.txt").unwrap(); + let fetch_url = Url::parse("https://www.python.org/robots.txt").unwrap(); assert!(robots_txt.can_fetch("*", &fetch_url)); } ``` diff --git a/src/lib.rs b/src/lib.rs index f22bb11..f5692e4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,7 +1,7 @@ //! robots.txt parser for Rust //! //! The robots.txt Exclusion Protocol is implemented as specified in -//! +//! //! //! # Installation //! @@ -23,9 +23,9 @@ //! //! fn main() { //! let client = Client::new(); -//! let robots_txt_url = Url::parse("http://www.python.org/robots.txt").unwrap(); +//! let robots_txt_url = Url::parse("https://www.python.org/robots.txt").unwrap(); //! let robots_txt = client.fetch_robots_txt(robots_txt_url.origin()).unwrap().get_result(); -//! let fetch_url = Url::parse("http://www.python.org/robots.txt").unwrap(); +//! let fetch_url = Url::parse("https://www.python.org/robots.txt").unwrap(); //! assert!(robots_txt.can_fetch("*", &fetch_url)); //! } //! ``` diff --git a/src/parser.rs b/src/parser.rs index 94dcba0..ee4bbb7 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -16,13 +16,13 @@ //! use url::Url; //! //! fn main() { -//! let robots_txt_url = Url::parse("http://google.com/robots.txt").unwrap(); +//! let robots_txt_url = Url::parse("https://google.com/robots.txt").unwrap(); //! let robots_txt = "User-agent: *\nDisallow: /search"; //! let robots_txt = parse_robots_txt(robots_txt_url.origin(), robots_txt); //! assert_eq!(robots_txt.get_warnings().len(), 0); //! let robots_txt = robots_txt.get_result(); -//! let good_url = Url::parse("http://google.com/test").unwrap(); -//! let bad_url = Url::parse("http://google.com/search/vvv").unwrap(); +//! let good_url = Url::parse("https://google.com/test").unwrap(); +//! let bad_url = Url::parse("https://google.com/search/vvv").unwrap(); //! assert_eq!(robots_txt.can_fetch("*", &bad_url), false); //! assert_eq!(robots_txt.can_fetch("*", &good_url), true); //! } diff --git a/tests/test_lib.rs b/tests/test_lib.rs index a50a8be..1cfc1bf 100644 --- a/tests/test_lib.rs +++ b/tests/test_lib.rs @@ -6,15 +6,15 @@ use url::Url; const AGENT: &'static str = "test_robotparser"; fn robot_test(doc: &str, good_urls: Vec<&str>, bad_urls: Vec<&str>, agent: &str) { - let url = Url::parse("http://www.baidu.com/robots.txt").unwrap(); + let url = Url::parse("https://www.baidu.com/robots.txt").unwrap(); let parser = parse_robots_txt(url.origin(), doc).get_result(); for url in &good_urls { - let url = format!("http://www.baidu.com{}", url); + let url = format!("https://www.baidu.com{}", url); let url = Url::parse(&url).unwrap(); assert!(parser.can_fetch(agent, &url)); } for url in &bad_urls { - let url = format!("http://www.baidu.com{}", url); + let url = format!("https://www.baidu.com{}", url); let url = Url::parse(&url).unwrap(); assert!(!parser.can_fetch(agent, &url)); } @@ -56,7 +56,7 @@ fn test_robots_txt_1() { #[test] fn test_robots_txt_2() { let doc = "\n\ - # robots.txt for http://www.example.com/\n\ + # robots.txt for https://www.example.com/\n\ \n\ User-agent: *\n\ Disallow: /cyberworld/map/ # This is an infinite virtual URL space\n\ @@ -249,7 +249,7 @@ fn test_robots_txt_read() { use robotparser::http::{CreateRobotsTxtRequest, ParseRobotsTxtResponse}; use reqwest::{Client, Request}; let http_client = Client::new(); - let url = Url::parse("http://www.python.org/robots.txt").unwrap(); + let url = Url::parse("https://www.python.org/robots.txt").unwrap(); let request = Request::create_robots_txt_request(url.origin()); let mut response = http_client.execute(request).unwrap(); let parser = response.parse_robots_txt_response().unwrap().get_result(); @@ -258,7 +258,7 @@ fn test_robots_txt_read() { #[test] fn test_robots_text_crawl_delay() { - let robots_txt_url = Url::parse("http://www.python.org/robots.txt").unwrap(); + let robots_txt_url = Url::parse("https://www.python.org/robots.txt").unwrap(); let doc = "User-agent: Yandex\n\ Crawl-delay: 2.35\n\ Disallow: /search/\n"; @@ -268,18 +268,18 @@ fn test_robots_text_crawl_delay() { #[test] fn test_robots_text_sitemaps() { - let robots_txt_url = Url::parse("http://www.python.org/robots.txt").unwrap(); + let robots_txt_url = Url::parse("https://www.python.org/robots.txt").unwrap(); let doc = "User-agent: Yandex\n\ - Sitemap \t : http://example.com/sitemap1.xml\n - Sitemap: http://example.com/sitemap2.xml\n - Sitemap: http://example.com/sitemap3.xml\n + Sitemap \t : https://example.com/sitemap1.xml\n + Sitemap: https://example.com/sitemap2.xml\n + Sitemap: https://example.com/sitemap3.xml\n Disallow: /search/\n"; let parser = parse_robots_txt(robots_txt_url.origin(), doc).get_result(); assert_eq!( &[ - Url::parse("http://example.com/sitemap1.xml").unwrap(), - Url::parse("http://example.com/sitemap2.xml").unwrap(), - Url::parse("http://example.com/sitemap3.xml").unwrap() + Url::parse("https://example.com/sitemap1.xml").unwrap(), + Url::parse("https://example.com/sitemap2.xml").unwrap(), + Url::parse("https://example.com/sitemap3.xml").unwrap() ], parser.get_sitemaps() ); @@ -287,7 +287,7 @@ fn test_robots_text_sitemaps() { #[test] fn test_robots_text_request_rate() { - let robots_txt_url = Url::parse("http://www.python.org/robots.txt").unwrap(); + let robots_txt_url = Url::parse("https://www.python.org/robots.txt").unwrap(); let doc = "User-agent: Yandex\n\ Request-rate: 3/15\n\ @@ -313,15 +313,15 @@ Clean-param: gid\n\ Clean-param: tm\n\ Clean-param: amp\n\ "; - let url = Url::parse("http://www.baidu.com/robots.txt").unwrap(); + let url = Url::parse("https://www.baidu.com/robots.txt").unwrap(); let parser = parse_robots_txt(url.origin(), doc).get_result(); - let mut site_url = Url::parse("http://www.baidu.com/test?post_id=7777&mode=99&from=google&pid=99&gid=88&tm=777&=1").unwrap(); + let mut site_url = Url::parse("https://www.baidu.com/test?post_id=7777&mode=99&from=google&pid=99&gid=88&tm=777&=1").unwrap(); let was_updated = parser.normalize_url(&mut site_url); assert_eq!(was_updated, true); - assert_eq!(site_url.as_str(), "http://www.baidu.com/test?post_id=7777"); + assert_eq!(site_url.as_str(), "https://www.baidu.com/test?post_id=7777"); - let mut site_url = Url::parse("http://www.google.com/test?post_id=7777&mode=99&from=google&pid=99&gid=88&tm=777&=1").unwrap(); + let mut site_url = Url::parse("https://www.google.com/test?post_id=7777&mode=99&from=google&pid=99&gid=88&tm=777&=1").unwrap(); let was_updated = parser.normalize_url(&mut site_url); assert_eq!(was_updated, false); - assert_eq!(site_url.as_str(), "http://www.google.com/test?post_id=7777&mode=99&from=google&pid=99&gid=88&tm=777&=1"); + assert_eq!(site_url.as_str(), "https://www.google.com/test?post_id=7777&mode=99&from=google&pid=99&gid=88&tm=777&=1"); } \ No newline at end of file diff --git a/tests/test_reqwest_async.rs b/tests/test_reqwest_async.rs index e286b0b..a5ecd08 100644 --- a/tests/test_reqwest_async.rs +++ b/tests/test_reqwest_async.rs @@ -8,9 +8,9 @@ use tokio::runtime::Runtime; fn test_reqwest_async() { let mut runtime = Runtime::new().unwrap(); let client = Client::new(); - let robots_txt_url = Url::parse("http://www.python.org/robots.txt").unwrap(); + let robots_txt_url = Url::parse("https://www.python.org/robots.txt").unwrap(); let robots_txt_response = runtime.block_on(client.fetch_robots_txt(robots_txt_url.origin())); let robots_txt = robots_txt_response.unwrap().get_result(); - let fetch_url = Url::parse("http://www.python.org/robots.txt").unwrap(); + let fetch_url = Url::parse("https://www.python.org/robots.txt").unwrap(); assert!(robots_txt.can_fetch("*", &fetch_url)); } \ No newline at end of file diff --git a/tests/test_reqwest_blocking.rs b/tests/test_reqwest_blocking.rs index 129f14d..1c38c4e 100644 --- a/tests/test_reqwest_blocking.rs +++ b/tests/test_reqwest_blocking.rs @@ -6,8 +6,8 @@ use url::Url; #[test] fn test_reqwest_blocking() { let client = Client::new(); - let robots_txt_url = Url::parse("http://www.python.org/robots.txt").unwrap(); + let robots_txt_url = Url::parse("https://www.python.org/robots.txt").unwrap(); let robots_txt = client.fetch_robots_txt(robots_txt_url.origin()).unwrap().get_result(); - let fetch_url = Url::parse("http://www.python.org/robots.txt").unwrap(); + let fetch_url = Url::parse("https://www.python.org/robots.txt").unwrap(); assert!(robots_txt.can_fetch("*", &fetch_url)); } diff --git a/tests/test_warnings.rs b/tests/test_warnings.rs index 30b2d8e..b85620f 100644 --- a/tests/test_warnings.rs +++ b/tests/test_warnings.rs @@ -106,7 +106,7 @@ fn test_warning_request_rate() { #[test] fn test_warning_parsing_url() { - let input = "User-Agent: *\nSitemap: http://python.org/sitemap.xml"; + let input = "User-Agent: *\nSitemap: https://python.org/sitemap.xml"; validate_warnings(input, &[]); let input = "User-Agent: *\nSitemap: http$$$://python.org/sitemap.xml"; validate_warnings(input, &[WarningReasonKind::ParseUrl]);