Refactoring of robotparser-rs (#20)

* Migrated sites into robotsparser file. * Robots.txt refactoring. * Migrated to new version of url and reqwest.
2026-06-14 04:01:22 +00:00 · 2020-01-31 16:00:58 +07:00 · 2020-01-31 16:00:58 +07:00 · 2d19755779
commit 2d19755779
parent cb7df85b83
33 changed files with 1789 additions and 511 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,3 +1,4 @@
 target
 Cargo.lock
 .vscode/
+.idea/
--- a/Cargo.toml
+++ b/Cargo.toml
@ -8,16 +8,25 @@ license = "MIT"
 name = "robotparser"
 readme = "README.md"
 repository = "https://github.com/messense/robotparser-rs"
-version = "0.10.2"
+version = "0.11.0"
+edition = "2018"

 [dependencies]
-url = "1"
+url = "2"
+percent-encoding = "2.1"

 [dependencies.reqwest]
-version = "0.9"
+version = "0.10.1"
+optional = true
+features = ["blocking"]
+
+[dependencies.futures]
+version = "0.3"
 optional = true

 [features]
-default = ["http"]
-http = ["reqwest"]
+default = ["reqwest", "futures"]
 unstable = []
+
+[dev-dependencies]
+tokio = "0.2.11"
--- a/README.md
+++ b/README.md
@ -15,7 +15,7 @@ Add it to your ``Cargo.toml``:

 ```toml
 [dependencies]
-robotparser = "0.10"
+robotparser = "0.11"
 ```

 Add ``extern crate robotparser`` to your crate root and your're good to go!
@ -24,14 +24,17 @@ Add ``extern crate robotparser`` to your crate root and your're good to go!
 ## Examples

 ```rust
-extern crate robotparser;
-
-use robotparser::RobotFileParser;
+use robotparser::http::RobotsTxtClient;
+use robotparser::service::RobotsTxtService;
+use reqwest::Client;
+use url::Url;

 fn main() {
-    let parser = RobotFileParser::new("http://www.python.org/robots.txt");
-    parser.read();
-    assert!(parser.can_fetch("*", "http://www.python.org/robots.txt"));
+    let client = Client::new();
+    let robots_txt_url = Url::parse("http://www.python.org/robots.txt").unwrap();
+    let robots_txt = client.fetch_robots_txt(robots_txt_url.origin()).unwrap().get_result();
+    let fetch_url = Url::parse("http://www.python.org/robots.txt").unwrap();
+    assert!(robots_txt.can_fetch("*", &fetch_url));
 }
 ```

--- a/src/http.rs
+++ b/src/http.rs
@ -0,0 +1,19 @@
+//! # Supported libraries
+//! To enable support for the required library, you need to add this feature to your `Cargo.toml`.
+//! Now only one library is supported - `reqwest`.
+//! But you can also add support for other libraries.
+
+use url::Origin;
+#[cfg(feature = "reqwest")]
+/// Support for reqwest library.
+pub mod reqwest;
+
+/// User agent of this crate.
+pub const DEFAULT_USER_AGENT: &str = "robotparser-rs (https://crates.io/crates/robotparser)";
+
+/// Trait to fetch and parse the robots.txt file.
+/// Must be implemented on http-client.
+pub trait RobotsTxtClient {
+    type Result;
+    fn fetch_robots_txt(&self, origin: Origin) -> Self::Result;
+}
--- a/src/http/reqwest.rs
+++ b/src/http/reqwest.rs
@ -0,0 +1,4 @@
+mod sync_reqwest;
+pub use self::sync_reqwest::*;
+mod async_reqwest;
+pub use self::async_reqwest::*;
--- a/src/http/reqwest/async_reqwest.rs
+++ b/src/http/reqwest/async_reqwest.rs
@ -0,0 +1,76 @@
+use reqwest::{Client, Request};
+use reqwest::{Method, Error};
+use reqwest::header::HeaderValue;
+use url::{Origin, Url};
+use reqwest::header::USER_AGENT;
+use crate::http::{RobotsTxtClient, DEFAULT_USER_AGENT};
+use crate::parser::{ParseResult, parse_fetched_robots_txt};
+use crate::model::FetchedRobotsTxt;
+use std::pin::Pin;
+use futures::task::{Context, Poll};
+use futures::Future;
+use futures::future::TryFutureExt;
+use futures::future::ok as future_ok;
+
+type FetchFuture = Box<dyn Future<Output=Result<(ResponseInfo, String), Error>>>;
+
+impl RobotsTxtClient for Client {
+    type Result = RobotsTxtResponse;
+    fn fetch_robots_txt(&self, origin: Origin) -> Self::Result {
+        let url = format!("{}/robots.txt", origin.unicode_serialization());
+        let url = Url::parse(&url).expect("Unable to parse robots.txt url");
+        let mut request = Request::new(Method::GET, url);
+        let _ = request.headers_mut().insert(USER_AGENT, HeaderValue::from_static(DEFAULT_USER_AGENT));
+        let response = self
+            .execute(request)
+            .and_then(|response| {
+                let response_info = ResponseInfo {status_code: response.status().as_u16()};
+                return response.text().and_then(|response_text| {
+                    return future_ok((response_info, response_text));
+                });
+            });
+        let response: Pin<Box<dyn Future<Output=Result<(ResponseInfo, String), Error>>>> = Box::pin(response);
+        return RobotsTxtResponse {
+            origin,
+            response,
+        }
+    }
+}
+
+struct ResponseInfo {
+    status_code: u16,
+}
+
+/// Future for fetching robots.txt result.
+pub struct RobotsTxtResponse {
+    origin: Origin,
+    response: Pin<FetchFuture>,
+}
+
+impl RobotsTxtResponse {
+    /// Returns origin of robots.txt
+    pub fn get_origin(&self) -> &Origin {
+        return &self.origin;
+    }
+}
+
+impl Future for RobotsTxtResponse {
+    type Output = Result<ParseResult<FetchedRobotsTxt>, Error>;
+
+    fn poll(self: Pin<&mut Self>, cx: &mut Context) -> Poll<Self::Output> {
+        let self_mut = self.get_mut();
+        let response_pin = self_mut.response.as_mut();
+        match response_pin.poll(cx) {
+            Poll::Ready(Ok((response_info, text))) => {
+                let robots_txt = parse_fetched_robots_txt(self_mut.origin.clone(), response_info.status_code, &text);
+                return Poll::Ready(Ok(robots_txt));
+            },
+            Poll::Ready(Err(error)) => {
+                return Poll::Ready(Err(error));
+            },
+            Poll::Pending => {
+                return Poll::Pending;
+            },
+        }
+    }
+}
--- a/src/http/reqwest/sync_reqwest.rs
+++ b/src/http/reqwest/sync_reqwest.rs
@ -0,0 +1,23 @@
+use reqwest::blocking::{Client, Request};
+use reqwest::{Method, Error};
+use reqwest::header::HeaderValue;
+use url::{Origin, Url};
+use reqwest::header::USER_AGENT;
+use crate::http::{RobotsTxtClient, DEFAULT_USER_AGENT};
+use crate::parser::{ParseResult, parse_fetched_robots_txt};
+use crate::model::FetchedRobotsTxt;
+
+impl RobotsTxtClient for Client {
+    type Result = Result<ParseResult<FetchedRobotsTxt>, Error>;
+    fn fetch_robots_txt(&self, origin: Origin) -> Self::Result {
+        let url = format!("{}/robots.txt", origin.unicode_serialization());
+        let url = Url::parse(&url).expect("Unable to parse robots.txt url");
+        let mut request = Request::new(Method::GET, url);
+        let _ = request.headers_mut().insert(USER_AGENT, HeaderValue::from_static(DEFAULT_USER_AGENT));
+        let response = self.execute(request)?;
+        let status_code = response.status().as_u16();
+        let text = response.text()?;
+        let robots_txt = parse_fetched_robots_txt(origin, status_code, &text);
+        return Ok(robots_txt);
+    }
+}
--- a/src/lib.rs
+++ b/src/lib.rs
@ -9,482 +9,32 @@
 //!
 //! ```toml
 //! [dependencies]
-//! robotparser = "0.10"
+//! robotparser = "0.11"
 //! ```
 //!
-//! Add ``extern crate robotparser`` to your crate root and your're good to go!
 //!
 //! # Examples
 //!
-//! ```rust,ignore
-//! extern crate robotparser;
-//!
-//! use robotparser::RobotFileParser;
+//! ```rust
+//! use robotparser::http::RobotsTxtClient;
+//! use robotparser::service::RobotsTxtService;
+//! use reqwest::blocking::Client;
+//! use url::Url;
 //!
 //! fn main() {
-//!     let parser = RobotFileParser::new("http://www.python.org/robots.txt");
-//!     parser.read();
-//!     assert!(parser.can_fetch("*", "http://www.python.org/robots.txt"));
+//!     let client = Client::new();
+//!     let robots_txt_url = Url::parse("http://www.python.org/robots.txt").unwrap();
+//!     let robots_txt = client.fetch_robots_txt(robots_txt_url.origin()).unwrap().get_result();
+//!     let fetch_url = Url::parse("http://www.python.org/robots.txt").unwrap();
+//!     assert!(robots_txt.can_fetch("*", &fetch_url));
 //! }
 //! ```

-extern crate url;
-#[cfg(feature = "http")]
-extern crate reqwest;
-
-#[cfg(feature = "http")]
-use std::io::Read;
-use std::cell::{Cell, RefCell};
-use std::borrow::Cow;
-use std::time::{Duration, SystemTime, UNIX_EPOCH};
-
-use url::Url;
-
-#[cfg(feature = "http")]
-use reqwest::Client;
-#[cfg(feature = "http")]
-use reqwest::header::USER_AGENT;
-#[cfg(feature = "http")]
-use reqwest::StatusCode;
-#[cfg(feature = "http")]
-use reqwest::Response;
-
-#[cfg(feature = "http")]
-const RP_USER_AGENT: &str = "robotparser-rs (https://crates.io/crates/robotparser)";
-
-/// A rule line is a single "Allow:" (allowance==True) or "Disallow:"
-/// (allowance==False) followed by a path."""
-#[derive(Debug, Eq, PartialEq, Clone)]
-struct RuleLine<'a> {
-    path: Cow<'a, str>,
-    allowance: bool,
-}
-
-#[derive(Debug, Eq, PartialEq, Clone)]
-pub struct RequestRate {
-    pub requests: usize,
-    pub seconds: usize,
-}
-
-/// An entry has one or more user-agents and zero or more rulelines
-#[derive(Debug, Eq, PartialEq, Clone)]
-struct Entry<'a> {
-    useragents: RefCell<Vec<String>>,
-    rulelines: RefCell<Vec<RuleLine<'a>>>,
-    crawl_delay: Option<Duration>,
-    sitemaps: Vec<Url>,
-    req_rate: Option<RequestRate>,
-}
-
-/// robots.txt file parser
-#[derive(Debug, Eq, PartialEq, Clone)]
-pub struct RobotFileParser<'a> {
-    entries: RefCell<Vec<Entry<'a>>>,
-    default_entry: RefCell<Entry<'a>>,
-    disallow_all: Cell<bool>,
-    allow_all: Cell<bool>,
-    url: Url,
-    host: String,
-    path: String,
-    last_checked: Cell<i64>,
-}
-
-
-impl<'a> RuleLine<'a> {
-    fn new<S>(path: S, allowance: bool) -> RuleLine<'a>
-        where S: Into<Cow<'a, str>>
-    {
-        let path = path.into();
-        let mut allow = allowance;
-        if path == "" && !allowance {
-            // an empty value means allow all
-            allow = true;
-        }
-        RuleLine {
-            path: path,
-            allowance: allow,
-        }
-    }
-
-    fn applies_to(&self, filename: &str) -> bool {
-        self.path == "*" || filename.starts_with(&self.path[..])
-    }
-}
-
-
-impl<'a> Entry<'a> {
-    fn new() -> Entry<'a> {
-        Entry {
-            useragents: RefCell::new(vec![]),
-            rulelines: RefCell::new(vec![]),
-            crawl_delay: None,
-            sitemaps: Vec::new(),
-            req_rate: None,
-        }
-    }
-
-    /// check if this entry applies to the specified agent
-    fn applies_to(&self, useragent: &str) -> bool {
-        let ua = useragent.split('/').nth(0).unwrap_or("").to_lowercase();
-        let useragents = self.useragents.borrow();
-        for agent in &*useragents {
-            if agent == "*" {
-                return true;
-            }
-            if ua.contains(agent) {
-                return true;
-            }
-        }
-        false
-    }
-
-
-    /// Preconditions:
-    /// - our agent applies to this entry
-    /// - filename is URL decoded
-    fn allowance(&self, filename: &str) -> bool {
-        let rulelines = self.rulelines.borrow();
-        for line in &*rulelines {
-            if line.applies_to(filename) {
-                return line.allowance;
-            }
-        }
-        true
-    }
-
-    fn push_useragent(&self, useragent: &str) {
-        let mut useragents = self.useragents.borrow_mut();
-        useragents.push(useragent.to_lowercase().to_owned());
-    }
-
-    fn push_ruleline(&self, ruleline: RuleLine<'a>) {
-        let mut rulelines = self.rulelines.borrow_mut();
-        rulelines.push(ruleline);
-    }
-
-    fn has_useragent(&self, useragent: &str) -> bool {
-        let useragents = self.useragents.borrow();
-        useragents.contains(&useragent.to_owned())
-    }
-
-    fn is_empty(&self) -> bool {
-        let useragents = self.useragents.borrow();
-        let rulelines = self.rulelines.borrow();
-        useragents.is_empty() && rulelines.is_empty()
-    }
-
-    fn set_crawl_delay(&mut self, delay: Duration) {
-        self.crawl_delay = Some(delay);
-    }
-
-    fn get_crawl_delay(&self) -> Option<Duration> {
-        self.crawl_delay
-    }
-
-    fn add_sitemap(&mut self, url: &str) {
-        if let Ok(url) = Url::parse(url) {
-            self.sitemaps.push(url);
-        }
-    }
-
-    fn get_sitemaps(&self) -> Vec<Url> {
-        self.sitemaps.clone()
-    }
-
-    fn set_req_rate(&mut self, req_rate: RequestRate) {
-        self.req_rate = Some(req_rate);
-    }
-
-    fn get_req_rate(&self) -> Option<RequestRate> {
-        self.req_rate.clone()
-    }
-}
-
-
-impl<'a> Default for Entry<'a> {
-    fn default() -> Entry<'a> {
-        Entry::new()
-    }
-}
-
-
-impl<'a> RobotFileParser<'a> {
-    pub fn new<T: AsRef<str>>(url: T) -> RobotFileParser<'a> {
-        let parsed_url = Url::parse(url.as_ref()).unwrap();
-        RobotFileParser {
-            entries: RefCell::new(vec![]),
-            default_entry: RefCell::new(Entry::new()),
-            disallow_all: Cell::new(false),
-            allow_all: Cell::new(false),
-            url: parsed_url.clone(),
-            host: parsed_url.host_str().unwrap().to_owned(),
-            path: parsed_url.path().to_owned(),
-            last_checked: Cell::new(0i64),
-        }
-    }
-
-    /// Returns the time the robots.txt file was last fetched.
-    ///
-    /// This is useful for long-running web spiders that need to
-    /// check for new robots.txt files periodically.
-    pub fn mtime(&self) -> i64 {
-        self.last_checked.get()
-    }
-
-    /// Sets the time the robots.txt file was last fetched to the
-    /// current time.
-    pub fn modified(&self) {
-        let now = SystemTime::now().duration_since(UNIX_EPOCH).unwrap().as_secs() as i64;
-        self.last_checked.set(now);
-    }
-
-    /// Sets the URL referring to a robots.txt file.
-    pub fn set_url<T: AsRef<str>>(&mut self, url: T) {
-        let parsed_url = Url::parse(url.as_ref()).unwrap();
-        self.url = parsed_url.clone();
-        self.host = parsed_url.host_str().unwrap().to_owned();
-        self.path = parsed_url.path().to_owned();
-        self.last_checked.set(0i64);
-    }
-
-    #[cfg(feature = "http")]
-    /// Reads the robots.txt URL and feeds it to the parser.
-    pub fn read(&self) {
-        let client = Client::new();
-        let request = client.get(self.url.clone());
-        let request = request.header(USER_AGENT, RP_USER_AGENT.to_owned());
-        let mut res = match request.send() {
-            Ok(res) => res,
-            Err(_) => {
-                return;
-            }
-        };
-        let status = res.status();
-        match status {
-            StatusCode::UNAUTHORIZED | StatusCode::FORBIDDEN => {
-                self.disallow_all.set(true);
-            }
-            status if status >= StatusCode::BAD_REQUEST && status < StatusCode::INTERNAL_SERVER_ERROR => {
-                self.allow_all.set(true);
-            }
-            StatusCode::OK => self.from_response(&mut res),
-            _ => {}
-        }
-    }
-
-    #[cfg(feature = "http")]
-    /// Reads the HTTP response and feeds it to the parser.
-    pub fn from_response(&self, response: &mut Response) {
-        let mut buf = String::new();
-        response.read_to_string(&mut buf).unwrap();
-        let lines: Vec<&str> = buf.split('\n').collect();
-        self.parse(&lines);
-    }
-
-    fn _add_entry(&self, entry: Entry<'a>) {
-        if entry.has_useragent("*") {
-            // the default entry is considered last
-            let mut default_entry = self.default_entry.borrow_mut();
-            if default_entry.is_empty() {
-                // the first default entry wins
-                *default_entry = entry;
-            }
-        } else {
-            let mut entries = self.entries.borrow_mut();
-            entries.push(entry);
-        }
-    }
-
-    ///
-    /// Parse the input lines from a robots.txt file
-    ///
-    /// We allow that a user-agent: line is not preceded by
-    /// one or more blank lines.
-    ///
-    pub fn parse<T: AsRef<str>>(&self, lines: &[T]) {
-        use url::percent_encoding::percent_decode;
-
-        // states:
-        //   0: start state
-        //   1: saw user-agent line
-        //   2: saw an allow or disallow line
-        let mut state = 0;
-        let mut entry = Entry::new();
-
-        self.modified();
-        for line in lines {
-            let mut ln = line.as_ref();
-            if ln.is_empty() {
-                match state {
-                    1 => {
-                        entry = Entry::new();
-                        state = 0;
-                    }
-                    2 => {
-                        self._add_entry(entry);
-                        entry = Entry::new();
-                        state = 0;
-                    }
-                    _ => {}
-                }
-            }
-            // remove optional comment and strip line
-            if let Some(i) = ln.find('#') {
-                ln = &ln[0..i];
-            }
-            ln = ln.trim();
-            if ln.is_empty() {
-                continue;
-            }
-            let parts: Vec<&str> = ln.splitn(2, ':').collect();
-            if parts.len() == 2 {
-                let part0 = parts[0].trim().to_lowercase();
-                let part1 = String::from_utf8(percent_decode(parts[1].trim().as_bytes()).collect())
-                    .unwrap_or("".to_owned());
-                match part0 {
-                    ref x if x == "user-agent" => {
-                        if state == 2 {
-                            self._add_entry(entry);
-                            entry = Entry::new();
-                        }
-                        entry.push_useragent(&part1);
-                        state = 1;
-                    }
-                    ref x if x == "disallow" => {
-                        if state != 0 {
-                            entry.push_ruleline(RuleLine::new(part1, false));
-                            state = 2;
-                        }
-                    }
-                    ref x if x == "allow" => {
-                        if state != 0 {
-                            entry.push_ruleline(RuleLine::new(part1, true));
-                            state = 2;
-                        }
-                    }
-                    ref x if x == "crawl-delay" => {
-                        if state != 0 {
-                            if let Ok(delay) = part1.parse::<f64>() {
-                                let delay_seconds = delay.trunc();
-                                let delay_nanoseconds = delay.fract() * 10f64.powi(9);
-                                let delay = Duration::new(delay_seconds as u64, delay_nanoseconds as u32);
-                                entry.set_crawl_delay(delay);
-                            }
-                            state = 2;
-                        }
-                    }
-                    ref x if x == "sitemap" => {
-                        if state != 0 {
-                            entry.add_sitemap(&part1);
-                            state = 2;
-                        }
-                    }
-                    ref x if x == "request-rate" => {
-                        if state != 0 {
-                            let numbers: Vec<Result<usize, _>> = part1.split('/').map(|x| x.parse::<usize>()).collect();
-                            if numbers.len() == 2 && numbers[0].is_ok() && numbers[1].is_ok() {
-                                let req_rate = RequestRate {
-                                    requests: numbers[0].clone().unwrap(),
-                                    seconds: numbers[1].clone().unwrap(),
-                                };
-                                entry.set_req_rate(req_rate);
-                            }
-                            state = 2;
-                        }
-                    }
-                    _ => {}
-                }
-            }
-        }
-        if state == 2 {
-            self._add_entry(entry);
-        }
-    }
-
-    /// Using the parsed robots.txt decide if useragent can fetch url
-    pub fn can_fetch<T: AsRef<str>>(&self, useragent: T, url: T) -> bool {
-        use url::percent_encoding::percent_decode;
-
-        let useragent = useragent.as_ref();
-        let url = url.as_ref();
-
-        if self.disallow_all.get() {
-            return false;
-        }
-        if self.allow_all.get() {
-            return true;
-        }
-        // Until the robots.txt file has been read or found not
-        // to exist, we must assume that no url is allowable.
-        // This prevents false positives when a user erronenously
-        // calls can_fetch() before calling read().
-        if self.last_checked.get() == 0 {
-            return false;
-        }
-        // search for given user agent matches
-        // the first match counts
-        let decoded_url = String::from_utf8(percent_decode(url.trim().as_bytes()).collect()).unwrap_or("".to_owned());
-        let url_str = match decoded_url {
-            ref u if !u.is_empty() => u.to_owned(),
-            _ => "/".to_owned(),
-        };
-        let entries = self.entries.borrow();
-        for entry in &*entries {
-            if entry.applies_to(useragent) {
-                return entry.allowance(&url_str);
-            }
-        }
-        // try the default entry last
-        let default_entry = self.default_entry.borrow();
-        if !default_entry.is_empty() {
-            return default_entry.allowance(&url_str);
-        }
-        // agent not found ==> access granted
-        true
-    }
-
-    /// Returns the crawl delay for this user agent as a `Duration`, or None if no crawl delay is defined.
-    pub fn get_crawl_delay<T: AsRef<str>>(&self, useragent: T) -> Option<Duration> {
-        let useragent = useragent.as_ref();
-        if self.last_checked.get() == 0 {
-            return None;
-        }
-        let entries = self.entries.borrow();
-        for entry in &*entries {
-            if entry.applies_to(useragent) {
-                return entry.get_crawl_delay();
-            }
-        }
-        None
-    }
-
-    /// Returns the sitemaps for this user agent as a `Vec<Url>`.
-    pub fn get_sitemaps<T: AsRef<str>>(&self, useragent: T) -> Vec<Url> {
-        let useragent = useragent.as_ref();
-        if self.last_checked.get() == 0 {
-            return Vec::new();
-        }
-        let entries = self.entries.borrow();
-        for entry in &*entries {
-            if entry.applies_to(useragent) {
-                return entry.get_sitemaps();
-            }
-        }
-        vec![]
-    }
-
-    /// Returns the request rate for this user agent as a `RequestRate`, or None if not request rate is defined
-    pub fn get_req_rate<T: AsRef<str>>(&self, useragent: T) -> Option<RequestRate> {
-        let useragent = useragent.as_ref();
-        if self.last_checked.get() == 0 {
-            return None;
-        }
-        let entries = self.entries.borrow();
-        for entry in &*entries {
-            if entry.applies_to(useragent) {
-                return entry.get_req_rate();
-            }
-        }
-        None
-    }
-}
+/// Contains models of robots.txt file.
+pub mod model;
+/// Contains robots.txt parsers.
+pub mod parser;
+/// Contains robots.txt services.
+pub mod service;
+/// Request builder & response parsers for other http libraries.
+pub mod http;
--- a/src/model.rs
+++ b/src/model.rs
@ -0,0 +1,17 @@
+mod path_pattern;
+pub (crate) use self::path_pattern::PathPattern;
+mod group;
+pub (crate) use self::group::Group;
+mod rule;
+pub (crate) use self::rule::Rule;
+mod clean_params;
+pub (crate) use self::clean_params::CleanParams;
+mod request_rate;
+pub use self::request_rate::RequestRate;
+mod robots_txt;
+pub use self::fetched_robots_txt::FetchedRobotsTxt;
+pub (crate) use self::fetched_robots_txt::FetchedRobotsTxtContainer;
+mod fetched_robots_txt;
+pub use self::robots_txt::RobotsTxt;
+mod path;
+pub (crate) use self::path::Path;
--- a/src/model/clean_params.rs
+++ b/src/model/clean_params.rs
@ -0,0 +1,24 @@
+use crate::model::PathPattern;
+
+#[derive(Debug, Clone)]
+pub struct CleanParams {
+    path_pattern: PathPattern,
+    params: Vec<String>,
+}
+
+impl CleanParams {
+    pub fn new(path_pattern: PathPattern, params: Vec<String>) -> CleanParams {
+        return CleanParams {
+            path_pattern,
+            params,
+        }
+    }
+
+    pub fn get_path_pattern(&self) -> &PathPattern {
+        return &self.path_pattern;
+    }
+
+    pub fn get_params(&self) -> &Vec<String> {
+        return &self.params;
+    }
+}
--- a/src/model/fetched_robots_txt.rs
+++ b/src/model/fetched_robots_txt.rs
@ -0,0 +1,36 @@
+use crate::model::robots_txt::RobotsTxt;
+use std::time::SystemTime;
+
+#[derive(Debug, Clone)]
+pub (crate) enum FetchedRobotsTxtContainer {
+    FetchDenied,
+    FetchFailed,
+    Fetched(RobotsTxt),
+}
+
+#[derive(Debug, Clone)]
+/// A model of the robots.txt file that was downloaded over the network.
+/// This model takes into account HTTP response codes when loading the robots.txt file.
+/// To work with this model you should use the trait `robotparser::service::RobotsTxtService`.
+/// To create this structure you should use the `robotparser::parser::parse_fetched_robots_txt`.
+pub struct FetchedRobotsTxt {
+    fetched_at: SystemTime,
+    container: FetchedRobotsTxtContainer,
+}
+
+impl FetchedRobotsTxt {
+    pub (crate) fn new(container: FetchedRobotsTxtContainer) -> FetchedRobotsTxt {
+        FetchedRobotsTxt {
+            fetched_at: SystemTime::now(),
+            container,
+        }
+    }
+    pub (crate) fn get_container(&self) -> &FetchedRobotsTxtContainer {
+        return &self.container;
+    }
+
+    /// Returns the system time when the robots.txt file was downloaded over the network.
+    pub fn get_fetched_at(&self) -> &SystemTime {
+        return &self.fetched_at;
+    }
+}
--- a/src/model/group.rs
+++ b/src/model/group.rs
@ -0,0 +1,93 @@
+use std::time::Duration;
+use crate::model::request_rate::RequestRate;
+use crate::model::rule::Rule;
+
+/// An group has one or more user-agents and zero or more rules
+#[derive(Debug, Clone)]
+pub struct Group {
+    user_agents: Vec<String>,
+    rules: Vec<Rule>,
+    crawl_delay: Option<Duration>,
+    req_rate: Option<RequestRate>,
+}
+
+impl Group {
+    pub (crate) fn new() -> Group {
+        Group {
+            user_agents: vec![],
+            rules: vec![],
+            crawl_delay: None,
+            req_rate: None,
+        }
+    }
+
+    /// check if this group applies to the specified agent
+    pub (crate) fn applies_to(&self, useragent: &str) -> bool {
+        let ua = useragent.split('/').nth(0).unwrap_or("").to_lowercase();
+        for agent in self.user_agents.iter() {
+            if ua.contains(agent) {
+                return true;
+            }
+        }
+        false
+    }
+
+    pub (crate) fn push_useragent(&mut self, useragent: &str) {
+        self.user_agents.push(useragent.to_lowercase().to_owned());
+    }
+
+    pub (crate) fn push_rule(&mut self, rule: Rule) {
+        self.rules.push(rule);
+    }
+
+    pub (crate) fn get_rules_sorted_by_path_len_desc(&self) -> Vec<&Rule> {
+        let mut rules: Vec<&Rule> = self.rules.iter().collect();
+        rules.sort_by(|a, b| {
+            let a = a.get_path_pattern().len();
+            let b = b.get_path_pattern().len();
+           return b.cmp(&a);
+        });
+        return rules;
+    }
+
+    pub (crate) fn contains_user_agent(&self, user_agent: &str) -> bool {
+        return self
+            .user_agents
+            .iter()
+            .find(|item| {
+                return *item == user_agent;
+            }).is_some();
+    }
+
+    pub (crate) fn set_crawl_delay(&mut self, delay: Duration) {
+        self.crawl_delay = Some(delay);
+    }
+
+    pub (crate) fn get_crawl_delay(&self) -> Option<Duration> {
+        return self.crawl_delay.clone();
+    }
+
+    pub (crate) fn set_req_rate(&mut self, req_rate: RequestRate) {
+        self.req_rate = Some(req_rate);
+    }
+
+    pub (crate) fn get_req_rate(&self) -> Option<RequestRate> {
+        return self.req_rate.clone();
+    }
+
+    pub (crate) fn is_default(&self) -> bool {
+        for user_agent in self.user_agents.iter() {
+            if user_agent == "*" {
+                return true;
+            }
+        }
+        return false;
+    }
+}
+
+
+impl Default for Group {
+    fn default() -> Group {
+        Group::new()
+    }
+}
--- a/src/model/path.rs
+++ b/src/model/path.rs
@ -0,0 +1,35 @@
+use url::Url;
+use percent_encoding::percent_decode;
+
+#[derive(Debug)]
+pub struct Path(String);
+
+impl Path {
+    pub fn from_url(url: &Url) -> Path {
+        let path = get_url_without_origin(&url);
+        let path = percent_decode(path.as_bytes()).decode_utf8_lossy();
+        if path.is_empty() {
+            return Path("/".into());
+        } else {
+            return Path(path.into());
+        }
+    }
+    pub fn as_str(&self) -> &str {
+        return &self.0;
+    }
+}
+
+fn get_url_without_origin(url: &Url) -> &str {
+    let origin = url.origin();
+    let url = url.as_str();
+    let unicode_origin = origin.unicode_serialization();
+    let ascii_origin = origin.ascii_serialization();
+    if url.starts_with(&unicode_origin) && unicode_origin.len() >= 1 {
+        return &url[unicode_origin.len()..];
+    }
+    if url.starts_with(&ascii_origin) && ascii_origin.len() >= 1 {
+        return &url[ascii_origin.len()..];
+    }
+    // Must never be executed.
+    panic!("Unable to get path from url");
+}
--- a/src/model/path_pattern.rs
+++ b/src/model/path_pattern.rs
@ -0,0 +1,127 @@
+use std::convert::From;
+use std::mem::replace;
+use percent_encoding::percent_decode;
+use crate::model::path::Path;
+
+#[derive(Debug, Clone)]
+pub struct PathPattern(Vec<PathPatternToken>);
+
+#[derive(Debug, Eq, PartialEq, Clone)]
+enum PathPatternToken {
+    Text(String),
+    AnyString,
+    TerminateString,
+}
+
+impl PathPatternToken {
+    fn from_path_pattern(path: String) -> PathPatternToken {
+        let path = percent_decode(path.as_bytes()).decode_utf8_lossy();
+        return PathPatternToken::Text(path.to_string());
+    }
+}
+
+impl PathPatternToken {
+    fn len(&self) -> usize {
+        return match self {
+            &PathPatternToken::Text(ref text) => {
+                text.len()
+            },
+            &PathPatternToken::AnyString => {
+                1
+            },
+            &PathPatternToken::TerminateString => {
+                1
+            },
+        }
+    }
+}
+
+impl PathPattern {
+    pub fn new(path: &str) -> PathPattern {
+        let mut text = String::new();
+        let mut tokens = Vec::new();
+        for c in path.chars() {
+            let prepared_token = match c {
+                '*' => {
+                    Some(PathPatternToken::AnyString)
+                },
+                '$' => {
+                    Some(PathPatternToken::TerminateString)
+                },
+                _ => {
+                    text.push(c);
+                    None
+                },
+            };
+            if let Some(prepared_token) = prepared_token {
+                if !text.is_empty() {
+                    tokens.push(PathPatternToken::from_path_pattern(replace(&mut text, "".into())));
+                }
+                tokens.push(prepared_token);
+            }
+        }
+        if !text.is_empty() {
+            tokens.push(PathPatternToken::from_path_pattern(text));
+        }
+        if let Some(&PathPatternToken::Text(..)) = tokens.last() {
+            tokens.push(PathPatternToken::AnyString);
+        }
+        tokens.dedup();
+        return PathPattern(tokens);
+    }
+
+    pub fn all() -> PathPattern {
+        return PathPattern(vec![PathPatternToken::AnyString]);
+    }
+
+    pub fn applies_to(&self, path: &Path) -> bool {
+        let mut filename = path.as_str();
+        for (index, token) in self.0.iter().enumerate() {
+            match token {
+                &PathPatternToken::Text(ref text) => {
+                    if !filename.starts_with(text) {
+                        return false;
+                    }
+                    filename = &filename[text.len() ..];
+                },
+                &PathPatternToken::AnyString => {
+                    if let Some(&PathPatternToken::Text(ref text)) = self.0.get(index + 1) {
+                        while filename.len() >= 1 {
+                            if filename.starts_with(text) {
+                                break;
+                            }
+                            // Search for next unicode char.
+                            if let Some((next_char_index, _)) = filename.char_indices().nth(1) {
+                                filename = &filename[next_char_index..];
+                            } else {
+                                break;
+                            }
+                        }
+                    } else {
+                        filename = &filename[filename.len()..];
+                    }
+                },
+                &PathPatternToken::TerminateString => {
+                    if filename.len() != 0 {
+                        return false;
+                    }
+                },
+            }
+        }
+        return true;
+    }
+
+    pub fn len(&self) -> usize {
+        let mut length = 0;
+        for path_token in self.0.iter() {
+            length += path_token.len();
+        }
+        return length;
+    }
+}
+
+impl From<&str> for PathPattern {
+    fn from(path: &str) -> Self {
+        return PathPattern::new(path);
+    }
+}
--- a/src/model/request_rate.rs
+++ b/src/model/request_rate.rs
@ -0,0 +1,9 @@
+#[derive(Debug, Clone)]
+/// The model of limiting the frequency of requests to the server.
+/// It's set by the `Request-Rate` directive.
+/// # Example
+/// For the directive `Request-Rate: 1/5` is equivalent to the model `RequestRate {requests: 1, seconds: 5}`
+pub struct RequestRate {
+    pub requests: usize,
+    pub seconds: usize,
+}
--- a/src/model/robots_txt.rs
+++ b/src/model/robots_txt.rs
@ -0,0 +1,75 @@
+use crate::model::group::Group;
+use crate::model::clean_params::CleanParams;
+use url::{Url, Origin};
+
+#[derive(Debug, Clone)]
+/// The robots.txt model that was obtained after parsing the text of the robots.txt file.
+/// To work with this model you should use the trait `robotparser::service::RobotsTxtService`.
+/// To create this structure you should use the `robotparser::parser::parse_robots_txt`.
+pub struct RobotsTxt {
+    origin: Origin,
+    groups: Vec<Group>,
+    sitemaps: Vec<Url>,
+    clean_params: Vec<CleanParams>,
+}
+
+impl RobotsTxt {
+    pub (crate) fn new(origin: Origin) -> RobotsTxt {
+        return RobotsTxt {
+            origin,
+            groups: Vec::new(),
+            sitemaps: Vec::new(),
+            clean_params: Vec::new(),
+        }
+    }
+
+    pub (crate) fn add_sitemap(&mut self, url: Url) {
+        self.sitemaps.push(url);
+    }
+
+    pub (crate) fn get_sitemaps_slice(&self) -> &[Url] {
+        return self.sitemaps.as_slice();
+    }
+
+    pub (crate) fn add_clean_params(&mut self, clean_params: CleanParams) {
+        self.clean_params.push(clean_params);
+    }
+
+    pub (crate) fn get_clean_params(&self) -> &[CleanParams] {
+        return self.clean_params.as_slice();
+    }
+
+    pub (crate) fn add_group(&mut self, group: Group) {
+        self.groups.push(group);
+    }
+
+    pub (crate) fn get_origin(&self) -> &Origin {
+        return &self.origin;
+    }
+
+    pub (crate) fn find_in_group<'a, T>(&'a self, user_agent: &str, callback: impl Fn(&'a Group) -> Option<T>) -> Option<T> {
+        // Search by user agents
+        for group in self.groups.iter() {
+            if group.applies_to(user_agent) {
+                if let Some(output) = (callback)(group) {
+                    return Some(output);
+                }
+            }
+        }
+        if let Some(group) = self.get_default_group() {
+            if let Some(output) = (callback)(group) {
+                return Some(output);
+            }
+        }
+        return None;
+    }
+
+    pub (crate) fn get_default_group(&self) -> Option<&Group> {
+        for group in self.groups.iter() {
+            if group.is_default() {
+                return Some(group);
+            }
+        }
+        return None;
+    }
+}
--- a/src/model/rule.rs
+++ b/src/model/rule.rs
@ -0,0 +1,31 @@
+use crate::model::path_pattern::PathPattern;
+use crate::model::path::Path;
+
+/// A rule line is a single "Allow:" (allowance==True) or "Disallow:"
+/// (allowance==False) followed by a path."""
+#[derive(Debug, Clone)]
+pub struct Rule {
+    path_pattern: PathPattern,
+    allowance: bool,
+}
+
+impl Rule {
+    pub fn new(path_pattern: impl Into<PathPattern>, allowance: bool) -> Rule {
+        Rule {
+            path_pattern: path_pattern.into(),
+            allowance,
+        }
+    }
+
+    pub (crate) fn applies_to(&self, path: &Path) -> bool {
+        return self.path_pattern.applies_to(path);
+    }
+
+    pub (crate) fn get_allowance(&self) -> bool {
+        return self.allowance;
+    }
+
+    pub (crate) fn get_path_pattern(&self) -> &PathPattern {
+        return &self.path_pattern;
+    }
+}
--- a/src/parser.rs
+++ b/src/parser.rs
@ -0,0 +1,40 @@
+//! # Supported features and directives
+//!
+//! * Removes BOM unicode
+//! * Directive `User-Agent`
+//! * Directive `Allow`
+//! * Directive `Disallow`
+//! * Directive `Crawl-Delay`
+//! * Directive `Request-Rate`
+//! * Directive `Sitemap`
+//! * Directive `Clean-Param`
+//!
+//! # Example
+//! ```rust
+//! use robotparser::parser::parse_robots_txt;
+//! use robotparser::service::RobotsTxtService;
+//! use url::Url;
+//!
+//! fn main() {
+//!     let robots_txt_url = Url::parse("http://google.com/robots.txt").unwrap();
+//!     let robots_txt = "User-agent: *\nDisallow: /search";
+//!     let robots_txt = parse_robots_txt(robots_txt_url.origin(), robots_txt);
+//!     assert_eq!(robots_txt.get_warnings().len(), 0);
+//!     let robots_txt = robots_txt.get_result();
+//!     let good_url = Url::parse("http://google.com/test").unwrap();
+//!     let bad_url = Url::parse("http://google.com/search/vvv").unwrap();
+//!     assert_eq!(robots_txt.can_fetch("*", &bad_url), false);
+//!     assert_eq!(robots_txt.can_fetch("*", &good_url), true);
+//! }
+//! ```
+mod robots_txt_parser;
+pub use self::robots_txt_parser::parse as parse_robots_txt;
+mod warning_reason;
+pub use self::warning_reason::WarningReason;
+mod warning;
+pub use self::warning::ParseWarning;
+mod parse_result;
+pub use self::parse_result::ParseResult;
+mod fetched_robots_txt_parser;
+pub use self::fetched_robots_txt_parser::parse as parse_fetched_robots_txt;
+mod line;
--- a/src/parser/fetched_robots_txt_parser.rs
+++ b/src/parser/fetched_robots_txt_parser.rs
@ -0,0 +1,28 @@
+use crate::model::{FetchedRobotsTxt, FetchedRobotsTxtContainer};
+use crate::parser::ParseResult;
+use crate::parser::parse_robots_txt;
+use url::Origin;
+
+const UNAUTHORIZED: u16 = 401;
+const FORBIDDEN: u16 = 403;
+const OK: u16 = 200;
+
+/// Parses the text of the robots.txt file located in the specified place of origin,
+/// taking into account the response status code of the HTTP-request.
+/// **IMPORTANT NOTE**: origin must point to robots.txt url **before redirects**.
+pub fn parse(origin: Origin, status_code: u16, input: &str) -> ParseResult<FetchedRobotsTxt> {
+    match status_code {
+        UNAUTHORIZED | FORBIDDEN => {
+            return ParseResult::new(FetchedRobotsTxt::new(FetchedRobotsTxtContainer::FetchDenied));
+        }
+        OK => {
+            return parse_robots_txt(origin, input)
+                .map(|result| {
+                    return FetchedRobotsTxt::new(FetchedRobotsTxtContainer::Fetched(result));
+                });
+        },
+        _ => {
+            return ParseResult::new(FetchedRobotsTxt::new(FetchedRobotsTxtContainer::FetchFailed));
+        }
+    }
+}
--- a/src/parser/line.rs
+++ b/src/parser/line.rs
@ -0,0 +1,21 @@
+pub struct Line<'a> {
+    line: &'a str,
+    position: usize,
+}
+
+impl <'a>Line<'a> {
+    pub fn new(line: &'a str, position: usize) -> Line<'a> {
+        return Line {
+            line,
+            position,
+        }
+    }
+
+    pub fn get_line_text(&self) -> &str {
+        return self.line;
+    }
+
+    pub fn get_line_number(&self) -> usize {
+        return self.position;
+    }
+}
--- a/src/parser/parse_result.rs
+++ b/src/parser/parse_result.rs
@ -0,0 +1,62 @@
+use crate::parser::warning::ParseWarning;
+use std::fmt::Debug;
+
+#[derive(Debug)]
+/// The result of the robots.txt parser.
+pub struct ParseResult<R> where R: Debug {
+    result: R,
+    warnings: Vec<ParseWarning>,
+}
+
+impl <R>ParseResult<R> where R: Debug {
+    /// Creates a new structure for parser results.
+    pub (crate) fn new(result: R) -> ParseResult<R>{
+        return ParseResult {
+            result,
+            warnings: Vec::new(),
+        }
+    }
+
+    /// Creates a new structure for parser results with warnings.
+    pub (crate) fn new_with_warnings(result: R, warnings: Vec<ParseWarning>) -> ParseResult<R>{
+        return ParseResult {
+            result,
+            warnings,
+        }
+    }
+
+    /// Returns the result of the robots.txt parser.
+    pub fn get_result(self) -> R {
+        return self.result;
+    }
+
+    /// Returns the robots.txt parser warning array.
+    pub fn get_warnings(&self) -> &[ParseWarning] {
+        return self.warnings.as_slice();
+    }
+
+    /// Returns reference to result of the robots.txt parser or first warning.
+    pub fn ok_ref(&self) -> Result<&R, &ParseWarning> {
+        if let Some(warning) = self.warnings.first() {
+            return Err(warning);
+        }
+        return Ok(&self.result);
+    }
+
+    /// Returns the result of the robots.txt parser or first warning.
+    pub fn ok(mut self) -> Result<R, ParseWarning> {
+        if self.warnings.is_empty() {
+            return Ok(self.result);
+        }
+        let first_warning = self.warnings.remove(0);
+        return Err(first_warning);
+    }
+
+    /// Converts this structure into another type of structure.
+    pub (crate) fn map<T>(self, callback: impl Fn(R) -> T) -> ParseResult<T> where T: Debug {
+        return ParseResult {
+            result: (callback)(self.result),
+            warnings: self.warnings,
+        }
+    }
+}
--- a/src/parser/robots_txt_parser.rs
+++ b/src/parser/robots_txt_parser.rs
@ -0,0 +1,281 @@
+use url::{Origin, Url};
+use std::time::Duration;
+use crate::parser::parse_result::ParseResult;
+use crate::model::{RobotsTxt, Rule, PathPattern, CleanParams, RequestRate};
+use crate::parser::line::Line;
+use crate::parser::warning::ParseWarning;
+mod directive;
+use self::directive::Directive;
+mod group_builder;
+pub use self::group_builder::GroupBuilder;
+
+const COMMENT_BEGIN_CHAR: char = '#';
+const KV_SEPARATOR: &'static str = ":";
+
+/// Parses the text of the robots.txt file located in the specified origin.
+pub fn parse(origin: Origin, input: &str) -> ParseResult<RobotsTxt> {
+    let parser = Parser::new(origin);
+    return parser.parse(input);
+}
+
+struct Parser {
+    result: RobotsTxt,
+    group_builder: GroupBuilder,
+    warnings: Vec<ParseWarning>,
+}
+
+impl Parser {
+    pub fn new(origin: Origin) -> Parser {
+        return Parser {
+            result: RobotsTxt::new(origin),
+            group_builder: GroupBuilder::new(),
+            warnings: Vec::new(),
+        }
+    }
+
+    pub fn parse(mut self, input: &str) -> ParseResult<RobotsTxt> {
+        let input = ignore_bom(input);
+        let mut line_no = 0;
+        for line in input.lines() {
+            line_no += 1;
+            let line = Line::new(line, line_no);
+            match Self::parse_line(&line) {
+                Ok(Some(line_value)) => {
+                    self.process_line_value(&line, &line_value);
+                },
+                Err(warning) => {
+                    self.warnings.push(warning);
+                },
+                _ => {},
+            }
+        }
+        self.group_builder.fill_entries(&mut self.result);
+        return ParseResult::new_with_warnings(self.result, self.warnings);
+    }
+
+    fn parse_line<'a>(line: &'a Line) -> Result<Option<Directive<'a>>, ParseWarning> {
+        let mut kv_part = line.get_line_text();
+        if let Some(comment_separator_position) = line.get_line_text().find(COMMENT_BEGIN_CHAR) {
+            kv_part = &kv_part[0..comment_separator_position];
+        }
+        if kv_part.is_empty() {
+            return Ok(None);
+        }
+        let separator_index = kv_part.find(KV_SEPARATOR).ok_or_else(|| {
+            return ParseWarning::invalid_directive_format(line);
+        })?;
+        if separator_index >= kv_part.len() {
+            return Err(ParseWarning::invalid_directive_format(line));
+        }
+        let key = &kv_part[0..separator_index];
+        let key = key.trim();
+        if key.is_empty() {
+            return Err(ParseWarning::directive_key_is_empty(line));
+        }
+        let value = &kv_part[separator_index + 1..];
+        let value = value.trim();
+        let result = Directive::new(key, value);
+        return Ok(Some(result));
+    }
+
+    fn process_line_value(&mut self, line: &Line, directive: &Directive) {
+        let key = directive.get_key_lowercase();
+        match key.as_str() {
+            // Group specific directives
+            "user-agent" => {
+                self.process_directive_user_agent(line, directive);
+            },
+            "allow" => {
+                self.process_directive_allow(line, directive);
+            },
+            "disallow" => {
+                self.process_directive_disallow(line, directive);
+            },
+            "crawl-delay" => {
+                self.process_directive_crawl_delay(line, directive);
+            },
+            "request-rate" => {
+                self.process_directive_request_rate(line, directive);
+            },
+            // Non-group directives
+            "sitemap" => {
+                self.process_directive_sitemap(line, directive);
+            },
+            "clean-param" => {
+                self.process_directive_clean_param(line, directive);
+            },
+            _ => {
+                self.warnings.push(ParseWarning::unsupported_directive_key(line, key));
+            },
+        }
+    }
+
+    fn process_directive_user_agent(&mut self, line: &Line, directive: &Directive) {
+        let user_agent = directive.get_value();
+        if user_agent.is_empty() {
+            self.warnings.push(ParseWarning::user_agent_cannot_be_empty(line));
+            return;
+        }
+        self.group_builder.handle_user_agent(user_agent);
+    }
+
+    fn process_directive_allow(&mut self, line: &Line, directive: &Directive) {
+        if let Some(group) = self.group_builder.get_mut_active_group() {
+            if directive.get_value() == "" {
+                // Nothing to do. Ignoring.
+            } else if directive.get_value().starts_with("*") || directive.get_value().starts_with("/") {
+                group.push_rule(Rule::new(directive.get_value(), true));
+            } else {
+                self.warnings.push(ParseWarning::wrong_path_format(line));
+            }
+        } else {
+            self.warnings.push(ParseWarning::directive_without_user_agent(line));
+        }
+    }
+
+    fn process_directive_disallow(&mut self, line: &Line, directive: &Directive) {
+        if let Some(group) = self.group_builder.get_mut_active_group() {
+            if directive.get_value() == "" {
+                // Allow all.
+                group.push_rule(Rule::new(PathPattern::all(), true));
+            } else if directive.get_value().starts_with("*") || directive.get_value().starts_with("/") {
+                group.push_rule(Rule::new(directive.get_value(), false));
+            } else {
+                self.warnings.push(ParseWarning::wrong_path_format(line));
+            }
+        } else {
+            self.warnings.push(ParseWarning::directive_without_user_agent(line));
+        }
+    }
+
+    fn process_directive_crawl_delay(&mut self, line: &Line, directive: &Directive) {
+        if let Some(group) = self.group_builder.get_mut_active_group() {
+            match directive.get_value().parse::<f64>() {
+                Ok(delay) => {
+                    let delay_seconds = delay.trunc();
+                    let delay_nanoseconds = delay.fract() * 10f64.powi(9);
+                    let delay = Duration::new(delay_seconds as u64, delay_nanoseconds as u32);
+                    group.set_crawl_delay(delay);
+                },
+                Err(error) => {
+                    self.warnings.push(ParseWarning::parse_crawl_delay_error(line, error));
+                },
+            }
+        } else {
+            self.warnings.push(ParseWarning::directive_without_user_agent(line));
+        }
+    }
+
+    fn process_directive_request_rate(&mut self, line: &Line, directive: &Directive) {
+        if let Some(group) = self.group_builder.get_mut_active_group() {
+            let numbers: Vec<&str> = directive.get_value().split('/').collect();
+            if numbers.len() != 2 {
+                self.warnings.push(ParseWarning::wrong_request_rate_format(line));
+                return;
+            }
+            let requests = match numbers[0].parse::<usize>() {
+                Ok(requests) => {requests},
+                Err(error) => {
+                    self.warnings.push(ParseWarning::parse_request_rate(line, error));
+                    return;
+                },
+            };
+            let seconds = match numbers[1].parse::<usize>() {
+                Ok(seconds) => {seconds},
+                Err(error) => {
+                    self.warnings.push(ParseWarning::parse_request_rate(line, error));
+                    return;
+                },
+            };
+            group.set_req_rate(RequestRate{requests, seconds});
+        } else {
+            self.warnings.push(ParseWarning::directive_without_user_agent(line));
+        }
+    }
+
+    fn process_directive_sitemap(&mut self, line: &Line, directive: &Directive) {
+        match Url::parse(directive.get_value()) {
+            Ok(sitemap_url) => {
+                self.result.add_sitemap(sitemap_url);
+            },
+            Err(error) => {
+                self.warnings.push(ParseWarning::parse_url(line, error));
+            },
+        }
+    }
+
+    fn process_directive_clean_param(&mut self, line: &Line, directive: &Directive) {
+        let parts: Vec<&str> = directive.get_value().split_whitespace().collect();
+        if parts.len() >= 3 || parts.len() == 0 {
+            self.warnings.push(ParseWarning::wrong_clean_param_format(line));
+            return;
+        }
+        if parts[0].len() == 0 {
+            self.warnings.push(ParseWarning::wrong_clean_param_format(line));
+            return;
+        }
+        let clean_params_path_pattern;
+        let clean_params;
+        if let Some(second_param) = parts.get(1) {
+            if second_param.len() == 0 {
+                self.warnings.push(ParseWarning::wrong_clean_param_format(line));
+                return;
+            }
+            clean_params_path_pattern = PathPattern::new(parts[0]);
+            clean_params = *second_param;
+        } else {
+            clean_params_path_pattern = PathPattern::all();
+            clean_params = parts[0];
+        }
+        let (valid_clean_params, invalid_clean_params) = Self::parse_clean_params(clean_params);
+        if !invalid_clean_params.is_empty() {
+            self.warnings.push(ParseWarning::ignored_clean_params(line, invalid_clean_params));
+        }
+        self.result.add_clean_params(CleanParams::new(clean_params_path_pattern, valid_clean_params));
+    }
+
+    fn parse_clean_params(clean_params: &str) -> (Vec<String>, Vec<String>) {
+        let mut valid = Vec::new();
+        let mut invalid = Vec::new();
+        for clean_param in clean_params.split('&') {
+            if !clean_param.is_empty() {
+                if Self::is_valid_clean_param(clean_param) {
+                    valid.push(clean_param.into());
+                } else {
+                    invalid.push(clean_param.into());
+                }
+            }
+        }
+        return (valid, invalid);
+    }
+
+    fn is_valid_clean_param(clean_param: &str) -> bool {
+        for c in clean_param.chars() {
+            let mut is_valid = false;
+            if ('A'..'Z').contains(&c) {
+                is_valid = true;
+            }
+            if ('a'..'z').contains(&c) {
+                is_valid = true;
+            }
+            if ('0'..'9').contains(&c) {
+                is_valid = true;
+            }
+            if c == '.' || c == '-' || c == '_' {
+                is_valid = true;
+            }
+            if !is_valid {
+                return false;
+            }
+        }
+        return true;
+    }
+}
+
+fn ignore_bom(input: &str) -> &str {
+    const BOM: &'static str = "\u{feff}";
+    if input.starts_with(BOM) {
+        return &input[BOM.len()..];
+    }
+    return input;
+}
--- a/src/parser/robots_txt_parser/directive.rs
+++ b/src/parser/robots_txt_parser/directive.rs
@ -0,0 +1,21 @@
+pub struct Directive<'a> {
+    key: &'a str,
+    value: &'a str,
+}
+
+impl <'a> Directive<'a> {
+    pub fn new(key: &'a str, value: &'a str) -> Directive<'a> {
+        return Directive {
+            key,
+            value,
+        }
+    }
+
+    pub fn get_key_lowercase(&self) -> String {
+        return self.key.to_lowercase();
+    }
+
+    pub fn get_value(&self) -> &str {
+        return self.value;
+    }
+}
--- a/src/parser/robots_txt_parser/group_builder.rs
+++ b/src/parser/robots_txt_parser/group_builder.rs
@ -0,0 +1,54 @@
+use crate::model::{Group, RobotsTxt};
+enum State {
+    WaitingForNewGroup,
+    WaitingForAdditionalUserAgent,
+}
+
+pub struct GroupBuilder {
+    state: State,
+    active_group: Option<usize>,
+    groups: Vec<Group>,
+}
+
+impl GroupBuilder {
+    pub fn new() -> GroupBuilder {
+        return GroupBuilder {
+            state: State::WaitingForNewGroup,
+            active_group: None,
+            groups: Vec::new(),
+        }
+    }
+
+    pub fn handle_user_agent(&mut self, user_agent: &str) {
+        match self.state {
+            State::WaitingForNewGroup => {
+                let mut group = Group::new();
+                group.push_useragent(user_agent);
+                self.groups.push(group);
+                self.active_group = Some(self.groups.len() - 1);
+                self.state = State::WaitingForAdditionalUserAgent;
+            },
+            State::WaitingForAdditionalUserAgent => {
+                let active_group = self.active_group.expect("Unable to get active group");
+                let group = self.groups.get_mut(active_group).expect("Unable to get group index");
+                if !group.contains_user_agent(user_agent) {
+                    group.push_useragent(user_agent);
+                }
+            },
+        }
+    }
+
+    pub fn get_mut_active_group(&mut self) -> Option<&mut Group> {
+        self.state = State::WaitingForNewGroup;
+        if let Some(active_group) = self.active_group {
+            return self.groups.get_mut(active_group);
+        }
+        return None;
+    }
+
+    pub fn fill_entries(mut self, robots_txt: &mut RobotsTxt) {
+        for group in self.groups.drain(..) {
+            robots_txt.add_group(group);
+        }
+    }
+}
--- a/src/parser/warning.rs
+++ b/src/parser/warning.rs
@ -0,0 +1,136 @@
+use super::line::Line;
+use super::warning_reason::WarningReason;
+use url::ParseError as ParseUrlError;
+use std::num::{ParseFloatError, ParseIntError};
+use std::fmt;
+use std::error::Error;
+
+#[derive(Clone, Debug)]
+/// Warning of robots.txt parser about problems when parsing robots.txt file.
+pub struct ParseWarning {
+    line_no: usize,
+    line: String,
+    reason: WarningReason,
+}
+
+impl Error for ParseWarning {}
+
+impl ParseWarning {
+    /// Returns the line number in the text of the robots.txt file.
+    pub fn get_line_no(&self) -> usize {
+        return self.line_no;
+    }
+
+    /// Returns the text of the robots.txt file string.
+    pub fn get_line_text(&self) -> &String {
+        return &self.line;
+    }
+
+    /// Returns the reason of warning.
+    pub fn get_reason(&self) -> &WarningReason {
+        return &self.reason;
+    }
+
+    pub (crate) fn invalid_directive_format(line: &Line) -> ParseWarning {
+        return ParseWarning {
+            line_no: line.get_line_number(),
+            line: line.get_line_text().into(),
+            reason: WarningReason::InvalidDirectiveFormat,
+        }
+    }
+
+    pub (crate) fn directive_key_is_empty(line: &Line) -> ParseWarning {
+        return ParseWarning {
+            line_no: line.get_line_number(),
+            line: line.get_line_text().into(),
+            reason: WarningReason::DirectiveKeyIsEmpty,
+        }
+    }
+
+    pub (crate) fn unsupported_directive_key(line: &Line, key: String) -> ParseWarning {
+        return ParseWarning {
+            line_no: line.get_line_number(),
+            line: line.get_line_text().into(),
+            reason: WarningReason::UnsupportedDirectiveKey(key),
+        }
+    }
+
+    pub (crate) fn user_agent_cannot_be_empty(line: &Line) -> ParseWarning {
+        return ParseWarning {
+            line_no: line.get_line_number(),
+            line: line.get_line_text().into(),
+            reason: WarningReason::UserAgentCannotBeEmpty,
+        }
+    }
+
+    pub (crate) fn wrong_path_format(line: &Line) -> ParseWarning {
+        return ParseWarning {
+            line_no: line.get_line_number(),
+            line: line.get_line_text().into(),
+            reason: WarningReason::WrongPathFormat,
+        }
+    }
+
+    pub (crate) fn directive_without_user_agent(line: &Line) -> ParseWarning {
+        return ParseWarning {
+            line_no: line.get_line_number(),
+            line: line.get_line_text().into(),
+            reason: WarningReason::DirectiveWithoutUserAgent,
+        }
+    }
+
+    pub (crate) fn parse_crawl_delay_error(line: &Line, error: ParseFloatError) -> ParseWarning {
+        return ParseWarning {
+            line_no: line.get_line_number(),
+            line: line.get_line_text().into(),
+            reason: WarningReason::ParseCrawlDelayError(error),
+        }
+    }
+
+    pub (crate) fn wrong_request_rate_format(line: &Line) -> ParseWarning {
+        return ParseWarning {
+            line_no: line.get_line_number(),
+            line: line.get_line_text().into(),
+            reason: WarningReason::WrongRequestRateFormat,
+        }
+    }
+
+    pub (crate) fn parse_request_rate(line: &Line, error: ParseIntError) -> ParseWarning {
+        return ParseWarning {
+            line_no: line.get_line_number(),
+            line: line.get_line_text().into(),
+            reason: WarningReason::ParseRequestRate(error),
+        }
+    }
+
+    pub (crate) fn parse_url(line: &Line, error: ParseUrlError) -> ParseWarning {
+        return ParseWarning {
+            line_no: line.get_line_number(),
+            line: line.get_line_text().into(),
+            reason: WarningReason::ParseUrl(error),
+        }
+    }
+
+    pub (crate) fn wrong_clean_param_format(line: &Line) -> ParseWarning {
+        return ParseWarning {
+            line_no: line.get_line_number(),
+            line: line.get_line_text().into(),
+            reason: WarningReason::WrongCleanParamFormat,
+        }
+    }
+
+    pub (crate) fn ignored_clean_params(line: &Line, ignored_clean_params: Vec<String>) -> ParseWarning {
+        return ParseWarning {
+            line_no: line.get_line_number(),
+            line: line.get_line_text().into(),
+            reason: WarningReason::IgnoredCleanParams(ignored_clean_params),
+        }
+    }
+}
+
+/// Displays text of warning.
+impl fmt::Display for ParseWarning {
+    fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
+        write!(f, "Line: {}. Text: `{}`. {}", self.line_no, self.line, self.reason)
+    }
+}
--- a/src/parser/warning_reason.rs
+++ b/src/parser/warning_reason.rs
@ -0,0 +1,80 @@
+use url::ParseError as ParseUrlError;
+use std::num::{ParseFloatError, ParseIntError};
+use std::fmt;
+
+#[derive(Clone, Debug)]
+/// Warning reason of robots.txt parser about problems when parsing robots.txt file.
+pub enum WarningReason {
+    /// Invalid directive format. Invalid directive example: `:`
+    InvalidDirectiveFormat,
+    /// Directive key is empty. Invalid directive example: `: <Value>`
+    DirectiveKeyIsEmpty,
+    /// Directive key is not suppored by this parser.
+    UnsupportedDirectiveKey(String),
+    /// Passed directive key is `User-Agent` and passed value is empty.
+    UserAgentCannotBeEmpty,
+    /// It is impossible to process this directive before the `User-Agent` directive has not been processed.
+    DirectiveWithoutUserAgent,
+    /// It is impossible to process the `Crawl-Delay` directive because of an error when parsing a floating point number.
+    ParseCrawlDelayError(ParseFloatError),
+    /// Incorrect format of the `Request-Rate` directive. Example of the correct format: `Request-rate: 1/5`
+    WrongRequestRateFormat,
+    /// Incorrect format of the `Request-Rate` directive. Example of the correct format: `Request-rate: 1/5`
+    ParseRequestRate(ParseIntError),
+    /// Parsing URL error.
+    ParseUrl(ParseUrlError),
+    /// Incorrect format of the `Clean-Param` directive.
+    /// Parameters must be matched to regular expression: `A-Za-z0-9.-_`.
+    /// Example of the correct format: `Clean-param: ref1&ref2 /some_dir/get_book.pl`
+    WrongCleanParamFormat,
+    /// Some parameters of the `Clean-Param` directive has wrong symbols.
+    /// Parameters must be matched to regular expression: `A-Za-z0-9.-_`.
+    /// Example of the correct format: `Clean-param: ref1&ref2 /some_dir/get_book.pl`
+    IgnoredCleanParams(Vec<String>),
+    /// Error in URL path format.
+    WrongPathFormat,
+}
+
+/// Displays text of warning reason.
+impl fmt::Display for WarningReason {
+    fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
+        match &self {
+            &Self::InvalidDirectiveFormat => {
+                write!(f, "Invalid directive format.")
+            },
+            &Self::DirectiveKeyIsEmpty => {
+                write!(f, "Directive key is empty.")
+            },
+            &Self::UnsupportedDirectiveKey(key) => {
+                write!(f, "Directive key `{}` is not suppored by this parser.", key)
+            },
+            &Self::UserAgentCannotBeEmpty => {
+                write!(f, "Passed directive key is `User-Agent` and passed value is empty.")
+            },
+            &Self::DirectiveWithoutUserAgent => {
+                write!(f, "It is impossible to process this directive before `User-Agent` directive has not been processed.")
+            },
+            &Self::ParseCrawlDelayError(err) => {
+                write!(f, "It is impossible to process the `Crawl-Delay` directive because of an error when parsing a floating point number: {}", err)
+            },
+            &Self::WrongRequestRateFormat => {
+                write!(f, "Incorrect format of the `Request-Rate` directive")
+            },
+            &Self::ParseRequestRate(err) => {
+                write!(f, "Incorrect format of the `Request-Rate` directive: {}", err)
+            },
+            &Self::ParseUrl(err) => {
+                write!(f, "Parsing URL error: {}", err)
+            },
+            &Self::WrongCleanParamFormat => {
+                write!(f, "Incorrect format of the `Clean-Param` directive.")
+            },
+            &Self::IgnoredCleanParams(ref params) => {
+                write!(f, "Directive `Clean-Param` directive has incorrect parameters: {:?}", params)
+            },
+            &Self::WrongPathFormat => {
+                write!(f, "Error in URL path format.")
+            },
+        }
+    }
+}
--- a/src/service.rs
+++ b/src/service.rs
@ -0,0 +1,30 @@
+mod robots_txt;
+mod fetched_robots_txt;
+use url::Url;
+use std::time::Duration;
+use crate::model::RequestRate;
+
+/// Trait that implements robots txt service.
+pub trait RobotsTxtService {
+    /// Using the parsed robots.txt decide if useragent can fetch url.
+    fn can_fetch(&self, user_agent: &str, url: &Url) -> bool;
+
+    /// Returns the crawl delay for this user agent as a Duration, or None if no crawl delay is defined.
+    fn get_crawl_delay(&self, user_agent: &str) -> Option<Duration>;
+
+    /// Removes the request parameters from the url that were listed in the `Clean-param` directive.
+    /// This method CHECKS that the origin of the transmitted url matches the origin of robots.txt.
+    /// Returns true if the operation was applied to the passed url.
+    /// In other cases it returns false.
+    fn normalize_url(&self, url: &mut Url) -> bool;
+
+    /// Removes the request parameters from the url that were listed in the `Clean-param` directive.
+    /// This method DOES NOT CHECK that the origin of the transmitted url coincides with the origin of robots.txt.
+    fn normalize_url_ignore_origin(&self, url: &mut Url);
+
+    /// Returns the list of URL sitemaps that have been listed in the robots.txt file.
+    fn get_sitemaps(&self) -> &[Url];
+
+    /// Returns information about the restrictions set for sending HTTP requests to the server.
+    fn get_req_rate(&self, user_agent: &str) -> Option<RequestRate>;
+}
--- a/src/service/fetched_robots_txt.rs
+++ b/src/service/fetched_robots_txt.rs
@ -0,0 +1,51 @@
+use url::Url;
+use std::time::Duration;
+use crate::service::RobotsTxtService;
+use crate::model::{FetchedRobotsTxt, FetchedRobotsTxtContainer};
+use crate::model::RequestRate;
+
+impl RobotsTxtService for FetchedRobotsTxt {
+    fn can_fetch(&self, user_agent: &str, url: &Url) -> bool {
+        match self.get_container() {
+            &FetchedRobotsTxtContainer::FetchDenied => false,
+            &FetchedRobotsTxtContainer::FetchFailed => true,
+            &FetchedRobotsTxtContainer::Fetched(ref robots_txt) => {
+                robots_txt.can_fetch(user_agent, url)
+            }
+        }
+    }
+
+    fn get_crawl_delay(&self, user_agent: &str) -> Option<Duration> {
+        if let &FetchedRobotsTxtContainer::Fetched(ref robots_txt) = self.get_container() {
+            return robots_txt.get_crawl_delay(user_agent);
+        }
+        return None;
+    }
+
+    fn normalize_url(&self, url: &mut Url) -> bool {
+        if let &FetchedRobotsTxtContainer::Fetched(ref robots_txt) = self.get_container() {
+            return robots_txt.normalize_url(url);
+        }
+        return true;
+    }
+
+    fn normalize_url_ignore_origin(&self, url: &mut Url) {
+        if let &FetchedRobotsTxtContainer::Fetched(ref robots_txt) = self.get_container() {
+            robots_txt.normalize_url_ignore_origin(url);
+        }
+    }
+
+    fn get_sitemaps(&self) -> &[Url] {
+        if let &FetchedRobotsTxtContainer::Fetched(ref robots_txt) = self.get_container() {
+            return robots_txt.get_sitemaps();
+        }
+        return &[];
+    }
+
+    fn get_req_rate(&self, user_agent: &str) -> Option<RequestRate> {
+        if let &FetchedRobotsTxtContainer::Fetched(ref robots_txt) = self.get_container() {
+            return robots_txt.get_req_rate(user_agent);
+        }
+        return None;
+    }
+}
--- a/src/service/robots_txt.rs
+++ b/src/service/robots_txt.rs
@ -0,0 +1,84 @@
+use url::Url;
+use std::time::Duration;
+use crate::service::RobotsTxtService;
+use crate::model::RobotsTxt;
+use crate::model::RequestRate;
+use crate::model::Path;
+
+impl RobotsTxtService for RobotsTxt {
+    fn can_fetch(&self, user_agent: &str, url: &Url) -> bool {
+        if url.origin() != *self.get_origin() {
+            return false;
+        }
+        let path = Path::from_url(url);
+        let rule_decision = self.find_in_group(user_agent, |group| {
+            let rules = group.get_rules_sorted_by_path_len_desc();
+            for rule in rules.iter() {
+                if rule.applies_to(&path) {
+                    return Some(rule.get_allowance());
+                }
+            }
+            return None;
+        });
+        if let Some(rule_decision) = rule_decision {
+            return rule_decision;
+        }
+        // Empty robots.txt allows crawling. Everything that was not denied must be allowed.
+        return true;
+    }
+
+    fn get_crawl_delay(&self, user_agent: &str) -> Option<Duration> {
+        return self.find_in_group(user_agent, |group| {
+            return group.get_crawl_delay();
+        });
+    }
+
+    fn normalize_url(&self, url: &mut Url) -> bool {
+        if url.origin() != *self.get_origin() {
+            return false;
+        }
+        self.normalize_url_ignore_origin(url);
+        return true;
+    }
+
+    fn normalize_url_ignore_origin(&self, url: &mut Url) {
+        if url.query().is_none() {
+            return;
+        }
+        let mut query_params_to_filter = Vec::new();
+        let path = Path::from_url(url);
+        for clean_params in self.get_clean_params().iter() {
+            if clean_params.get_path_pattern().applies_to(&path) {
+                query_params_to_filter.extend_from_slice(clean_params.get_params())
+            }
+        }
+        let mut pairs: Vec<(String, String)> = url
+            .query_pairs()
+            .map(|(key, value)|{
+                return (key.into(), value.into());
+            })
+            .collect();
+        {
+            let mut query_pairs_mut = url.query_pairs_mut();
+            query_pairs_mut.clear();
+            for (key, value) in pairs.drain(..) {
+                if !query_params_to_filter.contains(&key) {
+                    query_pairs_mut.append_pair(&key, &value);
+                }
+            }
+        }
+        if url.query() == Some("") {
+            url.set_query(None);
+        }
+    }
+
+    fn get_sitemaps(&self) -> &[Url] {
+        return self.get_sitemaps_slice();
+    }
+
+    fn get_req_rate(&self, user_agent: &str) -> Option<RequestRate> {
+        return self.find_in_group(user_agent, |group| {
+            return group.get_req_rate();
+        });
+    }
+}
--- a/tests/test_lib.rs
+++ b/tests/test_lib.rs
@ -1,21 +1,22 @@
-extern crate robotparser;
-extern crate url;
-
-use robotparser::RobotFileParser;
+use robotparser::parser::parse_robots_txt;
+use robotparser::service::RobotsTxtService;
 use std::time::Duration;
 use url::Url;

 const AGENT: &'static str = "test_robotparser";

 fn robot_test(doc: &str, good_urls: Vec<&str>, bad_urls: Vec<&str>, agent: &str) {
-    let parser = RobotFileParser::new("http://www.baidu.com/robots.txt");
-    let lines: Vec<&str> = doc.split("\n").collect();
-    parser.parse(&lines);
+    let url = Url::parse("http://www.baidu.com/robots.txt").unwrap();
+    let parser = parse_robots_txt(url.origin(), doc).get_result();
    for url in &good_urls {
-        assert!(parser.can_fetch(agent, url));
+        let url = format!("http://www.baidu.com{}", url);
+        let url = Url::parse(&url).unwrap();
+        assert!(parser.can_fetch(agent, &url));
    }
    for url in &bad_urls {
-        assert!(!parser.can_fetch(agent, url));
+        let url = format!("http://www.baidu.com{}", url);
+        let url = Url::parse(&url).unwrap();
+        assert!(!parser.can_fetch(agent, &url));
    }
 }

@ -24,6 +25,19 @@ fn robot_test_simple(doc: &str, good_urls: Vec<&str>, bad_urls: Vec<&str>) {
    robot_test(doc, good_urls, bad_urls, AGENT);
 }

+#[test]
+fn test_robots_txt_rn_bom() {
+    let doc = "\u{feff}\r\n\
+    User-agent: *\r\n\
+    Disallow: /cyberworld/map/ # This is an infinite virtual URL space\r\n\
+    Disallow: /tmp/ # these will soon disappear\r\n\
+    Disallow: /foo.html\r\n\
+    ";
+    let good = vec!["/","/test.html"];
+    let bad = vec!["/cyberworld/map/index.html","/tmp/xxx","/foo.html"];
+    robot_test_simple(doc, good, bad);
+}
+

 #[test]
 fn test_robots_txt_1() {
@ -213,54 +227,72 @@ fn test_robots_txt_13() {
    robot_test_simple(doc, good, bad);
 }

+/// Using patterns with `*` and `$` symbols.
+#[test]
+fn test_robots_txt_14() {
+    let doc = "\n\
+    User-agent: *\n
+    Allow: /*video.html\n
+    Allow: */?amp*\n
+    Disallow: */rss$\n
+    Disallow: */rss/$\n
+    Disallow: /rate/\n
+    ";
+    let good = vec!["/rss/test", "/sdfvsdvs-sdfvsdv-video.html", "/rate"];
+    let bad = vec!["/rss", "/rss/", "/rate/", "/rate/0/9"];
+    robot_test_simple(doc, good, bad);
+}
+
 #[cfg(feature = "http")]
 #[test]
 fn test_robots_txt_read() {
-    let parser = RobotFileParser::new("http://www.python.org/robots.txt");
-    parser.read();
-    assert!(parser.can_fetch("*", "http://www.python.org/robots.txt"));
+    use robotparser::http::{CreateRobotsTxtRequest, ParseRobotsTxtResponse};
+    use reqwest::{Client, Request};
+    let http_client = Client::new();
+    let url = Url::parse("http://www.python.org/robots.txt").unwrap();
+    let request = Request::create_robots_txt_request(url.origin());
+    let mut response = http_client.execute(request).unwrap();
+    let parser = response.parse_robots_txt_response().unwrap().get_result();
+    assert!(parser.can_fetch("*", &url));
 }

 #[test]
 fn test_robots_text_crawl_delay() {
-    let parser = RobotFileParser::new("http://www.python.org/robots.txt");
+    let robots_txt_url = Url::parse("http://www.python.org/robots.txt").unwrap();
    let doc = "User-agent: Yandex\n\
    Crawl-delay: 2.35\n\
    Disallow: /search/\n";
-    let lines: Vec<&str> = doc.split("\n").collect();
-    parser.parse(&lines);
+    let parser = parse_robots_txt(robots_txt_url.origin(), doc).get_result();
    assert_eq!(Duration::new(2,350 * 1000 * 1000), parser.get_crawl_delay("Yandex").unwrap());
 }

 #[test]
 fn test_robots_text_sitemaps() {
-    let parser = RobotFileParser::new("http://www.python.org/robots.txt");
+    let robots_txt_url = Url::parse("http://www.python.org/robots.txt").unwrap();
    let doc = "User-agent: Yandex\n\
-    Sitemap:  http://example.com/sitemap1.xml
-    Sitemap:  http://example.com/sitemap2.xml
-    Sitemap:  http://example.com/sitemap3.xml
+    Sitemap    \t  :  http://example.com/sitemap1.xml\n
+    Sitemap:  http://example.com/sitemap2.xml\n
+    Sitemap:  http://example.com/sitemap3.xml\n
    Disallow: /search/\n";
-    let lines: Vec<&str> = doc.split("\n").collect();
-    parser.parse(&lines);
+    let parser = parse_robots_txt(robots_txt_url.origin(), doc).get_result();
    assert_eq!(
-        vec![
+        &[
            Url::parse("http://example.com/sitemap1.xml").unwrap(),
            Url::parse("http://example.com/sitemap2.xml").unwrap(),
            Url::parse("http://example.com/sitemap3.xml").unwrap()
        ],
-        parser.get_sitemaps("Yandex")
+        parser.get_sitemaps()
    );
 }

 #[test]
 fn test_robots_text_request_rate() {
-    let parser = RobotFileParser::new("http://www.python.org/robots.txt");
+    let robots_txt_url = Url::parse("http://www.python.org/robots.txt").unwrap();
    let doc =
        "User-agent: Yandex\n\
        Request-rate: 3/15\n\
        Disallow: /search/\n";
-    let lines: Vec<&str> = doc.split("\n").collect();
-    parser.parse(&lines);
+    let parser = parse_robots_txt(robots_txt_url.origin(), doc).get_result();
    let req_rate = parser.get_req_rate("Yandex").unwrap();
    assert_eq!(3, req_rate.requests);
    assert_eq!(15, req_rate.seconds);
@ -269,8 +301,27 @@ fn test_robots_text_request_rate() {
    assert!(req_rate.is_none());
 }

+
 #[test]
-fn test_robots_127_0_0_1() {
-    // Ensure it does not panic
-    RobotFileParser::new("http://127.0.0.1:4000/robots.txt");
-}
+fn test_robots_text_clean_params() {
+    let doc = "\
+User-Agent: *\n\
+Clean-param: mode\n\
+Clean-param: from\n\
+Clean-param: pid\n\
+Clean-param: gid\n\
+Clean-param: tm\n\
+Clean-param: amp\n\
+    ";
+    let url = Url::parse("http://www.baidu.com/robots.txt").unwrap();
+    let parser = parse_robots_txt(url.origin(), doc).get_result();
+    let mut site_url = Url::parse("http://www.baidu.com/test?post_id=7777&mode=99&from=google&pid=99&gid=88&tm=777&amp=1").unwrap();
+    let was_updated = parser.normalize_url(&mut site_url);
+    assert_eq!(was_updated, true);
+    assert_eq!(site_url.as_str(), "http://www.baidu.com/test?post_id=7777");
+
+    let mut site_url = Url::parse("http://www.google.com/test?post_id=7777&mode=99&from=google&pid=99&gid=88&tm=777&amp=1").unwrap();
+    let was_updated = parser.normalize_url(&mut site_url);
+    assert_eq!(was_updated, false);
+    assert_eq!(site_url.as_str(), "http://www.google.com/test?post_id=7777&mode=99&from=google&pid=99&gid=88&tm=777&amp=1");
+}
--- a/tests/test_reqwest_async.rs
+++ b/tests/test_reqwest_async.rs
@ -0,0 +1,16 @@
+use robotparser::http::RobotsTxtClient;
+use robotparser::service::RobotsTxtService;
+use reqwest::Client;
+use url::Url;
+use tokio::runtime::Runtime;
+
+#[test]
+fn test_reqwest_async() {
+    let mut runtime = Runtime::new().unwrap();
+    let client = Client::new();
+    let robots_txt_url = Url::parse("http://www.python.org/robots.txt").unwrap();
+    let robots_txt_response = runtime.block_on(client.fetch_robots_txt(robots_txt_url.origin()));
+    let robots_txt = robots_txt_response.unwrap().get_result();
+    let fetch_url = Url::parse("http://www.python.org/robots.txt").unwrap();
+    assert!(robots_txt.can_fetch("*", &fetch_url));
+}
--- a/tests/test_reqwest_blocking.rs
+++ b/tests/test_reqwest_blocking.rs
@ -0,0 +1,13 @@
+use robotparser::http::RobotsTxtClient;
+use robotparser::service::RobotsTxtService;
+use reqwest::blocking::Client;
+use url::Url;
+
+#[test]
+fn test_reqwest_blocking() {
+    let client = Client::new();
+    let robots_txt_url = Url::parse("http://www.python.org/robots.txt").unwrap();
+    let robots_txt = client.fetch_robots_txt(robots_txt_url.origin()).unwrap().get_result();
+    let fetch_url = Url::parse("http://www.python.org/robots.txt").unwrap();
+    assert!(robots_txt.can_fetch("*", &fetch_url));
+}
--- a/tests/test_warnings.rs
+++ b/tests/test_warnings.rs
@ -0,0 +1,178 @@
+use robotparser::parser::{parse_robots_txt, WarningReason};
+use url::{Host, Origin};
+use std::convert::From;
+
+#[derive(PartialEq, Eq, Debug, Clone)]
+enum WarningReasonKind {
+    InvalidDirectiveFormat,
+    DirectiveKeyIsEmpty,
+    UnsupportedDirectiveKey,
+    UserAgentCannotBeEmpty,
+    DirectiveWithoutUserAgent,
+    ParseCrawlDelayError,
+    WrongRequestRateFormat,
+    ParseRequestRate,
+    ParseUrl,
+    WrongCleanParamFormat,
+    IgnoredCleanParams,
+    WrongPathFormat,
+}
+
+fn validate_warnings(input: &str, expected_warnings: &[WarningReasonKind]) {
+    let host = Host::Domain("python.org".into());
+    let origin = Origin::Tuple("http".into(), host, 80);
+    let warnings = parse_robots_txt(origin, &input).get_warnings().to_vec();
+    assert_eq!(warnings.len(), expected_warnings.len());
+    for (warning, expected_warning) in warnings.iter().zip(expected_warnings.iter()) {
+        let warning: WarningReasonKind = warning.get_reason().into();
+        assert_eq!(expected_warning.clone(), warning);
+    }
+}
+
+#[test]
+fn test_warning_invalid_directive_format() {
+    let input = "`";
+    validate_warnings(input, &[WarningReasonKind::InvalidDirectiveFormat]);
+    let input = " \t ` \t ";
+    validate_warnings(input, &[WarningReasonKind::InvalidDirectiveFormat]);
+}
+
+#[test]
+fn test_warning_directive_key_is_empty() {
+    let input = ":";
+    validate_warnings(input, &[WarningReasonKind::DirectiveKeyIsEmpty]);
+}
+
+#[test]
+fn test_warning_supported_directive_key() {
+    let input = "X-Directive:";
+    validate_warnings(input, &[WarningReasonKind::UnsupportedDirectiveKey]);
+    let input = "\t  X-Directive\t  :\t  ";
+    validate_warnings(input, &[WarningReasonKind::UnsupportedDirectiveKey]);
+}
+
+
+#[test]
+fn test_warning_user_agent_cannot_be_empty() {
+    let input = "User-Agent:";
+    validate_warnings(input, &[WarningReasonKind::UserAgentCannotBeEmpty]);
+    let input = "\t  User-Agent\t  :\t  ";
+    validate_warnings(input, &[WarningReasonKind::UserAgentCannotBeEmpty]);
+    let input = "\t  User-Agent\t  :\t  *";
+    validate_warnings(input, &[]);
+}
+
+#[test]
+fn test_warning_directive_without_user_agent() {
+    let input = "Crawl-Delay: 5s";
+    validate_warnings(input, &[WarningReasonKind::DirectiveWithoutUserAgent]);
+    let input = "User-Agent: *\nCrawl-Delay: 5";
+    validate_warnings(input, &[]);
+}
+
+#[test]
+fn test_warning_parse_crawl_delay_error() {
+    let input = "User-Agent: *\nCrawl-Delay: ";
+    validate_warnings(input, &[WarningReasonKind::ParseCrawlDelayError]);
+    let input = "User-Agent: *\nCrawl-Delay: -";
+    validate_warnings(input, &[WarningReasonKind::ParseCrawlDelayError]);
+    let input = "User-Agent: *\nCrawl-Delay: 5h9";
+    validate_warnings(input, &[WarningReasonKind::ParseCrawlDelayError]);
+    let input = "User-Agent: *\nCrawl-Delay: 5";
+    validate_warnings(input, &[]);
+}
+
+#[test]
+fn test_warning_request_rate_format() {
+    let input = "User-Agent: *\nRequest-rate: 1/5";
+    validate_warnings(input, &[]);
+    let input = "User-Agent: *\nRequest-rate: 1//5";
+    validate_warnings(input, &[WarningReasonKind::WrongRequestRateFormat]);
+    let input = "User-Agent: *\nRequest-rate: 1";
+    validate_warnings(input, &[WarningReasonKind::WrongRequestRateFormat]);
+}
+
+#[test]
+fn test_warning_request_rate() {
+    let input = "User-Agent: *\nRequest-rate: a/b";
+    validate_warnings(input, &[WarningReasonKind::ParseRequestRate]);
+    let input = "User-Agent: *\nRequest-rate: a/5";
+    validate_warnings(input, &[WarningReasonKind::ParseRequestRate]);
+    let input = "User-Agent: *\nRequest-rate: 5/b";
+    validate_warnings(input, &[WarningReasonKind::ParseRequestRate]);
+    let input = "User-Agent: *\nRequest-rate: 1.0/5.0";
+    validate_warnings(input, &[WarningReasonKind::ParseRequestRate]);
+}
+
+#[test]
+fn test_warning_parsing_url() {
+    let input = "User-Agent: *\nSitemap: http://python.org/sitemap.xml";
+    validate_warnings(input, &[]);
+    let input = "User-Agent: *\nSitemap: http$$$://python.org/sitemap.xml";
+    validate_warnings(input, &[WarningReasonKind::ParseUrl]);
+}
+
+#[test]
+fn test_wrong_clean_param() {
+    let input = "User-Agent: *\nClean-param: ref ";
+    validate_warnings(input, &[]);
+    let input = "User-Agent: *\nClean-param: ";
+    validate_warnings(input, &[WarningReasonKind::WrongCleanParamFormat]);
+    let input = "User-Agent: *\nClean-param: &";
+    validate_warnings(input, &[]);
+    let input = "User-Agent: *\nClean-param: ?";
+    validate_warnings(input, &[WarningReasonKind::IgnoredCleanParams]);
+    let input = "User-Agent: *\nClean-param: abc$";
+    validate_warnings(input, &[WarningReasonKind::IgnoredCleanParams]);
+}
+
+#[test]
+fn test_warning_wrong_path_format() {
+    let input = "User-Agent: *\nAllow: \\";
+    validate_warnings(input, &[WarningReasonKind::WrongPathFormat]);
+    let input = "User-Agent: *\nDisallow: \\";
+    validate_warnings(input, &[WarningReasonKind::WrongPathFormat]);
+}
+
+impl From<&WarningReason> for WarningReasonKind {
+    fn from(reason: &WarningReason) -> Self {
+        match reason {
+            &WarningReason::InvalidDirectiveFormat => {
+                return WarningReasonKind::InvalidDirectiveFormat;
+            },
+            &WarningReason::DirectiveKeyIsEmpty => {
+                return WarningReasonKind::DirectiveKeyIsEmpty;
+            },
+            &WarningReason::UnsupportedDirectiveKey {..} => {
+                return WarningReasonKind::UnsupportedDirectiveKey;
+            },
+            &WarningReason::UserAgentCannotBeEmpty => {
+                return WarningReasonKind::UserAgentCannotBeEmpty;
+            },
+            &WarningReason::DirectiveWithoutUserAgent => {
+                return WarningReasonKind::DirectiveWithoutUserAgent;
+            },
+            &WarningReason::ParseCrawlDelayError {..} => {
+                return WarningReasonKind::ParseCrawlDelayError;
+            },
+            &WarningReason::WrongRequestRateFormat => {
+                return WarningReasonKind::WrongRequestRateFormat;
+            },
+            &WarningReason::ParseRequestRate {..} => {
+                return WarningReasonKind::ParseRequestRate;
+            },
+            &WarningReason::ParseUrl {..} => {
+                return WarningReasonKind::ParseUrl;
+            },
+            &WarningReason::WrongCleanParamFormat => {
+                return WarningReasonKind::WrongCleanParamFormat;
+            },
+            &WarningReason::IgnoredCleanParams {..} => {
+                return WarningReasonKind::IgnoredCleanParams;
+            },
+            &WarningReason::WrongPathFormat => {
+                return WarningReasonKind::WrongPathFormat;
+            },
+        }
+    }
+}