Error handling (#24)

* Add test when url is invalid and panic

* Initial error handling

ref https://github.com/messense/robotparser-rs/issues/22

* Rename ErrorKind::HttpClient => ErrorKind::Http

* Implement std::error::Error and rename to Error
This commit is contained in:
Laurent Arnoud 2020-03-08 12:33:34 +00:00 committed by GitHub
parent 1474a8cce9
commit df49f6bcf0
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 73 additions and 17 deletions

View file

@ -1,24 +1,26 @@
use reqwest::{Client, Request};
use reqwest::{Method, Error};
use reqwest::Method;
use reqwest::Error as ReqwestError;
use reqwest::header::HeaderValue;
use url::{Origin, Url};
use reqwest::header::USER_AGENT;
use crate::http::{RobotsTxtClient, DEFAULT_USER_AGENT};
use crate::parser::{ParseResult, parse_fetched_robots_txt};
use crate::model::FetchedRobotsTxt;
use crate::model::{Error, ErrorKind};
use std::pin::Pin;
use futures::task::{Context, Poll};
use futures::Future;
use futures::future::TryFutureExt;
use futures::future::ok as future_ok;
type FetchFuture = Box<dyn Future<Output=Result<(ResponseInfo, String), Error>>>;
type FetchFuture = Box<dyn Future<Output=Result<(ResponseInfo, String), ReqwestError>>>;
impl RobotsTxtClient for Client {
type Result = RobotsTxtResponse;
type Result = Result<RobotsTxtResponse, Error>;
fn fetch_robots_txt(&self, origin: Origin) -> Self::Result {
let url = format!("{}/robots.txt", origin.unicode_serialization());
let url = Url::parse(&url).expect("Unable to parse robots.txt url");
let url = Url::parse(&url).map_err(|err| Error {kind: ErrorKind::Url(err)})?;
let mut request = Request::new(Method::GET, url);
let _ = request.headers_mut().insert(USER_AGENT, HeaderValue::from_static(DEFAULT_USER_AGENT));
let response = self
@ -29,11 +31,11 @@ impl RobotsTxtClient for Client {
return future_ok((response_info, response_text));
});
});
let response: Pin<Box<dyn Future<Output=Result<(ResponseInfo, String), Error>>>> = Box::pin(response);
return RobotsTxtResponse {
let response: Pin<Box<dyn Future<Output=Result<(ResponseInfo, String), ReqwestError>>>> = Box::pin(response);
Ok(RobotsTxtResponse {
origin,
response,
}
})
}
}
@ -55,7 +57,7 @@ impl RobotsTxtResponse {
}
impl Future for RobotsTxtResponse {
type Output = Result<ParseResult<FetchedRobotsTxt>, Error>;
type Output = Result<ParseResult<FetchedRobotsTxt>, ReqwestError>;
fn poll(self: Pin<&mut Self>, cx: &mut Context) -> Poll<Self::Output> {
let self_mut = self.get_mut();
@ -73,4 +75,4 @@ impl Future for RobotsTxtResponse {
},
}
}
}
}

View file

@ -1,23 +1,24 @@
use reqwest::blocking::{Client, Request};
use reqwest::{Method, Error};
use reqwest::Method;
use reqwest::header::HeaderValue;
use url::{Origin, Url};
use reqwest::header::USER_AGENT;
use crate::http::{RobotsTxtClient, DEFAULT_USER_AGENT};
use crate::parser::{ParseResult, parse_fetched_robots_txt};
use crate::model::FetchedRobotsTxt;
use crate::model::{Error, ErrorKind};
impl RobotsTxtClient for Client {
type Result = Result<ParseResult<FetchedRobotsTxt>, Error>;
fn fetch_robots_txt(&self, origin: Origin) -> Self::Result {
let url = format!("{}/robots.txt", origin.unicode_serialization());
let url = Url::parse(&url).expect("Unable to parse robots.txt url");
let url = Url::parse(&url).map_err(|err| Error {kind: ErrorKind::Url(err)})?;
let mut request = Request::new(Method::GET, url);
let _ = request.headers_mut().insert(USER_AGENT, HeaderValue::from_static(DEFAULT_USER_AGENT));
let response = self.execute(request)?;
let response = self.execute(request).map_err(|err| Error {kind: ErrorKind::Http(err)})?;
let status_code = response.status().as_u16();
let text = response.text()?;
let text = response.text().map_err(|err| Error {kind: ErrorKind::Http(err)})?;
let robots_txt = parse_fetched_robots_txt(origin, status_code, &text);
return Ok(robots_txt);
}
}
}

View file

@ -14,4 +14,6 @@ pub (crate) use self::fetched_robots_txt::FetchedRobotsTxtContainer;
mod fetched_robots_txt;
pub use self::robots_txt::RobotsTxt;
mod path;
pub (crate) use self::path::Path;
pub (crate) use self::path::Path;
mod errors;
pub use self::errors::{Error, ErrorKind};

23
src/model/errors.rs Normal file
View file

@ -0,0 +1,23 @@
use std::fmt;
#[derive(Debug)]
pub struct Error {
pub kind: ErrorKind,
}
#[derive(Debug)]
pub enum ErrorKind {
Url(url::ParseError),
Http(reqwest::Error),
}
impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self.kind {
ErrorKind::Url(ref err) => err.fmt(f),
ErrorKind::Http(ref err) => err.fmt(f),
}
}
}
impl std::error::Error for Error {}

View file

@ -3,14 +3,28 @@ use robotparser::service::RobotsTxtService;
use reqwest::Client;
use url::Url;
use tokio::runtime::Runtime;
use url::{Host, Origin};
#[test]
fn test_reqwest_async() {
let mut runtime = Runtime::new().unwrap();
let client = Client::new();
let robots_txt_url = Url::parse("https://www.python.org/robots.txt").unwrap();
let robots_txt_response = runtime.block_on(client.fetch_robots_txt(robots_txt_url.origin()));
let robots_txt_response = runtime.block_on(client.fetch_robots_txt(robots_txt_url.origin()).unwrap());
let robots_txt = robots_txt_response.unwrap().get_result();
let fetch_url = Url::parse("https://www.python.org/robots.txt").unwrap();
assert!(robots_txt.can_fetch("*", &fetch_url));
}
let fetch_url = Url::parse("http://www.python.org/webstats/").unwrap();
assert!(!robots_txt.can_fetch("*", &fetch_url));
}
#[test]
fn test_reqwest_blocking_panic_url() {
let client = Client::new();
let host = Host::Domain("python.org::".into());
let origin = Origin::Tuple("https".into(), host, 80);
match client.fetch_robots_txt(origin) {
Ok(_) => assert!(false),
Err(_) => assert!(true)
}
}

View file

@ -2,6 +2,7 @@ use robotparser::http::RobotsTxtClient;
use robotparser::service::RobotsTxtService;
use reqwest::blocking::Client;
use url::Url;
use url::{Host, Origin};
#[test]
fn test_reqwest_blocking() {
@ -10,4 +11,17 @@ fn test_reqwest_blocking() {
let robots_txt = client.fetch_robots_txt(robots_txt_url.origin()).unwrap().get_result();
let fetch_url = Url::parse("https://www.python.org/robots.txt").unwrap();
assert!(robots_txt.can_fetch("*", &fetch_url));
let fetch_url = Url::parse("https://www.python.org/webstats/").unwrap();
assert!(!robots_txt.can_fetch("*", &fetch_url));
}
#[test]
fn test_reqwest_blocking_panic_url() {
let client = Client::new();
let host = Host::Domain("python.org::".into());
let origin = Origin::Tuple("https".into(), host, 80);
match client.fetch_robots_txt(origin) {
Ok(_) => assert!(false),
Err(_) => assert!(true)
}
}