robotparser-rs/src/http/reqwest/sync_reqwest.rs
Laurent Arnoud 6ba403aab9
Inital github-actions (#25)
* Inital github-actions

most taken from starship project
ref #21

* rustfmt config remove unknown configuration options

* Run rustfmt

* clippy: use any instead of find..is_some

* clippy: Remove the `clone` call: `self.crawl_delay`

* Clippy fixes

* Rustfmt fixes

* clippy: fix dont need to add `&` to all patterns

* clippy: fix needless `fn main` in doctest

* clippy: fix if-then-else expression returns a bool literal

* clippy: fix very complex type BoxFuture response

* clippy: fix variable `line_no` is used as a loop counter

* clippy: dereference the expression on tests

* clippy: fix assert(true) will be optimized out by the compiler

* github: name workflow
2020-03-30 09:32:32 +08:00

32 lines
1.2 KiB
Rust

use crate::http::{RobotsTxtClient, DEFAULT_USER_AGENT};
use crate::model::FetchedRobotsTxt;
use crate::model::{Error, ErrorKind};
use crate::parser::{parse_fetched_robots_txt, ParseResult};
use reqwest::blocking::{Client, Request};
use reqwest::header::HeaderValue;
use reqwest::header::USER_AGENT;
use reqwest::Method;
use url::{Origin, Url};
impl RobotsTxtClient for Client {
type Result = Result<ParseResult<FetchedRobotsTxt>, Error>;
fn fetch_robots_txt(&self, origin: Origin) -> Self::Result {
let url = format!("{}/robots.txt", origin.unicode_serialization());
let url = Url::parse(&url).map_err(|err| Error {
kind: ErrorKind::Url(err),
})?;
let mut request = Request::new(Method::GET, url);
let _ = request
.headers_mut()
.insert(USER_AGENT, HeaderValue::from_static(DEFAULT_USER_AGENT));
let response = self.execute(request).map_err(|err| Error {
kind: ErrorKind::Http(err),
})?;
let status_code = response.status().as_u16();
let text = response.text().map_err(|err| Error {
kind: ErrorKind::Http(err),
})?;
let robots_txt = parse_fetched_robots_txt(origin, status_code, &text);
Ok(robots_txt)
}
}