mirror of
https://github.com/danbulant/robotparser-rs
synced 2026-06-15 20:51:34 +00:00
* Inital github-actions most taken from starship project ref #21 * rustfmt config remove unknown configuration options * Run rustfmt * clippy: use any instead of find..is_some * clippy: Remove the `clone` call: `self.crawl_delay` * Clippy fixes * Rustfmt fixes * clippy: fix dont need to add `&` to all patterns * clippy: fix needless `fn main` in doctest * clippy: fix if-then-else expression returns a bool literal * clippy: fix very complex type BoxFuture response * clippy: fix variable `line_no` is used as a loop counter * clippy: dereference the expression on tests * clippy: fix assert(true) will be optimized out by the compiler * github: name workflow
32 lines
1.2 KiB
Rust
32 lines
1.2 KiB
Rust
use crate::http::{RobotsTxtClient, DEFAULT_USER_AGENT};
|
|
use crate::model::FetchedRobotsTxt;
|
|
use crate::model::{Error, ErrorKind};
|
|
use crate::parser::{parse_fetched_robots_txt, ParseResult};
|
|
use reqwest::blocking::{Client, Request};
|
|
use reqwest::header::HeaderValue;
|
|
use reqwest::header::USER_AGENT;
|
|
use reqwest::Method;
|
|
use url::{Origin, Url};
|
|
|
|
impl RobotsTxtClient for Client {
|
|
type Result = Result<ParseResult<FetchedRobotsTxt>, Error>;
|
|
fn fetch_robots_txt(&self, origin: Origin) -> Self::Result {
|
|
let url = format!("{}/robots.txt", origin.unicode_serialization());
|
|
let url = Url::parse(&url).map_err(|err| Error {
|
|
kind: ErrorKind::Url(err),
|
|
})?;
|
|
let mut request = Request::new(Method::GET, url);
|
|
let _ = request
|
|
.headers_mut()
|
|
.insert(USER_AGENT, HeaderValue::from_static(DEFAULT_USER_AGENT));
|
|
let response = self.execute(request).map_err(|err| Error {
|
|
kind: ErrorKind::Http(err),
|
|
})?;
|
|
let status_code = response.status().as_u16();
|
|
let text = response.text().map_err(|err| Error {
|
|
kind: ErrorKind::Http(err),
|
|
})?;
|
|
let robots_txt = parse_fetched_robots_txt(origin, status_code, &text);
|
|
Ok(robots_txt)
|
|
}
|
|
}
|