mirror of
https://github.com/danbulant/robotparser-rs
synced 2026-06-15 20:51:34 +00:00
* Inital github-actions most taken from starship project ref #21 * rustfmt config remove unknown configuration options * Run rustfmt * clippy: use any instead of find..is_some * clippy: Remove the `clone` call: `self.crawl_delay` * Clippy fixes * Rustfmt fixes * clippy: fix dont need to add `&` to all patterns * clippy: fix needless `fn main` in doctest * clippy: fix if-then-else expression returns a bool literal * clippy: fix very complex type BoxFuture response * clippy: fix variable `line_no` is used as a loop counter * clippy: dereference the expression on tests * clippy: fix assert(true) will be optimized out by the compiler * github: name workflow
76 lines
2.5 KiB
Rust
76 lines
2.5 KiB
Rust
use crate::http::{RobotsTxtClient, DEFAULT_USER_AGENT};
|
|
use crate::model::FetchedRobotsTxt;
|
|
use crate::model::{Error, ErrorKind};
|
|
use crate::parser::{parse_fetched_robots_txt, ParseResult};
|
|
use futures::future::ok as future_ok;
|
|
use futures::future::TryFutureExt;
|
|
use futures::task::{Context, Poll};
|
|
use futures::Future;
|
|
use reqwest::header::HeaderValue;
|
|
use reqwest::header::USER_AGENT;
|
|
use reqwest::Error as ReqwestError;
|
|
use reqwest::Method;
|
|
use reqwest::{Client, Request};
|
|
use std::pin::Pin;
|
|
use url::{Origin, Url};
|
|
|
|
type FetchFuture = Box<dyn Future<Output = Result<(ResponseInfo, String), ReqwestError>>>;
|
|
type BoxFuture = Pin<FetchFuture>;
|
|
|
|
impl RobotsTxtClient for Client {
|
|
type Result = Result<RobotsTxtResponse, Error>;
|
|
fn fetch_robots_txt(&self, origin: Origin) -> Self::Result {
|
|
let url = format!("{}/robots.txt", origin.unicode_serialization());
|
|
let url = Url::parse(&url).map_err(|err| Error {
|
|
kind: ErrorKind::Url(err),
|
|
})?;
|
|
let mut request = Request::new(Method::GET, url);
|
|
let _ = request
|
|
.headers_mut()
|
|
.insert(USER_AGENT, HeaderValue::from_static(DEFAULT_USER_AGENT));
|
|
let response = self.execute(request).and_then(|response| {
|
|
let response_info = ResponseInfo {
|
|
status_code: response.status().as_u16(),
|
|
};
|
|
response
|
|
.text()
|
|
.and_then(|response_text| future_ok((response_info, response_text)))
|
|
});
|
|
let response: BoxFuture = Box::pin(response);
|
|
Ok(RobotsTxtResponse { origin, response })
|
|
}
|
|
}
|
|
|
|
struct ResponseInfo {
|
|
status_code: u16,
|
|
}
|
|
|
|
/// Future for fetching robots.txt result.
|
|
pub struct RobotsTxtResponse {
|
|
origin: Origin,
|
|
response: Pin<FetchFuture>,
|
|
}
|
|
|
|
impl RobotsTxtResponse {
|
|
/// Returns origin of robots.txt
|
|
pub fn get_origin(&self) -> &Origin {
|
|
&self.origin
|
|
}
|
|
}
|
|
|
|
impl Future for RobotsTxtResponse {
|
|
type Output = Result<ParseResult<FetchedRobotsTxt>, ReqwestError>;
|
|
|
|
fn poll(self: Pin<&mut Self>, cx: &mut Context) -> Poll<Self::Output> {
|
|
let self_mut = self.get_mut();
|
|
let response_pin = self_mut.response.as_mut();
|
|
match response_pin.poll(cx) {
|
|
Poll::Ready(Ok((response_info, text))) => {
|
|
let robots_txt = parse_fetched_robots_txt(self_mut.origin.clone(), response_info.status_code, &text);
|
|
Poll::Ready(Ok(robots_txt))
|
|
}
|
|
Poll::Ready(Err(error)) => Poll::Ready(Err(error)),
|
|
Poll::Pending => Poll::Pending,
|
|
}
|
|
}
|
|
}
|