mirror of
https://github.com/danbulant/robotparser-rs
synced 2026-06-15 20:51:34 +00:00
* Inital github-actions most taken from starship project ref #21 * rustfmt config remove unknown configuration options * Run rustfmt * clippy: use any instead of find..is_some * clippy: Remove the `clone` call: `self.crawl_delay` * Clippy fixes * Rustfmt fixes * clippy: fix dont need to add `&` to all patterns * clippy: fix needless `fn main` in doctest * clippy: fix if-then-else expression returns a bool literal * clippy: fix very complex type BoxFuture response * clippy: fix variable `line_no` is used as a loop counter * clippy: dereference the expression on tests * clippy: fix assert(true) will be optimized out by the compiler * github: name workflow
30 lines
1.4 KiB
Rust
30 lines
1.4 KiB
Rust
mod fetched_robots_txt;
|
|
mod robots_txt;
|
|
use crate::model::RequestRate;
|
|
use std::time::Duration;
|
|
use url::Url;
|
|
|
|
/// Trait that implements robots txt service.
|
|
pub trait RobotsTxtService {
|
|
/// Using the parsed robots.txt decide if useragent can fetch url.
|
|
fn can_fetch(&self, user_agent: &str, url: &Url) -> bool;
|
|
|
|
/// Returns the crawl delay for this user agent as a Duration, or None if no crawl delay is defined.
|
|
fn get_crawl_delay(&self, user_agent: &str) -> Option<Duration>;
|
|
|
|
/// Removes the request parameters from the url that were listed in the `Clean-param` directive.
|
|
/// This method CHECKS that the origin of the transmitted url matches the origin of robots.txt.
|
|
/// Returns true if the operation was applied to the passed url.
|
|
/// In other cases it returns false.
|
|
fn normalize_url(&self, url: &mut Url) -> bool;
|
|
|
|
/// Removes the request parameters from the url that were listed in the `Clean-param` directive.
|
|
/// This method DOES NOT CHECK that the origin of the transmitted url coincides with the origin of robots.txt.
|
|
fn normalize_url_ignore_origin(&self, url: &mut Url);
|
|
|
|
/// Returns the list of URL sitemaps that have been listed in the robots.txt file.
|
|
fn get_sitemaps(&self) -> &[Url];
|
|
|
|
/// Returns information about the restrictions set for sending HTTP requests to the server.
|
|
fn get_req_rate(&self, user_agent: &str) -> Option<RequestRate>;
|
|
}
|