mirror of
https://github.com/danbulant/robotparser-rs
synced 2026-06-16 13:11:04 +00:00
* Inital github-actions most taken from starship project ref #21 * rustfmt config remove unknown configuration options * Run rustfmt * clippy: use any instead of find..is_some * clippy: Remove the `clone` call: `self.crawl_delay` * Clippy fixes * Rustfmt fixes * clippy: fix dont need to add `&` to all patterns * clippy: fix needless `fn main` in doctest * clippy: fix if-then-else expression returns a bool literal * clippy: fix very complex type BoxFuture response * clippy: fix variable `line_no` is used as a loop counter * clippy: dereference the expression on tests * clippy: fix assert(true) will be optimized out by the compiler * github: name workflow
20 lines
948 B
Rust
20 lines
948 B
Rust
use crate::model::{FetchedRobotsTxt, FetchedRobotsTxtContainer};
|
|
use crate::parser::parse_robots_txt;
|
|
use crate::parser::ParseResult;
|
|
use url::Origin;
|
|
|
|
const UNAUTHORIZED: u16 = 401;
|
|
const FORBIDDEN: u16 = 403;
|
|
const OK: u16 = 200;
|
|
|
|
/// Parses the text of the robots.txt file located in the specified place of origin,
|
|
/// taking into account the response status code of the HTTP-request.
|
|
/// **IMPORTANT NOTE**: origin must point to robots.txt url **before redirects**.
|
|
pub fn parse(origin: Origin, status_code: u16, input: &str) -> ParseResult<FetchedRobotsTxt> {
|
|
match status_code {
|
|
UNAUTHORIZED | FORBIDDEN => ParseResult::new(FetchedRobotsTxt::new(FetchedRobotsTxtContainer::FetchDenied)),
|
|
OK => parse_robots_txt(origin, input)
|
|
.map(|result| FetchedRobotsTxt::new(FetchedRobotsTxtContainer::Fetched(result))),
|
|
_ => ParseResult::new(FetchedRobotsTxt::new(FetchedRobotsTxtContainer::FetchFailed)),
|
|
}
|
|
}
|