mirror of
https://github.com/danbulant/robotparser-rs
synced 2026-05-27 22:12:02 +00:00
style: rustfmt source codes
This commit is contained in:
parent
e501f4c3b9
commit
a5254dbc52
2 changed files with 28 additions and 25 deletions
3
rustfmt.toml
Normal file
3
rustfmt.toml
Normal file
|
|
@ -0,0 +1,3 @@
|
||||||
|
max_width = 120
|
||||||
|
ideal_width = 100
|
||||||
|
write_mode = "Overwrite"
|
||||||
34
src/lib.rs
34
src/lib.rs
|
|
@ -1,4 +1,3 @@
|
||||||
//!
|
|
||||||
//! robots.txt parser for Rust
|
//! robots.txt parser for Rust
|
||||||
//!
|
//!
|
||||||
//! The robots.txt Exclusion Protocol is implemented as specified in
|
//! The robots.txt Exclusion Protocol is implemented as specified in
|
||||||
|
|
@ -40,8 +39,8 @@ use std::io::Read;
|
||||||
use std::cell::{Cell, RefCell};
|
use std::cell::{Cell, RefCell};
|
||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
use url::Url;
|
use url::Url;
|
||||||
use hyper::{Client};
|
use hyper::Client;
|
||||||
use hyper::header::{UserAgent};
|
use hyper::header::UserAgent;
|
||||||
use hyper::status::StatusCode;
|
use hyper::status::StatusCode;
|
||||||
use std::time::{Duration, SystemTime, UNIX_EPOCH};
|
use std::time::{Duration, SystemTime, UNIX_EPOCH};
|
||||||
|
|
||||||
|
|
@ -133,7 +132,7 @@ impl<'a> Entry<'a> {
|
||||||
let rulelines = self.rulelines.borrow();
|
let rulelines = self.rulelines.borrow();
|
||||||
for line in &*rulelines {
|
for line in &*rulelines {
|
||||||
if line.applies_to(filename) {
|
if line.applies_to(filename) {
|
||||||
return line.allowance
|
return line.allowance;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
true
|
true
|
||||||
|
|
@ -240,17 +239,17 @@ impl<'a> RobotFileParser<'a> {
|
||||||
match res.status {
|
match res.status {
|
||||||
StatusCode::Unauthorized | StatusCode::Forbidden => {
|
StatusCode::Unauthorized | StatusCode::Forbidden => {
|
||||||
self.disallow_all.set(true);
|
self.disallow_all.set(true);
|
||||||
},
|
}
|
||||||
status if status >= StatusCode::BadRequest && status < StatusCode::InternalServerError => {
|
status if status >= StatusCode::BadRequest && status < StatusCode::InternalServerError => {
|
||||||
self.allow_all.set(true);
|
self.allow_all.set(true);
|
||||||
},
|
}
|
||||||
StatusCode::Ok => {
|
StatusCode::Ok => {
|
||||||
let mut buf = String::new();
|
let mut buf = String::new();
|
||||||
res.read_to_string(&mut buf).unwrap();
|
res.read_to_string(&mut buf).unwrap();
|
||||||
let lines: Vec<&str> = buf.split('\n').collect();
|
let lines: Vec<&str> = buf.split('\n').collect();
|
||||||
self.parse(&lines);
|
self.parse(&lines);
|
||||||
},
|
}
|
||||||
_ => {},
|
_ => {}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -292,13 +291,13 @@ impl<'a> RobotFileParser<'a> {
|
||||||
1 => {
|
1 => {
|
||||||
entry = Entry::new();
|
entry = Entry::new();
|
||||||
state = 0;
|
state = 0;
|
||||||
},
|
}
|
||||||
2 => {
|
2 => {
|
||||||
self._add_entry(entry);
|
self._add_entry(entry);
|
||||||
entry = Entry::new();
|
entry = Entry::new();
|
||||||
state = 0;
|
state = 0;
|
||||||
},
|
}
|
||||||
_ => {},
|
_ => {}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// remove optional comment and strip line
|
// remove optional comment and strip line
|
||||||
|
|
@ -312,7 +311,8 @@ impl<'a> RobotFileParser<'a> {
|
||||||
let parts: Vec<&str> = ln.splitn(2, ':').collect();
|
let parts: Vec<&str> = ln.splitn(2, ':').collect();
|
||||||
if parts.len() == 2 {
|
if parts.len() == 2 {
|
||||||
let part0 = parts[0].trim().to_lowercase();
|
let part0 = parts[0].trim().to_lowercase();
|
||||||
let part1 = String::from_utf8(percent_decode(parts[1].trim().as_bytes()).collect()).unwrap_or("".to_owned());
|
let part1 = String::from_utf8(percent_decode(parts[1].trim().as_bytes()).collect())
|
||||||
|
.unwrap_or("".to_owned());
|
||||||
match part0 {
|
match part0 {
|
||||||
ref x if x == "user-agent" => {
|
ref x if x == "user-agent" => {
|
||||||
if state == 2 {
|
if state == 2 {
|
||||||
|
|
@ -321,19 +321,19 @@ impl<'a> RobotFileParser<'a> {
|
||||||
}
|
}
|
||||||
entry.push_useragent(&part1);
|
entry.push_useragent(&part1);
|
||||||
state = 1;
|
state = 1;
|
||||||
},
|
}
|
||||||
ref x if x == "disallow" => {
|
ref x if x == "disallow" => {
|
||||||
if state != 0 {
|
if state != 0 {
|
||||||
entry.push_ruleline(RuleLine::new(part1, false));
|
entry.push_ruleline(RuleLine::new(part1, false));
|
||||||
state = 2;
|
state = 2;
|
||||||
}
|
}
|
||||||
},
|
}
|
||||||
ref x if x == "allow" => {
|
ref x if x == "allow" => {
|
||||||
if state != 0 {
|
if state != 0 {
|
||||||
entry.push_ruleline(RuleLine::new(part1, true));
|
entry.push_ruleline(RuleLine::new(part1, true));
|
||||||
state = 2;
|
state = 2;
|
||||||
}
|
}
|
||||||
},
|
}
|
||||||
ref x if x == "crawl-delay" => {
|
ref x if x == "crawl-delay" => {
|
||||||
if state != 0 {
|
if state != 0 {
|
||||||
if let Ok(delay) = part1.parse::<f64>() {
|
if let Ok(delay) = part1.parse::<f64>() {
|
||||||
|
|
@ -344,14 +344,14 @@ impl<'a> RobotFileParser<'a> {
|
||||||
}
|
}
|
||||||
state = 2;
|
state = 2;
|
||||||
}
|
}
|
||||||
},
|
}
|
||||||
ref x if x == "sitemap" => {
|
ref x if x == "sitemap" => {
|
||||||
if state != 0 {
|
if state != 0 {
|
||||||
entry.add_sitemap(&part1);
|
entry.add_sitemap(&part1);
|
||||||
state = 2;
|
state = 2;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
_ => {},
|
_ => {}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue