mirror of
https://github.com/danbulant/robotparser-rs
synced 2026-05-20 04:48:37 +00:00
Refactor codes like clippy suggested
This commit is contained in:
parent
7bbe96d096
commit
2d9c028dfa
3 changed files with 48 additions and 33 deletions
25
.travis.yml
25
.travis.yml
|
|
@ -1,5 +1,12 @@
|
|||
sudo: required
|
||||
sudo: false
|
||||
language: rust
|
||||
addons:
|
||||
apt:
|
||||
packages:
|
||||
- libcurl4-openssl-dev
|
||||
- libelf-dev
|
||||
- libdw-dev
|
||||
- binutils-dev
|
||||
rust:
|
||||
- stable
|
||||
- beta
|
||||
|
|
@ -7,9 +14,17 @@ rust:
|
|||
before_script:
|
||||
- pip install 'travis-cargo<0.2' --user && export PATH=$HOME/.local/bin:$PATH
|
||||
script:
|
||||
- travis-cargo build
|
||||
- |
|
||||
if [ "$TRAVIS_RUST_VERSION" = nightly ]; then
|
||||
travis-cargo build -- --features clippy
|
||||
else
|
||||
travis-cargo build
|
||||
fi
|
||||
- travis-cargo test
|
||||
# - travis-cargo --only stable doc
|
||||
- travis-cargo --only stable doc
|
||||
after_success:
|
||||
# - travis-cargo --only stable doc-upload
|
||||
- travis-cargo coveralls
|
||||
- travis-cargo --only stable doc-upload
|
||||
- travis-cargo coveralls --no-sudo --verify
|
||||
env:
|
||||
global:
|
||||
secure: NaYk9xXQKTTRESUrQ1XK6UjSE1T8lv9Xphtoux9b2vZvziuECUXv/aelpobSmI8nheO9V+ZVgMbskParMCuRiAjUPEA2wh0w66nn9wVeR8ZD7bVltXEkcNCtRO3NoOQmSyyClbEAQdn04YazfdDqZ1Sor260J1u04D1c4LD+dD/2YKS+VnrGtHPa2NBx5ebFu0YTFxMAQez6KhPy3V5zPCCSWMIu7d/s5K9NNg6JlHqz06KQ1ZVKpV6ZRV5OA/16B5elLMpH+8PDO70LyWnSBYtdAn68epydjXEmZh3ra94bQz7U5K4XdnwhBp0PljIlY9LE6YbDxvU+ZcwX0VIpLCMStTV7xpbr7wI45OmNKaNLXZiBGHezsaVUSVRFCJZ7EB/xYDDk+9CIev6LlGR9wSFD1VnjHN1lo8X9tTN2rH7o0csnr1AkjJM8U46M8Mh/7+luGeAlwxkH5UQW/GnogZbF+UIHpSLayM04qECSz7C3fBhAVXW9H/20QxMCzNWDbKDpi8KIXbEq4AGSYCDrMwJRdQ182QtZzjBdnSvRENE2jsy33lpUxBGd/sYB4dy2FjkUGf+cjiS9F/CDRHDo4vNlYpXJvZ9aaqgjaEd5nIlw4aBwVWN4Qaf/iXbxiL9wg+RIGRhCsaKkxEgWaFRb1Y8s9zJ9B51wQsGBLHyorOs=
|
||||
|
|
|
|||
|
|
@ -1,18 +1,23 @@
|
|||
[package]
|
||||
authors = ["messense <messense@icloud.com>"]
|
||||
description = "robots.txt parser for Rust"
|
||||
documentation = "https://messense.github.io/robotparser-rs"
|
||||
homepage = "https://github.com/messense/robotparser-rs"
|
||||
keywords = ["robot", "robots", "parser", "spider", "crawler"]
|
||||
license = "MIT"
|
||||
name = "robotparser"
|
||||
readme = "README.md"
|
||||
repository = "https://github.com/messense/robotparser-rs"
|
||||
version = "0.4.0"
|
||||
version = "0.4.1"
|
||||
|
||||
[dependencies]
|
||||
hyper = "0.7"
|
||||
time = "0.1"
|
||||
url = "0.5"
|
||||
|
||||
[dependencies.clippy]
|
||||
optional = true
|
||||
version = "^0.*"
|
||||
|
||||
[features]
|
||||
unstable = []
|
||||
|
|
|
|||
49
src/lib.rs
49
src/lib.rs
|
|
@ -4,6 +4,11 @@
|
|||
//! The robots.txt Exclusion Protocol is implemented as specified in
|
||||
//! http://www.robotstxt.org/norobots-rfc.txt
|
||||
//!
|
||||
#![cfg_attr(feature="clippy", feature(plugin))]
|
||||
#![cfg_attr(feature="clippy", plugin(clippy))]
|
||||
#![cfg_attr(feature="clippy", deny(clippy))]
|
||||
#![cfg_attr(feature="clippy", warn(cyclomatic_complexity))]
|
||||
|
||||
extern crate url;
|
||||
extern crate time;
|
||||
extern crate hyper;
|
||||
|
|
@ -76,7 +81,7 @@ impl Entry {
|
|||
|
||||
/// check if this entry applies to the specified agent
|
||||
fn applies_to(&self, useragent: &str) -> bool {
|
||||
let ua = useragent.split("/").nth(0).unwrap_or("").to_lowercase();
|
||||
let ua = useragent.split('/').nth(0).unwrap_or("").to_lowercase();
|
||||
let useragents = self.useragents.borrow();
|
||||
for agent in &*useragents {
|
||||
if agent == "*" {
|
||||
|
|
@ -129,20 +134,17 @@ impl Entry {
|
|||
}
|
||||
|
||||
fn get_crawl_delay(&self) -> Option<Duration> {
|
||||
return self.crawl_delay.clone();
|
||||
self.crawl_delay
|
||||
}
|
||||
|
||||
fn add_sitemap(&mut self,url:&str) {
|
||||
match Url::parse(url) {
|
||||
Ok(url) => {
|
||||
self.sitemaps.push(url);
|
||||
},
|
||||
Err(_) => {},
|
||||
if let Ok(url) = Url::parse(url) {
|
||||
self.sitemaps.push(url);
|
||||
}
|
||||
}
|
||||
|
||||
fn get_sitemaps(&self) -> Vec<Url> {
|
||||
return self.sitemaps.clone();
|
||||
self.sitemaps.clone()
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -206,7 +208,7 @@ impl RobotFileParser {
|
|||
StatusCode::Ok => {
|
||||
let mut buf = String::new();
|
||||
res.read_to_string(&mut buf).unwrap();
|
||||
let lines: Vec<&str> = buf.split("\n").collect();
|
||||
let lines: Vec<&str> = buf.split('\n').collect();
|
||||
self.parse(&lines);
|
||||
},
|
||||
_ => {},
|
||||
|
|
@ -245,7 +247,7 @@ impl RobotFileParser {
|
|||
|
||||
self.modified();
|
||||
for line in lines {
|
||||
let mut ln = line.as_ref().clone();
|
||||
let mut ln = line.as_ref();
|
||||
if ln.is_empty() {
|
||||
match state {
|
||||
1 => {
|
||||
|
|
@ -261,17 +263,14 @@ impl RobotFileParser {
|
|||
}
|
||||
}
|
||||
// remove optional comment and strip line
|
||||
match ln.find("#") {
|
||||
Some(i) => {
|
||||
ln = &ln[0..i];
|
||||
},
|
||||
None => {},
|
||||
if let Some(i) = ln.find('#') {
|
||||
ln = &ln[0..i];
|
||||
}
|
||||
ln = ln.trim();
|
||||
if ln.is_empty() {
|
||||
continue;
|
||||
}
|
||||
let parts: Vec<&str> = ln.splitn(2, ":").collect();
|
||||
let parts: Vec<&str> = ln.splitn(2, ':').collect();
|
||||
if parts.len() == 2 {
|
||||
let part0 = parts[0].trim().to_lowercase();
|
||||
let part1 = String::from_utf8(percent_decode(parts[1].trim().as_bytes())).unwrap_or("".to_owned());
|
||||
|
|
@ -298,15 +297,11 @@ impl RobotFileParser {
|
|||
},
|
||||
ref x if x == "crawl-delay" => {
|
||||
if state != 0 {
|
||||
let delay = part1.parse::<f64>();
|
||||
match delay {
|
||||
Ok(delay) => {
|
||||
let delay_seconds = delay.trunc();
|
||||
let delay_nanoseconds = delay.fract()* 10f64.powi(9);
|
||||
let delay = Duration::new(delay_seconds as u64,delay_nanoseconds as u32);
|
||||
entry.set_crawl_delay(delay);
|
||||
},
|
||||
Err(_) => {}
|
||||
if let Ok(delay) = part1.parse::<f64>() {
|
||||
let delay_seconds = delay.trunc();
|
||||
let delay_nanoseconds = delay.fract() * 10f64.powi(9);
|
||||
let delay = Duration::new(delay_seconds as u64,delay_nanoseconds as u32);
|
||||
entry.set_crawl_delay(delay);
|
||||
}
|
||||
state = 2;
|
||||
}
|
||||
|
|
@ -380,7 +375,7 @@ impl RobotFileParser {
|
|||
return entry.get_crawl_delay();
|
||||
}
|
||||
}
|
||||
return None;
|
||||
None
|
||||
}
|
||||
|
||||
/// Returns the sitemaps for this user agent as a `Vec<Url>`.
|
||||
|
|
@ -395,6 +390,6 @@ impl RobotFileParser {
|
|||
return entry.get_sitemaps();
|
||||
}
|
||||
}
|
||||
return Vec::new();
|
||||
vec![]
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue