Use Cow instead of owned String

This commit is contained in:
messense 2016-04-27 11:26:28 +08:00
parent 4862c704ad
commit caca158589

View file

@ -39,6 +39,7 @@ extern crate hyper;
use std::io::Read; use std::io::Read;
use std::cell::{Cell, RefCell}; use std::cell::{Cell, RefCell};
use std::borrow::Cow;
use url::Url; use url::Url;
use hyper::{Client}; use hyper::{Client};
use hyper::status::StatusCode; use hyper::status::StatusCode;
@ -47,25 +48,25 @@ use std::time::Duration;
/// A rule line is a single "Allow:" (allowance==True) or "Disallow:" /// A rule line is a single "Allow:" (allowance==True) or "Disallow:"
/// (allowance==False) followed by a path.""" /// (allowance==False) followed by a path."""
#[derive(Debug, Eq, PartialEq, Clone)] #[derive(Debug, Eq, PartialEq, Clone)]
struct RuleLine { struct RuleLine<'a> {
path: String, path: Cow<'a, str>,
allowance: bool, allowance: bool,
} }
/// An entry has one or more user-agents and zero or more rulelines /// An entry has one or more user-agents and zero or more rulelines
#[derive(Debug, Eq, PartialEq, Clone)] #[derive(Debug, Eq, PartialEq, Clone)]
struct Entry { struct Entry<'a> {
useragents: RefCell<Vec<String>>, useragents: RefCell<Vec<String>>,
rulelines: RefCell<Vec<RuleLine>>, rulelines: RefCell<Vec<RuleLine<'a>>>,
crawl_delay: Option<Duration>, crawl_delay: Option<Duration>,
sitemaps: Vec<Url>, sitemaps: Vec<Url>,
} }
/// robots.txt file parser /// robots.txt file parser
#[derive(Debug, Eq, PartialEq, Clone)] #[derive(Debug, Eq, PartialEq, Clone)]
pub struct RobotFileParser { pub struct RobotFileParser<'a> {
entries: RefCell<Vec<Entry>>, entries: RefCell<Vec<Entry<'a>>>,
default_entry: RefCell<Entry>, default_entry: RefCell<Entry<'a>>,
disallow_all: Cell<bool>, disallow_all: Cell<bool>,
allow_all: Cell<bool>, allow_all: Cell<bool>,
url: Url, url: Url,
@ -75,27 +76,30 @@ pub struct RobotFileParser {
} }
impl RuleLine { impl<'a> RuleLine<'a> {
fn new(path: &str, allowance: bool) -> RuleLine { fn new<S>(path: S, allowance: bool) -> RuleLine<'a>
where S: Into<Cow<'a, str>>
{
let path = path.into();
let mut allow = allowance; let mut allow = allowance;
if path == "" && !allowance { if path == "" && !allowance {
// an empty value means allow all // an empty value means allow all
allow = true; allow = true;
} }
RuleLine { RuleLine {
path: path.to_owned(), path: path,
allowance: allow, allowance: allow,
} }
} }
fn applies_to(&self, filename: &str) -> bool { fn applies_to(&self, filename: &str) -> bool {
&self.path == "*" || filename.starts_with(&self.path) self.path == "*" || filename.starts_with(&self.path[..])
} }
} }
impl Entry { impl<'a> Entry<'a> {
fn new() -> Entry { fn new() -> Entry<'a> {
Entry { Entry {
useragents: RefCell::new(vec![]), useragents: RefCell::new(vec![]),
rulelines: RefCell::new(vec![]), rulelines: RefCell::new(vec![]),
@ -138,7 +142,7 @@ impl Entry {
useragents.push(useragent.to_lowercase().to_owned()); useragents.push(useragent.to_lowercase().to_owned());
} }
fn push_ruleline(&self, ruleline: RuleLine) { fn push_ruleline(&self, ruleline: RuleLine<'a>) {
let mut rulelines = self.rulelines.borrow_mut(); let mut rulelines = self.rulelines.borrow_mut();
rulelines.push(ruleline); rulelines.push(ruleline);
} }
@ -154,7 +158,7 @@ impl Entry {
useragents.is_empty() && rulelines.is_empty() useragents.is_empty() && rulelines.is_empty()
} }
fn set_crawl_delay(&mut self,delay: Duration) { fn set_crawl_delay(&mut self, delay: Duration) {
self.crawl_delay = Some(delay); self.crawl_delay = Some(delay);
} }
@ -174,15 +178,15 @@ impl Entry {
} }
impl Default for Entry { impl<'a> Default for Entry<'a> {
fn default() -> Entry { fn default() -> Entry<'a> {
Entry::new() Entry::new()
} }
} }
impl RobotFileParser { impl<'a> RobotFileParser<'a> {
pub fn new<T: AsRef<str>>(url: T) -> RobotFileParser { pub fn new<T: AsRef<str>>(url: T) -> RobotFileParser<'a> {
let parsed_url = Url::parse(url.as_ref()).unwrap(); let parsed_url = Url::parse(url.as_ref()).unwrap();
RobotFileParser { RobotFileParser {
entries: RefCell::new(vec![]), entries: RefCell::new(vec![]),
@ -247,7 +251,7 @@ impl RobotFileParser {
} }
} }
fn _add_entry(&self, entry: Entry) { fn _add_entry(&self, entry: Entry<'a>) {
if entry.has_useragent("*") { if entry.has_useragent("*") {
// the default entry is considered last // the default entry is considered last
let mut default_entry = self.default_entry.borrow_mut(); let mut default_entry = self.default_entry.borrow_mut();
@ -317,13 +321,13 @@ impl RobotFileParser {
}, },
ref x if x == "disallow" => { ref x if x == "disallow" => {
if state != 0 { if state != 0 {
entry.push_ruleline(RuleLine::new(&part1, false)); entry.push_ruleline(RuleLine::new(part1, false));
state = 2; state = 2;
} }
}, },
ref x if x == "allow" => { ref x if x == "allow" => {
if state != 0 { if state != 0 {
entry.push_ruleline(RuleLine::new(&part1, true)); entry.push_ruleline(RuleLine::new(part1, true));
state = 2; state = 2;
} }
}, },