Skip to content

Commit e4f54de

Browse files
committed
Fix #168 and using Arc for named groups
1 parent 37d0e48 commit e4f54de

File tree

2 files changed

+89
-59
lines changed

2 files changed

+89
-59
lines changed

src/program.rs

+4-2
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,9 @@
88
// option. This file may not be copied, modified, or distributed
99
// except according to those terms.
1010

11-
11+
use std::{char, cmp};
12+
use std::collections::HashMap;
13+
use std::sync::Arc;
1214

1315
use syntax;
1416

@@ -43,7 +45,7 @@ pub struct Program {
4345
pub cap_names: Vec<Option<String>>,
4446
/// The map of named capture groups. The keys are group names and
4547
/// the values are group indices.
46-
pub named_groups: ::std::collections::HashMap<String, usize>,
48+
pub named_groups: Arc<HashMap<String, usize>>,
4749
/// If the regular expression requires a literal prefix in order to have a
4850
/// match, that prefix is stored here as a DFA.
4951
pub prefixes: Literals,

src/re.rs

+85-57
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ use std::ops::Index;
1414
#[cfg(feature = "pattern")]
1515
use std::str::pattern::{Pattern, Searcher, SearchStep};
1616
use std::str::FromStr;
17+
use std::collections::HashMap;
18+
use std::sync::Arc;
1719

1820
use exec::{Exec, ExecBuilder};
1921
use syntax;
@@ -394,13 +396,13 @@ impl Regex {
394396
///
395397
/// The `0`th capture group is always unnamed, so it must always be
396398
/// accessed with `at(0)` or `[0]`.
397-
pub fn captures<'r, 't>(&'r self, text: &'t str) -> Option<Captures<'r, 't>> {
399+
pub fn captures<'t>(&self, text: &'t str) -> Option<Captures<'t>> {
398400
let mut locs = self.alloc_captures();
399401
if exec(self, &mut locs, text, 0) {
400402
Some(Captures {
401-
regex: self,
402403
text: text,
403404
locs: locs,
405+
named_groups: NamedGroups::from_regex(self)
404406
})
405407
} else {
406408
None
@@ -808,6 +810,47 @@ impl<'r, 't> Iterator for RegexSplitsN<'r, 't> {
808810
}
809811
}
810812

813+
enum NamedGroups {
814+
Native(&'static [(&'static str, usize)]),
815+
Dynamic(Arc<HashMap<String, usize>>),
816+
}
817+
818+
impl NamedGroups {
819+
fn from_regex(regex: &Regex) -> NamedGroups {
820+
match *regex {
821+
Regex::Native(ExNative { ref groups, .. }) =>
822+
NamedGroups::Native(groups),
823+
Regex::Dynamic(Program { ref named_groups, .. }) =>
824+
NamedGroups::Dynamic(named_groups.clone())
825+
}
826+
}
827+
828+
fn pos(&self, name: &str) -> Option<usize> {
829+
match *self {
830+
NamedGroups::Native(groups) => {
831+
groups.binary_search_by(|&(n, _)| n.cmp(name))
832+
.ok().map(|i| groups[i].1)
833+
},
834+
NamedGroups::Dynamic(ref groups) => {
835+
groups.get(name).map(|i| *i)
836+
},
837+
}
838+
}
839+
840+
fn iter<'n>(&'n self) -> Box<Iterator<Item=(&'n str, usize)> + 'n> {
841+
match *self {
842+
NamedGroups::Native(groups) => {
843+
Box::new(groups.iter().map(|&v| v))
844+
as Box<Iterator<Item=(&'n str, usize)> + 'n>
845+
},
846+
NamedGroups::Dynamic(ref groups) => {
847+
Box::new(groups.iter().map(|(s, i)| (&s[..], *i)))
848+
as Box<Iterator<Item=(&'n str, usize)> + 'n>
849+
},
850+
}
851+
}
852+
}
853+
811854
/// Captures represents a group of captured strings for a single match.
812855
///
813856
/// The 0th capture always corresponds to the entire match. Each subsequent
@@ -819,13 +862,13 @@ impl<'r, 't> Iterator for RegexSplitsN<'r, 't> {
819862
/// Positions returned from a capture group are always byte indices.
820863
///
821864
/// `'t` is the lifetime of the matched text.
822-
pub struct Captures<'r, 't> {
823-
regex: &'r Regex,
865+
pub struct Captures<'t> {
824866
text: &'t str,
825867
locs: Vec<Option<usize>>,
868+
named_groups: NamedGroups,
826869
}
827870

828-
impl<'r, 't> Captures<'r, 't> {
871+
impl<'t> Captures<'t> {
829872
/// Returns the start and end positions of the Nth capture group.
830873
/// Returns `None` if `i` is not a valid capture group or if the capture
831874
/// group did not match anything.
@@ -854,49 +897,29 @@ impl<'r, 't> Captures<'r, 't> {
854897
/// `name` isn't a valid capture group or didn't match anything, then
855898
/// `None` is returned.
856899
pub fn name(&self, name: &str) -> Option<&'t str> {
857-
match *self.regex {
858-
Regex::Native(ExNative { ref groups, .. }) => {
859-
match groups.binary_search_by(|&(n, _)| n.cmp(name)) {
860-
Ok(i) => self.at(groups[i].1),
861-
Err(_) => None
862-
}
863-
},
864-
Regex::Dynamic(Program { ref named_groups, .. }) => {
865-
named_groups.get(name).and_then(|i| self.at(*i))
866-
},
867-
}
900+
self.named_groups.pos(name).and_then(|i| self.at(i))
868901
}
869902

870903
/// Creates an iterator of all the capture groups in order of appearance
871904
/// in the regular expression.
872-
pub fn iter<'c>(&'c self) -> SubCaptures<'c, 'r, 't> {
905+
pub fn iter<'c>(&'c self) -> SubCaptures<'c, 't> {
873906
SubCaptures { idx: 0, caps: self, }
874907
}
875908

876909
/// Creates an iterator of all the capture group positions in order of
877910
/// appearance in the regular expression. Positions are byte indices
878911
/// in terms of the original string matched.
879-
pub fn iter_pos<'c>(&'c self) -> SubCapturesPos<'c, 'r, 't> {
880-
SubCapturesPos { idx: 0, caps: self, }
912+
pub fn iter_pos<'c>(&'c self) -> SubCapturesPos<'c> {
913+
SubCapturesPos { idx: 0, locs: &self.locs }
881914
}
882915

883916
/// Creates an iterator of all named groups as an tuple with the group
884917
/// name and the value. The iterator returns these values in arbitrary
885918
/// order.
886-
pub fn iter_named<'c>(&'c self) -> SubCapturesNamed<'c, 'r, 't> {
887-
let iter = match *self.regex {
888-
Regex::Native(ExNative { ref groups, .. }) => {
889-
Box::new(groups.iter().map(|&v| v))
890-
as Box<Iterator<Item=(&'r str, usize)> + 'r>
891-
},
892-
Regex::Dynamic(Program { ref named_groups, .. }) => {
893-
Box::new(named_groups.iter().map(|(s, i)| (&s[..], *i)))
894-
as Box<Iterator<Item=(&'r str, usize)> + 'r>
895-
},
896-
};
919+
pub fn iter_named<'c: 't>(&'c self) -> SubCapturesNamed<'c, 't> {
897920
SubCapturesNamed {
898921
caps: self,
899-
inner: iter
922+
names: self.named_groups.iter()
900923
}
901924
}
902925

@@ -940,7 +963,7 @@ impl<'r, 't> Captures<'r, 't> {
940963
///
941964
/// # Panics
942965
/// If there is no group at the given index.
943-
impl<'r, 't> Index<usize> for Captures<'r, 't> {
966+
impl<'t> Index<usize> for Captures<'t> {
944967

945968
type Output = str;
946969

@@ -954,7 +977,7 @@ impl<'r, 't> Index<usize> for Captures<'r, 't> {
954977
///
955978
/// # Panics
956979
/// If there is no group named by the given value.
957-
impl<'r, 't> Index<&'t str> for Captures<'r, 't> {
980+
impl<'t> Index<&'t str> for Captures<'t> {
958981

959982
type Output = str;
960983

@@ -971,12 +994,12 @@ impl<'r, 't> Index<&'t str> for Captures<'r, 't> {
971994
/// expression.
972995
///
973996
/// `'t` is the lifetime of the matched text.
974-
pub struct SubCaptures<'c, 'r: 'c, 't: 'c> {
997+
pub struct SubCaptures<'c, 't: 'c> {
975998
idx: usize,
976-
caps: &'c Captures<'r, 't>,
999+
caps: &'c Captures<'t>,
9771000
}
9781001

979-
impl<'c, 'r, 't> Iterator for SubCaptures<'c, 'r, 't> {
1002+
impl<'c, 't> Iterator for SubCaptures<'c, 't> {
9801003
type Item = Option<&'t str>;
9811004

9821005
fn next(&mut self) -> Option<Option<&'t str>> {
@@ -995,41 +1018,42 @@ impl<'c, 'r, 't> Iterator for SubCaptures<'c, 'r, 't> {
9951018
/// Positions are byte indices in terms of the original string matched.
9961019
///
9971020
/// `'t` is the lifetime of the matched text.
998-
pub struct SubCapturesPos<'c, 'r: 'c, 't: 'c> {
1021+
pub struct SubCapturesPos<'c> {
9991022
idx: usize,
1000-
caps: &'c Captures<'r, 't>,
1023+
locs: &'c [Option<usize>]
10011024
}
10021025

1003-
impl<'c, 'r, 't> Iterator for SubCapturesPos<'c, 'r, 't> {
1026+
impl<'c> Iterator for SubCapturesPos<'c> {
10041027
type Item = Option<(usize, usize)>;
10051028

10061029
fn next(&mut self) -> Option<Option<(usize, usize)>> {
1007-
if self.idx < self.caps.len() {
1008-
self.idx += 1;
1009-
Some(self.caps.pos(self.idx - 1))
1010-
} else {
1011-
None
1030+
if self.idx >= self.locs.len() {
1031+
return None
10121032
}
1033+
let r = match (self.locs[self.idx], self.locs[self.idx + 1]) {
1034+
(Some(s), Some(e)) => Some((s, e)),
1035+
(None, None) => None,
1036+
_ => unreachable!()
1037+
};
1038+
self.idx += 2;
1039+
Some(r)
10131040
}
10141041
}
10151042

10161043
/// An Iterator over named capture groups as a tuple with the group
10171044
/// name and the value.
10181045
///
10191046
/// `'t` is the lifetime of the matched text.
1020-
pub struct SubCapturesNamed<'c, 'r: 'c, 't: 'c> {
1021-
caps: &'c Captures<'r, 't>,
1022-
inner: Box<Iterator<Item=(&'r str, usize)> + 'r>,
1047+
pub struct SubCapturesNamed<'c, 't: 'c> {
1048+
caps: &'c Captures<'t>,
1049+
names: Box<Iterator<Item=(&'c str, usize)> + 'c>,
10231050
}
10241051

1025-
impl<'c, 'r, 't> Iterator for SubCapturesNamed<'c, 'r, 't> {
1026-
type Item = (&'r str, Option<&'t str>);
1052+
impl<'c, 't: 'c> Iterator for SubCapturesNamed<'c, 't> {
1053+
type Item = (&'c str, Option<&'t str>);
10271054

1028-
fn next(&mut self) -> Option<(&'r str, Option<&'t str>)> {
1029-
match self.inner.next() {
1030-
Some((name, pos)) => Some((name, self.caps.at(pos))),
1031-
None => None
1032-
}
1055+
fn next(&mut self) -> Option<(&'c str, Option<&'t str>)> {
1056+
self.names.next().map(|(name, pos)| (name, self.caps.at(pos)))
10331057
}
10341058
}
10351059

@@ -1048,9 +1072,9 @@ pub struct FindCaptures<'r, 't> {
10481072
}
10491073

10501074
impl<'r, 't> Iterator for FindCaptures<'r, 't> {
1051-
type Item = Captures<'r, 't>;
1075+
type Item = Captures<'t>;
10521076

1053-
fn next(&mut self) -> Option<Captures<'r, 't>> {
1077+
fn next(&mut self) -> Option<Captures<'t>> {
10541078
if self.last_end > self.search.len() {
10551079
return None
10561080
}
@@ -1073,7 +1097,11 @@ impl<'r, 't> Iterator for FindCaptures<'r, 't> {
10731097
}
10741098
self.last_end = e;
10751099
self.last_match = Some(self.last_end);
1076-
Some(Captures::new(self.re, self.search, caps))
1100+
Some(Captures {
1101+
text: self.search,
1102+
locs: caps,
1103+
named_groups: NamedGroups::from_regex(self.re),
1104+
})
10771105
}
10781106
}
10791107

0 commit comments

Comments
 (0)