Skip to content

Commit 422a4b4

Browse files
committed
Fix #168 and using Arc for named groups
1 parent 4ad644f commit 422a4b4

File tree

2 files changed

+90
-64
lines changed

2 files changed

+90
-64
lines changed

src/program.rs

+8-6
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,9 @@
88
// option. This file may not be copied, modified, or distributed
99
// except according to those terms.
1010

11-
11+
use std::{char, cmp};
12+
use std::collections::HashMap;
13+
use std::sync::Arc;
1214

1315
use syntax;
1416

@@ -57,7 +59,7 @@ pub struct Program {
5759
pub cap_names: Vec<Option<String>>,
5860
/// The map of named capture groups. The keys are group names and
5961
/// the values are group indices.
60-
pub named_groups: ::std::collections::HashMap<String, usize>,
62+
pub named_groups: Arc<HashMap<String, usize>>,
6163
/// If the regular expression requires a literal prefix in order to have a
6264
/// match, that prefix is stored here as a DFA.
6365
pub prefixes: Prefix,
@@ -89,7 +91,7 @@ impl Program {
8991
let (insts_len, ncaps) = (insts.len(), num_captures(&insts));
9092
let create_threads = move || NfaThreads::new(insts_len, ncaps);
9193
let create_backtrack = move || BackMachine::new();
92-
let mut named_groups = ::std::collections::HashMap::new();
94+
let mut named_groups = HashMap::new();
9395
for (i, name) in cap_names.iter().enumerate() {
9496
if let Some(ref name) = *name {
9597
named_groups.insert(name.to_owned(), i);
@@ -99,7 +101,7 @@ impl Program {
99101
original: re.into(),
100102
insts: insts,
101103
cap_names: cap_names,
102-
named_groups: named_groups,
104+
named_groups: Arc::new(named_groups),
103105
prefixes: Prefix::Empty,
104106
prefixes_complete: false,
105107
anchored_begin: false,
@@ -284,7 +286,7 @@ impl Program {
284286
for c in (s as u32)..(e as u32 + 1){
285287
for alt in &orig {
286288
let mut alt = alt.clone();
287-
alt.push(::std::char::from_u32(c).unwrap());
289+
alt.push(char::from_u32(c).unwrap());
288290
alts.push(alt);
289291
}
290292
}
@@ -346,7 +348,7 @@ fn num_captures(insts: &[Inst]) -> usize {
346348
let mut n = 0;
347349
for inst in insts {
348350
if let Inst::Save(ref inst) = *inst {
349-
n = ::std::cmp::max(n, inst.slot + 1)
351+
n = cmp::max(n, inst.slot + 1)
350352
}
351353
}
352354
// There's exactly 2 Save slots for every capture.

src/re.rs

+82-58
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ use std::ops::Index;
1414
#[cfg(feature = "pattern")]
1515
use std::str::pattern::{Pattern, Searcher, SearchStep};
1616
use std::str::FromStr;
17+
use std::collections::HashMap;
18+
use std::sync::Arc;
1719

1820
use program::{Program, MatchEngine};
1921
use syntax;
@@ -416,13 +418,13 @@ impl Regex {
416418
///
417419
/// The `0`th capture group is always unnamed, so it must always be
418420
/// accessed with `at(0)` or `[0]`.
419-
pub fn captures<'r, 't>(&'r self, text: &'t str) -> Option<Captures<'r, 't>> {
421+
pub fn captures<'t>(&self, text: &'t str) -> Option<Captures<'t>> {
420422
let mut locs = self.alloc_captures();
421423
if exec(self, &mut locs, text, 0) {
422424
Some(Captures {
423-
regex: self,
424425
text: text,
425426
locs: locs,
427+
named_groups: NamedGroups::from_regex(self)
426428
})
427429
} else {
428430
None
@@ -816,6 +818,47 @@ impl<'r, 't> Iterator for RegexSplitsN<'r, 't> {
816818
}
817819
}
818820

821+
enum NamedGroups {
822+
Native(&'static [(&'static str, usize)]),
823+
Dynamic(Arc<HashMap<String, usize>>),
824+
}
825+
826+
impl NamedGroups {
827+
fn from_regex(regex: &Regex) -> NamedGroups {
828+
match *regex {
829+
Regex::Native(ExNative { ref groups, .. }) =>
830+
NamedGroups::Native(groups),
831+
Regex::Dynamic(Program { ref named_groups, .. }) =>
832+
NamedGroups::Dynamic(named_groups.clone())
833+
}
834+
}
835+
836+
fn pos(&self, name: &str) -> Option<usize> {
837+
match *self {
838+
NamedGroups::Native(groups) => {
839+
groups.binary_search_by(|&(n, _)| n.cmp(name))
840+
.ok().map(|i| groups[i].1)
841+
},
842+
NamedGroups::Dynamic(ref groups) => {
843+
groups.get(name).map(|i| *i)
844+
},
845+
}
846+
}
847+
848+
fn iter<'n>(&'n self) -> Box<Iterator<Item=(&'n str, usize)> + 'n> {
849+
match *self {
850+
NamedGroups::Native(groups) => {
851+
Box::new(groups.iter().map(|&v| v))
852+
as Box<Iterator<Item=(&'n str, usize)> + 'n>
853+
},
854+
NamedGroups::Dynamic(ref groups) => {
855+
Box::new(groups.iter().map(|(s, i)| (&s[..], *i)))
856+
as Box<Iterator<Item=(&'n str, usize)> + 'n>
857+
},
858+
}
859+
}
860+
}
861+
819862
/// Captures represents a group of captured strings for a single match.
820863
///
821864
/// The 0th capture always corresponds to the entire match. Each subsequent
@@ -827,13 +870,13 @@ impl<'r, 't> Iterator for RegexSplitsN<'r, 't> {
827870
/// Positions returned from a capture group are always byte indices.
828871
///
829872
/// `'t` is the lifetime of the matched text.
830-
pub struct Captures<'r, 't> {
831-
regex: &'r Regex,
873+
pub struct Captures<'t> {
832874
text: &'t str,
833875
locs: Vec<Option<usize>>,
876+
named_groups: NamedGroups,
834877
}
835878

836-
impl<'r, 't> Captures<'r, 't> {
879+
impl<'t> Captures<'t> {
837880
/// Returns the start and end positions of the Nth capture group.
838881
/// Returns `None` if `i` is not a valid capture group or if the capture
839882
/// group did not match anything.
@@ -862,49 +905,29 @@ impl<'r, 't> Captures<'r, 't> {
862905
/// `name` isn't a valid capture group or didn't match anything, then
863906
/// `None` is returned.
864907
pub fn name(&self, name: &str) -> Option<&'t str> {
865-
match *self.regex {
866-
Regex::Native(ExNative { ref groups, .. }) => {
867-
match groups.binary_search_by(|&(n, _)| n.cmp(name)) {
868-
Ok(i) => self.at(groups[i].1),
869-
Err(_) => None
870-
}
871-
},
872-
Regex::Dynamic(Program { ref named_groups, .. }) => {
873-
named_groups.get(name).and_then(|i| self.at(*i))
874-
},
875-
}
908+
self.named_groups.pos(name).and_then(|i| self.at(i))
876909
}
877910

878911
/// Creates an iterator of all the capture groups in order of appearance
879912
/// in the regular expression.
880-
pub fn iter<'c>(&'c self) -> SubCaptures<'c, 'r, 't> {
913+
pub fn iter<'c>(&'c self) -> SubCaptures<'c, 't> {
881914
SubCaptures { idx: 0, caps: self, }
882915
}
883916

884917
/// Creates an iterator of all the capture group positions in order of
885918
/// appearance in the regular expression. Positions are byte indices
886919
/// in terms of the original string matched.
887-
pub fn iter_pos<'c>(&'c self) -> SubCapturesPos<'c, 'r, 't> {
888-
SubCapturesPos { idx: 0, caps: self, }
920+
pub fn iter_pos<'c>(&'c self) -> SubCapturesPos<'c> {
921+
SubCapturesPos { idx: 0, locs: &self.locs }
889922
}
890923

891924
/// Creates an iterator of all named groups as an tuple with the group
892925
/// name and the value. The iterator returns these values in arbitrary
893926
/// order.
894-
pub fn iter_named<'c>(&'c self) -> SubCapturesNamed<'c, 'r, 't> {
895-
let iter = match *self.regex {
896-
Regex::Native(ExNative { ref groups, .. }) => {
897-
Box::new(groups.iter().map(|&v| v))
898-
as Box<Iterator<Item=(&'r str, usize)> + 'r>
899-
},
900-
Regex::Dynamic(Program { ref named_groups, .. }) => {
901-
Box::new(named_groups.iter().map(|(s, i)| (&s[..], *i)))
902-
as Box<Iterator<Item=(&'r str, usize)> + 'r>
903-
},
904-
};
927+
pub fn iter_named<'c: 't>(&'c self) -> SubCapturesNamed<'c, 't> {
905928
SubCapturesNamed {
906929
caps: self,
907-
inner: iter
930+
names: self.named_groups.iter()
908931
}
909932
}
910933

@@ -948,7 +971,7 @@ impl<'r, 't> Captures<'r, 't> {
948971
///
949972
/// # Panics
950973
/// If there is no group at the given index.
951-
impl<'r, 't> Index<usize> for Captures<'r, 't> {
974+
impl<'t> Index<usize> for Captures<'t> {
952975

953976
type Output = str;
954977

@@ -962,7 +985,7 @@ impl<'r, 't> Index<usize> for Captures<'r, 't> {
962985
///
963986
/// # Panics
964987
/// If there is no group named by the given value.
965-
impl<'r, 't> Index<&'t str> for Captures<'r, 't> {
988+
impl<'t> Index<&'t str> for Captures<'t> {
966989

967990
type Output = str;
968991

@@ -979,12 +1002,12 @@ impl<'r, 't> Index<&'t str> for Captures<'r, 't> {
9791002
/// expression.
9801003
///
9811004
/// `'t` is the lifetime of the matched text.
982-
pub struct SubCaptures<'c, 'r: 'c, 't: 'c> {
1005+
pub struct SubCaptures<'c, 't: 'c> {
9831006
idx: usize,
984-
caps: &'c Captures<'r, 't>,
1007+
caps: &'c Captures<'t>,
9851008
}
9861009

987-
impl<'c, 'r, 't> Iterator for SubCaptures<'c, 'r, 't> {
1010+
impl<'c, 't> Iterator for SubCaptures<'c, 't> {
9881011
type Item = Option<&'t str>;
9891012

9901013
fn next(&mut self) -> Option<Option<&'t str>> {
@@ -1003,41 +1026,42 @@ impl<'c, 'r, 't> Iterator for SubCaptures<'c, 'r, 't> {
10031026
/// Positions are byte indices in terms of the original string matched.
10041027
///
10051028
/// `'t` is the lifetime of the matched text.
1006-
pub struct SubCapturesPos<'c, 'r: 'c, 't: 'c> {
1029+
pub struct SubCapturesPos<'c> {
10071030
idx: usize,
1008-
caps: &'c Captures<'r, 't>,
1031+
locs: &'c [Option<usize>]
10091032
}
10101033

1011-
impl<'c, 'r, 't> Iterator for SubCapturesPos<'c, 'r, 't> {
1034+
impl<'c> Iterator for SubCapturesPos<'c> {
10121035
type Item = Option<(usize, usize)>;
10131036

10141037
fn next(&mut self) -> Option<Option<(usize, usize)>> {
1015-
if self.idx < self.caps.len() {
1016-
self.idx += 1;
1017-
Some(self.caps.pos(self.idx - 1))
1018-
} else {
1019-
None
1038+
if self.idx >= self.locs.len() {
1039+
return None
10201040
}
1041+
let r = match (self.locs[self.idx], self.locs[self.idx + 1]) {
1042+
(Some(s), Some(e)) => Some((s, e)),
1043+
(None, None) => None,
1044+
_ => unreachable!()
1045+
};
1046+
self.idx += 2;
1047+
Some(r)
10211048
}
10221049
}
10231050

10241051
/// An Iterator over named capture groups as a tuple with the group
10251052
/// name and the value.
10261053
///
10271054
/// `'t` is the lifetime of the matched text.
1028-
pub struct SubCapturesNamed<'c, 'r: 'c, 't: 'c> {
1029-
caps: &'c Captures<'r, 't>,
1030-
inner: Box<Iterator<Item=(&'r str, usize)> + 'r>,
1055+
pub struct SubCapturesNamed<'c, 't: 'c> {
1056+
caps: &'c Captures<'t>,
1057+
names: Box<Iterator<Item=(&'c str, usize)> + 'c>,
10311058
}
10321059

1033-
impl<'c, 'r, 't> Iterator for SubCapturesNamed<'c, 'r, 't> {
1034-
type Item = (&'r str, Option<&'t str>);
1060+
impl<'c, 't: 'c> Iterator for SubCapturesNamed<'c, 't> {
1061+
type Item = (&'c str, Option<&'t str>);
10351062

1036-
fn next(&mut self) -> Option<(&'r str, Option<&'t str>)> {
1037-
match self.inner.next() {
1038-
Some((name, pos)) => Some((name, self.caps.at(pos))),
1039-
None => None
1040-
}
1063+
fn next(&mut self) -> Option<(&'c str, Option<&'t str>)> {
1064+
self.names.next().map(|(name, pos)| (name, self.caps.at(pos)))
10411065
}
10421066
}
10431067

@@ -1056,9 +1080,9 @@ pub struct FindCaptures<'r, 't> {
10561080
}
10571081

10581082
impl<'r, 't> Iterator for FindCaptures<'r, 't> {
1059-
type Item = Captures<'r, 't>;
1083+
type Item = Captures<'t>;
10601084

1061-
fn next(&mut self) -> Option<Captures<'r, 't>> {
1085+
fn next(&mut self) -> Option<Captures<'t>> {
10621086
if self.last_end > self.search.len() {
10631087
return None
10641088
}
@@ -1083,9 +1107,9 @@ impl<'r, 't> Iterator for FindCaptures<'r, 't> {
10831107
self.skip_next_empty = true;
10841108
}
10851109
Some(Captures {
1086-
regex: self.re,
10871110
text: self.search,
1088-
locs: caps
1111+
locs: caps,
1112+
named_groups: NamedGroups::from_regex(self.re),
10891113
})
10901114
}
10911115
}

0 commit comments

Comments
 (0)