Skip to content

Commit 4ad644f

Browse files
committed
Storing mapping from names to group indices into Regex
1 parent aae73b0 commit 4ad644f

File tree

3 files changed

+91
-63
lines changed

3 files changed

+91
-63
lines changed

regex_macros/src/lib.rs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,19 @@ impl<'a> NfaGen<'a> {
111111
None => cx.expr_none(self.sp),
112112
}
113113
);
114+
let named_groups = {
115+
let mut named_groups = ::std::collections::BTreeMap::new();
116+
for (i, name) in self.names.iter().enumerate() {
117+
if let Some(ref name) = *name {
118+
named_groups.insert(name.to_owned(), i);
119+
}
120+
}
121+
self.vec_expr(named_groups.iter(),
122+
&mut |cx, (name, group_idx)|
123+
quote_expr!(cx, ($name, $group_idx))
124+
)
125+
};
126+
114127
let prefix_anchor = self.prog.anchored_begin;
115128

116129
let step_insts = self.step_insts();
@@ -125,6 +138,8 @@ impl<'a> NfaGen<'a> {
125138
// unused code generated by regex!. See #14185 for an example.
126139
#[allow(dead_code)]
127140
static CAP_NAMES: &'static [Option<&'static str>] = &$cap_names;
141+
#[allow(dead_code)]
142+
static NAMED_GROUPS: &'static [(&'static str, usize)] = &$named_groups;
128143

129144
#[allow(dead_code)]
130145
fn exec<'t>(
@@ -310,6 +325,7 @@ fn exec<'t>(
310325
::regex::internal::Native(::regex::internal::ExNative {
311326
original: $regex,
312327
names: &CAP_NAMES,
328+
groups: &NAMED_GROUPS,
313329
prog: exec,
314330
})
315331
})

src/program.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
// option. This file may not be copied, modified, or distributed
99
// except according to those terms.
1010

11+
12+
1113
use syntax;
1214

1315
use Error;
@@ -53,6 +55,9 @@ pub struct Program {
5355
/// The sequence of capture group names. There is an entry for each capture
5456
/// group index and a name exists only if the capture group is named.
5557
pub cap_names: Vec<Option<String>>,
58+
/// The map of named capture groups. The keys are group names and
59+
/// the values are group indices.
60+
pub named_groups: ::std::collections::HashMap<String, usize>,
5661
/// If the regular expression requires a literal prefix in order to have a
5762
/// match, that prefix is stored here as a DFA.
5863
pub prefixes: Prefix,
@@ -84,10 +89,17 @@ impl Program {
8489
let (insts_len, ncaps) = (insts.len(), num_captures(&insts));
8590
let create_threads = move || NfaThreads::new(insts_len, ncaps);
8691
let create_backtrack = move || BackMachine::new();
92+
let mut named_groups = ::std::collections::HashMap::new();
93+
for (i, name) in cap_names.iter().enumerate() {
94+
if let Some(ref name) = *name {
95+
named_groups.insert(name.to_owned(), i);
96+
}
97+
}
8798
let mut prog = Program {
8899
original: re.into(),
89100
insts: insts,
90101
cap_names: cap_names,
102+
named_groups: named_groups,
91103
prefixes: Prefix::Empty,
92104
prefixes_complete: false,
93105
anchored_begin: false,
@@ -317,6 +329,7 @@ impl Clone for Program {
317329
original: self.original.clone(),
318330
insts: self.insts.clone(),
319331
cap_names: self.cap_names.clone(),
332+
named_groups: self.named_groups.clone(),
320333
prefixes: self.prefixes.clone(),
321334
prefixes_complete: self.prefixes_complete,
322335
anchored_begin: self.anchored_begin,

src/re.rs

Lines changed: 62 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,6 @@
99
// except according to those terms.
1010

1111
use std::borrow::Cow;
12-
use std::collections::HashMap;
13-
use std::collections::hash_map::Iter;
1412
use std::fmt;
1513
use std::ops::Index;
1614
#[cfg(feature = "pattern")]
@@ -186,6 +184,8 @@ pub struct ExNative {
186184
#[doc(hidden)]
187185
pub names: &'static &'static [Option<&'static str>],
188186
#[doc(hidden)]
187+
pub groups: &'static &'static [(&'static str, usize)],
188+
#[doc(hidden)]
189189
pub prog: fn(&mut CaptureIdxs, &str, usize) -> bool,
190190
}
191191

@@ -416,10 +416,14 @@ impl Regex {
416416
///
417417
/// The `0`th capture group is always unnamed, so it must always be
418418
/// accessed with `at(0)` or `[0]`.
419-
pub fn captures<'t>(&self, text: &'t str) -> Option<Captures<'t>> {
420-
let mut caps = self.alloc_captures();
421-
if exec(self, &mut caps, text, 0) {
422-
Some(Captures::new(self, text, caps))
419+
pub fn captures<'r, 't>(&'r self, text: &'t str) -> Option<Captures<'r, 't>> {
420+
let mut locs = self.alloc_captures();
421+
if exec(self, &mut locs, text, 0) {
422+
Some(Captures {
423+
regex: self,
424+
text: text,
425+
locs: locs,
426+
})
423427
} else {
424428
None
425429
}
@@ -823,37 +827,13 @@ impl<'r, 't> Iterator for RegexSplitsN<'r, 't> {
823827
/// Positions returned from a capture group are always byte indices.
824828
///
825829
/// `'t` is the lifetime of the matched text.
826-
pub struct Captures<'t> {
830+
pub struct Captures<'r, 't> {
831+
regex: &'r Regex,
827832
text: &'t str,
828833
locs: Vec<Option<usize>>,
829-
named: Option<HashMap<String, usize>>,
830834
}
831835

832-
impl<'t> Captures<'t> {
833-
fn new(
834-
re: &Regex,
835-
search: &'t str,
836-
locs: Vec<Option<usize>>,
837-
) -> Captures<'t> {
838-
let named =
839-
if re.captures_len() == 0 {
840-
None
841-
} else {
842-
let mut named = HashMap::new();
843-
for (i, name) in re.capture_names().enumerate() {
844-
if let Some(name) = name {
845-
named.insert(name.to_owned(), i);
846-
}
847-
}
848-
Some(named)
849-
};
850-
Captures {
851-
text: search,
852-
locs: locs,
853-
named: named,
854-
}
855-
}
856-
836+
impl<'r, 't> Captures<'r, 't> {
857837
/// Returns the start and end positions of the Nth capture group.
858838
/// Returns `None` if `i` is not a valid capture group or if the capture
859839
/// group did not match anything.
@@ -882,35 +862,50 @@ impl<'t> Captures<'t> {
882862
/// `name` isn't a valid capture group or didn't match anything, then
883863
/// `None` is returned.
884864
pub fn name(&self, name: &str) -> Option<&'t str> {
885-
match self.named {
886-
None => None,
887-
Some(ref h) => {
888-
match h.get(name) {
889-
None => None,
890-
Some(i) => self.at(*i),
865+
match *self.regex {
866+
Regex::Native(ExNative { ref groups, .. }) => {
867+
match groups.binary_search_by(|&(n, _)| n.cmp(name)) {
868+
Ok(i) => self.at(groups[i].1),
869+
Err(_) => None
891870
}
892-
}
871+
},
872+
Regex::Dynamic(Program { ref named_groups, .. }) => {
873+
named_groups.get(name).and_then(|i| self.at(*i))
874+
},
893875
}
894876
}
895877

896878
/// Creates an iterator of all the capture groups in order of appearance
897879
/// in the regular expression.
898-
pub fn iter(&'t self) -> SubCaptures<'t> {
880+
pub fn iter<'c>(&'c self) -> SubCaptures<'c, 'r, 't> {
899881
SubCaptures { idx: 0, caps: self, }
900882
}
901883

902884
/// Creates an iterator of all the capture group positions in order of
903885
/// appearance in the regular expression. Positions are byte indices
904886
/// in terms of the original string matched.
905-
pub fn iter_pos(&'t self) -> SubCapturesPos<'t> {
887+
pub fn iter_pos<'c>(&'c self) -> SubCapturesPos<'c, 'r, 't> {
906888
SubCapturesPos { idx: 0, caps: self, }
907889
}
908890

909891
/// Creates an iterator of all named groups as an tuple with the group
910892
/// name and the value. The iterator returns these values in arbitrary
911893
/// order.
912-
pub fn iter_named(&'t self) -> SubCapturesNamed<'t> {
913-
SubCapturesNamed { caps: self, inner: self.named.as_ref().map(|n| n.iter()) }
894+
pub fn iter_named<'c>(&'c self) -> SubCapturesNamed<'c, 'r, 't> {
895+
let iter = match *self.regex {
896+
Regex::Native(ExNative { ref groups, .. }) => {
897+
Box::new(groups.iter().map(|&v| v))
898+
as Box<Iterator<Item=(&'r str, usize)> + 'r>
899+
},
900+
Regex::Dynamic(Program { ref named_groups, .. }) => {
901+
Box::new(named_groups.iter().map(|(s, i)| (&s[..], *i)))
902+
as Box<Iterator<Item=(&'r str, usize)> + 'r>
903+
},
904+
};
905+
SubCapturesNamed {
906+
caps: self,
907+
inner: iter
908+
}
914909
}
915910

916911
/// Expands all instances of `$name` in `text` to the corresponding capture
@@ -953,7 +948,7 @@ impl<'t> Captures<'t> {
953948
///
954949
/// # Panics
955950
/// If there is no group at the given index.
956-
impl<'t> Index<usize> for Captures<'t> {
951+
impl<'r, 't> Index<usize> for Captures<'r, 't> {
957952

958953
type Output = str;
959954

@@ -967,7 +962,7 @@ impl<'t> Index<usize> for Captures<'t> {
967962
///
968963
/// # Panics
969964
/// If there is no group named by the given value.
970-
impl<'t> Index<&'t str> for Captures<'t> {
965+
impl<'r, 't> Index<&'t str> for Captures<'r, 't> {
971966

972967
type Output = str;
973968

@@ -984,12 +979,12 @@ impl<'t> Index<&'t str> for Captures<'t> {
984979
/// expression.
985980
///
986981
/// `'t` is the lifetime of the matched text.
987-
pub struct SubCaptures<'t> {
982+
pub struct SubCaptures<'c, 'r: 'c, 't: 'c> {
988983
idx: usize,
989-
caps: &'t Captures<'t>,
984+
caps: &'c Captures<'r, 't>,
990985
}
991986

992-
impl<'t> Iterator for SubCaptures<'t> {
987+
impl<'c, 'r, 't> Iterator for SubCaptures<'c, 'r, 't> {
993988
type Item = Option<&'t str>;
994989

995990
fn next(&mut self) -> Option<Option<&'t str>> {
@@ -1008,12 +1003,12 @@ impl<'t> Iterator for SubCaptures<'t> {
10081003
/// Positions are byte indices in terms of the original string matched.
10091004
///
10101005
/// `'t` is the lifetime of the matched text.
1011-
pub struct SubCapturesPos<'t> {
1006+
pub struct SubCapturesPos<'c, 'r: 'c, 't: 'c> {
10121007
idx: usize,
1013-
caps: &'t Captures<'t>,
1008+
caps: &'c Captures<'r, 't>,
10141009
}
10151010

1016-
impl<'t> Iterator for SubCapturesPos<'t> {
1011+
impl<'c, 'r, 't> Iterator for SubCapturesPos<'c, 'r, 't> {
10171012
type Item = Option<(usize, usize)>;
10181013

10191014
fn next(&mut self) -> Option<Option<(usize, usize)>> {
@@ -1030,17 +1025,17 @@ impl<'t> Iterator for SubCapturesPos<'t> {
10301025
/// name and the value.
10311026
///
10321027
/// `'t` is the lifetime of the matched text.
1033-
pub struct SubCapturesNamed<'t>{
1034-
caps: &'t Captures<'t>,
1035-
inner: Option<Iter<'t, String, usize>>,
1028+
pub struct SubCapturesNamed<'c, 'r: 'c, 't: 'c> {
1029+
caps: &'c Captures<'r, 't>,
1030+
inner: Box<Iterator<Item=(&'r str, usize)> + 'r>,
10361031
}
10371032

1038-
impl<'t> Iterator for SubCapturesNamed<'t> {
1039-
type Item = (&'t str, Option<&'t str>);
1033+
impl<'c, 'r, 't> Iterator for SubCapturesNamed<'c, 'r, 't> {
1034+
type Item = (&'r str, Option<&'t str>);
10401035

1041-
fn next(&mut self) -> Option<(&'t str, Option<&'t str>)> {
1042-
match self.inner.as_mut().map_or(None, |it| it.next()) {
1043-
Some((name, pos)) => Some((name, self.caps.at(*pos))),
1036+
fn next(&mut self) -> Option<(&'r str, Option<&'t str>)> {
1037+
match self.inner.next() {
1038+
Some((name, pos)) => Some((name, self.caps.at(pos))),
10441039
None => None
10451040
}
10461041
}
@@ -1061,9 +1056,9 @@ pub struct FindCaptures<'r, 't> {
10611056
}
10621057

10631058
impl<'r, 't> Iterator for FindCaptures<'r, 't> {
1064-
type Item = Captures<'t>;
1059+
type Item = Captures<'r, 't>;
10651060

1066-
fn next(&mut self) -> Option<Captures<'t>> {
1061+
fn next(&mut self) -> Option<Captures<'r, 't>> {
10671062
if self.last_end > self.search.len() {
10681063
return None
10691064
}
@@ -1087,7 +1082,11 @@ impl<'r, 't> Iterator for FindCaptures<'r, 't> {
10871082
self.last_end = e;
10881083
self.skip_next_empty = true;
10891084
}
1090-
Some(Captures::new(self.re, self.search, caps))
1085+
Some(Captures {
1086+
regex: self.re,
1087+
text: self.search,
1088+
locs: caps
1089+
})
10911090
}
10921091
}
10931092

0 commit comments

Comments
 (0)