Skip to content

Commit 37d0e48

Browse files
committed
Storing mapping from names to group indices into Regex
1 parent 277926f commit 37d0e48

File tree

3 files changed

+75
-62
lines changed

3 files changed

+75
-62
lines changed

regex_macros/src/lib.rs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,19 @@ impl<'a> NfaGen<'a> {
109109
None => cx.expr_none(self.sp),
110110
}
111111
);
112+
let named_groups = {
113+
let mut named_groups = ::std::collections::BTreeMap::new();
114+
for (i, name) in self.names.iter().enumerate() {
115+
if let Some(ref name) = *name {
116+
named_groups.insert(name.to_owned(), i);
117+
}
118+
}
119+
self.vec_expr(named_groups.iter(),
120+
&mut |cx, (name, group_idx)|
121+
quote_expr!(cx, ($name, $group_idx))
122+
)
123+
};
124+
112125
let prefix_anchor = self.prog.anchored_begin;
113126

114127
let step_insts = self.step_insts();
@@ -123,6 +136,8 @@ impl<'a> NfaGen<'a> {
123136
// unused code generated by regex!. See #14185 for an example.
124137
#[allow(dead_code)]
125138
static CAP_NAMES: &'static [Option<&'static str>] = &$cap_names;
139+
#[allow(dead_code)]
140+
static NAMED_GROUPS: &'static [(&'static str, usize)] = &$named_groups;
126141

127142
#[allow(dead_code)]
128143
fn exec<'t>(
@@ -308,6 +323,7 @@ fn exec<'t>(
308323
::regex::Regex::Native(::regex::internal::ExNative {
309324
original: $regex,
310325
names: &CAP_NAMES,
326+
groups: &NAMED_GROUPS,
311327
prog: exec,
312328
})
313329
})

src/program.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
// option. This file may not be copied, modified, or distributed
99
// except according to those terms.
1010

11+
12+
1113
use syntax;
1214

1315
use backtrack::BacktrackCache;
@@ -39,6 +41,9 @@ pub struct Program {
3941
/// The sequence of capture group names. There is an entry for each capture
4042
/// group index and a name exists only if the capture group is named.
4143
pub cap_names: Vec<Option<String>>,
44+
/// The map of named capture groups. The keys are group names and
45+
/// the values are group indices.
46+
pub named_groups: ::std::collections::HashMap<String, usize>,
4247
/// If the regular expression requires a literal prefix in order to have a
4348
/// match, that prefix is stored here as a DFA.
4449
pub prefixes: Literals,

src/re.rs

Lines changed: 54 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,6 @@
99
// except according to those terms.
1010

1111
use std::borrow::Cow;
12-
use std::collections::HashMap;
13-
use std::collections::hash_map::Iter;
1412
use std::fmt;
1513
use std::ops::Index;
1614
#[cfg(feature = "pattern")]
@@ -186,6 +184,8 @@ pub struct ExNative {
186184
#[doc(hidden)]
187185
pub names: &'static &'static [Option<&'static str>],
188186
#[doc(hidden)]
187+
pub groups: &'static &'static [(&'static str, usize)],
188+
#[doc(hidden)]
189189
pub prog: fn(&mut CaptureIdxs, &str, usize) -> bool,
190190
}
191191

@@ -394,10 +394,14 @@ impl Regex {
394394
///
395395
/// The `0`th capture group is always unnamed, so it must always be
396396
/// accessed with `at(0)` or `[0]`.
397-
pub fn captures<'t>(&self, text: &'t str) -> Option<Captures<'t>> {
398-
let mut caps = self.alloc_captures();
399-
if exec(self, &mut caps, text, 0) {
400-
Some(Captures::new(self, text, caps))
397+
pub fn captures<'r, 't>(&'r self, text: &'t str) -> Option<Captures<'r, 't>> {
398+
let mut locs = self.alloc_captures();
399+
if exec(self, &mut locs, text, 0) {
400+
Some(Captures {
401+
regex: self,
402+
text: text,
403+
locs: locs,
404+
})
401405
} else {
402406
None
403407
}
@@ -815,37 +819,13 @@ impl<'r, 't> Iterator for RegexSplitsN<'r, 't> {
815819
/// Positions returned from a capture group are always byte indices.
816820
///
817821
/// `'t` is the lifetime of the matched text.
818-
pub struct Captures<'t> {
822+
pub struct Captures<'r, 't> {
823+
regex: &'r Regex,
819824
text: &'t str,
820825
locs: Vec<Option<usize>>,
821-
named: Option<HashMap<String, usize>>,
822826
}
823827

824-
impl<'t> Captures<'t> {
825-
fn new(
826-
re: &Regex,
827-
search: &'t str,
828-
locs: Vec<Option<usize>>,
829-
) -> Captures<'t> {
830-
let named =
831-
if re.captures_len() == 0 {
832-
None
833-
} else {
834-
let mut named = HashMap::new();
835-
for (i, name) in re.capture_names().enumerate() {
836-
if let Some(name) = name {
837-
named.insert(name.to_owned(), i);
838-
}
839-
}
840-
Some(named)
841-
};
842-
Captures {
843-
text: search,
844-
locs: locs,
845-
named: named,
846-
}
847-
}
848-
828+
impl<'r, 't> Captures<'r, 't> {
849829
/// Returns the start and end positions of the Nth capture group.
850830
/// Returns `None` if `i` is not a valid capture group or if the capture
851831
/// group did not match anything.
@@ -874,37 +854,49 @@ impl<'t> Captures<'t> {
874854
/// `name` isn't a valid capture group or didn't match anything, then
875855
/// `None` is returned.
876856
pub fn name(&self, name: &str) -> Option<&'t str> {
877-
match self.named {
878-
None => None,
879-
Some(ref h) => {
880-
match h.get(name) {
881-
None => None,
882-
Some(i) => self.at(*i),
857+
match *self.regex {
858+
Regex::Native(ExNative { ref groups, .. }) => {
859+
match groups.binary_search_by(|&(n, _)| n.cmp(name)) {
860+
Ok(i) => self.at(groups[i].1),
861+
Err(_) => None
883862
}
884-
}
863+
},
864+
Regex::Dynamic(Program { ref named_groups, .. }) => {
865+
named_groups.get(name).and_then(|i| self.at(*i))
866+
},
885867
}
886868
}
887869

888870
/// Creates an iterator of all the capture groups in order of appearance
889871
/// in the regular expression.
890-
pub fn iter(&'t self) -> SubCaptures<'t> {
872+
pub fn iter<'c>(&'c self) -> SubCaptures<'c, 'r, 't> {
891873
SubCaptures { idx: 0, caps: self, }
892874
}
893875

894876
/// Creates an iterator of all the capture group positions in order of
895877
/// appearance in the regular expression. Positions are byte indices
896878
/// in terms of the original string matched.
897-
pub fn iter_pos(&'t self) -> SubCapturesPos<'t> {
879+
pub fn iter_pos<'c>(&'c self) -> SubCapturesPos<'c, 'r, 't> {
898880
SubCapturesPos { idx: 0, caps: self, }
899881
}
900882

901883
/// Creates an iterator of all named groups as an tuple with the group
902884
/// name and the value. The iterator returns these values in arbitrary
903885
/// order.
904-
pub fn iter_named(&'t self) -> SubCapturesNamed<'t> {
886+
pub fn iter_named<'c>(&'c self) -> SubCapturesNamed<'c, 'r, 't> {
887+
let iter = match *self.regex {
888+
Regex::Native(ExNative { ref groups, .. }) => {
889+
Box::new(groups.iter().map(|&v| v))
890+
as Box<Iterator<Item=(&'r str, usize)> + 'r>
891+
},
892+
Regex::Dynamic(Program { ref named_groups, .. }) => {
893+
Box::new(named_groups.iter().map(|(s, i)| (&s[..], *i)))
894+
as Box<Iterator<Item=(&'r str, usize)> + 'r>
895+
},
896+
};
905897
SubCapturesNamed {
906898
caps: self,
907-
inner: self.named.as_ref().map(|n| n.iter()),
899+
inner: iter
908900
}
909901
}
910902

@@ -948,7 +940,7 @@ impl<'t> Captures<'t> {
948940
///
949941
/// # Panics
950942
/// If there is no group at the given index.
951-
impl<'t> Index<usize> for Captures<'t> {
943+
impl<'r, 't> Index<usize> for Captures<'r, 't> {
952944

953945
type Output = str;
954946

@@ -962,7 +954,7 @@ impl<'t> Index<usize> for Captures<'t> {
962954
///
963955
/// # Panics
964956
/// If there is no group named by the given value.
965-
impl<'t> Index<&'t str> for Captures<'t> {
957+
impl<'r, 't> Index<&'t str> for Captures<'r, 't> {
966958

967959
type Output = str;
968960

@@ -979,12 +971,12 @@ impl<'t> Index<&'t str> for Captures<'t> {
979971
/// expression.
980972
///
981973
/// `'t` is the lifetime of the matched text.
982-
pub struct SubCaptures<'t> {
974+
pub struct SubCaptures<'c, 'r: 'c, 't: 'c> {
983975
idx: usize,
984-
caps: &'t Captures<'t>,
976+
caps: &'c Captures<'r, 't>,
985977
}
986978

987-
impl<'t> Iterator for SubCaptures<'t> {
979+
impl<'c, 'r, 't> Iterator for SubCaptures<'c, 'r, 't> {
988980
type Item = Option<&'t str>;
989981

990982
fn next(&mut self) -> Option<Option<&'t str>> {
@@ -1003,12 +995,12 @@ impl<'t> Iterator for SubCaptures<'t> {
1003995
/// Positions are byte indices in terms of the original string matched.
1004996
///
1005997
/// `'t` is the lifetime of the matched text.
1006-
pub struct SubCapturesPos<'t> {
998+
pub struct SubCapturesPos<'c, 'r: 'c, 't: 'c> {
1007999
idx: usize,
1008-
caps: &'t Captures<'t>,
1000+
caps: &'c Captures<'r, 't>,
10091001
}
10101002

1011-
impl<'t> Iterator for SubCapturesPos<'t> {
1003+
impl<'c, 'r, 't> Iterator for SubCapturesPos<'c, 'r, 't> {
10121004
type Item = Option<(usize, usize)>;
10131005

10141006
fn next(&mut self) -> Option<Option<(usize, usize)>> {
@@ -1025,17 +1017,17 @@ impl<'t> Iterator for SubCapturesPos<'t> {
10251017
/// name and the value.
10261018
///
10271019
/// `'t` is the lifetime of the matched text.
1028-
pub struct SubCapturesNamed<'t>{
1029-
caps: &'t Captures<'t>,
1030-
inner: Option<Iter<'t, String, usize>>,
1020+
pub struct SubCapturesNamed<'c, 'r: 'c, 't: 'c> {
1021+
caps: &'c Captures<'r, 't>,
1022+
inner: Box<Iterator<Item=(&'r str, usize)> + 'r>,
10311023
}
10321024

1033-
impl<'t> Iterator for SubCapturesNamed<'t> {
1034-
type Item = (&'t str, Option<&'t str>);
1025+
impl<'c, 'r, 't> Iterator for SubCapturesNamed<'c, 'r, 't> {
1026+
type Item = (&'r str, Option<&'t str>);
10351027

1036-
fn next(&mut self) -> Option<(&'t str, Option<&'t str>)> {
1037-
match self.inner.as_mut().map_or(None, |it| it.next()) {
1038-
Some((name, pos)) => Some((name, self.caps.at(*pos))),
1028+
fn next(&mut self) -> Option<(&'r str, Option<&'t str>)> {
1029+
match self.inner.next() {
1030+
Some((name, pos)) => Some((name, self.caps.at(pos))),
10391031
None => None
10401032
}
10411033
}
@@ -1056,9 +1048,9 @@ pub struct FindCaptures<'r, 't> {
10561048
}
10571049

10581050
impl<'r, 't> Iterator for FindCaptures<'r, 't> {
1059-
type Item = Captures<'t>;
1051+
type Item = Captures<'r, 't>;
10601052

1061-
fn next(&mut self) -> Option<Captures<'t>> {
1053+
fn next(&mut self) -> Option<Captures<'r, 't>> {
10621054
if self.last_end > self.search.len() {
10631055
return None
10641056
}

0 commit comments

Comments
 (0)