@@ -2,9 +2,50 @@ use std::str;
2
2
3
3
use memchr:: memchr;
4
4
5
- use bytes:: Captures ;
5
+ use re_bytes;
6
+ use re_unicode;
6
7
7
- pub fn expand ( caps : & Captures , mut replacement : & [ u8 ] , dst : & mut Vec < u8 > ) {
8
+ pub fn expand_str (
9
+ caps : & re_unicode:: Captures ,
10
+ mut replacement : & str ,
11
+ dst : & mut String ,
12
+ ) {
13
+ while !replacement. is_empty ( ) {
14
+ match memchr ( b'$' , replacement. as_bytes ( ) ) {
15
+ None => break ,
16
+ Some ( i) => {
17
+ dst. push_str ( & replacement[ ..i] ) ;
18
+ replacement = & replacement[ i..] ;
19
+ }
20
+ }
21
+ if replacement. as_bytes ( ) . get ( 1 ) . map_or ( false , |& b| b == b'$' ) {
22
+ dst. push_str ( "$" ) ;
23
+ replacement = & replacement[ 2 ..] ;
24
+ continue ;
25
+ }
26
+ debug_assert ! ( !replacement. is_empty( ) ) ;
27
+ let cap_ref = match find_cap_ref ( replacement) {
28
+ Some ( cap_ref) => cap_ref,
29
+ None => {
30
+ dst. push_str ( "$" ) ;
31
+ replacement = & replacement[ 1 ..] ;
32
+ continue ;
33
+ }
34
+ } ;
35
+ replacement = & replacement[ cap_ref. end ..] ;
36
+ match cap_ref. cap {
37
+ Ref :: Number ( i) => dst. push_str ( caps. at ( i) . unwrap_or ( "" ) ) ,
38
+ Ref :: Named ( name) => dst. push_str ( caps. name ( name) . unwrap_or ( "" ) ) ,
39
+ }
40
+ }
41
+ dst. push_str ( replacement) ;
42
+ }
43
+
44
+ pub fn expand_bytes (
45
+ caps : & re_bytes:: Captures ,
46
+ mut replacement : & [ u8 ] ,
47
+ dst : & mut Vec < u8 > ,
48
+ ) {
8
49
while !replacement. is_empty ( ) {
9
50
match memchr ( b'$' , replacement) {
10
51
None => break ,
@@ -27,7 +68,7 @@ pub fn expand(caps: &Captures, mut replacement: &[u8], dst: &mut Vec<u8>) {
27
68
continue ;
28
69
}
29
70
} ;
30
- replacement = cap_ref. rest ;
71
+ replacement = & replacement [ cap_ref. end .. ] ;
31
72
match cap_ref. cap {
32
73
Ref :: Number ( i) => dst. extend ( caps. at ( i) . unwrap_or ( b"" ) ) ,
33
74
Ref :: Named ( name) => dst. extend ( caps. name ( name) . unwrap_or ( b"" ) ) ,
@@ -36,56 +77,127 @@ pub fn expand(caps: &Captures, mut replacement: &[u8], dst: &mut Vec<u8>) {
36
77
dst. extend ( replacement) ;
37
78
}
38
79
80
+ /// CaptureRef represents a reference to a capture group inside some text. The
81
+ /// reference is either a capture group name or a number.
82
+ ///
83
+ /// It is also tagged with the position in the text immediately proceding the
84
+ /// capture reference.
85
+ #[ derive( Clone , Copy , Debug , Eq , PartialEq ) ]
39
86
struct CaptureRef < ' a > {
40
- rest : & ' a [ u8 ] ,
41
87
cap : Ref < ' a > ,
88
+ end : usize ,
42
89
}
43
90
91
+ /// A reference to a capture group in some text.
92
+ ///
93
+ /// e.g., `$2`, `$foo`, `${foo}`.
94
+ #[ derive( Clone , Copy , Debug , Eq , PartialEq ) ]
44
95
enum Ref < ' a > {
45
96
Named ( & ' a str ) ,
46
97
Number ( usize ) ,
47
98
}
48
99
49
- fn find_cap_ref ( mut replacement : & [ u8 ] ) -> Option < CaptureRef > {
50
- if replacement. len ( ) <= 1 || replacement[ 0 ] != b'$' {
100
+ impl < ' a > From < & ' a str > for Ref < ' a > {
101
+ fn from ( x : & ' a str ) -> Ref < ' a > {
102
+ Ref :: Named ( x)
103
+ }
104
+ }
105
+
106
+ impl From < usize > for Ref < ' static > {
107
+ fn from ( x : usize ) -> Ref < ' static > {
108
+ Ref :: Number ( x)
109
+ }
110
+ }
111
+
112
+ /// Parses a possible reference to a capture group name in the given text,
113
+ /// starting at the beginning of `replacement`.
114
+ ///
115
+ /// If no such valid reference could be found, None is returned.
116
+ fn find_cap_ref < T : ?Sized + AsRef < [ u8 ] > > (
117
+ replacement : & T ,
118
+ ) -> Option < CaptureRef > {
119
+ let mut i = 0 ;
120
+ let rep: & [ u8 ] = replacement. as_ref ( ) ;
121
+ if rep. len ( ) <= 1 || rep[ 0 ] != b'$' {
51
122
return None ;
52
123
}
53
124
let mut brace = false ;
54
- replacement = & replacement [ 1 .. ] ;
55
- if replacement [ 0 ] == b'{' {
125
+ i += 1 ;
126
+ if rep [ i ] == b'{' {
56
127
brace = true ;
57
- replacement = & replacement [ 1 .. ] ;
128
+ i += 1 ;
58
129
}
59
- let mut cap_end = 0 ;
60
- while replacement . get ( cap_end) . map_or ( false , is_valid_cap_letter) {
130
+ let mut cap_end = i ;
131
+ while rep . get ( cap_end) . map_or ( false , is_valid_cap_letter) {
61
132
cap_end += 1 ;
62
133
}
63
- if cap_end == 0 {
134
+ if cap_end == i {
64
135
return None ;
65
136
}
66
137
// We just verified that the range 0..cap_end is valid ASCII, so it must
67
138
// therefore be valid UTF-8. If we really cared, we could avoid this UTF-8
68
139
// check with either unsafe or by parsing the number straight from &[u8].
69
- let cap = str:: from_utf8 ( & replacement [ ..cap_end] )
140
+ let cap = str:: from_utf8 ( & rep [ i ..cap_end] )
70
141
. ok ( ) . expect ( "valid UTF-8 capture name" ) ;
71
142
if brace {
72
- if !replacement . get ( cap_end) . map_or ( false , |& b| b == b'}' ) {
143
+ if !rep . get ( cap_end) . map_or ( false , |& b| b == b'}' ) {
73
144
return None ;
74
145
}
75
146
cap_end += 1 ;
76
147
}
77
148
Some ( CaptureRef {
78
- rest : & replacement[ cap_end..] ,
79
149
cap : match cap. parse :: < u32 > ( ) {
80
150
Ok ( i) => Ref :: Number ( i as usize ) ,
81
151
Err ( _) => Ref :: Named ( cap) ,
82
152
} ,
153
+ end : cap_end,
83
154
} )
84
155
}
85
156
157
+ /// Returns true if and only if the given byte is allowed in a capture name.
86
158
fn is_valid_cap_letter ( b : & u8 ) -> bool {
87
159
match * b {
88
160
b'0' ... b'9' | b'a' ... b'z' | b'A' ... b'Z' | b'_' => true ,
89
161
_ => false ,
90
162
}
91
163
}
164
+
165
+ #[ cfg( test) ]
166
+ mod tests {
167
+ use super :: { CaptureRef , find_cap_ref} ;
168
+
169
+ macro_rules! find {
170
+ ( $name: ident, $text: expr) => {
171
+ #[ test]
172
+ fn $name( ) {
173
+ assert_eq!( None , find_cap_ref( $text) ) ;
174
+ }
175
+ } ;
176
+ ( $name: ident, $text: expr, $capref: expr) => {
177
+ #[ test]
178
+ fn $name( ) {
179
+ assert_eq!( Some ( $capref) , find_cap_ref( $text) ) ;
180
+ }
181
+ } ;
182
+ }
183
+
184
+ macro_rules! c {
185
+ ( $name_or_number: expr, $pos: expr) => {
186
+ CaptureRef { cap: $name_or_number. into( ) , end: $pos }
187
+ } ;
188
+ }
189
+
190
+ find ! ( find_cap_ref1, "$foo" , c!( "foo" , 4 ) ) ;
191
+ find ! ( find_cap_ref2, "${foo}" , c!( "foo" , 6 ) ) ;
192
+ find ! ( find_cap_ref3, "$0" , c!( 0 , 2 ) ) ;
193
+ find ! ( find_cap_ref4, "$5" , c!( 5 , 2 ) ) ;
194
+ find ! ( find_cap_ref5, "$10" , c!( 10 , 3 ) ) ;
195
+ find ! ( find_cap_ref6, "$42a" , c!( "42a" , 4 ) ) ;
196
+ find ! ( find_cap_ref7, "${42}a" , c!( 42 , 5 ) ) ;
197
+ find ! ( find_cap_ref8, "${42" ) ;
198
+ find ! ( find_cap_ref9, "${42 " ) ;
199
+ find ! ( find_cap_ref10, " $0 " ) ;
200
+ find ! ( find_cap_ref11, "$" ) ;
201
+ find ! ( find_cap_ref12, " " ) ;
202
+ find ! ( find_cap_ref13, "" ) ;
203
+ }
0 commit comments