Skip to content

Commit 727df1b

Browse files
committed
refactor: Improve suppress_github_mentions function with unicode text support.
1 parent ebf5466 commit 727df1b

File tree

3 files changed

+192
-98
lines changed

3 files changed

+192
-98
lines changed

src/bors/handlers/trybuild.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ use crate::github::{
2020
CommitSha, GithubUser, LabelTrigger, MergeError, PullRequest, PullRequestNumber,
2121
};
2222
use crate::permissions::PermissionType;
23-
use crate::utils::suppress_github_mentions;
23+
use crate::utils::text::suppress_github_mentions;
2424

2525
use super::deny_request;
2626
use super::has_permission;

src/utils/mod.rs

Lines changed: 1 addition & 97 deletions
Original file line numberDiff line numberDiff line change
@@ -1,99 +1,3 @@
11
pub mod logging;
2+
pub mod text;
23
pub mod timing;
3-
4-
/// Converts GitHub @mentions to markdown-backticked text to prevent notifications.
5-
/// For example, "@user" becomes "`user`".
6-
///
7-
/// Handles GitHub mention formats:
8-
/// - Usernames (@username)
9-
/// - Teams (@org/team)
10-
/// - Nested teams (@org/team/subteam)
11-
///
12-
/// GitHub's nested team documentation:
13-
/// https://docs.github.com/en/organizations/organizing-members-into-teams/about-teams#nested-teams
14-
///
15-
/// Ignores email addresses and other @ symbols that don't match GitHub mention patterns.
16-
pub fn suppress_github_mentions(text: &str) -> String {
17-
if text.is_empty() || !text.contains('@') {
18-
return text.to_string();
19-
}
20-
21-
let segment = r"[A-Za-z0-9][A-Za-z0-9\-]{0,38}";
22-
let pattern = format!(r"@{0}(?:/{0})*", segment);
23-
24-
let re = regex::Regex::new(&pattern).unwrap();
25-
re.replace_all(text, |caps: &regex::Captures| {
26-
let mention = &caps[0];
27-
let position = caps.get(0).unwrap().start();
28-
29-
if !is_github_mention(text, mention, position) {
30-
return mention.to_string();
31-
}
32-
33-
let name = &mention[1..]; // Drop the @ symbol
34-
format!("`{}`", name)
35-
})
36-
.to_string()
37-
}
38-
39-
// Determines if a potential mention would actually trigger a notification
40-
fn is_github_mention(text: &str, mention: &str, pos: usize) -> bool {
41-
// Not a valid mention if preceded by alphanumeric or underscore (email)
42-
if pos > 0 {
43-
let c = text.chars().nth(pos - 1).unwrap();
44-
if c.is_alphanumeric() || c == '_' {
45-
return false;
46-
}
47-
}
48-
49-
// Check if followed by invalid character
50-
let end = pos + mention.len();
51-
if end < text.len() {
52-
let next_char = text.chars().nth(end).unwrap();
53-
if next_char.is_alphanumeric() || next_char == '_' || next_char == '-' {
54-
return false;
55-
}
56-
}
57-
58-
true
59-
}
60-
61-
#[cfg(test)]
62-
mod tests {
63-
use super::*;
64-
65-
#[test]
66-
fn test_suppress_github_mentions() {
67-
// User mentions
68-
assert_eq!(suppress_github_mentions("Hello @user"), "Hello `user`");
69-
70-
// Org team mentions
71-
assert_eq!(suppress_github_mentions("@org/team"), "`org/team`");
72-
assert_eq!(
73-
suppress_github_mentions("@org/team/subteam"),
74-
"`org/team/subteam`"
75-
);
76-
assert_eq!(
77-
suppress_github_mentions("@big/team/sub/group"),
78-
"`big/team/sub/group`"
79-
);
80-
assert_eq!(
81-
suppress_github_mentions("Thanks @user, @rust-lang/libs and @github/docs/content!"),
82-
"Thanks `user`, `rust-lang/libs` and `github/docs/content`!"
83-
);
84-
85-
// Non mentions
86-
assert_eq!(suppress_github_mentions("@"), "@");
87-
assert_eq!(suppress_github_mentions(""), "");
88-
assert_eq!(
89-
suppress_github_mentions("No mentions here"),
90-
"No mentions here"
91-
);
92-
assert_eq!(
93-
suppress_github_mentions("[email protected]"),
94-
95-
);
96-
97-
assert_eq!(suppress_github_mentions("@user_test"), "@user_test");
98-
}
99-
}

src/utils/text.rs

Lines changed: 190 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,190 @@
1+
use std::sync::LazyLock;
2+
3+
use regex::Regex;
4+
5+
/// Pattern for matching an auto-linked GitHub username which can be a potential github @mention.
6+
///
7+
/// Matches GitHub's username/team patterns:
8+
/// - Usernames: 1-39 chars, starting with alphanumeric, followed by alphanumeric or hyphens
9+
/// - Teams: Same as username but allows underscores and slashes for org/team hierarchy
10+
///
11+
/// ref: https://github.com/rust-lang/homu/pull/230
12+
static MENTION_REGEX: LazyLock<Regex> = LazyLock::new(|| {
13+
let username_segment = r"(?x)
14+
[\p{L}\p{N}] # Must start with letter/number
15+
(?:[\p{L}\p{N}-] # Followed by allowed base chars
16+
| \p{M} # Or combining marks (for accented chars)
17+
){0,38} # Up to 38 more characters
18+
";
19+
20+
let team_segment = r"(?x)
21+
[\p{L}\p{N}_] # Start with letter/number/underscore
22+
(?:[\p{L}\p{N}_-] # Team name components
23+
| \p{M} # Allow combining marks
24+
){0,38} # Up to 38 more characters
25+
";
26+
27+
let pattern = format!(r"@({username_segment})(?:/({team_segment}))*");
28+
29+
Regex::new(&pattern).unwrap()
30+
});
31+
32+
/// Replaces valid GitHub @mentions with backticks to prevent accidental pings
33+
///
34+
/// For example:
35+
/// "@user" -> "`@user`".
36+
/// "@org/team" -> "`@org/team`".
37+
/// "@org/team/subteam" -> "`@org/team/subteam`".
38+
pub fn suppress_github_mentions(text: &str) -> String {
39+
if text.is_empty() || !text.contains('@') {
40+
return text.to_string();
41+
}
42+
43+
let mut buffer = String::with_capacity(text.len());
44+
let mut last_end = 0;
45+
46+
for m in MENTION_REGEX.find_iter(text) {
47+
let (start, end) = (m.start(), m.end());
48+
let mention = &text[start..end];
49+
50+
// Append text between matches
51+
buffer.push_str(&text[last_end..start]);
52+
53+
if is_valid_mention_context(text, start, end) {
54+
buffer.push_str(&format!("`{mention}`"));
55+
} else {
56+
buffer.push_str(mention);
57+
}
58+
59+
last_end = end;
60+
}
61+
62+
buffer.push_str(&text[last_end..]);
63+
buffer
64+
}
65+
66+
/// Extension trait for GitHub mention boundary checks
67+
trait WordChar {
68+
fn is_word_char(&self) -> bool;
69+
}
70+
71+
impl WordChar for char {
72+
fn is_word_char(&self) -> bool {
73+
self.is_alphanumeric() || *self == '_'
74+
}
75+
}
76+
77+
/// Validates mention boundaries according to GitHub's autolinking rules
78+
///
79+
/// A mention is considered valid if:
80+
/// 1. Preceded by non-word character (or start of string)
81+
/// 2. Followed by non-word character (or end of string)
82+
///
83+
/// ref: https://github.com/rust-lang/homu/pull/230
84+
fn is_valid_mention_context(text: &str, start: usize, end: usize) -> bool {
85+
// Check preceding boundary
86+
if start > 0 {
87+
if let Some(prev) = text[..start].chars().last() {
88+
if prev.is_word_char() {
89+
return false;
90+
}
91+
}
92+
}
93+
94+
// Check following boundary
95+
if end < text.len() {
96+
if let Some(next) = text[end..].chars().next() {
97+
if next.is_word_char() || "-_".contains(next) {
98+
return false;
99+
}
100+
}
101+
}
102+
103+
true
104+
}
105+
106+
#[cfg(test)]
107+
mod tests {
108+
use super::*;
109+
110+
#[test]
111+
fn basic_mentions() {
112+
assert_eq!(suppress_github_mentions("Hello @user"), "Hello `@user`");
113+
assert_eq!(
114+
suppress_github_mentions("Ping @developer"),
115+
"Ping `@developer`"
116+
);
117+
assert_eq!(
118+
suppress_github_mentions("Multiple @user1 and @user2"),
119+
"Multiple `@user1` and `@user2`"
120+
);
121+
}
122+
123+
#[test]
124+
fn team_mentions() {
125+
assert_eq!(suppress_github_mentions("@org/team"), "`@org/team`");
126+
assert_eq!(
127+
suppress_github_mentions("@rust-lang/libs"),
128+
"`@rust-lang/libs`"
129+
);
130+
assert_eq!(
131+
suppress_github_mentions("@org/team/subteam"),
132+
"`@org/team/subteam`"
133+
);
134+
}
135+
136+
#[test]
137+
fn mention_boundaries() {
138+
// Adjacent punctuation
139+
assert_eq!(
140+
suppress_github_mentions("Hello,@user! How are you?"),
141+
"Hello,`@user`! How are you?"
142+
);
143+
144+
// Email addresses
145+
assert_eq!(
146+
suppress_github_mentions("[email protected]"),
147+
148+
);
149+
150+
// Invalid mentions
151+
assert_eq!(suppress_github_mentions("@-user"), "@-user");
152+
}
153+
154+
#[test]
155+
fn unicode_support() {
156+
// Precomposed characters
157+
assert_eq!(
158+
suppress_github_mentions("Hello @üsèrñàmé"),
159+
"Hello `@üsèrñàmé`"
160+
);
161+
162+
// Combining marks exclusion
163+
assert_eq!(
164+
suppress_github_mentions("Hello @us\u{0301}er"),
165+
"Hello `@us\u{0301}er`"
166+
);
167+
168+
// Bidirectional text
169+
assert_eq!(
170+
suppress_github_mentions("Hello @user في العالم"),
171+
"Hello `@user` في العالم"
172+
);
173+
}
174+
175+
#[test]
176+
fn edge_cases() {
177+
// Empty input
178+
assert_eq!(suppress_github_mentions(""), "");
179+
180+
// Minimum valid mention
181+
assert_eq!(suppress_github_mentions("@a"), "`@a`");
182+
183+
// Maximum length mention
184+
let long_mention = "@".to_string() + &"a".repeat(39);
185+
assert_eq!(
186+
suppress_github_mentions(&long_mention),
187+
format!("`{long_mention}`")
188+
);
189+
}
190+
}

0 commit comments

Comments
 (0)