Skip to content

Commit 123cf65

Browse files
committed
Auto merge of #1806 - sgrif:sg-block-arbitrary-traffic, r=sgrif
Allow blocking arbitrary traffic without redeploying This generalizes the `block_ips` module to allow checking against any header instead of just the `X-Real-Ip` header. This does not enable more complex matching such as regexes, but it does let us apply the simple string matching we have today to any header. Additionally, which headers we check against is now configured by an environment variable, which is a comma separated list in the form `Header=VALUE_ENV_VAR`. So to keep the behavior we had today, one would set `BLOCKED_TRAFFIC="X-Real-Ip=BLOCKED_IPS"`. This means that when we see traffic patterns that we need to block which can be handled by the existing logic, we can now begin applying it to patterns we didn't previosuly anticipate without requiring a redeploy
2 parents a248372 + 8b7611b commit 123cf65

File tree

6 files changed

+132
-22
lines changed

6 files changed

+132
-22
lines changed

src/config.rs

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ pub struct Config {
1818
pub mirror: Replica,
1919
pub api_protocol: String,
2020
pub publish_rate_limit: PublishRateLimit,
21+
pub blocked_traffic: Vec<(String, Vec<String>)>,
2122
}
2223

2324
impl Default for Config {
@@ -42,6 +43,8 @@ impl Default for Config {
4243
/// - `GH_CLIENT_ID`: The client ID of the associated GitHub application.
4344
/// - `GH_CLIENT_SECRET`: The client secret of the associated GitHub application.
4445
/// - `DATABASE_URL`: The URL of the postgres database to use.
46+
/// - `BLOCKED_TRAFFIC`: A list of headers and environment variables to use for blocking
47+
///. traffic. See the `block_traffic` module for more documentation.
4548
fn default() -> Config {
4649
let checkout = PathBuf::from(env("GIT_REPO_CHECKOUT"));
4750
let api_protocol = String::from("https");
@@ -135,6 +138,48 @@ impl Default for Config {
135138
mirror,
136139
api_protocol,
137140
publish_rate_limit: Default::default(),
141+
blocked_traffic: blocked_traffic(),
138142
}
139143
}
140144
}
145+
146+
fn blocked_traffic() -> Vec<(String, Vec<String>)> {
147+
let pattern_list = dotenv::var("BLOCKED_TRAFFIC").unwrap_or_default();
148+
parse_traffic_patterns(&pattern_list)
149+
.map(|(header, value_env_var)| {
150+
let value_list = dotenv::var(value_env_var).unwrap_or_default();
151+
let values = value_list.split(',').map(String::from).collect();
152+
(header.into(), values)
153+
})
154+
.collect()
155+
}
156+
157+
fn parse_traffic_patterns(patterns: &str) -> impl Iterator<Item = (&str, &str)> {
158+
patterns.split_terminator(',').map(|pattern| {
159+
if let Some(idx) = pattern.find('=') {
160+
(&pattern[..idx], &pattern[(idx + 1)..])
161+
} else {
162+
panic!(
163+
"BLOCKED_TRAFFIC must be in the form HEADER=VALUE_ENV_VAR, \
164+
got invalid pattern {}",
165+
pattern
166+
)
167+
}
168+
})
169+
}
170+
171+
#[test]
172+
fn parse_traffic_patterns_splits_on_comma_and_looks_for_equal_sign() {
173+
let pattern_string_1 = "Foo=BAR,Bar=BAZ";
174+
let pattern_string_2 = "Baz=QUX";
175+
let pattern_string_3 = "";
176+
177+
let patterns_1 = parse_traffic_patterns(pattern_string_1).collect::<Vec<_>>();
178+
assert_eq!(vec![("Foo", "BAR"), ("Bar", "BAZ")], patterns_1);
179+
180+
let patterns_2 = parse_traffic_patterns(pattern_string_2).collect::<Vec<_>>();
181+
assert_eq!(vec![("Baz", "QUX")], patterns_2);
182+
183+
let patterns_3 = parse_traffic_patterns(pattern_string_3).collect::<Vec<_>>();
184+
assert!(patterns_3.is_empty());
185+
}

src/middleware.rs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ pub use self::security_headers::SecurityHeaders;
1414
pub use self::static_or_continue::StaticOrContinue;
1515

1616
pub mod app;
17-
mod block_ips;
17+
mod block_traffic;
1818
pub mod current_user;
1919
mod debug;
2020
mod ember_index_rewrite;
@@ -38,7 +38,8 @@ use crate::{App, Env};
3838

3939
pub fn build_middleware(app: Arc<App>, endpoints: R404) -> MiddlewareBuilder {
4040
let mut m = MiddlewareBuilder::new(endpoints);
41-
let env = app.config.env;
41+
let config = app.config.clone();
42+
let env = config.env;
4243

4344
if env != Env::Test {
4445
m.add(ensure_well_formed_500::EnsureWellFormed500);
@@ -69,7 +70,7 @@ pub fn build_middleware(app: Arc<App>, endpoints: R404) -> MiddlewareBuilder {
6970
));
7071

7172
if env == Env::Production {
72-
m.add(SecurityHeaders::new(&app.config.uploader));
73+
m.add(SecurityHeaders::new(&config.uploader));
7374
}
7475
m.add(AppMiddleware::new(app));
7576

@@ -87,9 +88,8 @@ pub fn build_middleware(app: Arc<App>, endpoints: R404) -> MiddlewareBuilder {
8788

8889
m.around(Head::default());
8990

90-
if let Ok(ip_list) = env::var("BLOCKED_IPS") {
91-
let ips = ip_list.split(',').map(String::from).collect();
92-
m.around(block_ips::BlockIps::new(ips));
91+
for (header, blocked_values) in config.blocked_traffic {
92+
m.around(block_traffic::BlockTraffic::new(header, blocked_values));
9393
}
9494

9595
m.around(require_user_agent::RequireUserAgent::default());

src/middleware/block_ips.rs renamed to src/middleware/block_traffic.rs

Lines changed: 26 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,12 @@
1-
//! Middleware that blocks requests from a list of given IPs
1+
//! Middleware that blocks requests if a header matches the given list
2+
//!
3+
//! To use, set the `BLOCKED_TRAFFIC` environment variable to a comma-separated list of pairs
4+
//! containing a header name, an equals sign, and the name of another environment variable that
5+
//! contains the values of that header that should be blocked. For example, set `BLOCKED_TRAFFIC`
6+
//! to `User-Agent=BLOCKED_UAS,X-Real-Ip=BLOCKED_IPS`, `BLOCKED_UAS` to `curl/7.54.0,cargo 1.36.0
7+
//! (c4fcfb725 2019-05-15)`, and `BLOCKED_IPS` to `192.168.0.1,127.0.0.1` to block requests from
8+
//! the versions of curl or Cargo specified or from either of the IPs (values are nonsensical
9+
//! examples). Values of the headers must match exactly.
210
311
use super::prelude::*;
412

@@ -8,32 +16,37 @@ use std::io::Cursor;
816
// Can't derive debug because of Handler.
917
#[allow(missing_debug_implementations)]
1018
#[derive(Default)]
11-
pub struct BlockIps {
12-
ips: Vec<String>,
19+
pub struct BlockTraffic {
20+
header_name: String,
21+
blocked_values: Vec<String>,
1322
handler: Option<Box<dyn Handler>>,
1423
}
1524

16-
impl BlockIps {
17-
pub fn new(ips: Vec<String>) -> Self {
18-
Self { ips, handler: None }
25+
impl BlockTraffic {
26+
pub fn new(header_name: String, blocked_values: Vec<String>) -> Self {
27+
Self {
28+
header_name,
29+
blocked_values,
30+
handler: None,
31+
}
1932
}
2033
}
2134

22-
impl AroundMiddleware for BlockIps {
35+
impl AroundMiddleware for BlockTraffic {
2336
fn with_handler(&mut self, handler: Box<dyn Handler>) {
2437
self.handler = Some(handler);
2538
}
2639
}
2740

28-
impl Handler for BlockIps {
41+
impl Handler for BlockTraffic {
2942
fn call(&self, req: &mut dyn Request) -> Result<Response, Box<dyn Error + Send>> {
30-
let has_blocked_ip = req
43+
let has_blocked_value = req
3144
.headers()
32-
.find("X-Real-Ip")
33-
.unwrap()
45+
.find(&self.header_name)
46+
.unwrap_or_default()
3447
.iter()
35-
.any(|ip| self.ips.iter().any(|v| v == ip));
36-
if has_blocked_ip {
48+
.any(|value| self.blocked_values.iter().any(|v| v == value));
49+
if has_blocked_value {
3750
let body = format!(
3851
"We are unable to process your request at this time. \
3952
This usually means that you are in violation of our crawler \

src/tests/all.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,7 @@ fn simple_config() -> Config {
148148
// sniff/record it, but everywhere else we use https
149149
api_protocol: String::from("http"),
150150
publish_rate_limit: Default::default(),
151+
blocked_traffic: Default::default(),
151152
}
152153
}
153154

src/tests/server.rs

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,3 +26,48 @@ fn user_agent_is_not_required_for_download() {
2626
let resp = anon.run::<()>(req);
2727
resp.assert_status(302);
2828
}
29+
30+
#[test]
31+
fn blocked_traffic_doesnt_panic_if_checked_header_is_not_present() {
32+
let (app, anon, user) = TestApp::init()
33+
.with_config(|config| {
34+
config.blocked_traffic = vec![("Never-Given".into(), vec!["1".into()])];
35+
})
36+
.with_user();
37+
38+
app.db(|conn| {
39+
CrateBuilder::new("dl_no_ua", user.as_model().id).expect_build(conn);
40+
});
41+
42+
let mut req = anon.request_builder(Method::Get, "/api/v1/crates/dl_no_ua/0.99.0/download");
43+
req.header("User-Agent", "");
44+
let resp = anon.run::<()>(req);
45+
resp.assert_status(302);
46+
}
47+
48+
#[test]
49+
fn block_traffic_via_arbitrary_header_and_value() {
50+
let (app, anon, user) = TestApp::init()
51+
.with_config(|config| {
52+
config.blocked_traffic = vec![("User-Agent".into(), vec!["1".into(), "2".into()])];
53+
})
54+
.with_user();
55+
56+
app.db(|conn| {
57+
CrateBuilder::new("dl_no_ua", user.as_model().id).expect_build(conn);
58+
});
59+
60+
let mut req = anon.request_builder(Method::Get, "/api/v1/crates/dl_no_ua/0.99.0/download");
61+
// A request with a header value we want to block isn't allowed
62+
req.header("User-Agent", "1");
63+
req.header("X-Request-Id", "abcd"); // Needed for the error message we generate
64+
let resp = anon.run::<()>(req);
65+
resp.assert_status(403);
66+
67+
let mut req = anon.request_builder(Method::Get, "/api/v1/crates/dl_no_ua/0.99.0/download");
68+
// A request with a header value we don't want to block is allowed, even though there might
69+
// be a substring match
70+
req.header("User-Agent", "1value-must-match-exactly-this-is-allowed");
71+
let resp = anon.run::<()>(req);
72+
resp.assert_status(302);
73+
}

src/tests/util.rs

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -268,12 +268,18 @@ impl TestAppBuilder {
268268
(app, anon, user, token)
269269
}
270270

271-
pub fn with_publish_rate_limit(mut self, rate: Duration, burst: i32) -> Self {
272-
self.config.publish_rate_limit.rate = rate;
273-
self.config.publish_rate_limit.burst = burst;
271+
pub fn with_config(mut self, f: impl FnOnce(&mut Config)) -> Self {
272+
f(&mut self.config);
274273
self
275274
}
276275

276+
pub fn with_publish_rate_limit(self, rate: Duration, burst: i32) -> Self {
277+
self.with_config(|config| {
278+
config.publish_rate_limit.rate = rate;
279+
config.publish_rate_limit.burst = burst;
280+
})
281+
}
282+
277283
pub fn with_git_index(mut self) -> Self {
278284
use crate::git;
279285

0 commit comments

Comments
 (0)