Skip to content

Commit 5bb4a12

Browse files
committed
[1/4 for #2365, #2671] Fix create/kill race with schedulers and tasks during rust_kernel::fail
1 parent f55999f commit 5bb4a12

10 files changed

+69
-32
lines changed

src/rt/rust_kernel.cpp

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ rust_kernel::rust_kernel(rust_env *env) :
1919
max_port_id(1),
2020
rval(0),
2121
max_sched_id(1),
22+
killed(false),
2223
sched_reaper(this),
2324
osmain_driver(NULL),
2425
non_weak_tasks(0),
@@ -103,7 +104,8 @@ rust_kernel::create_scheduler(rust_sched_launcher_factory *launchfac,
103104
id = max_sched_id++;
104105
assert(id != INTPTR_MAX && "Hit the maximum scheduler id");
105106
sched = new (this, "rust_scheduler")
106-
rust_scheduler(this, num_threads, id, allow_exit, launchfac);
107+
rust_scheduler(this, num_threads, id, allow_exit, killed,
108+
launchfac);
107109
bool is_new = sched_table
108110
.insert(std::pair<rust_sched_id,
109111
rust_scheduler*>(id, sched)).second;
@@ -197,6 +199,10 @@ rust_kernel::fail() {
197199
#endif
198200
// Copy the list of schedulers so that we don't hold the lock while
199201
// running kill_all_tasks.
202+
// I think this only needs to be done by one task ever; as it is,
203+
// multiple tasks invoking kill_all might get here. Currently libcore
204+
// ensures only one task will ever invoke it, but this would really be
205+
// fine either way, so I'm leaving it as it is. -- bblum
200206
// FIXME (#2671): There's a lot that happens under kill_all_tasks,
201207
// and I don't know that holding sched_lock here is ok, but we need
202208
// to hold the sched lock to prevent the scheduler from being
@@ -205,15 +211,13 @@ rust_kernel::fail() {
205211
std::vector<rust_scheduler*> scheds;
206212
{
207213
scoped_lock with(sched_lock);
214+
killed = true;
208215
for (sched_map::iterator iter = sched_table.begin();
209216
iter != sched_table.end(); iter++) {
210217
scheds.push_back(iter->second);
211218
}
212219
}
213220

214-
// FIXME (#2671): This is not a foolproof way to kill all tasks
215-
// while ensuring that no new tasks or schedulers are created in the
216-
// meantime that keep the scheduler alive.
217221
for (std::vector<rust_scheduler*>::iterator iter = scheds.begin();
218222
iter != scheds.end(); iter++) {
219223
(*iter)->kill_all_tasks();

src/rt/rust_kernel.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ class rust_kernel {
7272
lock_and_signal rval_lock;
7373
int rval;
7474

75-
// Protects max_sched_id and sched_table, join_list
75+
// Protects max_sched_id and sched_table, join_list, killed
7676
lock_and_signal sched_lock;
7777
// The next scheduler id
7878
rust_sched_id max_sched_id;
@@ -81,6 +81,10 @@ class rust_kernel {
8181
sched_map sched_table;
8282
// A list of scheduler ids that are ready to exit
8383
std::vector<rust_sched_id> join_list;
84+
// Whether or not the runtime has to die (triggered when the root/main
85+
// task group fails). This propagates to all new schedulers and tasks
86+
// created after it is set.
87+
bool killed;
8488

8589
rust_sched_reaper sched_reaper;
8690
// The single-threaded scheduler that uses the main thread

src/rt/rust_sched_launcher.cpp

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,33 +4,36 @@
44

55
const size_t SCHED_STACK_SIZE = 1024*100;
66

7-
rust_sched_launcher::rust_sched_launcher(rust_scheduler *sched, int id)
7+
rust_sched_launcher::rust_sched_launcher(rust_scheduler *sched, int id,
8+
bool killed)
89
: kernel(sched->kernel),
9-
sched_loop(sched, id),
10+
sched_loop(sched, id, killed),
1011
driver(&sched_loop) {
1112
}
1213

1314
rust_thread_sched_launcher::rust_thread_sched_launcher(rust_scheduler *sched,
14-
int id)
15-
: rust_sched_launcher(sched, id),
15+
int id, bool killed)
16+
: rust_sched_launcher(sched, id, killed),
1617
rust_thread(SCHED_STACK_SIZE) {
1718
}
1819

1920
rust_manual_sched_launcher::rust_manual_sched_launcher(rust_scheduler *sched,
20-
int id)
21-
: rust_sched_launcher(sched, id) {
21+
int id, bool killed)
22+
: rust_sched_launcher(sched, id, killed) {
2223
}
2324

2425
rust_sched_launcher *
25-
rust_thread_sched_launcher_factory::create(rust_scheduler *sched, int id) {
26+
rust_thread_sched_launcher_factory::create(rust_scheduler *sched, int id,
27+
bool killed) {
2628
return new(sched->kernel, "rust_thread_sched_launcher")
27-
rust_thread_sched_launcher(sched, id);
29+
rust_thread_sched_launcher(sched, id, killed);
2830
}
2931

3032
rust_sched_launcher *
31-
rust_manual_sched_launcher_factory::create(rust_scheduler *sched, int id) {
33+
rust_manual_sched_launcher_factory::create(rust_scheduler *sched, int id,
34+
bool killed) {
3235
assert(launcher == NULL && "I can only track one sched_launcher");
3336
launcher = new(sched->kernel, "rust_manual_sched_launcher")
34-
rust_manual_sched_launcher(sched, id);
37+
rust_manual_sched_launcher(sched, id, killed);
3538
return launcher;
3639
}

src/rt/rust_sched_launcher.h

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ class rust_sched_launcher : public kernel_owned<rust_sched_launcher> {
1717
rust_sched_driver driver;
1818

1919
public:
20-
rust_sched_launcher(rust_scheduler *sched, int id);
20+
rust_sched_launcher(rust_scheduler *sched, int id, bool killed);
2121
virtual ~rust_sched_launcher() { }
2222

2323
virtual void start() = 0;
@@ -29,15 +29,15 @@ class rust_thread_sched_launcher
2929
:public rust_sched_launcher,
3030
private rust_thread {
3131
public:
32-
rust_thread_sched_launcher(rust_scheduler *sched, int id);
32+
rust_thread_sched_launcher(rust_scheduler *sched, int id, bool killed);
3333
virtual void start() { rust_thread::start(); }
3434
virtual void join() { rust_thread::join(); }
3535
virtual void run() { driver.start_main_loop(); }
3636
};
3737

3838
class rust_manual_sched_launcher : public rust_sched_launcher {
3939
public:
40-
rust_manual_sched_launcher(rust_scheduler *sched, int id);
40+
rust_manual_sched_launcher(rust_scheduler *sched, int id, bool killed);
4141
virtual void start() { }
4242
virtual void join() { }
4343
rust_sched_driver *get_driver() { return &driver; };
@@ -47,13 +47,14 @@ class rust_sched_launcher_factory {
4747
public:
4848
virtual ~rust_sched_launcher_factory() { }
4949
virtual rust_sched_launcher *
50-
create(rust_scheduler *sched, int id) = 0;
50+
create(rust_scheduler *sched, int id, bool killed) = 0;
5151
};
5252

5353
class rust_thread_sched_launcher_factory
5454
: public rust_sched_launcher_factory {
5555
public:
56-
virtual rust_sched_launcher *create(rust_scheduler *sched, int id);
56+
virtual rust_sched_launcher *create(rust_scheduler *sched, int id,
57+
bool killed);
5758
};
5859

5960
class rust_manual_sched_launcher_factory
@@ -62,7 +63,8 @@ class rust_manual_sched_launcher_factory
6263
rust_manual_sched_launcher *launcher;
6364
public:
6465
rust_manual_sched_launcher_factory() : launcher(NULL) { }
65-
virtual rust_sched_launcher *create(rust_scheduler *sched, int id);
66+
virtual rust_sched_launcher *create(rust_scheduler *sched, int id,
67+
bool killed);
6668
rust_sched_driver *get_driver() {
6769
assert(launcher != NULL);
6870
return launcher->get_driver();

src/rt/rust_sched_loop.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,13 @@ const size_t C_STACK_SIZE = 1024*1024;
1313

1414
bool rust_sched_loop::tls_initialized = false;
1515

16-
rust_sched_loop::rust_sched_loop(rust_scheduler *sched,int id) :
16+
rust_sched_loop::rust_sched_loop(rust_scheduler *sched, int id, bool killed) :
1717
_log(this),
1818
id(id),
1919
should_exit(false),
2020
cached_c_stack(NULL),
2121
dead_task(NULL),
22+
killed(killed),
2223
pump_signal(NULL),
2324
kernel(sched->kernel),
2425
sched(sched),
@@ -63,6 +64,8 @@ rust_sched_loop::kill_all_tasks() {
6364

6465
{
6566
scoped_lock with(lock);
67+
// Any task created after this will be killed. See transition, below.
68+
killed = true;
6669

6770
for (size_t i = 0; i < running_tasks.length(); i++) {
6871
all_tasks.push_back(running_tasks[i]);
@@ -319,6 +322,11 @@ rust_sched_loop::transition(rust_task *task,
319322
}
320323
task->set_state(dst, cond, cond_name);
321324

325+
// If the entire runtime is failing, newborn tasks must be doomed.
326+
if (src == task_state_newborn && killed) {
327+
task->kill_inner();
328+
}
329+
322330
pump_loop();
323331
}
324332

src/rt/rust_sched_loop.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ struct rust_sched_loop
6060
rust_task_list running_tasks;
6161
rust_task_list blocked_tasks;
6262
rust_task *dead_task;
63+
bool killed;
6364

6465
rust_signal *pump_signal;
6566

@@ -91,7 +92,7 @@ struct rust_sched_loop
9192

9293
// Only a pointer to 'name' is kept, so it must live as long as this
9394
// domain.
94-
rust_sched_loop(rust_scheduler *sched, int id);
95+
rust_sched_loop(rust_scheduler *sched, int id, bool killed);
9596
void activate(rust_task *task);
9697
rust_log & get_log();
9798
void fail();
@@ -107,6 +108,7 @@ struct rust_sched_loop
107108
void log_state();
108109

109110
void kill_all_tasks();
111+
bool doomed();
110112

111113
rust_task *create_task(rust_task *spawner, const char *name);
112114

src/rt/rust_scheduler.cpp

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ rust_scheduler::rust_scheduler(rust_kernel *kernel,
99
size_t num_threads,
1010
rust_sched_id id,
1111
bool allow_exit,
12+
bool killed,
1213
rust_sched_launcher_factory *launchfac) :
1314
kernel(kernel),
1415
live_threads(num_threads),
@@ -18,7 +19,7 @@ rust_scheduler::rust_scheduler(rust_kernel *kernel,
1819
num_threads(num_threads),
1920
id(id)
2021
{
21-
create_task_threads(launchfac);
22+
create_task_threads(launchfac, killed);
2223
}
2324

2425
rust_scheduler::~rust_scheduler() {
@@ -27,8 +28,8 @@ rust_scheduler::~rust_scheduler() {
2728

2829
rust_sched_launcher *
2930
rust_scheduler::create_task_thread(rust_sched_launcher_factory *launchfac,
30-
int id) {
31-
rust_sched_launcher *thread = launchfac->create(this, id);
31+
int id, bool killed) {
32+
rust_sched_launcher *thread = launchfac->create(this, id, killed);
3233
KLOG(kernel, kern, "created task thread: " PTR ", id: %d",
3334
thread, id);
3435
return thread;
@@ -41,11 +42,12 @@ rust_scheduler::destroy_task_thread(rust_sched_launcher *thread) {
4142
}
4243

4344
void
44-
rust_scheduler::create_task_threads(rust_sched_launcher_factory *launchfac) {
45+
rust_scheduler::create_task_threads(rust_sched_launcher_factory *launchfac,
46+
bool killed) {
4547
KLOG(kernel, kern, "Using %d scheduler threads.", num_threads);
4648

4749
for(size_t i = 0; i < num_threads; ++i) {
48-
threads.push(create_task_thread(launchfac, i));
50+
threads.push(create_task_thread(launchfac, i, killed));
4951
}
5052
}
5153

src/rt/rust_scheduler.h

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,18 +34,20 @@ class rust_scheduler : public kernel_owned<rust_scheduler> {
3434

3535
rust_sched_id id;
3636

37-
void create_task_threads(rust_sched_launcher_factory *launchfac);
37+
void create_task_threads(rust_sched_launcher_factory *launchfac,
38+
bool killed);
3839
void destroy_task_threads();
3940

4041
rust_sched_launcher *
41-
create_task_thread(rust_sched_launcher_factory *launchfac, int id);
42+
create_task_thread(rust_sched_launcher_factory *launchfac, int id,
43+
bool killed);
4244
void destroy_task_thread(rust_sched_launcher *thread);
4345

4446
void exit();
4547

4648
public:
4749
rust_scheduler(rust_kernel *kernel, size_t num_threads,
48-
rust_sched_id id, bool allow_exit,
50+
rust_sched_id id, bool allow_exit, bool killed,
4951
rust_sched_launcher_factory *launchfac);
5052
~rust_scheduler();
5153

src/rt/rust_task.cpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -257,8 +257,17 @@ rust_task::yield(bool *killed) {
257257
void
258258
rust_task::kill() {
259259
scoped_lock with(lifecycle_lock);
260+
kill_inner();
261+
}
262+
263+
void rust_task::kill_inner() {
264+
lifecycle_lock.must_have_lock();
260265

261-
// XXX: bblum: kill/kill race
266+
// Multiple kills should be able to safely race, but check anyway.
267+
if (killed) {
268+
LOG(this, task, "task %s @0x%" PRIxPTR " already killed", name, this);
269+
return;
270+
}
262271

263272
// Note the distinction here: kill() is when you're in an upcall
264273
// from task A and want to force-fail task B, you do B->kill().

src/rt/rust_task.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,7 @@ rust_task : public kernel_owned<rust_task>
264264

265265
// Fail this task (assuming caller-on-stack is different task).
266266
void kill();
267+
void kill_inner();
267268

268269
// Indicates that we've been killed and now is an apropriate
269270
// time to fail as a result

0 commit comments

Comments
 (0)