Skip to content

Commit 0ac87c1

Browse files
committed
libroach: avoid recycling encrypted WALs
RocksDB fails recovery if there are any non-empty WALs that have zero readable entries. So we need to make sure `EncryptedEnv::ReuseWritableFile()` cannot produce such files, even if an inopportune crash happens. We do not know how to achieve this while still changing the encryption key for the recycled WAL, so for now this PR works around the problem in `EncryptedEnv::ReuseWritableFile()` by faking recycling. Release note: None
1 parent 7c80217 commit 0ac87c1

File tree

2 files changed

+128
-21
lines changed

2 files changed

+128
-21
lines changed

c-deps/libroach/ccl/encrypted_env_test.cc

Lines changed: 110 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ rocksdb::Status checkFileContents(rocksdb::Env* env, const std::string& filename
127127

128128
}; // anonymous namespace
129129

130-
TEST(EncryptedEnv, FileOps) {
130+
TEST(EncryptedEnv, BasicOps) {
131131
// Check various file operations against an encrypted env.
132132
// This exercises the env/file registry interaction.
133133
// We need to use a real underlying env as the MemEnv takes a **lot**
@@ -222,7 +222,113 @@ TEST(EncryptedEnv, FileOps) {
222222
EXPECT_OK(checkFileContents(encrypted_env.get(), file1, contents3));
223223
// Check with the plain env.
224224
EXPECT_OK(checkFileContents(env, file1, contents3));
225-
// Try reading the still-encrypted file. We dropped the key on the floor.
226-
EXPECT_ERR(checkFileContents(encrypted_env.get(), file2, contents),
227-
"key_manager does not have a key with ID .*");
225+
EXPECT_OK(checkFileContents(env, file2, contents3));
226+
}
227+
228+
TEST(EncryptedEnv, ReuseWritableFileUnderlyingFailure) {
229+
// Ensure all WALs are either empty or decryptable no matter when/whether
230+
// `EncryptedEnv::ReuseWritableFile` fails. See comment above
231+
// `EncryptedEnv::ReuseWritableFile`'s definition for details on why this
232+
// guarantee is necessary for crash-safety.
233+
234+
// `ReuseWritableFileInjectionEnv` is used as the env underlying an
235+
// `EncryptedEnv`. It injects faults in the operations used by
236+
// `EncryptedEnv::ReuseWritableFile()`.
237+
class ReuseWritableFileInjectionEnv : public rocksdb::EnvWrapper {
238+
public:
239+
enum class Mode {
240+
kNone,
241+
kFailCreation,
242+
kFailDeletion,
243+
kEnd,
244+
};
245+
246+
explicit ReuseWritableFileInjectionEnv(Env* target) : EnvWrapper(target), mode_(Mode::kNone) {}
247+
248+
void set_mode(Mode mode) { mode_ = mode; }
249+
250+
rocksdb::Status NewWritableFile(const std::string& fname,
251+
std::unique_ptr<rocksdb::WritableFile>* result,
252+
const rocksdb::EnvOptions& options) override {
253+
if (mode_ == Mode::kFailCreation) {
254+
return rocksdb::Status::IOError("injected error");
255+
}
256+
return rocksdb::EnvWrapper::NewWritableFile(fname, result, options);
257+
}
258+
259+
rocksdb::Status DeleteFile(const std::string& fname) override {
260+
if (mode_ == Mode::kFailDeletion) {
261+
return rocksdb::Status::IOError("injected error");
262+
}
263+
return rocksdb::EnvWrapper::DeleteFile(fname);
264+
}
265+
266+
private:
267+
Mode mode_;
268+
};
269+
270+
for (int i = 0; i < static_cast<int>(ReuseWritableFileInjectionEnv::Mode::kEnd); ++i) {
271+
auto mode = static_cast<ReuseWritableFileInjectionEnv::Mode>(i);
272+
ReuseWritableFileInjectionEnv env(rocksdb::Env::Default());
273+
auto key_manager = new MemKeyManager(MakeAES128Key(&env));
274+
auto stream = new CTRCipherStreamCreator(key_manager, enginepb::Data);
275+
276+
auto tmpdir = std::unique_ptr<TempDirHandler>(new TempDirHandler());
277+
278+
auto file_registry = std::unique_ptr<FileRegistry>(
279+
new FileRegistry(&env, tmpdir->Path(""), false /* read-only */));
280+
EXPECT_OK(file_registry->Load());
281+
282+
std::unique_ptr<rocksdb::Env> encrypted_env(
283+
rocksdb_utils::NewEncryptedEnv(&env, file_registry.get(), stream));
284+
285+
auto old_file = tmpdir->Path("foo1");
286+
std::string contents("this is a file!");
287+
ASSERT_OK(rocksdb::WriteStringToFile(encrypted_env.get(), contents, old_file, false));
288+
289+
auto new_file = tmpdir->Path("foo2");
290+
env.set_mode(mode);
291+
{
292+
std::unique_ptr<rocksdb::WritableFile> res;
293+
if (i == static_cast<int>(ReuseWritableFileInjectionEnv::Mode::kNone)) {
294+
EXPECT_OK(
295+
encrypted_env->ReuseWritableFile(new_file, old_file, &res, rocksdb::EnvOptions()));
296+
} else {
297+
EXPECT_TRUE(
298+
encrypted_env->ReuseWritableFile(new_file, old_file, &res, rocksdb::EnvOptions())
299+
.IsIOError());
300+
}
301+
}
302+
303+
switch (mode) {
304+
case ReuseWritableFileInjectionEnv::Mode::kNone: {
305+
// In success scenario, the new file should be empty and the old file should be deleted.
306+
std::string new_file_contents;
307+
EXPECT_OK(rocksdb::ReadFileToString(encrypted_env.get(), new_file, &new_file_contents));
308+
EXPECT_STREQ("", new_file_contents.c_str());
309+
EXPECT_TRUE(encrypted_env->FileExists(old_file).IsNotFound());
310+
break;
311+
}
312+
case ReuseWritableFileInjectionEnv::Mode::kFailCreation:
313+
case ReuseWritableFileInjectionEnv::Mode::kFailDeletion: {
314+
// In failure scenarios, all existing files must be empty or readable.
315+
for (const auto& file : {old_file, new_file}) {
316+
auto exists_status = encrypted_env->FileExists(file);
317+
if (exists_status.ok()) {
318+
std::string file_contents;
319+
rocksdb::ReadFileToString(encrypted_env.get(), file, &file_contents);
320+
if (!file_contents.empty()) {
321+
EXPECT_STREQ(contents.c_str(), file_contents.c_str());
322+
}
323+
} else {
324+
EXPECT_TRUE(exists_status.IsNotFound());
325+
}
326+
}
327+
break;
328+
}
329+
case ReuseWritableFileInjectionEnv::Mode::kEnd:
330+
assert(false);
331+
break;
332+
}
333+
}
228334
}

c-deps/libroach/rocksdbutils/env_encryption.cc

Lines changed: 18 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -431,31 +431,32 @@ class EncryptedEnv : public rocksdb::EnvWrapper {
431431
return rocksdb::Status::OK();
432432
}
433433

434-
// Reuse an existing file by renaming it and opening it as writable.
434+
// `ReuseWritableFile` is used for recycling WAL files. With `EncryptedEnv`, we
435+
// currently do not know a crash-safe way to recycle WALs. The workaround is to
436+
// pretend to recycle in this function by producing a new file and throwing away
437+
// the old one. This method of "recycling" sacrifices performance for crash-safety.
438+
//
439+
// True recycling is not crash-safe because RocksDB recovery fails if it encounters
440+
// a non-empty WAL with zero readable entries. With true recycling, we need to change
441+
// the encryption key. Imagine a crash happened after we renamed the file and assigned
442+
// a new key, but before we wrote any data to the recycled file. Then, that file would
443+
// appear to recovery as containing all garbage data, causing it to fail.
444+
//
445+
// TODO(ajkr): Try to change how RocksDB handles WALs with zero readable entries:
446+
// https://www.facebook.com/groups/rocksdb.dev/permalink/2405909486174218/
447+
// Then we can have both performance and crash-safety.
435448
virtual rocksdb::Status ReuseWritableFile(const std::string& fname, const std::string& old_fname,
436449
std::unique_ptr<rocksdb::WritableFile>* result,
437450
const rocksdb::EnvOptions& options) override {
438451
result->reset();
439452
if (options.use_mmap_writes) {
440453
return rocksdb::Status::InvalidArgument();
441454
}
442-
// Open file using underlying Env implementation
443-
std::unique_ptr<rocksdb::WritableFile> underlying;
444-
rocksdb::Status status =
445-
rocksdb::EnvWrapper::ReuseWritableFile(fname, old_fname, &underlying, options);
446-
if (!status.ok()) {
447-
return status;
455+
auto status = NewWritableFile(fname, result, options);
456+
if (status.ok()) {
457+
status = DeleteFile(old_fname);
448458
}
449-
450-
// Create cipher stream
451-
std::unique_ptr<BlockAccessCipherStream> stream;
452-
status = CreateCipherStream(fname, true /* new_file */, &stream);
453-
if (!status.ok()) {
454-
return status;
455-
}
456-
(*result) = std::unique_ptr<rocksdb::WritableFile>(
457-
new EncryptedWritableFile(underlying.release(), stream.release()));
458-
return rocksdb::Status::OK();
459+
return status;
459460
}
460461

461462
// Open `fname` for random read and write, if file dont exist the file

0 commit comments

Comments
 (0)