Skip to content

Commit acc4eb1

Browse files
[llvm][cas] Implement a CAS stress tester
This is a basic tester for CAS ObjectStore that will insert random data into CAS and validate with several configurations randomly generated. It will check: * multi-threaded insertion * multi-process insertion * try randomly kill the subprocesses that are inserting data And make sure it doesn't leave CAS in an invalid state. Suggested usage: ``` LLVM_CAS_LOG=2 llvm-cas-test --cas=... --print-config ```
1 parent 577b96c commit acc4eb1

File tree

7 files changed

+328
-53
lines changed

7 files changed

+328
-53
lines changed

llvm/test/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,8 +79,9 @@ set(LLVM_TEST_DEPENDS
7979
llvm-bitcode-strip
8080
llvm-c-test
8181
llvm-cas
82-
llvm-cas-object-format
8382
llvm-cas-dump
83+
llvm-cas-object-format
84+
llvm-cas-test
8485
llvm-cat
8586
llvm-cfi-verify
8687
llvm-cgdata
Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,4 @@
1-
# REQUIRES: ondisk_cas
2-
31
# Multi-threaded test that CAS lock files protecting the shared data are working.
42

53
# RUN: rm -rf %t/cas
6-
# RUN: llvm-cas -cas %t/cas -check-lock-files
4+
# RUN: llvm-cas-test -cas %t/cas -check-lock-files
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
if not config.have_ondisk_cas:
2+
config.unsupported = True
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
REQUIRES: expensive_checks
2+
3+
RUN: rm -rf %t.cas
4+
RUN: llvm-cas-test --cas %t.cas --timeout 60
5+
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
set(LLVM_LINK_COMPONENTS
2+
CAS
3+
Support
4+
)
5+
add_llvm_tool(llvm-cas-test
6+
llvm-cas-test.cpp
7+
)
Lines changed: 310 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,310 @@
1+
//===- llvm-cas-test.cpp - CAS stress tester ------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
#include "llvm/ADT/BitmaskEnum.h"
9+
#include "llvm/CAS/ActionCache.h"
10+
#include "llvm/CAS/BuiltinUnifiedCASDatabases.h"
11+
#include "llvm/CAS/ObjectStore.h"
12+
#include "llvm/Support/CommandLine.h"
13+
#include "llvm/Support/Path.h"
14+
#include "llvm/Support/Program.h"
15+
#include "llvm/Support/RandomNumberGenerator.h"
16+
#include "llvm/Support/ThreadPool.h"
17+
18+
using namespace llvm;
19+
using namespace llvm::cas;
20+
21+
enum CommandKind {
22+
StressTest,
23+
GenerateData,
24+
CheckLockFiles
25+
};
26+
27+
static cl::opt<CommandKind>
28+
Command(cl::desc("choose command action:"),
29+
cl::values(clEnumValN(StressTest, "stress-test", "stress test CAS"),
30+
clEnumValN(GenerateData, "gen-data",
31+
"fill CAS with random data"),
32+
clEnumValN(CheckLockFiles, "check-lock-files",
33+
"check lock files")),
34+
cl::init(CommandKind::StressTest));
35+
36+
// CAS configuration.
37+
static cl::opt<std::string>
38+
CASPath("cas", cl::desc("CAS path on disk for testing"), cl::Required);
39+
static cl::opt<bool>
40+
PrintConfig("print-config",
41+
cl::desc("print randomly generated configuration"));
42+
static cl::opt<bool>
43+
ForceKill("force-kill",
44+
cl::desc("force kill subprocess to test termination"));
45+
static cl::opt<bool> KeepLog("keep-log",
46+
cl::desc("keep log and do not rotate the log"));
47+
48+
// CAS stress test parameters.
49+
static cl::opt<unsigned>
50+
OptNumShards("num-shards", cl::desc("number of shards"), cl::init(0));
51+
static cl::opt<unsigned> OptTreeDepth("tree-depth", cl::desc("tree depth"),
52+
cl::init(0));
53+
static cl::opt<unsigned> OptNumChildren("num-children",
54+
cl::desc("number of child nodes"),
55+
cl::init(0));
56+
static cl::opt<unsigned> OptDataLength("data-length", cl::desc("data length"),
57+
cl::init(0));
58+
static cl::opt<unsigned> OptPrecentFile(
59+
"precent-file",
60+
cl::desc("percentage of nodes that is long enough to be file based"),
61+
cl::init(0));
62+
// Default size to be 100MB.
63+
static cl::opt<uint64_t>
64+
SizeLimit("size-limit", cl::desc("CAS size limit (in MB)"), cl::init(100));
65+
// Default timeout 180s.
66+
static cl::opt<uint64_t>
67+
Timeout("timeout", cl::desc("test timeout (in seconds)"), cl::init(180));
68+
69+
enum CASFuzzingSettings : uint8_t {
70+
Default = 0,
71+
Fork = 1, // CAS Data filling happens in subprocesses.
72+
CheckTermination = 1 << 1, // Try kill the subprocess when it fills the data.
73+
74+
Last = UINT8_MAX, // Enum is randomly generated, use MAX to cover all inputs.
75+
LLVM_MARK_AS_BITMASK_ENUM(Last)
76+
};
77+
78+
struct Config {
79+
CASFuzzingSettings Settings = Default;
80+
uint8_t NumShards;
81+
uint8_t NumChildren;
82+
uint8_t TreeDepth;
83+
uint16_t DataLength;
84+
uint16_t PrecentFile;
85+
86+
static constexpr unsigned MaxShards = 20;
87+
static constexpr unsigned MaxChildren = 32;
88+
static constexpr unsigned MaxDepth = 8;
89+
static constexpr unsigned MaxDataLength = 1024 * 4;
90+
91+
void constrainParameters() {
92+
// reduce the size of parameter if they are too big.
93+
NumShards = NumShards % MaxShards;
94+
NumChildren = NumChildren % MaxChildren;
95+
TreeDepth = TreeDepth % MaxDepth;
96+
DataLength = DataLength % MaxDataLength;
97+
PrecentFile = PrecentFile % 100;
98+
99+
if (ForceKill) {
100+
Settings |= Fork;
101+
Settings |= CheckTermination;
102+
}
103+
}
104+
105+
bool extendToFile(uint8_t Seed) const {
106+
return ((float)Seed / (float)UINT8_MAX) > ((float)PrecentFile / 100.0f);
107+
}
108+
109+
void init() {
110+
NumShards = OptNumShards ? OptNumShards : MaxShards;
111+
NumChildren = OptNumChildren ? OptNumChildren : MaxChildren;
112+
TreeDepth = OptTreeDepth ? OptTreeDepth : MaxDepth;
113+
DataLength = OptDataLength ? OptDataLength : MaxDataLength;
114+
PrecentFile = OptPrecentFile;
115+
}
116+
117+
void appendCommandLineOpts(std::vector<std::string> &Cmd) {
118+
Cmd.push_back("--num-shards=" + utostr(NumShards));
119+
Cmd.push_back("--num-children=" + utostr(NumChildren));
120+
Cmd.push_back("--tree-depth=" + utostr(TreeDepth));
121+
Cmd.push_back("--data-length=" + utostr(DataLength));
122+
Cmd.push_back("--precent-file=" + utostr(PrecentFile));
123+
}
124+
125+
void dump() {
126+
llvm::errs() << "## Configuration:"
127+
<< " Fork: " << (bool)(Settings & Fork)
128+
<< " Kill: " << (bool)(Settings & CheckTermination)
129+
<< " NumShards: " << (unsigned)NumShards
130+
<< " TreeDepth: " << (unsigned)TreeDepth
131+
<< " NumChildren: " << (unsigned)NumChildren
132+
<< " DataLength: " << (unsigned)DataLength
133+
<< " PrecentFile: " << (unsigned)PrecentFile << "\n";
134+
}
135+
};
136+
137+
// fill the CAS with random data of specified tree depth and children numbers.
138+
static void fillData(ObjectStore &CAS, const Config &Conf) {
139+
ExitOnError ExitOnErr("llvm-cas-test fill data: ");
140+
DefaultThreadPool ThreadPool(hardware_concurrency());
141+
std::atomic<uint64_t> NumCreated = 0;
142+
for (size_t I = 0; I != Conf.NumShards; ++I) {
143+
ThreadPool.async([&] {
144+
std::vector<ObjectRef> Refs;
145+
for (unsigned Depth = 0; Depth < Conf.TreeDepth; ++Depth) {
146+
unsigned NumNodes = (Conf.TreeDepth - Depth + 1) * Conf.NumChildren + 1;
147+
std::vector<ObjectRef> Created;
148+
Created.reserve(NumNodes);
149+
ArrayRef<ObjectRef> PreviouslyCreated(Refs);
150+
for (unsigned I = 0; I < NumNodes; ++I) {
151+
std::vector<char> Data(Conf.DataLength);
152+
getRandomBytes(Data.data(), Data.size());
153+
// Use the first byte that generated to decide if we should make it
154+
// 64KB bigger and force that into a file based storage.
155+
if (Conf.extendToFile(Data[0]))
156+
Data.resize(64LL * 1024LL + Conf.DataLength);
157+
158+
if (Depth == 0) {
159+
auto Ref = ExitOnErr(CAS.store({}, Data));
160+
Created.push_back(Ref);
161+
} else {
162+
auto Parent = PreviouslyCreated.slice(I, Conf.NumChildren);
163+
auto Ref = ExitOnErr(CAS.store(Parent, Data));
164+
Created.push_back(Ref);
165+
}
166+
++NumCreated;
167+
}
168+
Refs.swap(Created);
169+
}
170+
});
171+
}
172+
ThreadPool.wait();
173+
}
174+
175+
static int genData() {
176+
ExitOnError ExitOnErr("llvm-cas-test --gen-data: ");
177+
178+
Config Conf;
179+
Conf.init();
180+
181+
auto DB = ExitOnErr(cas::createOnDiskUnifiedCASDatabases(CASPath));
182+
fillData(*DB.first, Conf);
183+
184+
return 0;
185+
}
186+
187+
static int runOneTest(const char *Argv0) {
188+
ExitOnError ExitOnErr("llvm-cas-test: ");
189+
190+
Config Conf;
191+
getRandomBytes(&Conf, sizeof(Conf));
192+
Conf.constrainParameters();
193+
194+
if (PrintConfig)
195+
Conf.dump();
196+
197+
// Start with fresh log if --keep-log is not used.
198+
if (!KeepLog) {
199+
static constexpr StringLiteral LogFile = "v1.log";
200+
SmallString<256> LogPath(CASPath);
201+
llvm::sys::path::append(LogPath, LogFile);
202+
llvm::sys::fs::remove(LogPath);
203+
}
204+
205+
auto DB = ExitOnErr(cas::createOnDiskUnifiedCASDatabases(CASPath));
206+
auto &CAS = DB.first;
207+
208+
// Size limit in MB.
209+
ExitOnErr(CAS->setSizeLimit(SizeLimit * 1024 * 1024));
210+
if (Conf.Settings & Fork) {
211+
// fill data using sub processes.
212+
std::string MainExe = sys::fs::getMainExecutable(Argv0, &CASPath);
213+
std::vector<std::string> Args = {MainExe, "--gen-data", "--cas=" + CASPath};
214+
Conf.appendCommandLineOpts(Args);
215+
216+
std::vector<StringRef> Cmd;
217+
for_each(Args, [&Cmd](const std::string &Arg) { Cmd.push_back(Arg); });
218+
219+
std::vector<sys::ProcessInfo> Subprocesses;
220+
for (int I = 0; I < Conf.NumShards; ++I) {
221+
auto SP = sys::ExecuteNoWait(MainExe, Cmd, std::nullopt);
222+
if (SP.Pid != 0)
223+
Subprocesses.push_back(SP);
224+
}
225+
226+
if (Conf.Settings & CheckTermination) {
227+
for_each(Subprocesses, [](auto &P) {
228+
// Wait 1 second and killed the process.
229+
auto WP = sys::Wait(P, 1);
230+
if (WP.ReturnCode)
231+
llvm::errs() << "subprocess killed successfully\n";
232+
});
233+
} else {
234+
for_each(Subprocesses, [](auto &P) { sys::Wait(P, std::nullopt); });
235+
}
236+
237+
} else {
238+
// in-process fill data.
239+
fillData(*CAS, Conf);
240+
}
241+
242+
// validate and prune in the end.
243+
ExitOnErr(CAS->validate(true));
244+
ExitOnErr(CAS->pruneStorageData());
245+
246+
return 0;
247+
}
248+
249+
static int stressTest(const char *Argv0) {
250+
auto Start = std::chrono::steady_clock::now();
251+
std::chrono::seconds Duration(Timeout);
252+
253+
while (std::chrono::steady_clock::now() - Start < Duration) {
254+
if (int Res = runOneTest(Argv0))
255+
return Res;
256+
}
257+
258+
return 0;
259+
}
260+
261+
static int checkLockFiles() {
262+
ExitOnError ExitOnErr("llvm-cas-test: check-lock-files: ");
263+
264+
SmallString<128> DataPoolPath(CASPath);
265+
sys::path::append(DataPoolPath, "v1.1/v8.data");
266+
267+
auto OpenCASAndGetDataPoolSize = [&]() -> Expected<uint64_t> {
268+
auto Result = createOnDiskUnifiedCASDatabases(CASPath);
269+
if (!Result)
270+
return Result.takeError();
271+
272+
sys::fs::file_status DataStat;
273+
if (std::error_code EC = sys::fs::status(DataPoolPath, DataStat))
274+
ExitOnErr(createFileError(DataPoolPath, EC));
275+
return DataStat.getSize();
276+
};
277+
278+
// Get the normal size of an open CAS data pool to compare against later.
279+
uint64_t OpenSize = ExitOnErr(OpenCASAndGetDataPoolSize());
280+
281+
DefaultThreadPool Pool;
282+
for (int I = 0; I < 1000; ++I) {
283+
Pool.async([&, I] {
284+
uint64_t DataPoolSize = ExitOnErr(OpenCASAndGetDataPoolSize());
285+
if (DataPoolSize < OpenSize)
286+
ExitOnErr(createStringError(
287+
inconvertibleErrorCode(),
288+
StringRef("CAS data file size (" + std::to_string(DataPoolSize) +
289+
") is smaller than expected (" +
290+
std::to_string(OpenSize) + ") in iteration " +
291+
std::to_string(I))));
292+
});
293+
}
294+
295+
Pool.wait();
296+
return 0;
297+
}
298+
299+
int main(int argc, char **argv) {
300+
cl::ParseCommandLineOptions(argc, argv, "llvm-cas-test CAS testing tool\n");
301+
302+
switch (Command) {
303+
case GenerateData:
304+
return genData();
305+
case StressTest:
306+
return stressTest(argv[0]);
307+
case CheckLockFiles:
308+
return checkLockFiles();
309+
}
310+
}

0 commit comments

Comments
 (0)