Skip to content

Commit c1ccd6b

Browse files
anand76facebook-github-bot
anand76
authored andcommitted
Implement deadline support for MultiGet (facebook#6710)
Summary: Initial implementation of ReadOptions.deadline for MultiGet. If the request takes longer than the deadline, the keys not yet found will be returned with Status::TimedOut(). This implementation enforces the deadline in DBImpl, which is fairly high level. Its best effort and may not check the deadline after every key lookup, but may do so after a batch of keys. In subsequent stages, we will extend this to passing a timeout down to the FileSystem. Pull Request resolved: facebook#6710 Test Plan: Add new unit tests Reviewed By: riversand963 Differential Revision: D21149158 Pulled By: anand1976 fbshipit-source-id: 9f44eecffeb40873f5034ed59a66d21f9f88879e
1 parent 6ee66cf commit c1ccd6b

File tree

5 files changed

+411
-44
lines changed

5 files changed

+411
-44
lines changed

db/db_basic_test.cc

+305-24
Original file line numberDiff line numberDiff line change
@@ -1890,17 +1890,14 @@ TEST_F(DBBasicTest, SkipWALIfMissingTableFiles) {
18901890
}
18911891
#endif // !ROCKSDB_LITE
18921892

1893-
class DBBasicTestWithParallelIO
1894-
: public DBTestBase,
1895-
public testing::WithParamInterface<
1896-
std::tuple<bool, bool, bool, bool, uint32_t>> {
1893+
class DBBasicTestMultiGet : public DBTestBase {
18971894
public:
1898-
DBBasicTestWithParallelIO() : DBTestBase("/db_basic_test_with_parallel_io") {
1899-
bool compressed_cache = std::get<0>(GetParam());
1900-
bool uncompressed_cache = std::get<1>(GetParam());
1901-
compression_enabled_ = std::get<2>(GetParam());
1902-
fill_cache_ = std::get<3>(GetParam());
1903-
uint32_t compression_parallel_threads = std::get<4>(GetParam());
1895+
DBBasicTestMultiGet(std::string test_dir, int num_cfs, bool compressed_cache,
1896+
bool uncompressed_cache, bool compression_enabled,
1897+
bool fill_cache, uint32_t compression_parallel_threads)
1898+
: DBTestBase(test_dir) {
1899+
compression_enabled_ = compression_enabled;
1900+
fill_cache_ = fill_cache;
19041901

19051902
if (compressed_cache) {
19061903
std::shared_ptr<Cache> cache = NewLRUCache(1048576);
@@ -1960,22 +1957,43 @@ class DBBasicTestWithParallelIO
19601957
}
19611958
Reopen(options);
19621959

1963-
std::string zero_str(128, '\0');
1964-
for (int i = 0; i < 100; ++i) {
1965-
// Make the value compressible. A purely random string doesn't compress
1966-
// and the resultant data block will not be compressed
1967-
values_.emplace_back(RandomString(&rnd, 128) + zero_str);
1968-
assert(Put(Key(i), values_[i]) == Status::OK());
1960+
if (num_cfs > 1) {
1961+
for (int cf = 0; cf < num_cfs; ++cf) {
1962+
cf_names_.emplace_back("cf" + std::to_string(cf));
1963+
}
1964+
CreateColumnFamilies(cf_names_, options);
1965+
cf_names_.emplace_back("default");
19691966
}
1970-
Flush();
19711967

1972-
for (int i = 0; i < 100; ++i) {
1973-
// block cannot gain space by compression
1974-
uncompressable_values_.emplace_back(RandomString(&rnd, 256) + '\0');
1975-
std::string tmp_key = "a" + Key(i);
1976-
assert(Put(tmp_key, uncompressable_values_[i]) == Status::OK());
1968+
std::string zero_str(128, '\0');
1969+
for (int cf = 0; cf < num_cfs; ++cf) {
1970+
for (int i = 0; i < 100; ++i) {
1971+
// Make the value compressible. A purely random string doesn't compress
1972+
// and the resultant data block will not be compressed
1973+
values_.emplace_back(RandomString(&rnd, 128) + zero_str);
1974+
assert(((num_cfs == 1) ? Put(Key(i), values_[i])
1975+
: Put(cf, Key(i), values_[i])) == Status::OK());
1976+
}
1977+
if (num_cfs == 1) {
1978+
Flush();
1979+
} else {
1980+
dbfull()->Flush(FlushOptions(), handles_[cf]);
1981+
}
1982+
1983+
for (int i = 0; i < 100; ++i) {
1984+
// block cannot gain space by compression
1985+
uncompressable_values_.emplace_back(RandomString(&rnd, 256) + '\0');
1986+
std::string tmp_key = "a" + Key(i);
1987+
assert(((num_cfs == 1) ? Put(tmp_key, uncompressable_values_[i])
1988+
: Put(cf, tmp_key, uncompressable_values_[i])) ==
1989+
Status::OK());
1990+
}
1991+
if (num_cfs == 1) {
1992+
Flush();
1993+
} else {
1994+
dbfull()->Flush(FlushOptions(), handles_[cf]);
1995+
}
19771996
}
1978-
Flush();
19791997
}
19801998

19811999
bool CheckValue(int i, const std::string& value) {
@@ -1992,6 +2010,8 @@ class DBBasicTestWithParallelIO
19922010
return false;
19932011
}
19942012

2013+
const std::vector<std::string>& GetCFNames() const { return cf_names_; }
2014+
19952015
int num_lookups() { return uncompressed_cache_->num_lookups(); }
19962016
int num_found() { return uncompressed_cache_->num_found(); }
19972017
int num_inserts() { return uncompressed_cache_->num_inserts(); }
@@ -2008,7 +2028,7 @@ class DBBasicTestWithParallelIO
20082028
static void SetUpTestCase() {}
20092029
static void TearDownTestCase() {}
20102030

2011-
private:
2031+
protected:
20122032
class MyFlushBlockPolicyFactory : public FlushBlockPolicyFactory {
20132033
public:
20142034
MyFlushBlockPolicyFactory() {}
@@ -2143,6 +2163,19 @@ class DBBasicTestWithParallelIO
21432163
std::vector<std::string> values_;
21442164
std::vector<std::string> uncompressable_values_;
21452165
bool fill_cache_;
2166+
std::vector<std::string> cf_names_;
2167+
};
2168+
2169+
class DBBasicTestWithParallelIO
2170+
: public DBBasicTestMultiGet,
2171+
public testing::WithParamInterface<
2172+
std::tuple<bool, bool, bool, bool, uint32_t>> {
2173+
public:
2174+
DBBasicTestWithParallelIO()
2175+
: DBBasicTestMultiGet("/db_basic_test_with_parallel_io", 1,
2176+
std::get<0>(GetParam()), std::get<1>(GetParam()),
2177+
std::get<2>(GetParam()), std::get<3>(GetParam()),
2178+
std::get<4>(GetParam())) {}
21462179
};
21472180

21482181
TEST_P(DBBasicTestWithParallelIO, MultiGet) {
@@ -2363,6 +2396,254 @@ INSTANTIATE_TEST_CASE_P(ParallelIO, DBBasicTestWithParallelIO,
23632396
::testing::Bool(), ::testing::Bool(),
23642397
::testing::Values(1, 4)));
23652398

2399+
// A test class for intercepting random reads and injecting artificial
2400+
// delays. Used for testing the deadline/timeout feature
2401+
class DBBasicTestMultiGetDeadline : public DBBasicTestMultiGet {
2402+
public:
2403+
DBBasicTestMultiGetDeadline()
2404+
: DBBasicTestMultiGet("db_basic_test_multiget_deadline" /*Test dir*/,
2405+
10 /*# of column families*/,
2406+
false /*compressed cache enabled*/,
2407+
true /*uncompressed cache enabled*/,
2408+
true /*compression enabled*/,
2409+
true /*ReadOptions.fill_cache*/,
2410+
1 /*# of parallel compression threads*/) {}
2411+
2412+
// Forward declaration
2413+
class DeadlineFS;
2414+
2415+
class DeadlineRandomAccessFile : public FSRandomAccessFileWrapper {
2416+
public:
2417+
DeadlineRandomAccessFile(DeadlineFS& fs,
2418+
std::unique_ptr<FSRandomAccessFile>& file)
2419+
: FSRandomAccessFileWrapper(file.get()),
2420+
fs_(fs),
2421+
file_(std::move(file)) {}
2422+
2423+
IOStatus Read(uint64_t offset, size_t len, const IOOptions& opts,
2424+
Slice* result, char* scratch, IODebugContext* dbg) const override {
2425+
int delay;
2426+
if (fs_.ShouldDelay(&delay)) {
2427+
Env::Default()->SleepForMicroseconds(delay);
2428+
}
2429+
return FSRandomAccessFileWrapper::Read(offset, len, opts, result, scratch,
2430+
dbg);
2431+
}
2432+
2433+
IOStatus MultiRead(FSReadRequest* reqs, size_t num_reqs,
2434+
const IOOptions& options, IODebugContext* dbg) override {
2435+
int delay;
2436+
if (fs_.ShouldDelay(&delay)) {
2437+
Env::Default()->SleepForMicroseconds(delay);
2438+
}
2439+
return FSRandomAccessFileWrapper::MultiRead(reqs, num_reqs, options, dbg);
2440+
}
2441+
2442+
private:
2443+
DeadlineFS& fs_;
2444+
std::unique_ptr<FSRandomAccessFile> file_;
2445+
};
2446+
2447+
class DeadlineFS : public FileSystemWrapper {
2448+
public:
2449+
DeadlineFS() : FileSystemWrapper(FileSystem::Default()) {}
2450+
~DeadlineFS() = default;
2451+
2452+
IOStatus NewRandomAccessFile(const std::string& fname,
2453+
const FileOptions& opts,
2454+
std::unique_ptr<FSRandomAccessFile>* result,
2455+
IODebugContext* dbg) override {
2456+
std::unique_ptr<FSRandomAccessFile> file;
2457+
IOStatus s;
2458+
2459+
s = target()->NewRandomAccessFile(fname, opts, &file, dbg);
2460+
result->reset(new DeadlineRandomAccessFile(*this, file));
2461+
return s;
2462+
}
2463+
2464+
// Set a vector of {IO counter, delay in microseconds} pairs that control
2465+
// when to inject a delay and duration of the delay
2466+
void SetDelaySequence(const std::vector<std::pair<int, int>>&& seq) {
2467+
int total_delay = 0;
2468+
for (auto& seq_iter : seq) {
2469+
// Ensure no individual delay is > 500ms
2470+
ASSERT_LT(seq_iter.second, 500000);
2471+
total_delay += seq_iter.second;
2472+
}
2473+
// ASSERT total delay is < 1s. This is mainly to keep the test from
2474+
// timing out in CI test frameworks
2475+
ASSERT_LT(total_delay, 1000000);
2476+
delay_seq_ = seq;
2477+
delay_idx_ = 0;
2478+
io_count_ = 0;
2479+
}
2480+
2481+
// Increment the IO counter and return a delay in microseconds
2482+
bool ShouldDelay(int* delay) {
2483+
if (delay_idx_ < delay_seq_.size() &&
2484+
delay_seq_[delay_idx_].first == io_count_++) {
2485+
*delay = delay_seq_[delay_idx_].second;
2486+
delay_idx_++;
2487+
return true;
2488+
}
2489+
return false;
2490+
}
2491+
2492+
private:
2493+
std::vector<std::pair<int, int>> delay_seq_;
2494+
size_t delay_idx_;
2495+
int io_count_;
2496+
};
2497+
2498+
inline void CheckStatus(std::vector<Status>& statuses, size_t num_ok) {
2499+
for (size_t i = 0; i < statuses.size(); ++i) {
2500+
if (i < num_ok) {
2501+
EXPECT_OK(statuses[i]);
2502+
} else {
2503+
EXPECT_EQ(statuses[i], Status::TimedOut());
2504+
}
2505+
}
2506+
}
2507+
};
2508+
2509+
TEST_F(DBBasicTestMultiGetDeadline, MultiGetDeadlineExceeded) {
2510+
std::shared_ptr<DBBasicTestMultiGetDeadline::DeadlineFS> fs(
2511+
new DBBasicTestMultiGetDeadline::DeadlineFS());
2512+
std::unique_ptr<Env> env = NewCompositeEnv(fs);
2513+
Options options = CurrentOptions();
2514+
2515+
std::shared_ptr<Cache> cache = NewLRUCache(1048576);
2516+
BlockBasedTableOptions table_options;
2517+
table_options.block_cache = cache;
2518+
options.table_factory.reset(new BlockBasedTableFactory(table_options));
2519+
options.env = env.get();
2520+
ReopenWithColumnFamilies(GetCFNames(), options);
2521+
2522+
// Test the non-batched version of MultiGet with multiple column
2523+
// families
2524+
std::vector<std::string> key_str;
2525+
size_t i;
2526+
for (i = 0; i < 5; ++i) {
2527+
key_str.emplace_back(Key(static_cast<int>(i)));
2528+
}
2529+
std::vector<ColumnFamilyHandle*> cfs(key_str.size());
2530+
;
2531+
std::vector<Slice> keys(key_str.size());
2532+
std::vector<std::string> values(key_str.size());
2533+
for (i = 0; i < key_str.size(); ++i) {
2534+
cfs[i] = handles_[i];
2535+
keys[i] = Slice(key_str[i].data(), key_str[i].size());
2536+
}
2537+
// Delay the first IO by 200ms
2538+
fs->SetDelaySequence({{0, 200000}});
2539+
2540+
ReadOptions ro;
2541+
ro.deadline = std::chrono::microseconds{env->NowMicros() + 10000};
2542+
std::vector<Status> statuses = dbfull()->MultiGet(ro, cfs, keys, &values);
2543+
std::cout << "Non-batched MultiGet";
2544+
// The first key is successful because we check after the lookup, but
2545+
// subsequent keys fail due to deadline exceeded
2546+
CheckStatus(statuses, 1);
2547+
2548+
// Clear the cache
2549+
cache->SetCapacity(0);
2550+
cache->SetCapacity(1048576);
2551+
// Test non-batched Multiget with multiple column families and
2552+
// introducing an IO delay in one of the middle CFs
2553+
key_str.clear();
2554+
for (i = 0; i < 10; ++i) {
2555+
key_str.emplace_back(Key(static_cast<int>(i)));
2556+
}
2557+
cfs.resize(key_str.size());
2558+
keys.resize(key_str.size());
2559+
values.resize(key_str.size());
2560+
for (i = 0; i < key_str.size(); ++i) {
2561+
// 2 keys per CF
2562+
cfs[i] = handles_[i / 2];
2563+
keys[i] = Slice(key_str[i].data(), key_str[i].size());
2564+
}
2565+
fs->SetDelaySequence({{1, 200000}});
2566+
ro.deadline = std::chrono::microseconds{env->NowMicros() + 10000};
2567+
statuses = dbfull()->MultiGet(ro, cfs, keys, &values);
2568+
std::cout << "Non-batched 2";
2569+
CheckStatus(statuses, 3);
2570+
2571+
// Test batched MultiGet with an IO delay in the first data block read.
2572+
// Both keys in the first CF should succeed as they're in the same data
2573+
// block and would form one batch, and we check for deadline between
2574+
// batches.
2575+
std::vector<PinnableSlice> pin_values(keys.size());
2576+
cache->SetCapacity(0);
2577+
cache->SetCapacity(1048576);
2578+
statuses.clear();
2579+
statuses.resize(keys.size());
2580+
fs->SetDelaySequence({{0, 200000}});
2581+
ro.deadline = std::chrono::microseconds{env->NowMicros() + 10000};
2582+
dbfull()->MultiGet(ro, keys.size(), cfs.data(), keys.data(),
2583+
pin_values.data(), statuses.data());
2584+
std::cout << "Batched 1";
2585+
CheckStatus(statuses, 2);
2586+
2587+
// Similar to the previous one, but an IO delay in the third CF data block
2588+
// read
2589+
for (PinnableSlice& value : pin_values) {
2590+
value.Reset();
2591+
}
2592+
cache->SetCapacity(0);
2593+
cache->SetCapacity(1048576);
2594+
statuses.clear();
2595+
statuses.resize(keys.size());
2596+
fs->SetDelaySequence({{2, 200000}});
2597+
ro.deadline = std::chrono::microseconds{env->NowMicros() + 10000};
2598+
dbfull()->MultiGet(ro, keys.size(), cfs.data(), keys.data(),
2599+
pin_values.data(), statuses.data());
2600+
std::cout << "Batched 2";
2601+
CheckStatus(statuses, 6);
2602+
2603+
// Similar to the previous one, but an IO delay in the last but one CF
2604+
for (PinnableSlice& value : pin_values) {
2605+
value.Reset();
2606+
}
2607+
cache->SetCapacity(0);
2608+
cache->SetCapacity(1048576);
2609+
statuses.clear();
2610+
statuses.resize(keys.size());
2611+
fs->SetDelaySequence({{3, 200000}});
2612+
ro.deadline = std::chrono::microseconds{env->NowMicros() + 10000};
2613+
dbfull()->MultiGet(ro, keys.size(), cfs.data(), keys.data(),
2614+
pin_values.data(), statuses.data());
2615+
std::cout << "Batched 3";
2616+
CheckStatus(statuses, 8);
2617+
2618+
// Test batched MultiGet with single CF and lots of keys. Inject delay
2619+
// into the second batch of keys. As each batch is 32, the first 64 keys,
2620+
// i.e first two batches, should succeed and the rest should time out
2621+
for (PinnableSlice& value : pin_values) {
2622+
value.Reset();
2623+
}
2624+
cache->SetCapacity(0);
2625+
cache->SetCapacity(1048576);
2626+
key_str.clear();
2627+
for (i = 0; i < 100; ++i) {
2628+
key_str.emplace_back(Key(static_cast<int>(i)));
2629+
}
2630+
keys.resize(key_str.size());
2631+
pin_values.clear();
2632+
pin_values.resize(key_str.size());
2633+
for (i = 0; i < key_str.size(); ++i) {
2634+
keys[i] = Slice(key_str[i].data(), key_str[i].size());
2635+
}
2636+
statuses.clear();
2637+
statuses.resize(keys.size());
2638+
fs->SetDelaySequence({{1, 200000}});
2639+
ro.deadline = std::chrono::microseconds{env->NowMicros() + 10000};
2640+
dbfull()->MultiGet(ro, handles_[0], keys.size(), keys.data(),
2641+
pin_values.data(), statuses.data());
2642+
std::cout << "Batched single CF";
2643+
CheckStatus(statuses, 64);
2644+
Close();
2645+
}
2646+
23662647
} // namespace ROCKSDB_NAMESPACE
23672648

23682649
#ifdef ROCKSDB_UNITTESTS_WITH_CUSTOM_OBJECTS_FROM_STATIC_LIBS

0 commit comments

Comments
 (0)