Skip to content
This repository was archived by the owner on Nov 4, 2024. It is now read-only.

Commit 2f4120f

Browse files
committed
reduce memory consumption of default benchmark configurations; update doc
1 parent 7522e98 commit 2f4120f

File tree

2 files changed

+36
-150
lines changed

2 files changed

+36
-150
lines changed

volatile/benchmark/bench.cpp

Lines changed: 18 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,9 @@ using namespace KVDK_NAMESPACE;
2222
// Benchmark configs
2323
DEFINE_string(path, "/mnt/pmem0/kvdk", "Instance path");
2424

25-
DEFINE_uint64(num_kv, (1 << 30), "Number of KVs to place");
25+
DEFINE_uint64(num_kv, (1 << 23), "Number of KVs to place");
2626

27-
DEFINE_uint64(num_operations, (1 << 30),
27+
DEFINE_uint64(num_operations, (1 << 20),
2828
"Number of total operations. "
2929
"num_kv will override this when benchmarking fill/insert");
3030

@@ -63,14 +63,10 @@ DEFINE_string(key_distribution, "random",
6363
"be ignored and only uniform distribution will be used");
6464

6565
// Engine configs
66-
DEFINE_bool(
67-
populate, false,
68-
"Populate pmem space while creating a new instance. This can improve write "
69-
"performance in runtime, but will take long time to init the instance");
70-
7166
DEFINE_uint64(max_access_threads, 64, "Max access threads of the instance");
7267

73-
DEFINE_uint64(space, (256ULL << 30), "Max usable PMem space of the instance");
68+
DEFINE_uint64(hash_bucket_num, (1 << 20),
69+
"The number of initial buckets in hash table");
7470

7571
DEFINE_bool(opt_large_sorted_collection_restore, true,
7672
" Optional optimization strategy which Multi-thread recovery a "
@@ -129,6 +125,7 @@ double existing_keys_ratio = 0;
129125
std::uint64_t batch_size = 0;
130126
bool scan = false;
131127
std::uint64_t num_operations = 0;
128+
std::uint64_t benchmark_threads = 0;
132129

133130
std::uint64_t max_key = UINT64_MAX;
134131
extd::zipfian_distribution<std::uint64_t>* zipf = nullptr;
@@ -422,6 +419,7 @@ void InitializeBenchmark() {
422419
if (bench_data_type != DataType::Blackhole) {
423420
Configs configs;
424421
configs.max_access_threads = FLAGS_max_access_threads;
422+
configs.hash_bucket_num = FLAGS_hash_bucket_num;
425423
configs.opt_large_sorted_collection_recovery =
426424
FLAGS_opt_large_sorted_collection_restore;
427425
configs.dest_memory_nodes = FLAGS_dest_memory_nodes;
@@ -483,18 +481,19 @@ void ProcessBenchmarkConfigs() {
483481
throw std::invalid_argument{"value size too large"};
484482
}
485483

486-
random_engines.resize(FLAGS_threads);
484+
benchmark_threads = fill ? FLAGS_max_access_threads : FLAGS_threads;
485+
random_engines.resize(benchmark_threads);
487486
if (fill) {
488487
assert(read_ratio == 0);
489488
key_dist = KeyDistribution::Range;
490-
operations_per_thread = FLAGS_num_kv / FLAGS_max_access_threads + 1;
489+
operations_per_thread = FLAGS_num_kv / benchmark_threads + 1;
491490
ranges.clear();
492-
for (size_t i = 0; i < FLAGS_max_access_threads; i++) {
491+
for (size_t i = 0; i < benchmark_threads; i++) {
493492
ranges.emplace_back(i * operations_per_thread,
494493
(i + 1) * operations_per_thread);
495494
}
496495
} else {
497-
operations_per_thread = num_operations / FLAGS_threads;
496+
operations_per_thread = num_operations / benchmark_threads;
498497
if (FLAGS_key_distribution == "random") {
499498
key_dist = KeyDistribution::Uniform;
500499
} else if (FLAGS_key_distribution == "zipf") {
@@ -527,12 +526,12 @@ void ResetBenchmarkData() {
527526
read_not_found = 0;
528527
has_timed_out = false;
529528
has_finished.clear();
530-
has_finished.resize(FLAGS_threads, 0);
529+
has_finished.resize(benchmark_threads, 0);
531530

532531
if (FLAGS_latency) {
533532
printf("calculate latencies\n");
534533
latencies.clear();
535-
latencies.resize(FLAGS_threads, std::vector<std::uint64_t>(MAX_LAT, 0));
534+
latencies.resize(benchmark_threads, std::vector<std::uint64_t>(MAX_LAT, 0));
536535
}
537536
}
538537

@@ -541,9 +540,9 @@ void RunBenchmark() {
541540
ResetBenchmarkData();
542541

543542
size_t write_threads =
544-
fill ? FLAGS_max_access_threads
545-
: FLAGS_threads - read_ratio * 100 * FLAGS_threads / 100;
546-
int read_threads = FLAGS_threads - write_threads;
543+
fill ? benchmark_threads
544+
: benchmark_threads - read_ratio * 100 * benchmark_threads / 100;
545+
int read_threads = fill ? 0 : benchmark_threads - write_threads;
547546
std::vector<std::thread> ts;
548547

549548
switch (bench_data_type) {
@@ -587,7 +586,7 @@ void RunBenchmark() {
587586
for (size_t i = 0; i < write_threads; i++) {
588587
ts.emplace_back(DBWrite, i);
589588
}
590-
for (size_t i = write_threads; i < FLAGS_threads; i++) {
589+
for (size_t i = write_threads; i < benchmark_threads; i++) {
591590
ts.emplace_back(scan ? DBScan : DBRead, i);
592591
}
593592

@@ -628,7 +627,7 @@ void RunBenchmark() {
628627
if (num_finished == 0 || idx < 2) {
629628
last_effective_idx = idx;
630629
}
631-
if (num_finished == FLAGS_threads) {
630+
if (num_finished == benchmark_threads) {
632631
break;
633632
}
634633
if (!fill && (duration.count() >= FLAGS_timeout * 1000)) {

volatile/doc/benchmark.md

Lines changed: 18 additions & 131 deletions
Original file line numberDiff line numberDiff line change
@@ -2,151 +2,38 @@
22

33
To test performance of KVDK, you can run our benchmark tool "bench", the tool is auto-built along with KVDK library in the build dir.
44

5-
You can manually run individual benchmark follow the examples as shown bellow, or simply run our basic benchmark script "scripts/run_benchmark.py" to test all the basic read/write performance.
6-
7-
To run the script, you shoulf first build kvdk, then run:
8-
5+
Here is an example to run benchmarks on `string` type:
6+
```bash
7+
./bench -path=./kvdk_bench_dir -type=string -num_kv=8388608 -num_operations=1048576 -threads=10 -max_access_threads=64 -value_size=120 -latency=1
98
```
10-
scripts/run_benchmark.py [data_type] [key distribution]
11-
```
12-
13-
data_type: Which data type to benchmark, it can be string/sorted/hash/list/blackhole/all
149

15-
key distribution: Distribution of key of the benchmark workloads, it can be random/zipf/all
16-
## Fill data to new instance
17-
18-
To test performance, we need to first fill key-value pairs to the KVDK instance. Since KVDK did not support cross-socket access yet, we need to bind bench program to a numa node:
19-
20-
numactl --cpunodebind=0 --membind=0 ./bench -fill=1 -value_size=120 -threads=64 -path=/mnt/pmem0/kvdk -space=274877906944 -num=838860800 -max_write_threads=64 -type=string -populate=1
10+
To benchmark performance when KVs are stored on separted memory nodes, we can use `numactl`:
11+
```bash
12+
numactl --cpunodebind=0 --membind=0 ./bench -path=./kvdk_bench_dir -type=string -num_kv=8388608 -num_operations=1048576 -threads=10 -max_access_threads=64 -value_size=120 -latency=1 -dest_memory_nodes=1
13+
```
2114

22-
This command will fill 83886088 uniform distributed string-type key-value pairs to the KVDK instance that located at /mnt/pmem0/kvdk.
15+
The above configurations will consume ~7 GB memory.
2316

2417
Explanation of arguments:
2518

26-
-fill: Indicates filling data to a new instance.
27-
28-
-threads: Number of threads of benchmark.
19+
-path: KVDK initialized here
2920

30-
-space: PMem space that allocate to the KVDK instance.
21+
-type: Type of key-value pairs to benchmark, it can be string/sorted/hash/list/blackhole.
3122

32-
-max_access_threads: Max concurrent access threads in the KVDK instance, set it to the number of the hyper-threads for performance consideration. You can call KVDK API with any number of threads, but if your parallel threads more than max_access_threads, the performance will be degraded due to synchronization cost
23+
-num_kv: Number of KV when benchmarking fill/insert.
3324

34-
-type: Type of key-value pairs to benchmark, it can be "string", "hash" or "sorted".
25+
-num_operations: Number of operations running benchmarks other than fill/insert.
3526

36-
-populate: Populate pmem space while creating new KVDK instance for best write performance in runtime, see "include/kvdk/configs.hpp" for explanation.
27+
-threads: Number of threads of benchmark. `max_access_threads` will override this when benchmarking `fill`.
3728

38-
## Test read/write performance
29+
-max_access_threads: Max concurrent access threads in the KVDK instance, set it to the number of the hyper-threads for performance consideration. You can call KVDK API with any number of threads, but if your parallel threads more than max_access_threads, the performance will be degraded due to synchronization cost.
3930

40-
### Read performance
31+
-value_size: Value length of values in Byte.
4132

42-
After fill the instance, we can test read performance with the command below:
33+
-latency: Print latencies of operations or not.
4334

44-
numactl --cpunodebind=0 --membind=0 ./bench -fill=0 -time=10 -value_size=120 -threads=64 -read_ratio=1 -existing_keys_ratio=1 -path=/mnt/pmem0/kvdk -space=274877906944 -num=838860800 -max_write_threads=64 -type=string
45-
46-
This will read key-value pairs from the KVDK instance with 48 threads in 10 seconds.
47-
48-
Explanation of arguments:
49-
50-
-read_ratio: Ratio of read threads among benchmark threads, for example, if set it to 0.5, then there will be 24 write threads and 24 read threads.
51-
52-
-existing_keys_ratio: Ratio of keys among key-value pairs to read that already filled in the instance. For example, if set it to 0.5, then 50% read operations will return NotFound.
53-
54-
Benchmark tool will print performance stats to stdout, include throughput in each second and average ops:
55-
56-
$numactl --cpunodebind=0 --membind=0 ./bench -fill=0 -time=10 -value_size=120 -threads=64 -read_ratio=1 -existing_keys_ratio=1 -path=/mnt/pmem0/kvdk -space=274877906944 -num=838860800 -max_write_threads=64 -type=string
57-
58-
[LOG] time 0 ms: Initializing PMem size 274877906944 in file /mnt/pmem0/kvdk/data
59-
[LOG] time 1864 ms: Map pmem space done
60-
[LOG] time 9033 ms: In restoring: iterated 840882543 records
61-
init 0 write threads
62-
init 64 read threads
63-
------- ops in seconds -----------
64-
time (ms), read ops, not found, write ops, total read, total write
65-
1000 73691000 0 0 73691000 0
66-
2001 73613000 0 0 147304000 0
67-
3002 73643000 0 0 220947000 0
68-
4003 73656000 0 0 294603000 0
69-
5004 73675000 0 0 368278000 0
70-
6005 73667000 0 0 441945000 0
71-
7006 73699000 0 0 515644000 0
72-
8007 73647000 0 0 589291000 0
73-
9008 73634000 0 0 662925000 0
74-
10009 73677000 0 0 736602000 0
75-
finish bench
76-
------------ statistics ------------
77-
read ops 73660400, write ops 0
78-
[LOG] time 19051 ms: instance closed
79-
80-
81-
82-
### Write performance
83-
84-
Similarily, to test write performance, we can simply modify "read_ratio":
85-
86-
numactl --cpunodebind=0 --membind=0 ./bench -fill=0 -time=10 -value_size=120 -threads=64 -read_ratio=0 -existing_keys_ratio=0 -path=/mnt/pmem0/kvdk -space=274877906944 -num=838860800 -max_write_threads=64 -type=string
87-
88-
This command will insert new key-value pairs to the KVDK instance in 10 seconds. Likely wise, by modify "existing_keys_ratio", we can control how many write operations are updates.
89-
90-
$numactl --cpunodebind=0 --membind=0 ./bench -fill=0 -time=10 -value_size=120 -threads=64 -read_ratio=0 -existing_keys_ratio=0 -path=/mnt/pmem0/kvdk -space=274877906944 -num=838860800 -max_write_threads=64 -type=string
91-
92-
[LOG] time 0 ms: Initializing PMem size 274877906944 in file /mnt/pmem0/kvdk/data
93-
[LOG] time 1865 ms: Map pmem space done
94-
[LOG] time 9015 ms: In restoring: iterated 840882543 records
95-
init 64 write threads
96-
init 0 read threads
97-
------- ops in seconds -----------
98-
time (ms), read ops, not found, write ops, total read, total write
99-
1000 0 0 50610000 0 50610000
100-
2007 0 0 50053000 0 100663000
101-
3016 0 0 49669000 0 150332000
102-
4017 0 0 49048000 0 199380000
103-
5018 0 0 48540000 0 247920000
104-
6022 0 0 48210000 0 296130000
105-
7023 0 0 47725000 0 343855000
106-
8024 0 0 47354000 0 391209000
107-
9027 0 0 47080000 0 438289000
108-
10028 0 0 46544000 0 484833000
109-
finish bench
110-
------------ statistics ------------
111-
read ops 0, write ops 48483400
112-
[LOG] time 19055 ms: instance closed
113-
114-
115-
### Stat latencies
116-
117-
We can also stat latency information by add "-latency=1" to the benchmark command.
118-
119-
$ numactl --cpunodebind=0 --membind=0 ./bench -fill=0 -time=10 -value_size=120 -threads=64 -read_ratio=0.5 -existing_keys_ratio=1 -path=/mnt/pmem0/kvdk -space=274877906944 -num=838860800 -max_write_threads=64 -type=string -latency=1
120-
121-
[LOG] time 0 ms: Initializing PMem size 274877906944 in file /mnt/pmem0/kvdk/data
122-
[LOG] time 1869 ms: Map pmem space done
123-
[LOG] time 14963 ms: In restoring: iterated 1323729106 records
124-
calculate latencies
125-
init 6 write threads
126-
init 58 read threads
127-
------- ops in seconds -----------
128-
time (ms), read ops, not found, write ops, total read, total write
129-
1000 62763000 0 3933000 62763000 3933000
130-
2001 62297000 0 4303000 125060000 8236000
131-
3002 62190000 0 4530000 187250000 12766000
132-
4003 62194000 0 4530000 249444000 17296000
133-
5004 62206000 0 4531000 311650000 21827000
134-
6005 62172000 0 4527000 373822000 26354000
135-
7006 62194000 0 4530000 436016000 30884000
136-
8007 62227000 0 4535000 498243000 35419000
137-
9008 62196000 0 4529000 560439000 39948000
138-
10009 62190000 0 4527000 622629000 44475000
139-
finish bench
140-
------------ statistics ------------
141-
read ops 62263100, write ops 4447500
142-
read lantencies (us): Avg: 0.89, P50: 0.83, P99: 1.54, P99.5: 1.67, P99.9: 2.77, P99.99: 4.20
143-
write lantencies (us): Avg: 0.09, P50: 1.22, P99: 2.64, P99.5: 3.25, P99.9: 4.22, P99.99: 5.35
144-
[LOG] time 28382 ms: instance closed
35+
-dest_memory_nodes: The memory nodes to store KV data.
14536

14637
## More configurations
14738

148-
For more configurations of the benchmark tool, please reference to "benchmark/bench.cpp" and "scripts/basic_benchmarks.py".
149-
150-
151-
152-
39+
For more configurations of the benchmark tool, please reference to "benchmark/bench.cpp".

0 commit comments

Comments
 (0)