test(memory): Unaccounted memory test + add DEBUG POPULATE TYPE <type> (#2561)

* test(memory): Test memory accounting for all types

* slightly faster

* WIP

* working

* Document

* Update test to use DEBUG POPULATE

* Nothing much

* Working

* fix

* yaml

* explicit capture

* fix ci?

* stub tx
This commit is contained in:
Shahar Mike 2024-02-12 08:09:48 +02:00 committed by GitHub
parent 963023f07c
commit 8ead569b2f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 154 additions and 27 deletions

View File

@ -153,7 +153,7 @@ jobs:
dfly-executable: dragonfly
run-only-on-ubuntu-latest: true
build-folder-name: build
filter: "not slow"
filter: ${{ matrix.build-type == 'Release' && 'not slow' || '(not slow) and (not opt_only)' }}
- name: Upload logs on failure
if: failure()
uses: actions/upload-artifact@v3

View File

@ -18,6 +18,7 @@
#include "server/engine_shard_set.h"
#include "server/error.h"
#include "server/main_service.h"
#include "server/multi_command_squasher.h"
#include "server/rdb_load.h"
#include "server/server_state.h"
#include "server/string_family.h"
@ -68,32 +69,99 @@ struct ValueCompressInfo {
size_t compressed_size = 0;
};
void DoPopulateBatch(std::string_view prefix, size_t val_size, bool random_value_str,
const SetCmd::SetParams& params, const PopulateBatch& batch) {
DbContext db_cntx{batch.dbid, 0};
OpArgs op_args(EngineShard::tlocal(), 0, db_cntx);
SetCmd sg(op_args, false);
std::string GenerateValue(size_t val_size, bool random_value, absl::InsecureBitGen* gen) {
if (random_value) {
return GetRandomHex(*gen, val_size);
} else {
return string(val_size, 'x');
}
}
tuple<const CommandId*, absl::InlinedVector<string, 5>> GeneratePopulateCommand(
string_view type, std::string key, size_t val_size, bool random_value, uint32_t elements,
const CommandRegistry& registry, absl::InsecureBitGen* gen) {
absl::InlinedVector<string, 5> args;
args.push_back(std::move(key));
const CommandId* cid = nullptr;
if (type == "STRING") {
cid = registry.Find("SET");
args.push_back(GenerateValue(val_size, random_value, gen));
} else if (type == "LIST") {
cid = registry.Find("LPUSH");
for (uint32_t i = 0; i < elements; ++i) {
args.push_back(GenerateValue(val_size, random_value, gen));
}
} else if (type == "SET") {
cid = registry.Find("SADD");
for (size_t i = 0; i < elements; ++i) {
args.push_back(GenerateValue(val_size, random_value, gen));
}
} else if (type == "HSET") {
cid = registry.Find("HSET");
for (size_t i = 0; i < elements; ++i) {
args.push_back(GenerateValue(val_size / 2, random_value, gen));
args.push_back(GenerateValue(val_size / 2, random_value, gen));
}
} else if (type == "ZSET") {
cid = registry.Find("ZADD");
for (size_t i = 0; i < elements; ++i) {
args.push_back(absl::StrCat((*gen)() % val_size));
args.push_back(GenerateValue(val_size, random_value, gen));
}
} else if (type == "JSON") {
cid = registry.Find("JSON.SET");
args.push_back("$");
string json = "{";
for (size_t i = 0; i < elements; ++i) {
absl::StrAppend(&json, "\"", i, "\":\"", GenerateValue(val_size, random_value, gen), "\",");
}
json[json.size() - 1] = '}'; // Replace last ',' with '}'
args.push_back(json);
}
return {cid, args};
}
void DoPopulateBatch(string_view type, string_view prefix, size_t val_size, bool random_value,
int32_t elements, const PopulateBatch& batch, ServerFamily* sf,
ConnectionContext* cntx) {
boost::intrusive_ptr<Transaction> local_tx =
new Transaction{sf->service().mutable_registry()->Find("EXEC")};
local_tx->StartMultiNonAtomic();
boost::intrusive_ptr<Transaction> stub_tx =
new Transaction{local_tx.get(), EngineShard::tlocal()->shard_id(), nullopt};
absl::InlinedVector<MutableSlice, 5> args_view;
facade::CapturingReplyBuilder crb;
ConnectionContext local_cntx{cntx, stub_tx.get(), &crb};
absl::InsecureBitGen gen;
for (unsigned i = 0; i < batch.sz; ++i) {
string key = absl::StrCat(prefix, ":", batch.index[i]);
string val;
if (random_value_str) {
val = GetRandomHex(gen, val_size);
} else {
val = absl::StrCat("value:", batch.index[i]);
if (val.size() < val_size) {
val.resize(val_size, 'x');
}
auto [cid, args] = GeneratePopulateCommand(type, std::move(key), val_size, random_value,
elements, *sf->service().mutable_registry(), &gen);
if (!cid) {
LOG_EVERY_N(WARNING, 10'000) << "Unable to find command, was it renamed?";
break;
}
auto res = sg.Set(params, key, val);
if (!res) {
LOG_EVERY_N(WARNING, 10'000) << "Debug populate failed to set value. Status:" << res.status();
return;
args_view.clear();
for (auto& arg : args) {
args_view.push_back(absl::MakeSpan(arg));
}
auto args_span = absl::MakeSpan(args_view);
stub_tx->MultiSwitchCmd(cid);
local_cntx.cid = cid;
crb.SetReplyMode(ReplyMode::NONE);
stub_tx->InitByArgs(local_cntx.conn_state.db_index, args_span);
sf->service().InvokeCmd(cid, args_span, &local_cntx);
}
local_cntx.Inject(nullptr);
local_tx->UnlockMulti();
}
struct ObjHist {
@ -315,13 +383,15 @@ void DebugCmd::Run(CmdArgList args) {
" Return sync id and array of number of journal commands executed for each replica flow",
"WATCHED",
" Shows the watched keys as a result of BLPOP and similar operations.",
"POPULATE <count> [<prefix>] [<size>] [RAND] [SLOTS start end]",
"POPULATE <count> [prefix] [size] [RAND] [SLOTS start end] [TYPE type] [ELEMENTS elements]",
" Create <count> string keys named key:<num> with value value:<num>.",
" If <prefix> is specified then it is used instead of the 'key' prefix.",
" If <size> is specified then X character is concatenated multiple times to value:<num>",
" to meet value size.",
" If RAND is specified then value will be set to random hex string in specified size.",
" If SLOTS is specified then create keys only in given slots range.",
" TYPE specifies data type (must be STRING/LIST/SET/HSET/ZSET/JSON), default STRING.",
" ELEMENTS specifies how many sub elements if relevant (like entries in a list / set).",
"OBJHIST",
" Prints histogram of object sizes.",
"STACKTRACE",
@ -498,7 +568,7 @@ void DebugCmd::Load(string_view filename) {
}
optional<DebugCmd::PopulateOptions> DebugCmd::ParsePopulateArgs(CmdArgList args) {
if (args.size() < 2 || args.size() > 8) {
if (args.size() < 2) {
cntx_->SendError(UnknownSubCmd("populate", "DEBUG"));
return nullopt;
}
@ -525,6 +595,22 @@ optional<DebugCmd::PopulateOptions> DebugCmd::ParsePopulateArgs(CmdArgList args)
std::string_view str = ArgS(args, index);
if (str == "RAND") {
options.populate_random_values = true;
} else if (str == "TYPE") {
if (args.size() < index + 2) {
cntx_->SendError(kSyntaxErr);
return nullopt;
}
ToUpper(&args[++index]);
options.type = ArgS(args, index);
} else if (str == "ELEMENTS") {
if (args.size() < index + 2) {
cntx_->SendError(kSyntaxErr);
return nullopt;
}
if (!absl::SimpleAtoi(ArgS(args, ++index), &options.elements)) {
cntx_->SendError(kSyntaxErr);
return nullopt;
}
} else if (str == "SLOTS") {
if (args.size() < index + 3) {
cntx_->SendError(kSyntaxErr);
@ -605,7 +691,6 @@ void DebugCmd::PopulateRangeFiber(uint64_t from, uint64_t num_of_keys,
DbIndex db_indx = cntx_->db_index();
EngineShardSet& ess = *shard_set;
std::vector<PopulateBatch> ps(ess.size(), PopulateBatch{db_indx});
SetCmd::SetParams params;
uint64_t index = from;
uint64_t to = from + num_of_keys;
@ -640,9 +725,9 @@ void DebugCmd::PopulateRangeFiber(uint64_t from, uint64_t num_of_keys,
++index;
if (shard_batch.sz == 32) {
ess.Add(sid, [=] {
DoPopulateBatch(options.prefix, options.val_size, options.populate_random_values, params,
shard_batch);
ess.Add(sid, [this, index, options, shard_batch] {
DoPopulateBatch(options.type, options.prefix, options.val_size,
options.populate_random_values, options.elements, shard_batch, &sf_, cntx_);
if (index % 50 == 0) {
ThisFiber::Yield();
}
@ -654,8 +739,8 @@ void DebugCmd::PopulateRangeFiber(uint64_t from, uint64_t num_of_keys,
}
ess.AwaitRunningOnShardQueue([&](EngineShard* shard) {
DoPopulateBatch(options.prefix, options.val_size, options.populate_random_values, params,
ps[shard->shard_id()]);
DoPopulateBatch(options.type, options.prefix, options.val_size, options.populate_random_values,
options.elements, ps[shard->shard_id()], &sf_, cntx_);
// Debug populate does not use transaction framework therefore we call OnCbFinish manually
// after running the callback
// Note that running debug populate while running flushall/db can cause dcheck fail because the

View File

@ -19,6 +19,8 @@ class DebugCmd {
std::string_view prefix{"key"};
uint32_t val_size = 0;
bool populate_random_values = false;
std::string_view type{"STRING"};
uint32_t elements = 1;
std::optional<ClusterConfig::SlotRange> slot_range;
};

View File

@ -168,7 +168,7 @@ class DflyInstance:
proc.kill()
else:
proc.terminate()
proc.communicate(timeout=15)
proc.communicate(timeout=30)
except subprocess.TimeoutExpired:
# We need to send SIGUSR1 to DF such that it prints the stacktrace
proc.send_signal(signal.SIGUSR1)

View File

@ -0,0 +1,40 @@
import pytest
from redis import asyncio as aioredis
from .utility import *
@pytest.mark.opt_only
@pytest.mark.parametrize(
"type, keys, val_size, elements",
[
("JSON", 300_000, 100, 100),
("SET", 500_000, 100, 100),
("HSET", 500_000, 100, 100),
("ZSET", 400_000, 100, 100),
("LIST", 500_000, 100, 100),
("STRING", 10_000_000, 1000, 1),
],
)
async def test_rss_used_mem_gap(df_local_factory, type, keys, val_size, elements):
# Create a Dragonfly and fill it up with `type` until it reaches `min_rss`, then make sure that
# the gap between used_memory and rss is no more than `max_unaccounted_ratio`.
min_rss = 5 * 1024 * 1024 * 1024 # 5gb
max_unaccounted = 200 * 1024 * 1024 # 200mb
df_server = df_local_factory.create()
df_local_factory.start_all([df_server])
client = df_server.client()
await asyncio.sleep(1) # Wait for another RSS heartbeat update in Dragonfly
cmd = f"DEBUG POPULATE {keys} {type} {val_size} RAND TYPE {type} ELEMENTS {elements}"
print(f"Running {cmd}")
await client.execute_command(cmd)
await asyncio.sleep(1) # Wait for another RSS heartbeat update in Dragonfly
info = await client.info("memory")
print(f'Used memory {info["used_memory"]}, rss {info["used_memory_rss"]}')
assert info["used_memory"] > min_rss, "Weak testcase: too little used memory"
assert info["used_memory_rss"] - info["used_memory"] < max_unaccounted
await disconnect_clients(client)