fix: Use MOVED error type for moved replies (#4125)

**The problem:**

When in cluster mode, `MOVED` replies (which are arguably not even errors) are aggregated per slot-id + remote host, and displayed in `# Errorstats` as such. For example, in a server that does _not_ own 8k slots, we will aggregate 8k different errors, and their counts (in memory).

This slows down all `INFO` replies, takes a lot of memory, and also makes `INFO` replies very long.

**The fix:**

Use `type` `MOVED` for moved replies, making them all the same under `# Errorstats`

Fixes #4118
This commit is contained in:
Shahar Mike 2024-11-14 13:12:38 +02:00 committed by GitHub
parent f04e5497bc
commit 1513134247
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 17 additions and 13 deletions

View File

@ -116,16 +116,17 @@ bool IsClusterShardedByTag() {
return IsClusterEnabledOrEmulated() || LockTagOptions::instance().enabled;
}
std::optional<std::string> SlotOwnershipErrorStr(SlotId slot_id) {
facade::ErrorReply SlotOwnershipError(SlotId slot_id) {
const cluster::ClusterConfig* cluster_config = ClusterFamily::cluster_config();
if (!cluster_config)
return facade::kClusterNotConfigured;
return facade::ErrorReply{facade::kClusterNotConfigured};
if (!cluster_config->IsMySlot(slot_id)) {
// See more details here: https://redis.io/docs/reference/cluster-spec/#moved-redirection
cluster::ClusterNodeInfo master = cluster_config->GetMasterNodeForSlot(slot_id);
return absl::StrCat("-MOVED ", slot_id, " ", master.ip, ":", master.port);
return facade::ErrorReply{absl::StrCat("-MOVED ", slot_id, " ", master.ip, ":", master.port),
"MOVED"};
}
return std::nullopt;
return facade::ErrorReply{facade::OpStatus::OK};
}
} // namespace dfly::cluster

View File

@ -10,6 +10,8 @@
#include <string_view>
#include <vector>
#include "facade/facade_types.h"
namespace dfly::cluster {
using SlotId = uint16_t;
@ -170,7 +172,7 @@ enum class MigrationState : uint8_t {
SlotId KeySlot(std::string_view key);
// return error message if slot doesn't belong to this node
std::optional<std::string> SlotOwnershipErrorStr(SlotId slot_id);
facade::ErrorReply SlotOwnershipError(SlotId slot_id);
void InitializeCluster();
bool IsClusterEnabled();

View File

@ -1118,9 +1118,9 @@ void BPopGeneric(ListDir dir, CmdArgList args, Transaction* tx, SinkReplyBuilder
case OpStatus::TIMED_OUT:
return rb->SendNullArray();
case OpStatus::KEY_MOVED: {
auto error = cluster::SlotOwnershipErrorStr(*tx->GetUniqueSlotId());
CHECK(error.has_value());
return builder->SendError(std::move(*error));
auto error = cluster::SlotOwnershipError(*tx->GetUniqueSlotId());
CHECK(!error.status.has_value() || error.status.value() != facade::OpStatus::OK);
return builder->SendError(std::move(error));
}
default:
LOG(ERROR) << "Unexpected error " << popped_key.status();

View File

@ -940,8 +940,9 @@ optional<ErrorReply> Service::CheckKeysOwnership(const CommandId* cid, CmdArgLis
}
if (keys_slot.has_value()) {
if (auto error_str = cluster::SlotOwnershipErrorStr(*keys_slot); error_str) {
return ErrorReply{std::move(*error_str)};
if (auto error = cluster::SlotOwnershipError(*keys_slot);
!error.status.has_value() || error.status.value() != facade::OpStatus::OK) {
return ErrorReply{std::move(error)};
}
}

View File

@ -1279,9 +1279,9 @@ void BZPopMinMax(CmdArgList args, Transaction* tx, SinkReplyBuilder* builder,
case OpStatus::TIMED_OUT:
return rb->SendNullArray();
case OpStatus::KEY_MOVED: {
auto error = cluster::SlotOwnershipErrorStr(*tx->GetUniqueSlotId());
CHECK(error.has_value());
return builder->SendError(std::move(*error));
auto error = cluster::SlotOwnershipError(*tx->GetUniqueSlotId());
CHECK(!error.status.has_value() || error.status.value() != facade::OpStatus::OK);
return builder->SendError(std::move(error));
}
default:
LOG(ERROR) << "Unexpected error " << popped_key.status();