From 7997874f4d2be9da1e26c77804569b0057c1e0a2 Mon Sep 17 00:00:00 2001 From: Binbin Date: Mon, 13 Mar 2023 22:12:29 +0800 Subject: [PATCH] Fix tail->repl_offset update in feedReplicationBuffer (#11905) In #11666, we added a while loop and will split a big reply node to multiple nodes. The update of tail->repl_offset may be wrong. Like before #11666, we would have created at most one new reply node, and now we will create multiple nodes if it is a big reply node. Now we are creating more than one node, and the tail->repl_offset of all the nodes except the last one are incorrect. Because we update master_repl_offset at the beginning, and then use it to update the tail->repl_offset. This would have lead to an assertion during PSYNC, a test was added to validate that case. Besides that, the calculation of size was adjusted to fix tests that failed due to a combination of a very low backlog size, and some thresholds of that get violated because of the relatively high overhead of replBufBlock. So now if the backlog size / 16 is too small, we'll take PROTO_REPLY_CHUNK_BYTES instead. Co-authored-by: Oran Agra --- src/replication.c | 11 +++++++---- src/server.h | 2 +- tests/integration/replication-buffer.tcl | 12 +++++++++++- 3 files changed, 19 insertions(+), 6 deletions(-) diff --git a/src/replication.c b/src/replication.c index 854688f2c..5aef23698 100644 --- a/src/replication.c +++ b/src/replication.c @@ -332,8 +332,6 @@ void feedReplicationBuffer(char *s, size_t len) { static long long repl_block_id = 0; if (server.repl_backlog == NULL) return; - server.master_repl_offset += len; - server.repl_backlog->histlen += len; while(len > 0) { size_t start_pos = 0; /* The position of referenced block to start sending. */ @@ -354,6 +352,8 @@ void feedReplicationBuffer(char *s, size_t len) { tail->used += copy; s += copy; len -= copy; + server.master_repl_offset += copy; + server.repl_backlog->histlen += copy; } if (len) { /* Create a new node, make sure it is allocated to at @@ -362,14 +362,15 @@ void feedReplicationBuffer(char *s, size_t len) { /* Avoid creating nodes smaller than PROTO_REPLY_CHUNK_BYTES, so that we can append more data into them, * and also avoid creating nodes bigger than repl_backlog_size / 16, so that we won't have huge nodes that can't * trim when we only still need to hold a small portion from them. */ - size_t size = min(max(len, (size_t)PROTO_REPLY_CHUNK_BYTES), (size_t)server.repl_backlog_size / 16); + size_t limit = max((size_t)server.repl_backlog_size / 16, (size_t)PROTO_REPLY_CHUNK_BYTES); + size_t size = min(max(len, (size_t)PROTO_REPLY_CHUNK_BYTES), limit); tail = zmalloc_usable(size + sizeof(replBufBlock), &usable_size); /* Take over the allocation's internal fragmentation */ tail->size = usable_size - sizeof(replBufBlock); size_t copy = (tail->size >= len) ? len : tail->size; tail->used = copy; tail->refcount = 0; - tail->repl_offset = server.master_repl_offset - tail->used + 1; + tail->repl_offset = server.master_repl_offset + 1; tail->id = repl_block_id++; memcpy(tail->buf, s, copy); listAddNodeTail(server.repl_buffer_blocks, tail); @@ -382,6 +383,8 @@ void feedReplicationBuffer(char *s, size_t len) { } s += copy; len -= copy; + server.master_repl_offset += copy; + server.repl_backlog->histlen += copy; } /* For output buffer of replicas. */ diff --git a/src/server.h b/src/server.h index 04371e959..a3d9aa088 100644 --- a/src/server.h +++ b/src/server.h @@ -921,7 +921,7 @@ typedef struct clientReplyBlock { * | / \ * | / \ * | / \ - * Repl Backlog Replia_A Replia_B + * Repl Backlog Replica_A Replica_B * * Each replica or replication backlog increments only the refcount of the * 'ref_repl_buf_node' which it points to. So when replica walks to the next diff --git a/tests/integration/replication-buffer.tcl b/tests/integration/replication-buffer.tcl index fe85632ae..2e402480d 100644 --- a/tests/integration/replication-buffer.tcl +++ b/tests/integration/replication-buffer.tcl @@ -267,7 +267,7 @@ test {Replica client-output-buffer size is limited to backlog_limit/16 when no r wait_for_ofs_sync $master $replica # Write another key to force the test to wait for another event loop iteration - # to give the serverCron a chance to disconnect replicas with COB size exeeeding the limits + # to give the serverCron a chance to disconnect replicas with COB size exceeding the limits $master set key1 "1" wait_for_ofs_sync $master $replica @@ -280,6 +280,16 @@ test {Replica client-output-buffer size is limited to backlog_limit/16 when no r } assert {[status $master sync_partial_ok] == 0} + + # Before this fix (#11905), the test would trigger an assertion in 'o->used >= c->ref_block_pos' + test {The update of replBufBlock's repl_offset is ok - Regression test for #11666} { + set rd [redis_deferring_client] + set replid [status $master master_replid] + set offset [status $master repl_backlog_first_byte_offset] + $rd psync $replid $offset + assert_equal {PONG} [$master ping] ;# Make sure the master doesn't crash. + $rd close + } } } }