From da9c2804a5ce774099bc3aabc894a8fd8c7e1440 Mon Sep 17 00:00:00 2001 From: sundb Date: Sat, 5 Aug 2023 12:57:06 +0800 Subject: [PATCH] Avoid mostly harmless integer overflow in cjson (#12456) This PR mainly fixes a possible integer overflow in `json_append_string()`. When we use `cjson.encoding()` to encode a string larger than 2GB, at specific compilation flags, an integer overflow may occur leading to truncation, resulting in the part of the string larger than 2GB not being encoded. On the other hand, this overflow doesn't cause any read or write out-of-range or segment fault. 1) using -O0 for lua_cjson (`make LUA_DEBUG=yes`) In this case, `i` will overflow and leads to truncation. When `i` reaches `INT_MAX+1` and overflows to INT_MIN, when compared to len, `i` (1000000..00) is expanded to 64 bits signed integer (1111111.....000000) . At this point i will be greater than len and jump out of the loop, so `for (i = 0; i < len; i++)` will loop up to 2^31 times, and the part of larger than 2GB will be truncated. ```asm `i` => -0x24(%rbp) <+253>: addl $0x1,-0x24(%rbp) ; overflow if i large than 2^31 <+257>: mov -0x24(%rbp),%eax <+260>: movslq %eax,%rdx ; move a 32-bit value with sign extension into a 64-bit signed <+263>: mov -0x20(%rbp),%rax <+267>: cmp %rax,%rdx ; check `i < len` <+270>: jb 0x212600 ``` 2) using -O2/-O3 for lua_cjson (`make LUA_DEBUG=no`, **the default**) In this case, because singed integer overflow is an undefined behavior, `i` will not overflow. `i` will be optimized by the compiler and use 64-bit registers for all subsequent instructions. ```asm <+180>: add $0x1,%rbx ; Using 64-bit register `rbx` for i++ <+184>: lea 0x1(%rdx),%rsi <+188>: mov %rsi,0x10(%rbp) <+192>: mov %al,(%rcx,%rdx,1) <+195>: cmp %rbx,(%rsp) ; check `i < len` <+199>: ja 0x20b63a ``` 3) using 32bit Because `strbuf_ensure_empty_length()` preallocates memory of length (len * 6 + 2), in 32-bit `cjson.encode()` can only handle strings smaller than ((2 ^ 32) - 3 ) / 6. So 32bit is not affected. Also change `i` in `strbuf_append_string()` to `size_t`. Since its second argument `str` is taken from the `char2escape` string array which is never larger than 6, so `strbuf_append_string()` is not at risk of overflow (the bug was unreachable). --- deps/lua/src/lua_cjson.c | 3 +-- deps/lua/src/strbuf.c | 3 +-- tests/unit/scripting.tcl | 7 +++++++ 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/deps/lua/src/lua_cjson.c b/deps/lua/src/lua_cjson.c index 991f5d31d..b86d73e97 100644 --- a/deps/lua/src/lua_cjson.c +++ b/deps/lua/src/lua_cjson.c @@ -464,9 +464,8 @@ static void json_encode_exception(lua_State *l, json_config_t *cfg, strbuf_t *js static void json_append_string(lua_State *l, strbuf_t *json, int lindex) { const char *escstr; - int i; const char *str; - size_t len; + size_t i, len; str = lua_tolstring(l, lindex, &len); diff --git a/deps/lua/src/strbuf.c b/deps/lua/src/strbuf.c index 775e8baf1..97ee940c9 100644 --- a/deps/lua/src/strbuf.c +++ b/deps/lua/src/strbuf.c @@ -176,8 +176,7 @@ void strbuf_resize(strbuf_t *s, size_t len) void strbuf_append_string(strbuf_t *s, const char *str) { - int i; - size_t space; + size_t i, space; space = strbuf_empty_length(s); diff --git a/tests/unit/scripting.tcl b/tests/unit/scripting.tcl index a8195fb91..cedf2b4f7 100644 --- a/tests/unit/scripting.tcl +++ b/tests/unit/scripting.tcl @@ -307,6 +307,13 @@ start_server {tags {"scripting"}} { set e } {*against a key*} + test {EVAL - JSON string encoding a string larger than 2GB} { + run_script { + local s = string.rep("a", 1024 * 1024 * 1024) + return #cjson.encode(s..s..s) + } 0 + } {3221225474} {large-memory} ;# length includes two double quotes at both ends + test {EVAL - JSON numeric decoding} { # We must return the table as a string because otherwise # Redis converts floats to ints and we get 0 and 1023 instead