diff --git a/src/core/compact_object.cc b/src/core/compact_object.cc index 0de870fdf..d4261228e 100644 --- a/src/core/compact_object.cc +++ b/src/core/compact_object.cc @@ -500,7 +500,9 @@ size_t CompactObj::Size() const { raw_size = an.size(); break; } - + case EXTERNAL_TAG: + raw_size = u_.ext_ptr.size; + break; case ROBJ_TAG: raw_size = u_.r_obj.Size(); break; @@ -540,6 +542,7 @@ uint64_t CompactObj::HashCode() const { return XXH3_64bits_withSeed(an.data(), an.size(), kHashSeed); } } + // We need hash only for keys. LOG(DFATAL) << "Should not reach " << int(taglen_); return 0; @@ -550,7 +553,7 @@ uint64_t CompactObj::HashCode(string_view str) { } unsigned CompactObj::ObjType() const { - if (IsInline() || taglen_ == INT_TAG || taglen_ == SMALL_TAG) + if (IsInline() || taglen_ == INT_TAG || taglen_ == SMALL_TAG || taglen_ == EXTERNAL_TAG) return OBJ_STRING; if (taglen_ == ROBJ_TAG) @@ -626,17 +629,9 @@ void CompactObj::SyncRObj() { DCHECK_EQ(ROBJ_TAG, taglen_); DCHECK_EQ(u_.r_obj.type(), obj->type); + CHECK_NE(OBJ_SET, obj->type) << "sets should be handled without robj"; unsigned enc = obj->encoding; - if (obj->type == OBJ_SET) { - LOG(FATAL) << "Should not reach"; - /*if (OBJ_ENCODING_INTSET == enc) { - enc = kEncodingIntSet; - } else { - DCHECK_EQ(OBJ_ENCODING_HT, enc); - enc = kEncodingStrMap; - }*/ - } u_.r_obj.Init(obj->type, enc, obj->ptr); } @@ -754,17 +749,17 @@ string_view CompactObj::GetSlice(string* scratch) const { detail::ascii_unpack(to_byte(u_.r_obj.inner_obj()), decoded_len, scratch->data()); } else if (taglen_ == SMALL_TAG) { size_t decoded_len = DecodedLen(u_.small_str.size()); - size_t pref_len = decoded_len - u_.small_str.size(); + size_t space_left = decoded_len - u_.small_str.size(); scratch->resize(decoded_len); string_view slices[2]; unsigned num = u_.small_str.GetV(slices); DCHECK_EQ(2u, num); - char* next = scratch->data() + pref_len; + char* next = scratch->data() + space_left; memcpy(next, slices[0].data(), slices[0].size()); next += slices[0].size(); memcpy(next, slices[1].data(), slices[1].size()); - detail::ascii_unpack(reinterpret_cast(scratch->data() + pref_len), decoded_len, + detail::ascii_unpack(reinterpret_cast(scratch->data() + space_left), decoded_len, scratch->data()); } else { LOG(FATAL) << "Unsupported tag " << int(taglen_); @@ -790,7 +785,7 @@ string_view CompactObj::GetSlice(string* scratch) const { } bool CompactObj::HasAllocated() const { - if (IsRef() || taglen_ == INT_TAG || IsInline() || + if (IsRef() || taglen_ == INT_TAG || IsInline() || taglen_ == EXTERNAL_TAG || (taglen_ == ROBJ_TAG && u_.r_obj.inner_obj() == nullptr)) return false; @@ -805,6 +800,90 @@ void __attribute__((noinline)) CompactObj::GetString(string* res) const { } } +void CompactObj::GetString(char* dest) const { + uint8_t is_encoded = mask_ & kEncMask; + + if (IsInline()) { + if (is_encoded) { + size_t decoded_len = taglen_ + 2; + + // must be this because we either shortened 17 or 18. + DCHECK_EQ(is_encoded, ASCII2_ENC_BIT); + DCHECK_EQ(decoded_len, ascii_len(taglen_)); + + detail::ascii_unpack(to_byte(u_.inline_str), decoded_len, dest); + } else { + memcpy(dest, u_.inline_str, taglen_); + } + + return; + } + + if (taglen_ == INT_TAG) { + absl::AlphaNum an(u_.ival); + memcpy(dest, an.data(), an.size()); + return; + } + + if (is_encoded) { + if (taglen_ == ROBJ_TAG) { + CHECK_EQ(OBJ_STRING, u_.r_obj.type()); + DCHECK_EQ(OBJ_ENCODING_RAW, u_.r_obj.encoding()); + size_t decoded_len = DecodedLen(u_.r_obj.Size()); + detail::ascii_unpack(to_byte(u_.r_obj.inner_obj()), decoded_len, dest); + } else if (taglen_ == SMALL_TAG) { + size_t decoded_len = DecodedLen(u_.small_str.size()); + + // we left some space on the left to allow inplace ascii unpacking. + size_t space_left = decoded_len - u_.small_str.size(); + string_view slices[2]; + + unsigned num = u_.small_str.GetV(slices); + DCHECK_EQ(2u, num); + char* next = dest + space_left; + memcpy(next, slices[0].data(), slices[0].size()); + next += slices[0].size(); + memcpy(next, slices[1].data(), slices[1].size()); + detail::ascii_unpack(reinterpret_cast(dest + space_left), decoded_len, dest); + } else { + LOG(FATAL) << "Unsupported tag " << int(taglen_); + } + return; + } + + // no encoding. + if (taglen_ == ROBJ_TAG) { + CHECK_EQ(OBJ_STRING, u_.r_obj.type()); + DCHECK_EQ(OBJ_ENCODING_RAW, u_.r_obj.encoding()); + memcpy(dest, u_.r_obj.inner_obj(), u_.r_obj.Size()); + return; + } + + if (taglen_ == SMALL_TAG) { + string_view slices[2]; + unsigned num = u_.small_str.GetV(slices); + DCHECK_EQ(2u, num); + memcpy(dest, slices[0].data(), slices[0].size()); + dest += slices[0].size(); + memcpy(dest, slices[1].data(), slices[1].size()); + return; + } + + LOG(FATAL) << "Bad tag " << int(taglen_); +} + +void CompactObj::SetExternal(size_t offset, size_t sz) { + SetMeta(EXTERNAL_TAG, mask_ & ~kEncMask); + + u_.ext_ptr.offset = offset; + u_.ext_ptr.size = sz; +} + +std::pair CompactObj::GetExternalPtr() { + DCHECK_EQ(EXTERNAL_TAG, taglen_); + return pair(size_t(u_.ext_ptr.offset), size_t(u_.ext_ptr.size)); +} + void CompactObj::Reset() { if (HasAllocated()) { Free(); diff --git a/src/core/compact_object.h b/src/core/compact_object.h index cf2e8ebe4..a49678ed5 100644 --- a/src/core/compact_object.h +++ b/src/core/compact_object.h @@ -36,9 +36,15 @@ class RobjWrapper { void SetString(std::string_view s, std::pmr::memory_resource* mr); void Init(unsigned type, unsigned encoding, void* inner); - unsigned type() const { return type_; } - unsigned encoding() const { return encoding_; } - void* inner_obj() const { return inner_obj_;} + unsigned type() const { + return type_; + } + unsigned encoding() const { + return encoding_; + } + void* inner_obj() const { + return inner_obj_; + } std::string_view AsView() const { return std::string_view{reinterpret_cast(inner_obj_), sz_}; @@ -87,6 +93,7 @@ class CompactObj { INT_TAG = 17, SMALL_TAG = 18, ROBJ_TAG = 19, + EXTERNAL_TAG = 20, }; enum MaskBit { @@ -100,6 +107,7 @@ class CompactObj { // while ASCII2_ENC_BIT rounds it up. See DecodedLen implementation for more info. ASCII1_ENC_BIT = 8, ASCII2_ENC_BIT = 0x10, + IO_PENDING = 0x20, }; static constexpr uint8_t kEncMask = ASCII1_ENC_BIT | ASCII2_ENC_BIT; @@ -193,6 +201,18 @@ class CompactObj { } } + bool HasIoPending() { + return mask_ & IO_PENDING; + } + + void SetIoPending(bool b) { + if (b) { + mask_ |= IO_PENDING; + } else { + mask_ &= ~IO_PENDING; + } + } + unsigned Encoding() const; unsigned ObjType() const; @@ -225,6 +245,15 @@ class CompactObj { void SetString(std::string_view str); void GetString(std::string* res) const; + // dest must have at least Size() bytes available + void GetString(char* dest) const; + + bool IsExternal() const { + return taglen_ == EXTERNAL_TAG; + } + void SetExternal(size_t offset, size_t sz); + std::pair GetExternalPtr(); + // In case this object a single blob, returns number of bytes allocated on heap // for that blob. Otherwise returns 0. size_t MallocUsed() const; @@ -271,6 +300,12 @@ class CompactObj { mask_ = mask; } + struct ExternalPtr { + size_t offset; + uint32_t size; + uint32_t unneeded; + } __attribute__((packed)); + // My main data structure. Union of representations. // RobjWrapper is kInlineLen=16 bytes, so we employ SSO of that size via inline_str. // In case of int values, we waste 8 bytes. I am assuming it's ok and it's not the data type @@ -281,6 +316,7 @@ class CompactObj { SmallString small_str; detail::RobjWrapper r_obj; int64_t ival __attribute__((packed)); + ExternalPtr ext_ptr; U() : r_obj() { }