CompactObject can now represent an pointer to external storage

This commit is contained in:
Roman Gershman 2022-04-18 17:55:32 +03:00
parent f113d29918
commit e163747023
2 changed files with 133 additions and 18 deletions

View File

@ -500,7 +500,9 @@ size_t CompactObj::Size() const {
raw_size = an.size();
break;
}
case EXTERNAL_TAG:
raw_size = u_.ext_ptr.size;
break;
case ROBJ_TAG:
raw_size = u_.r_obj.Size();
break;
@ -540,6 +542,7 @@ uint64_t CompactObj::HashCode() const {
return XXH3_64bits_withSeed(an.data(), an.size(), kHashSeed);
}
}
// We need hash only for keys.
LOG(DFATAL) << "Should not reach " << int(taglen_);
return 0;
@ -550,7 +553,7 @@ uint64_t CompactObj::HashCode(string_view str) {
}
unsigned CompactObj::ObjType() const {
if (IsInline() || taglen_ == INT_TAG || taglen_ == SMALL_TAG)
if (IsInline() || taglen_ == INT_TAG || taglen_ == SMALL_TAG || taglen_ == EXTERNAL_TAG)
return OBJ_STRING;
if (taglen_ == ROBJ_TAG)
@ -626,17 +629,9 @@ void CompactObj::SyncRObj() {
DCHECK_EQ(ROBJ_TAG, taglen_);
DCHECK_EQ(u_.r_obj.type(), obj->type);
CHECK_NE(OBJ_SET, obj->type) << "sets should be handled without robj";
unsigned enc = obj->encoding;
if (obj->type == OBJ_SET) {
LOG(FATAL) << "Should not reach";
/*if (OBJ_ENCODING_INTSET == enc) {
enc = kEncodingIntSet;
} else {
DCHECK_EQ(OBJ_ENCODING_HT, enc);
enc = kEncodingStrMap;
}*/
}
u_.r_obj.Init(obj->type, enc, obj->ptr);
}
@ -754,17 +749,17 @@ string_view CompactObj::GetSlice(string* scratch) const {
detail::ascii_unpack(to_byte(u_.r_obj.inner_obj()), decoded_len, scratch->data());
} else if (taglen_ == SMALL_TAG) {
size_t decoded_len = DecodedLen(u_.small_str.size());
size_t pref_len = decoded_len - u_.small_str.size();
size_t space_left = decoded_len - u_.small_str.size();
scratch->resize(decoded_len);
string_view slices[2];
unsigned num = u_.small_str.GetV(slices);
DCHECK_EQ(2u, num);
char* next = scratch->data() + pref_len;
char* next = scratch->data() + space_left;
memcpy(next, slices[0].data(), slices[0].size());
next += slices[0].size();
memcpy(next, slices[1].data(), slices[1].size());
detail::ascii_unpack(reinterpret_cast<uint8_t*>(scratch->data() + pref_len), decoded_len,
detail::ascii_unpack(reinterpret_cast<uint8_t*>(scratch->data() + space_left), decoded_len,
scratch->data());
} else {
LOG(FATAL) << "Unsupported tag " << int(taglen_);
@ -790,7 +785,7 @@ string_view CompactObj::GetSlice(string* scratch) const {
}
bool CompactObj::HasAllocated() const {
if (IsRef() || taglen_ == INT_TAG || IsInline() ||
if (IsRef() || taglen_ == INT_TAG || IsInline() || taglen_ == EXTERNAL_TAG ||
(taglen_ == ROBJ_TAG && u_.r_obj.inner_obj() == nullptr))
return false;
@ -805,6 +800,90 @@ void __attribute__((noinline)) CompactObj::GetString(string* res) const {
}
}
void CompactObj::GetString(char* dest) const {
uint8_t is_encoded = mask_ & kEncMask;
if (IsInline()) {
if (is_encoded) {
size_t decoded_len = taglen_ + 2;
// must be this because we either shortened 17 or 18.
DCHECK_EQ(is_encoded, ASCII2_ENC_BIT);
DCHECK_EQ(decoded_len, ascii_len(taglen_));
detail::ascii_unpack(to_byte(u_.inline_str), decoded_len, dest);
} else {
memcpy(dest, u_.inline_str, taglen_);
}
return;
}
if (taglen_ == INT_TAG) {
absl::AlphaNum an(u_.ival);
memcpy(dest, an.data(), an.size());
return;
}
if (is_encoded) {
if (taglen_ == ROBJ_TAG) {
CHECK_EQ(OBJ_STRING, u_.r_obj.type());
DCHECK_EQ(OBJ_ENCODING_RAW, u_.r_obj.encoding());
size_t decoded_len = DecodedLen(u_.r_obj.Size());
detail::ascii_unpack(to_byte(u_.r_obj.inner_obj()), decoded_len, dest);
} else if (taglen_ == SMALL_TAG) {
size_t decoded_len = DecodedLen(u_.small_str.size());
// we left some space on the left to allow inplace ascii unpacking.
size_t space_left = decoded_len - u_.small_str.size();
string_view slices[2];
unsigned num = u_.small_str.GetV(slices);
DCHECK_EQ(2u, num);
char* next = dest + space_left;
memcpy(next, slices[0].data(), slices[0].size());
next += slices[0].size();
memcpy(next, slices[1].data(), slices[1].size());
detail::ascii_unpack(reinterpret_cast<uint8_t*>(dest + space_left), decoded_len, dest);
} else {
LOG(FATAL) << "Unsupported tag " << int(taglen_);
}
return;
}
// no encoding.
if (taglen_ == ROBJ_TAG) {
CHECK_EQ(OBJ_STRING, u_.r_obj.type());
DCHECK_EQ(OBJ_ENCODING_RAW, u_.r_obj.encoding());
memcpy(dest, u_.r_obj.inner_obj(), u_.r_obj.Size());
return;
}
if (taglen_ == SMALL_TAG) {
string_view slices[2];
unsigned num = u_.small_str.GetV(slices);
DCHECK_EQ(2u, num);
memcpy(dest, slices[0].data(), slices[0].size());
dest += slices[0].size();
memcpy(dest, slices[1].data(), slices[1].size());
return;
}
LOG(FATAL) << "Bad tag " << int(taglen_);
}
void CompactObj::SetExternal(size_t offset, size_t sz) {
SetMeta(EXTERNAL_TAG, mask_ & ~kEncMask);
u_.ext_ptr.offset = offset;
u_.ext_ptr.size = sz;
}
std::pair<size_t, size_t> CompactObj::GetExternalPtr() {
DCHECK_EQ(EXTERNAL_TAG, taglen_);
return pair<size_t, size_t>(size_t(u_.ext_ptr.offset), size_t(u_.ext_ptr.size));
}
void CompactObj::Reset() {
if (HasAllocated()) {
Free();

View File

@ -36,9 +36,15 @@ class RobjWrapper {
void SetString(std::string_view s, std::pmr::memory_resource* mr);
void Init(unsigned type, unsigned encoding, void* inner);
unsigned type() const { return type_; }
unsigned encoding() const { return encoding_; }
void* inner_obj() const { return inner_obj_;}
unsigned type() const {
return type_;
}
unsigned encoding() const {
return encoding_;
}
void* inner_obj() const {
return inner_obj_;
}
std::string_view AsView() const {
return std::string_view{reinterpret_cast<char*>(inner_obj_), sz_};
@ -87,6 +93,7 @@ class CompactObj {
INT_TAG = 17,
SMALL_TAG = 18,
ROBJ_TAG = 19,
EXTERNAL_TAG = 20,
};
enum MaskBit {
@ -100,6 +107,7 @@ class CompactObj {
// while ASCII2_ENC_BIT rounds it up. See DecodedLen implementation for more info.
ASCII1_ENC_BIT = 8,
ASCII2_ENC_BIT = 0x10,
IO_PENDING = 0x20,
};
static constexpr uint8_t kEncMask = ASCII1_ENC_BIT | ASCII2_ENC_BIT;
@ -193,6 +201,18 @@ class CompactObj {
}
}
bool HasIoPending() {
return mask_ & IO_PENDING;
}
void SetIoPending(bool b) {
if (b) {
mask_ |= IO_PENDING;
} else {
mask_ &= ~IO_PENDING;
}
}
unsigned Encoding() const;
unsigned ObjType() const;
@ -225,6 +245,15 @@ class CompactObj {
void SetString(std::string_view str);
void GetString(std::string* res) const;
// dest must have at least Size() bytes available
void GetString(char* dest) const;
bool IsExternal() const {
return taglen_ == EXTERNAL_TAG;
}
void SetExternal(size_t offset, size_t sz);
std::pair<size_t, size_t> GetExternalPtr();
// In case this object a single blob, returns number of bytes allocated on heap
// for that blob. Otherwise returns 0.
size_t MallocUsed() const;
@ -271,6 +300,12 @@ class CompactObj {
mask_ = mask;
}
struct ExternalPtr {
size_t offset;
uint32_t size;
uint32_t unneeded;
} __attribute__((packed));
// My main data structure. Union of representations.
// RobjWrapper is kInlineLen=16 bytes, so we employ SSO of that size via inline_str.
// In case of int values, we waste 8 bytes. I am assuming it's ok and it's not the data type
@ -281,6 +316,7 @@ class CompactObj {
SmallString small_str;
detail::RobjWrapper r_obj;
int64_t ival __attribute__((packed));
ExternalPtr ext_ptr;
U() : r_obj() {
}