Redis 对象系统
1. 介绍
redis中基于双端链表、简单动态字符串(sds)、字典、跳跃表、整数集合、压缩列表、快速列表等等数据结构实现了一个对象系统,并且实现了5种不同的对象,每种对象都使用了至少一种前面的数据结构,优化对象在不同场合下的使用效率。
2. 对象的系统的实现
redis 3.2版本。所有注释在github中:对象系统的注释
2.1 对象的结构
对象结构robj功能:
- 为5种不同的对象类型提供同一的表示形式。
- 为不同的对象适用于不同的场景,支持同一种对象类型采用多种的数据结构方式。
- 支持引用计数,实现对象共享机制。
- 记录对象的访问时间,便于删除对象。
对象结构定义在redis 3.2版本的server.h
#define LRU_BITS 24
#define LRU_CLOCK_MAX ((1<<LRU_BITS)-1) /* Max value of obj->lru */
#define LRU_CLOCK_RESOLUTION 1000 /* LRU clock resolution in ms */
typedef struct redisObject {
//对象的数据类型,占4bits,共5种类型
unsigned type:4;
//对象的编码类型,占4bits,共10种类型
unsigned encoding:4;
//least recently used
//实用LRU算法计算相对server.lruclock的LRU时间
unsigned lru:LRU_BITS; /* lru time (relative to server.lruclock) */
//引用计数
int refcount;
//指向底层数据实现的指针
void *ptr;
} robj;
//type的占5种类型:
/* Object types */
#define OBJ_STRING 0 //字符串对象
#define OBJ_LIST 1 //列表对象
#define OBJ_SET 2 //集合对象
#define OBJ_ZSET 3 //有序集合对象
#define OBJ_HASH 4 //哈希对象
/* Objects encoding. Some kind of objects like Strings and Hashes can be
* internally represented in multiple ways. The 'encoding' field of the object
* is set to one of this fields for this object. */
// encoding 的10种类型
#define OBJ_ENCODING_RAW 0 /* Raw representation */ //原始表示方式,字符串对象是简单动态字符串
#define OBJ_ENCODING_INT 1 /* Encoded as integer */ //long类型的整数
#define OBJ_ENCODING_HT 2 /* Encoded as hash table */ //字典
#define OBJ_ENCODING_ZIPMAP 3 /* Encoded as zipmap */ //不在使用
#define OBJ_ENCODING_LINKEDLIST 4 /* Encoded as regular linked list */ //双端链表,不在使用
#define OBJ_ENCODING_ZIPLIST 5 /* Encoded as ziplist */ //压缩列表
#define OBJ_ENCODING_INTSET 6 /* Encoded as intset */ //整数集合
#define OBJ_ENCODING_SKIPLIST 7 /* Encoded as skiplist */ //跳跃表和字典
#define OBJ_ENCODING_EMBSTR 8 /* Embedded sds string encoding */ //embstr编码的简单动态字符串
#define OBJ_ENCODING_QUICKLIST 9 /* Encoded as linked list of ziplists */ //由压缩列表组成的双向列表-->快速列表
2.2 字符串对象的底层实现类型
编码—encoding | 对象—ptr |
---|---|
OBJ_ENCODING_RAW | 简单动态字符串实现的字符串对象 |
OBJ_ENCODING_INT | 整数值实现的字符串对象 |
OBJ_ENCODING_EMBSTR | embstr编码的简单动态字符串实现的字符串对象 |
2.3 列表对象的底层实现类型
编码—encoding | 对象—ptr |
---|---|
OBJ_ENCODING_QUICKLIST | 快速列表实现的列表对象 |
OBJ_ENCODING_ZIPLIST | 压缩列表实现的列表对象 |
2.4 集合对象的底层实现类型
编码—encoding | 对象—ptr |
---|---|
OBJ_ENCODING_HT | 字典实现的集合对象 |
OBJ_ENCODING_INTSET | 整数集合实现的集合对象 |
2.5 哈希对象的底层实现类型
编码—encoding | 对象—ptr |
---|---|
OBJ_ENCODING_ZIPLIST | 压缩列表实现的哈希对象 |
OBJ_ENCODING_HT | 字典实现的哈希对象 |
2.6 有序集合对象的底层实现类型
编码—encoding | 对象—ptr |
---|---|
OBJ_ENCODING_SKIPLIST | 跳跃表和字典 实现的有序集合对象 |
OBJ_ENCODING_ZIPLIST | 压缩列表实现的有序集合对象 |
3. 对象系统的重要操作
3.1创建一个字符串对象
- 编码为OBJ_ENCODING_RAW
robj *createObject(int type, void *ptr) { //创建一个对象
robj *o = zmalloc(sizeof(*o)); //分配空间
o->type = type; //设置对象类型
o->encoding = OBJ_ENCODING_RAW; //设置编码方式为OBJ_ENCODING_RAW
o->ptr = ptr; //设置
o->refcount = 1; //引用计数为1
/* Set the LRU to the current lruclock (minutes resolution). */
o->lru = LRU_CLOCK(); //计算设置当前LRU时间
return o;
}
- 编码为OBJ_ENCODING_EMBSTR
/* Create a string object with encoding OBJ_ENCODING_EMBSTR, that is
* an object where the sds string is actually an unmodifiable string
* allocated in the same chunk as the object itself. */
//创建一个embstr编码的字符串对象
robj *createEmbeddedStringObject(const char *ptr, size_t len) {
robj *o = zmalloc(sizeof(robj)+sizeof(struct sdshdr8)+len+1); //分配空间
struct sdshdr8 *sh = (void*)(o+1); //o+1刚好就是struct sdshdr8的地址
o->type = OBJ_STRING; //类型为字符串对象
o->encoding = OBJ_ENCODING_EMBSTR; //设置编码类型OBJ_ENCODING_EMBSTR
o->ptr = sh+1; //指向分配的sds对象,分配的len+1的空间首地址
o->refcount = 1; //设置引用计数
o->lru = LRU_CLOCK(); //计算设置当前LRU时间
sh->len = len; //设置字符串长度
sh->alloc = len; //设置最大容量
sh->flags = SDS_TYPE_8; //设置sds的类型
if (ptr) { //如果传了字符串参数
memcpy(sh->buf,ptr,len); //将传进来的ptr保存到对象中
sh->buf[len] = '\0'; //结束符标志
} else {
memset(sh->buf,0,len+1); //否则将对象的空间初始化为0
}
return o;
}
- 两种字符串对象编码方式的区别
/* Create a string object with EMBSTR encoding if it is smaller than
* REIDS_ENCODING_EMBSTR_SIZE_LIMIT, otherwise the RAW encoding is
* used.
*
* The current limit of 39 is chosen so that the biggest string object
* we allocate as EMBSTR will still fit into the 64 byte arena of jemalloc. */
//sdshdr8的大小为3个字节,加上1个结束符共4个字节
//redisObject的大小为16个字节
//redis使用jemalloc内存分配器,且jemalloc会分配8,16,32,64等字节的内存
//一个embstr固定的大小为16+3+1 = 20个字节,因此一个最大的embstr字符串为64-20 = 44字节
#define OBJ_ENCODING_EMBSTR_SIZE_LIMIT 44
// 创建字符串对象,根据长度使用不同的编码类型
// createRawStringObject和createEmbeddedStringObject的区别是:
// createRawStringObject是当字符串长度大于44字节时,robj结构和sdshdr结构在内存上是分开的
// createEmbeddedStringObject是当字符串长度小于等于44字节时,robj结构和sdshdr结构在内存上是连续的
robj *createStringObject(const char *ptr, size_t len) {
if (len <= OBJ_ENCODING_EMBSTR_SIZE_LIMIT)
return createEmbeddedStringObject(ptr,len);
else
return createRawStringObject(ptr,len);
}
3.2 字符串对象编码的优化
/* Try to encode a string object in order to save space */
//尝试优化字符串对象的编码方式以节约空间
robj *tryObjectEncoding(robj *o) {
long value;
sds s = o->ptr;
size_t len;
/* Make sure this is a string object, the only type we encode
* in this function. Other types use encoded memory efficient
* representations but are handled by the commands implementing
* the type. */
serverAssertWithInfo(NULL,o,o->type == OBJ_STRING);
/* We try some specialized encoding only for objects that are
* RAW or EMBSTR encoded, in other words objects that are still
* in represented by an actually array of chars. */
//如果字符串对象的编码类型为RAW或EMBSTR时,才对其重新编码
if (!sdsEncodedObject(o)) return o;
/* It's not safe to encode shared objects: shared objects can be shared
* everywhere in the "object space" of Redis and may end in places where
* they are not handled. We handle them only as values in the keyspace. */
//如果refcount大于1,则说明对象的ptr指向的值是共享的,不对共享对象进行编码
if (o->refcount > 1) return o;
/* Check if we can represent this string as a long integer.
* Note that we are sure that a string larger than 20 chars is not
* representable as a 32 nor 64 bit integer. */
len = sdslen(s); //获得字符串s的长度
//如果len小于等于20,表示符合long long可以表示的范围,且可以转换为long类型的字符串进行编码
if (len <= 20 && string2l(s,len,&value)) {
/* This object is encodable as a long. Try to use a shared object.
* Note that we avoid using shared integers when maxmemory is used
* because every object needs to have a private LRU field for the LRU
* algorithm to work well. */
if ((server.maxmemory == 0 ||
(server.maxmemory_policy != MAXMEMORY_VOLATILE_LRU &&
server.maxmemory_policy != MAXMEMORY_ALLKEYS_LRU)) &&
value >= 0 &&
value < OBJ_SHARED_INTEGERS) //如果value处于共享整数的范围内
{
decrRefCount(o); //原对象的引用计数减1,释放对象
incrRefCount(shared.integers[value]); //增加共享对象的引用计数
return shared.integers[value]; //返回一个编码为整数的字符串对象
} else { //如果不处于共享整数的范围
if (o->encoding == OBJ_ENCODING_RAW) sdsfree(o->ptr); //释放编码为OBJ_ENCODING_RAW的对象
o->encoding = OBJ_ENCODING_INT; //转换为OBJ_ENCODING_INT编码
o->ptr = (void*) value; //指针ptr指向value对象
return o;
}
}
/* If the string is small and is still RAW encoded,
* try the EMBSTR encoding which is more efficient.
* In this representation the object and the SDS string are allocated
* in the same chunk of memory to save space and cache misses. */
//如果len小于44,44是最大的编码为EMBSTR类型的字符串对象长度
if (len <= OBJ_ENCODING_EMBSTR_SIZE_LIMIT) {
robj *emb;
if (o->encoding == OBJ_ENCODING_EMBSTR) return o; //将RAW对象转换为OBJ_ENCODING_EMBSTR编码类型
emb = createEmbeddedStringObject(s,sdslen(s)); //创建一个编码类型为OBJ_ENCODING_EMBSTR的字符串对象
decrRefCount(o); //释放之前的对象
return emb;
}
/* We can't encode the object...
*
* Do the last try, and at least optimize the SDS string inside
* the string object to require little space, in case there
* is more than 10% of free space at the end of the SDS string.
*
* We do that only for relatively large strings as this branch
* is only entered if the length of the string is greater than
* OBJ_ENCODING_EMBSTR_SIZE_LIMIT. */
//无法进行编码,但是如果s的未使用的空间大于使用空间的10分之1
if (o->encoding == OBJ_ENCODING_RAW &&
sdsavail(s) > len/10)
{
o->ptr = sdsRemoveFreeSpace(o->ptr); //释放所有的未使用空间
}
/* Return the original object. */
return o;
}
3.3 引用计数管理对象
//引用计数加1
void incrRefCount(robj *o) {
o->refcount++;
}
//引用计数减1
void decrRefCount(robj *o) {
if (o->refcount <= 0) serverPanic("decrRefCount against refcount <= 0");
//当引用对象等于1时,在操作引用计数减1,直接释放对象的ptr和对象空间
if (o->refcount == 1) {
switch(o->type) {
case OBJ_STRING: freeStringObject(o); break;
case OBJ_LIST: freeListObject(o); break;
case OBJ_SET: freeSetObject(o); break;
case OBJ_ZSET: freeZsetObject(o); break;
case OBJ_HASH: freeHashObject(o); break;
default: serverPanic("Unknown object type"); break;
}
zfree(o);
} else {
o->refcount--; //否则减1
}
}
3.4 对象的复制,创建的对象非共享
//返回 复制的o对象的副本的地址,且创建的对象非共享
robj *dupStringObject(robj *o) {
robj *d;
serverAssert(o->type == OBJ_STRING); //一定是OBJ_STRING类型
switch(o->encoding) { //根据不同的编码类型
case OBJ_ENCODING_RAW:
return createRawStringObject(o->ptr,sdslen(o->ptr)); //创建的对象非共享
case OBJ_ENCODING_EMBSTR:
return createEmbeddedStringObject(o->ptr,sdslen(o->ptr)); //创建的对象非共享
case OBJ_ENCODING_INT: //整数编码类型
d = createObject(OBJ_STRING, NULL); //即使是共享整数范围内的整数,创建的对象也是非共享的
d->encoding = OBJ_ENCODING_INT;
d->ptr = o->ptr;
return d;
default:
serverPanic("Wrong encoding.");
break;
}
}
3.5 对象的解码操作
将保存的整数值解码成字符串对象返回回来。
/* Get a decoded version of an encoded object (returned as a new object).
* If the object is already raw-encoded just increment the ref count. */
//将对象是整型的解码为字符串并返回,如果是字符串编码则直接返回输入对象,只需增加引用计数
robj *getDecodedObject(robj *o) {
robj *dec;
if (sdsEncodedObject(o)) { //如果是OBJ_ENCODING_RAW或OBJ_ENCODING_EMBSTR类型的对象
incrRefCount(o); //增加引用计数,返回一个共享的对象
return o;
}
if (o->type == OBJ_STRING && o->encoding == OBJ_ENCODING_INT) { //如果是整数对象
char buf[32];
ll2string(buf,32,(long)o->ptr); //将整数转换为字符串
dec = createStringObject(buf,strlen(buf)); //创建一个字符串对象
return dec;
} else {
serverPanic("Unknown encoding type");
}
}
3.6 其他操作
所有注释在github中:对象系统的注释