Added the SRANDMEMBER key <count> variant.

SRANDMEMBER called with just the key argument can just return a single random element from a Redis Set. However many users need to return multiple unique elements from a Set, this is not a trivial problem to handle in the client side, and for truly good performance a C implementation was required. After many requests for this feature it was finally implemented. The problem implementing this command is the strategy to follow when the number of elements the user asks for is near to the number of elements that are already inside the set. In this case asking random elements to the dictionary API, and trying to add it to a temporary set, may result into an extremely poor performance, as most add operations will be wasted on duplicated elements. For this reason this implementation uses a different strategy in this case: the Set is copied, and random elements are returned to reach the specified count. The code actually uses 4 different algorithms optimized for the different cases. If the count is negative, the command changes behavior and allows for duplicated elements in the returned subset.
2024-11-22 18:54:58 +00:00 · 2012-09-19 21:29:40 +02:00 · 2012-09-19 21:29:40 +02:00 · be90c803e3
commit be90c803e3
parent d310fbedab
2 changed files with 155 additions and 1 deletions
--- a/src/redis.c
+++ b/src/redis.c
@ -152,7 +152,7 @@ struct redisCommand redisCommandTable[] = {
    {"sismember",sismemberCommand,3,"r",0,NULL,1,1,1,0,0},
    {"scard",scardCommand,2,"r",0,NULL,1,1,1,0,0},
    {"spop",spopCommand,2,"wRs",0,NULL,1,1,1,0,0},
-    {"srandmember",srandmemberCommand,2,"rR",0,NULL,1,1,1,0,0},
+    {"srandmember",srandmemberCommand,-2,"rR",0,NULL,1,1,1,0,0},
    {"sinter",sinterCommand,-2,"rS",0,NULL,1,-1,1,0,0},
    {"sinterstore",sinterstoreCommand,-3,"wm",0,NULL,1,-1,1,0,0},
    {"sunion",sunionCommand,-2,"rS",0,NULL,1,-1,1,0,0},
--- a/src/t_set.c
+++ b/src/t_set.c
@ -4,6 +4,8 @@
 * Set Commands
 *----------------------------------------------------------------------------*/

+void sunionDiffGenericCommand(redisClient *c, robj **setkeys, int setnum, robj *dstkey, int op);
+
 /* Factory method to return a set that *can* hold "value". When the object has
 * an integer-encodable value, an intset will be returned. Otherwise a regular
 * hash table. */
@ -360,11 +362,163 @@ void spopCommand(redisClient *c) {
    server.dirty++;
 }

+/* handle the "SRANDMEMBER key <count>" variant. The normal version of the
+ * command is handled by the srandmemberCommand() function itself. */
+
+/* How many times bigger should be the set compared to the requested size
+ * for us to don't use the "remove elements" strategy? Read later in the
+ * implementation for more info. */
+#define SRANDMEMBER_SUB_STRATEGY_MUL 3
+
+void srandmemberWithCountCommand(redisClient *c) {
+    long l;
+    unsigned long count, size;
+    int uniq = 1;
+    robj *set, *ele;
+    int64_t llele;
+    int encoding;
+
+    dict *d;
+
+    if (getLongFromObjectOrReply(c,c->argv[2],&l,NULL) != REDIS_OK) return;
+    if (l >= 0) {
+        count = (unsigned) l;
+    } else {
+        /* A negative count means: return the same elements multiple times
+         * (i.e. don't remove the extracted element after every extraction). */
+        count = -l;
+        uniq = 0;
+    }
+
+    if ((set = lookupKeyReadOrReply(c,c->argv[1],shared.emptymultibulk))
+        == NULL || checkType(c,set,REDIS_SET)) return;
+    size = setTypeSize(set);
+
+    /* If count is zero, serve it ASAP to avoid special cases later. */
+    if (count == 0) {
+        addReply(c,shared.emptymultibulk);
+        return;
+    }
+
+    /* CASE 1: The count was negative, so the extraction method is just:
+     * "return N random elements" sampling the whole set every time.
+     * This case is trivial and can be served without auxiliary data
+     * structures. */
+    if (!uniq) {
+        addReplyMultiBulkLen(c,count);
+        while(count--) {
+            encoding = setTypeRandomElement(set,&ele,&llele);
+            if (encoding == REDIS_ENCODING_INTSET) {
+                addReplyBulkLongLong(c,llele);
+            } else {
+                addReplyBulk(c,ele);
+            }
+        }
+        return;
+    }
+
+    /* CASE 2:
+     * The number of requested elements is greater than the number of
+     * elements inside the set: simply return the whole set. */
+    if (count >= size) {
+        sunionDiffGenericCommand(c,c->argv,c->argc-1,NULL,REDIS_OP_UNION);
+        return;
+    }
+
+    /* For CASE 3 and CASE 4 we need an auxiliary dictionary. */
+    d = dictCreate(&setDictType,NULL);
+
+    /* CASE 3:
+     * The number of elements inside the set is not greater than
+     * SRANDMEMBER_SUB_STRATEGY_MUL times the number of requested elements.
+     * In this case we create a set from scratch with all the elements, and
+     * subtract random elements to reach the requested number of elements.
+     *
+     * This is done because if the number of requsted elements is just
+     * a bit less than the number of elements in the set, the natural approach
+     * used into CASE 3 is highly inefficient. */
+    if (count*SRANDMEMBER_SUB_STRATEGY_MUL > size) {
+        setTypeIterator *si;
+
+        /* Add all the elements into the temporary dictionary. */
+        si = setTypeInitIterator(set);
+        while((encoding = setTypeNext(si,&ele,&llele)) != -1) {
+            int retval;
+
+            if (encoding == REDIS_ENCODING_INTSET) {
+                retval = dictAdd(d,createStringObjectFromLongLong(llele),NULL);
+            } else if (ele->encoding == REDIS_ENCODING_RAW) {
+                retval = dictAdd(d,dupStringObject(ele),NULL);
+            } else if (ele->encoding == REDIS_ENCODING_INT) {
+                retval = dictAdd(d,
+                    createStringObjectFromLongLong((long)ele->ptr),NULL);
+            }
+            redisAssert(retval == DICT_OK);
+        }
+        setTypeReleaseIterator(si);
+        redisAssert(dictSize(d) == size);
+
+        /* Remove random elements to reach the right count. */
+        while(size > count) {
+            dictEntry *de;
+
+            de = dictGetRandomKey(d);
+            dictDelete(d,dictGetKey(de));
+            size--;
+        }
+    }
+    
+    /* CASE 4: We have a big set compared to the requested number of elements.
+     * In this case we can simply get random elements from the set and add
+     * to the temporary set, trying to eventually get enough unique elements
+     * to reach the specified count. */
+    else {
+        unsigned long added = 0;
+
+        while(added < count) {
+            int retval;
+
+            encoding = setTypeRandomElement(set,&ele,&llele);
+            if (encoding == REDIS_ENCODING_INTSET) {
+                retval = dictAdd(d,createStringObjectFromLongLong(llele),NULL);
+            } else if (ele->encoding == REDIS_ENCODING_RAW) {
+                retval = dictAdd(d,dupStringObject(ele),NULL);
+            } else if (ele->encoding == REDIS_ENCODING_INT) {
+                retval = dictAdd(d,
+                    createStringObjectFromLongLong((long)ele->ptr),NULL);
+            }
+
+            if (retval == DICT_OK) added++;
+        }
+    }
+
+    /* CASE 3 & 4: send the result to the user. */
+    {
+        dictIterator *di;
+        dictEntry *de;
+
+        addReplyMultiBulkLen(c,count);
+        di = dictGetIterator(d);
+        while((de = dictNext(di)) != NULL)
+            addReplyBulk(c,dictGetKey(de));
+        dictReleaseIterator(di);
+        dictRelease(d);
+    }
+}
+
 void srandmemberCommand(redisClient *c) {
    robj *set, *ele;
    int64_t llele;
    int encoding;

+    if (c->argc == 3) {
+        srandmemberWithCountCommand(c);
+        return;
+    } else if (c->argc > 3) {
+        addReply(c,shared.syntaxerr);
+        return;
+    }
+
    if ((set = lookupKeyReadOrReply(c,c->argv[1],shared.nullbulk)) == NULL ||
        checkType(c,set,REDIS_SET)) return;