hll-gnuplot-graph.rb added to plot HyperLogLog error graphs.

2024-11-22 09:17:20 +00:00 · 2014-03-31 10:01:42 +02:00 · 2014-03-31 10:01:42 +02:00 · 7f9d289e10
commit 7f9d289e10
parent 307a189900
1 changed files with 68 additions and 0 deletions
--- a/utils/hyperloglog/hll-gnuplot-graph.rb
+++ b/utils/hyperloglog/hll-gnuplot-graph.rb
@ -0,0 +1,68 @@
 # hll-err.rb - Copyright (C) 2014 Salvatore Sanfilippo
 # BSD license, See the COPYING file for more information.
 #
 # This program is suited to output average and maximum errors of
 # the Redis HyperLogLog implementation in a format suitable to print
 # graphs using gnuplot.
 require 'rubygems'
 require 'redis'
 require 'digest/sha1'
 # Generate an array of [cardinality,relative_error] pairs
 # in the 0 - max range with step of 1000*step.
 #
 # 'r' is the Redis object used to perform the queries.
 # 'seed' must be different every time you want a test performed
 # with a different set. The function guarantees that if 'seed' is the
 # same, exactly the same dataset is used, and when it is different,
 # a totally unrelated different data set is used (without any common
 # element in practice).
 def run_experiment(r,seed,max,step)
    r.del('hll')
    i = 0
    samples = []
    while i < max do
        step.times {
            elements = []
            1000.times {
                ele = Digest::SHA1.hexdigest(i.to_s+seed.to_s)
                elements << ele
                i += 1
            }
            r.hlladd('hll',*elements)
        }
        approx = r.hllcount('hll')
        err = approx-i
        rel_err = 100.to_f*err/i
        samples << [i,rel_err]
    end
    samples
 end
 def filter_samples(numsets,filter)
    r = Redis.new
    dataset = {}
    (0...numsets).each{|i|
        dataset[i] = run_experiment(r,i,100000,1)
    }
    dataset[0].each_with_index{|ele,index|
        card,err=ele
        if filter == :max
            (1...numsets).each{|i|
                err = dataset[i][index][1] if err < dataset[i][index][1]
            }
        elsif filter == :avg
            (1...numsets).each{|i|
                err += dataset[i][index][1]
            }
            err /= numsets
        else
            raise "Unknown filter #{filter}"
        end
        puts "#{card} #{err}"
    }
 end
 filter_samples(100,:max)
 #filter_samples(100,:avg)