mirror of
http://github.com/valkey-io/valkey
synced 2024-11-22 09:17:20 +00:00
hll-gnuplot-graph.rb added to plot HyperLogLog error graphs.
This commit is contained in:
parent
307a189900
commit
7f9d289e10
68
utils/hyperloglog/hll-gnuplot-graph.rb
Normal file
68
utils/hyperloglog/hll-gnuplot-graph.rb
Normal file
@ -0,0 +1,68 @@
|
|||||||
|
# hll-err.rb - Copyright (C) 2014 Salvatore Sanfilippo
|
||||||
|
# BSD license, See the COPYING file for more information.
|
||||||
|
#
|
||||||
|
# This program is suited to output average and maximum errors of
|
||||||
|
# the Redis HyperLogLog implementation in a format suitable to print
|
||||||
|
# graphs using gnuplot.
|
||||||
|
|
||||||
|
require 'rubygems'
|
||||||
|
require 'redis'
|
||||||
|
require 'digest/sha1'
|
||||||
|
|
||||||
|
# Generate an array of [cardinality,relative_error] pairs
|
||||||
|
# in the 0 - max range with step of 1000*step.
|
||||||
|
#
|
||||||
|
# 'r' is the Redis object used to perform the queries.
|
||||||
|
# 'seed' must be different every time you want a test performed
|
||||||
|
# with a different set. The function guarantees that if 'seed' is the
|
||||||
|
# same, exactly the same dataset is used, and when it is different,
|
||||||
|
# a totally unrelated different data set is used (without any common
|
||||||
|
# element in practice).
|
||||||
|
def run_experiment(r,seed,max,step)
|
||||||
|
r.del('hll')
|
||||||
|
i = 0
|
||||||
|
samples = []
|
||||||
|
while i < max do
|
||||||
|
step.times {
|
||||||
|
elements = []
|
||||||
|
1000.times {
|
||||||
|
ele = Digest::SHA1.hexdigest(i.to_s+seed.to_s)
|
||||||
|
elements << ele
|
||||||
|
i += 1
|
||||||
|
}
|
||||||
|
r.hlladd('hll',*elements)
|
||||||
|
}
|
||||||
|
approx = r.hllcount('hll')
|
||||||
|
err = approx-i
|
||||||
|
rel_err = 100.to_f*err/i
|
||||||
|
samples << [i,rel_err]
|
||||||
|
end
|
||||||
|
samples
|
||||||
|
end
|
||||||
|
|
||||||
|
def filter_samples(numsets,filter)
|
||||||
|
r = Redis.new
|
||||||
|
dataset = {}
|
||||||
|
(0...numsets).each{|i|
|
||||||
|
dataset[i] = run_experiment(r,i,100000,1)
|
||||||
|
}
|
||||||
|
dataset[0].each_with_index{|ele,index|
|
||||||
|
card,err=ele
|
||||||
|
if filter == :max
|
||||||
|
(1...numsets).each{|i|
|
||||||
|
err = dataset[i][index][1] if err < dataset[i][index][1]
|
||||||
|
}
|
||||||
|
elsif filter == :avg
|
||||||
|
(1...numsets).each{|i|
|
||||||
|
err += dataset[i][index][1]
|
||||||
|
}
|
||||||
|
err /= numsets
|
||||||
|
else
|
||||||
|
raise "Unknown filter #{filter}"
|
||||||
|
end
|
||||||
|
puts "#{card} #{err}"
|
||||||
|
}
|
||||||
|
end
|
||||||
|
|
||||||
|
filter_samples(100,:max)
|
||||||
|
#filter_samples(100,:avg)
|
Loading…
Reference in New Issue
Block a user