mirror of
http://github.com/valkey-io/valkey
synced 2024-11-21 16:46:15 +00:00
hll-gnuplot-graph.rb added to plot HyperLogLog error graphs.
This commit is contained in:
parent
307a189900
commit
7f9d289e10
68
utils/hyperloglog/hll-gnuplot-graph.rb
Normal file
68
utils/hyperloglog/hll-gnuplot-graph.rb
Normal file
@ -0,0 +1,68 @@
|
||||
# hll-err.rb - Copyright (C) 2014 Salvatore Sanfilippo
|
||||
# BSD license, See the COPYING file for more information.
|
||||
#
|
||||
# This program is suited to output average and maximum errors of
|
||||
# the Redis HyperLogLog implementation in a format suitable to print
|
||||
# graphs using gnuplot.
|
||||
|
||||
require 'rubygems'
|
||||
require 'redis'
|
||||
require 'digest/sha1'
|
||||
|
||||
# Generate an array of [cardinality,relative_error] pairs
|
||||
# in the 0 - max range with step of 1000*step.
|
||||
#
|
||||
# 'r' is the Redis object used to perform the queries.
|
||||
# 'seed' must be different every time you want a test performed
|
||||
# with a different set. The function guarantees that if 'seed' is the
|
||||
# same, exactly the same dataset is used, and when it is different,
|
||||
# a totally unrelated different data set is used (without any common
|
||||
# element in practice).
|
||||
def run_experiment(r,seed,max,step)
|
||||
r.del('hll')
|
||||
i = 0
|
||||
samples = []
|
||||
while i < max do
|
||||
step.times {
|
||||
elements = []
|
||||
1000.times {
|
||||
ele = Digest::SHA1.hexdigest(i.to_s+seed.to_s)
|
||||
elements << ele
|
||||
i += 1
|
||||
}
|
||||
r.hlladd('hll',*elements)
|
||||
}
|
||||
approx = r.hllcount('hll')
|
||||
err = approx-i
|
||||
rel_err = 100.to_f*err/i
|
||||
samples << [i,rel_err]
|
||||
end
|
||||
samples
|
||||
end
|
||||
|
||||
def filter_samples(numsets,filter)
|
||||
r = Redis.new
|
||||
dataset = {}
|
||||
(0...numsets).each{|i|
|
||||
dataset[i] = run_experiment(r,i,100000,1)
|
||||
}
|
||||
dataset[0].each_with_index{|ele,index|
|
||||
card,err=ele
|
||||
if filter == :max
|
||||
(1...numsets).each{|i|
|
||||
err = dataset[i][index][1] if err < dataset[i][index][1]
|
||||
}
|
||||
elsif filter == :avg
|
||||
(1...numsets).each{|i|
|
||||
err += dataset[i][index][1]
|
||||
}
|
||||
err /= numsets
|
||||
else
|
||||
raise "Unknown filter #{filter}"
|
||||
end
|
||||
puts "#{card} #{err}"
|
||||
}
|
||||
end
|
||||
|
||||
filter_samples(100,:max)
|
||||
#filter_samples(100,:avg)
|
Loading…
Reference in New Issue
Block a user