#!/usr/bin/ruby
if ARGV.size == 0 then
$stderr.puts(__FILE__ + " " + < pairs, there p is the midpoint of
the bin and m is the mean of the values in the bin. For example, the first
output value is the first input value (F_0=1) at abscissa OFFSET, the
second output value is the second input value (F_1=1) at abscissa OFFSET +
1, the third input value is the mean of the following two input values
(F_2=2) at abscissa OFFSET + 2.5, the fourth output value is the mean of
the following three input values (F_2=3) at abscissa OFFSET + 5, and so on.
The OFFSET parameter specifies the abscissa intended to be associated with
the first input value (usually 0 or 1). All following input values are
assumed to be associated with the integers that follows. In particular,
the first two pairs output will be always the first two values associated
with abscissas OFFSET and OFFSET + 1, respectively, and you can use this
fact to check that your are overlapping correctly the original and the
binned data.
If specified, THRESHOLD is a lower bound to the smallest value output
(default: 0). It can be used to cut off a part of the tail in
monotonically decreasing data. The first output value strictly smaller
than THRESHOLD will interrupt the computation.
Note that zero output values are not output at all, and that the last
output value is rather unreliable as the bin might extend well beyond
the available data.
For more details: https://vigna.di.unimi.it/papers.php#VigFB
eof
)
exit 1
end
offset = ARGV[0].to_i
threshold = ARGV.size > 1 ? ARGV[1].to_f : 0
def formatbin g,b,v,offset
( ( g - 1 ) + ( b - 1 ) / 2.0 + offset ).to_s + "\t" + ( v.to_f / b ).to_s + "\n"
end
f = 1 # current Fibonacci (size of the next bin)
g = 1 # next Fibonacci (left extreme of the current bin plus one)
b = 0 # Current bin size
v = 0 # Sum of values in p
out = []
y = []
$stdin.each do |l|
x = l.chop.to_f
v += x
b += 1
if b == f; then # Bin [g-1+offset..g-1+f+offset) filled
if v > 0; then out <<= formatbin(g, f, v, offset); end
y <<= v.to_f / f
# Update bin size
t = g
g = g + f
f = t
v = b = 0
end
end
# Residual elements (if any); this point can be unreliable.
if v > 0; then
out <<= formatbin(g, f, v, offset)
y <<= v.to_f / f
end
# Cut off tail when the ordinate is below threshold
y.size.times do |i|
if y[i] < threshold; then
printf out[0..i - 1].join("")
exit
end
end
printf out.join("")