#!/usr/bin/ruby if ARGV.size == 0 then $stderr.puts(__FILE__ + " " + < pairs, there p is the midpoint of the bin and m is the mean of the values in the bin. For example, the first output value is the first input value (F_0=1) at abscissa OFFSET, the second output value is the second input value (F_1=1) at abscissa OFFSET + 1, the third input value is the mean of the following two input values (F_2=2) at abscissa OFFSET + 2.5, the fourth output value is the mean of the following three input values (F_2=3) at abscissa OFFSET + 5, and so on. The OFFSET parameter specifies the abscissa intended to be associated with the first input value (usually 0 or 1). All following input values are assumed to be associated with the integers that follows. In particular, the first two pairs output will be always the first two values associated with abscissas OFFSET and OFFSET + 1, respectively, and you can use this fact to check that your are overlapping correctly the original and the binned data. If specified, THRESHOLD is a lower bound to the smallest value output (default: 0). It can be used to cut off a part of the tail in monotonically decreasing data. The first output value strictly smaller than THRESHOLD will interrupt the computation. Note that zero output values are not output at all, and that the last output value is rather unreliable as the bin might extend well beyond the available data. For more details: https://vigna.di.unimi.it/papers.php#VigFB eof ) exit 1 end offset = ARGV[0].to_i threshold = ARGV.size > 1 ? ARGV[1].to_f : 0 def formatbin g,b,v,offset ( ( g - 1 ) + ( b - 1 ) / 2.0 + offset ).to_s + "\t" + ( v.to_f / b ).to_s + "\n" end f = 1 # current Fibonacci (size of the next bin) g = 1 # next Fibonacci (left extreme of the current bin plus one) b = 0 # Current bin size v = 0 # Sum of values in p out = [] y = [] $stdin.each do |l| x = l.chop.to_f v += x b += 1 if b == f; then # Bin [g-1+offset..g-1+f+offset) filled if v > 0; then out <<= formatbin(g, f, v, offset); end y <<= v.to_f / f # Update bin size t = g g = g + f f = t v = b = 0 end end # Residual elements (if any); this point can be unreliable. if v > 0; then out <<= formatbin(g, f, v, offset) y <<= v.to_f / f end # Cut off tail when the ordinate is below threshold y.size.times do |i| if y[i] < threshold; then printf out[0..i - 1].join("") exit end end printf out.join("")