#!/usr/bin/ruby if ARGV.size == 0 then $stderr.puts(__FILE__ + " " + < pairs, there p is the midpoint of the bin and m is the mean of the values in the bin. For example, the first output value is the first input value (F_0=1) at abscissa OFFSET, the second output value is the second input value (F_1=1) at abscissa OFFSET + 1, the third input value is the mean of the following two input values (F_2=2) at abscissa OFFSET + 2.5, the fourth output value is the mean of the following three input values (F_2=3) at abscissa OFFSET + 5, and so on. The OFFSET parameter specifies the abscissa intended to be associated with the first input value (usually 0 or 1). All following input values are assumed to be associated with the integers that follows. In particular, the first two pairs output will be always the first two values associated with abscissas OFFSET and OFFSET + 1, respectively, and you can use this fact to check that your are overlapping correctly the original and the binned data. If specified, THRESHOLD is a lower bound to the smallest value output (default: 0). It can be used to cut off a part of the tail in monotonically decreasing data. The first output value strictly smaller than THRESHOLD will interrupt the computation. Note that zero output values are not output at all. For more details: http://vigna.di.unimi.it/papers.php#VigFB eof ) exit 1 end offset = ARGV[0].to_i threshold = ARGV.size > 1 ? ARGV[1].to_f : 0 def formatbin g,b,v,p,offset ( ( g - 1 ) + ( b - 1 ) / 2.0 + offset ).to_s + "\t" + ( v.to_f / b ).to_s + "\n" end f = 1 # current Fibonacci (size of the next bin) g = 1 # next Fibonacci (left extreme of the current bin plus one) b = 0 # Current bin size p = [] # Values in the current bin v = 0 # Sum of values in p out = [] freq = [] y = [] $stdin.each do |l| x = l.chop.to_f p <<= x v += x b += 1 if b == f; then # Bin [g-1+offset..g-1+f+offset) filled # $stderr.printf("[%d..%d) %f\n", g - 1 + offset, g - 1 + b + offset, ( g - 1 ) + ( b - 1 ) / 2.0 + offset ); if v > 0; then out <<= formatbin(g, f, v, p, offset); end freq <<= v y <<= v.to_f / f # Update bin size t = g g = g + f f = t v = b = 0 p = [] end end # Residual elements (if any) # $stderr.printf("[%d..%d) %f\n", g - 1 + offset, g - 1 + b + offset, g + ( b - 1 ) / 2.0 - 1 + offset ); if v > 0; then out <<= formatbin(g, f, v, p, offset) freq <<= v y <<= v.to_f / f end # Cut off tail when the ordinate is below threshold freq.size.times do |i| if y[i] < threshold; then printf out[0..i - 1].join("") exit end end printf out.join("")