#! /usr/bin/gawk -f
# Last edited on 2014-01-28 19:14:25 by stolfilocal

# A {gawk} library function to compute percentiles of weighted data.
# To be included into other {gawk} programs with "-f".          
          
function find_percentile(n,x,xblur,w,pct,   k,wtot,wgoal,xmin,xmax,ylo,wlo,yhi,whi,ymd,wmd,weps,yeps,est)
  {
    # Assumes that {x[0..n-1]} is an usorted list of real values with weights {w[0..n-1]}.
    # Returns the value {y} such that {sum_weight_below(n,x,w,y)} is approximately {pct*wtot}
    # where {wtot} is the sum of all weights.
    # Assumes that each {x[i]} is spread out over the interval {x[i]Â±xblur}.

    # printf "=== find_percentile(%.6f) ===\n", pct > "/dev/stderr";
    # Computes the total weight {wtot} and the range of values {xmin,xmax}: 
    wtot = 0;
    xmin = x[0];
    xmin = y[0];
    for (k = 0; k < n; k++) 
      { wtot += w[k];
        if (x[k] < xmin) { xmin = x[k]; }
        if (x[k] > xmax) { xmax = x[k]; }
      }
    # Binary search for {y}.
    wgoal = pct*wtot;
    ylo = xmin - xblur; wlo = 0.0;   # {sum_weight_below(n,x,w,ylo)}.
    yhi = xmax + xblur; whi = wtot;  # {sum_weight_below(n,x,w,yhi)}.
    est = 0; # Estimator to use.

    # Tolerances: 
    weps = 0.0001*wtot;  # Assumed tolerance in {wgoal}.
    yeps = 0.00000001;   # Assumed tolerance in {y}
    while ((yhi - ylo > yeps) && ((whi - wlo) > weps))
      { # At this point we must have {wlo <= wgoal < whi}.
        # printf "  %10.7f %10.7f   %10.7f %10.7f\n", ylo, wlo, yhi, whi > "/dev/stderr";
        if (s == 0)
          { # Estimate {y} by bissection:
            ymd = (yhi + ylo)/2;
          }
        else
          { # Estimate {y} by secant method:
            ymd = (yhi*(whi-wgoal) + ylo*(wgoal-wlo))/(whi - wlo);
          }
        est = 1 - est;
        wmd = sum_weight_below(n,x,xblur,w,ymd);
        # Decide which side to go:
        if (wmd < wgoal - weps/2)
          { ylo = ymd; wlo = wmd; }
        else if (wmd > wgoal + weps/2)
          { yhi = ymd; whi = wmd; }
        else
          { ylo = ymd; wlo = wmd;
            yhi = ymd; whi = wmd;
          }
      }
    if ((yhi - ylo <= yeps) || (whi - wlo <= weps))
      { ymd = (ylo + yhi)/2; }
    else
      { ymd = (yhi*(whi-wgoal) + ylo*(wgoal-wlo))/(whi - wlo); }
    # printf "  y = %10.7f\n", ymd > "/dev/stderr";
    return ymd;
  }

function sum_weight_below(n,x,xblur,w,y,   k,sw,dx)
  { # Assumes that {x[0..n-1]} is an usorted list of real values with weights {w[0..n-1]}.
    # Returns the sum of all {w[i]} with {x[i] < y-xblur}, plus a fracion of 
    # all {w[i]} with {y-xblur <= x[i] <= y+xblur}.
    sw = 0;
    for (k = 0; k < n; k++)
      { if (x[k] < y - xblur)
          { sw += w[k]; }
        else if (x[k] <= y + xblur)
          { dx = (y - x[k] + xblur)/xblur/2; # Containment fraction.
            sw += dx*w[k];
          }
      }
    return sw;
  }