#! /usr/bin/gawk -f 
# Last edited on 1999-07-28 01:43:47 by stolfi

BEGIN{
  abort = -1;
  usage = "diff-dot-product ORG POS VEC";
  #
  # where ORG, POS and VEC are names of files, and each line in them
  # has the format COORD LABEL, where COORD is a real number and LABEL
  # is any word. (The LABELs must be sorted and must match in both
  # files.)  The COORD field of the ith line is interpreted as the 
  # ith coordinate of a vector in some high-dimensional space.
  #
  # After reading all three files, prints to stdout
  # a single number, the sum of  (POS[i] - ORG[i])*VEC[i]
  # for all coordinates of the three vectors.
  
  if (ARGC != 4) { error(("ARGC = " ARGC " - usage: " usage)); }
  org = ARGV[1]; if (org == "") { error(("usage: " usage)); }
  pos = ARGV[2]; if (pos == "") { error(("usage: " usage)); }
  vec = ARGV[3]; if (vec == "") { error(("usage: " usage)); }
 
  N = 0;
  prod = 0;
  while ((getline < org) > 0)
    { N++;
      if (NF != 2) { error((org ", line " N ": bad format")); }
      w = $2;
      oi = $1;
      getline < pos;
      if (ERRNO != "0") { error((pos ": " ERRNO)); }
      if ((NF != 2) || (w != $2)) { error((pos ", line " N ": bad format")); }
      pi = $1;
      getline < vec;
      if (ERRNO != "0") { error((vec ": " ERRNO)); }
      if ((NF != 2) || (w != $2)) { error((vec ", line " N ": bad format")); }
      vi = $1;
      prod += (pi-oi)*vi;
    }
  if (ERRNO != "0") { error((org ": " ERRNO)); }
  close(org);
  close(pos);
  close(vec);
  printf "%+8.5f",  prod;
}

function error(msg)
{
   printf "%s\n", msg > "/dev/stderr"; 
   abort=1; exit(1);
}