#! /usr/bin/perl -w

use strict;

my($usage) = "make-basis NAME_0 NAME_1... NAME_m";

# Reads a set of m+1 (two or more) points of n-space, 
# "p[0..m]".  Outputs a set of m orthogonal unit vectors,
# obtained Gram-Schmidt orthogonalization of the vectors 
# "pt[1]-p[0], p[2]-p[0], ..., p[m] - p[0]". 
#
# The "i"th input vector is read from a file called "NAME_i.pos".
# The "i"th basis vector is written to a file called "NAME_i.dir".

my ($i,$j,$k);
my ($n) = 0;    # Dimension of attribute space
my (%dic) = (); # Maps keyword to coordinate index in [0..$n-1]
my (@key) = (); # Maps coordinate index in [0..$n-1] to keyword

sub main()
{
  my($m) = $#ARGV;
  if ($m < 1) { die("too few arguments"); }
  
  my(@org) = vread($ARGV[0] . ".pos"); # Origin point
  my (@bas) = (); # $bas[$i] is a reference to a point of $n-space.
  for ($i=1; $i<= $#ARGV; $i++)
    { my(@bi) = vread($ARGV[$i] . ".pos");
      for ($k=0; $k<$n; $k++) { $bi[$k] -= $org[$k]; }
      $bas[$i-1] = \@bi;
    }
  @bas = gsortho(\@bas);
  for ($i=1; $i<= $#ARGV; $i++)
    { my($bir) = $bas[$i-1];
      vwrite($ARGV[$i] . ".dir", \@$bir);
    }
}

sub vread($)
{
  my($name) = @_;
  # Reads an $n-vector from file "$name".
  # Each line should have the form COORD KEYWORD
  # where COORD is a real number and KEYWORD a word.
  # The "%dic" table is used to convert keywords to numbers.
  # Keywords that are not in "%dic" get assigned new numbers,
  # incrementing $n.
  
  printf STDERR "reading $name ...";
  open FILE, "<$name";
  my(@vec) = ();
  while (<FILE>)
    { chomp($_);
      my(@fld) = split(/ /, $_);
      if ($#fld != 1) { die("bad format"); }
      my($wd) = $fld[1];
      my($k) = $dic{$wd};
      if (! defined($k)) 
        { $dic{$wd} = $n; $key[$n] = $wd; 
          # printf STDERR " $wd";
          $k = $n; $n++;
        }
      $vec[$k] = $fld[0];
    }
  printf STDERR "\n";
  return(@vec);
}

main();
exit(0);

sub vwrite($\@)
{
  my($name,$vr) = @_;
  # Write the "$n"-vector "@$vr" to file "$name".
  # Each line will have the form COORD KEYWORD
  # where COORD is an element of "@$vr" and KEYWORD the 
  # corresponding element from "@keywd".
  
  printf STDERR "writing $name\n";
  open FILE, ">$name";
  my($k);
  for($k = 0; $k < $n; $k++)
    { my($vk) = $$vr[$k]; 
      printf FILE "%+7.5f %s\n", $vk, $key[$k];
    }
  close FILE;
}

sub vdot(\@\@)
{
  my($ur,$vr) = @_;
  # Returns the dot product of @$ur and @$vr.
  my($s) = 0;
  my($k);
  for ($k=0; $k<$n; $k++)
    { my($uk) = $$ur[$k];
      my($vk) = $$vr[$k];
      if (defined($uk) && defined($vk)) { $s += $uk*$vk; }
    }
  return($s);
}

sub vlensqr(\@)
{
  my($vr) = @_;
  # Returns the Euclidean length of @$vr, squared.
  my($s) = 0;
  my($k);
  for ($k=0; $k<$n; $k++)
    { my($vk) = $$vr[$k];
      if (defined($vk)) { $s += $vk*$vk; }
    }
  return($s);
}

sub vnew()
{
  # returns a zero $n-vector.
  my(@u) = ();
  my($k);
  for ($k=0; $k<$n; $k++) { $u[$k] = 0; }
  return(@u);
}
  
sub vlen(\@)
{
  my($vr) = @_;
  # Returns the Euclidean length of @$vr.
  return(sqrt(vlensqr(@$vr)));
}

sub vdir(\@)
{
  my($vr) = @_;
  # Returns a length-normalized copy of @$vr.
  my($vL) = vlen(@$vr);
  my(@u) = ();
  my($k);
  for ($k=0; $k<$n; $k++)
    { my($vk) = $$vr[$k];
      $u[$k] = (defined($vk) ? $vk/$vL : 0);
    }
  return(@u);
}

sub gsortho(\@)
{
  my($ar) = @_;
  # Expects $ar to be a ref to an array of refs to $n-vectors,
  # indexed [0..$m-1]. Performs Gram-Schmidt orthogonalization 
  # on those vectors, and returns the result in the same format.
  my(@b) = ();  # The new set of vectors.
  my($i,$j,$k);
  my($m) = $#$ar + 1;
  for ($i=0; $i<$m; $i++)
    { my($air) = $$ar[$i];
      my(@u) = @$air;
      for ($j=0; $j<$i; $j++)
        { my($bjr) = $b[$j];
          my($s) = vdot(@u,@$bjr);
          for ($k=0; $k<$n; $k++)
            { $u[$k] -= $s*$$bjr[$k]; }
        }
      @u = vdir(@u);
      $b[$i] = \@u;
    }
  return(@b);
}