#! /usr/bin/gawk -f
# Last edited on 1999-12-08 23:26:28 by stolfi

# Reads a stream of words, factors them into `Grove elements'

/^ *$/{next;}
/./{ 
  # Break word into elements:

  $0 = grove_factor_text($0);
  
  # Delete dummy (empty) factors:

  gsub(/{[_]*}/, "", $0);
  gsub(/[_][_]*/, "", $0);

  # Insert breaks between elements:
  
  gsub(/[}{]/, "}\n{", $0);

  # Delete braces:
  
  gsub(/[{}]*/, "", $0);
 
  # Let empty strings stay empty, so that they are discarded.
   
  print $0;
}

function error(msg)
{ 
  printf "line %d: %s\n", NR, msg >> "/dev/stderr";
  abort = 1;
  exit 1;
}

function grove_factor_text(x,   y,e)
{
  # Decomposes "x" into Grove-style elements with "e"s and "i"s
  # attached to the following letter. Assumes "x" is uncapitalized EVA
  # without "%"-fillers. Allows EVA spaces and "/".

  # Removes comments and fillers, just in case:
  
  gsub(/{[^{}]*}/, "", x);
  gsub(/[!]/, "", x);
  if (match(x, /[^-=\/,. *?%a-z]/)) { error(("invalid char in word \"" x "\"")); }
  
  # Map "sh" "ch" to single letters to simplify the parsing.
  gsub(/ch/, "C", x);
  gsub(/sh/, "S", x);

  # Map platformed and half-platformed letters to capitals to simplify the parsing:
  gsub(/ckh/, "K", x);
  gsub(/cth/, "T", x);
  gsub(/cfh/, "F", x);
  gsub(/cph/, "P", x);
  #             
  gsub(/ikh/, "G", x);
  gsub(/ith/, "H", x);
  gsub(/ifh/, "M", x);
  gsub(/iph/, "N", x);
  #
  gsub(/ck/, "U", x);
  gsub(/ct/, "V", x);
  gsub(/cf/, "X", x);
  gsub(/cp/, "Y", x);
  #
  gsub(/ik/, "A", x);
  gsub(/it/, "B", x);
  gsub(/if/, "I", x);
  gsub(/ip/, "J", x);
  
  y = ""; 
  
  while (x != "")
    { # printf "x = [%s]\n", x > "/dev/stderr";
      if (match(x, /^[-=\/,. ]+/))
        { e = substr(x,1,RLENGTH); x = substr(x, RLENGTH+1);
          y = ( y e );
        }
      else
        { # split off initial <q> if any:
          if (match(x, /^[q]/)) 
            { e = substr(x,1,RLENGTH); x = substr(x, RLENGTH+1); }
          else
            { e = "_"; }
          y = ( y "{" e "}");

          while (1)
            { # copy next letter, with any [ie] prefixes and [h] suffixes.
              if (match(x, /^[ie]*[^-=\/,. ][h]*/))
                { e = substr(x,1,RLENGTH); x = substr(x, RLENGTH+1); }
              else
                { break; }
              y = ( y "{" e "}");
            }
        }
    }
  # Unfold letter folding:
  gsub(/A/, "ik", y);
  gsub(/B/, "it", y);
  gsub(/I/, "if", y);
  gsub(/J/, "ip", y);
  #
  gsub(/U/, "ck", y);
  gsub(/V/, "ct", y);
  gsub(/X/, "cf", y);
  gsub(/Y/, "cp", y);
  #
  gsub(/G/, "ikh", y);
  gsub(/H/, "ith", y);
  gsub(/M/, "ifh", y);
  gsub(/N/, "iph", y);
  #
  gsub(/K/, "ckh", y);
  gsub(/T/, "cth", y);
  gsub(/P/, "cph", y);
  gsub(/F/, "cfh", y);
  #
  gsub(/C/, "ch", y);
  gsub(/S/, "sh", y);

  return y;
}