#! /n/gnu/bin/gawk -f
# Last edited on 2000-02-01 22:24:27 by stolfi

# Reads a stream of element-factored words, writes the words 
# prefixed by word type

/^ *$/{next;}
/./{ 
  
  # Delete dummy (empty) factors:

  gsub(/{[_]*}/, "", $0);
  gsub(/[_][_]*/, "", $0);
  
  # Map "ch" and "sh" to "C" and "S" to simplify processing:
  
  gsub(/ch/, "C", $0);
  gsub(/sh/, "S", $0);
  
  # Label and split components: crust/mantle prefixes/suffixes, core.
  
  if ($0 ~ /^[^CSktpfech]*$/) 
    { # Crust-only word:
      $0 = ( "ps-" $0 );
    }
  else if ($0 ~  /^[^ktpf]*$/) 
    { # Empty core, nonempty mantle:

      # Must have at least one non-crust element:
      if ($0 !~ /[{].*[CSktpfech].*[}]$/)  { error(("bad format = «" $0 "»")); }

      # Measure crust prefix and suffix:
      match($0, /^([^CSktpfech]*)[{]/); plen = RLENGTH-1;
      match($0, /[}]([^CSktpfech]*)$/); slen = RLENGTH-1;
      $0 = ( (plen > 0 ? "p" : "") "mn" (slen > 0 ? "s" : "") "-" $0 );
    }
  else
    { # Non-empty core

      # Must have at least one core element:
      if ($0 !~ /[{].*[ktpf].*[}]$/)  { error(("bad format = «" $0 "»")); }

      # Measure crust prefix, mantle prefix, mantle suffix, and crust suffix:
      match($0, /^([^ktpf]*)[{]/); pmlen = RLENGTH-1;
      match($0, /^([^CSktpfech]*)[{]/); plen = RLENGTH-1;
      match($0, /[}]([^ktpf]*)$/); nslen = RLENGTH-1;
      match($0, /[}]([^CSktpfech]*)$/); slen = RLENGTH-1;
      $0 = ( \
        (plen > 0 ? "p" : "") \
        (pmlen > plen ? "m" : "" ) \
        "c" \
        (nslen > slen ? "n" : "" ) \
        (slen > 0 ? "s" : "") \
        "-" $0 );
    }
  
  # Delete braces
  
  gsub(/[{}]/, "", $0);

  # Restore "cs" and 'sh":
  
  gsub(/C/, "ch", $0);
  gsub(/S/, "sh", $0);

  print $0;
}

function error(msg)
{ 
  printf "line %d: %s\n", NR, msg >> "/dev/stderr";
  abort = 1;
  exit 1;
}