#! /usr/bin/gawk -f
# Last edited on 2002-01-03 22:59:49 by stolfi

BEGIN {
  abort = -1;
  usage = ( \
    "elem-to-stroke-pair \\\n" \
    "  [ -v inField=NUM ] \\\n" \
    "  [ -v erase=BOOL ] \\\n" \
    "  [ -v outField=NUM ] \\\n" \
    "  < INFILE > OUTFILE" \
  );
  
  # Expects the `inField'th field of INFILE to be a 
  # string of elements, each surrounded in braces, already capitalized as
  # in full EVA.  Replaces each element by three pseudo-elements:
  # a left stroke code, a "X", and a right-stroke code,
  # each inside braces "{}".
  # 
  # If `erase' is set, the input field is erased, otherwise it is
  # preserved. Then inserts the stroke string as the "outField"th
  # field.
  
  if (inField == "") inField = 1;
  if (erase == "") erase = 0;
  if (outField == "") outField = inField;
}

(abort >= 0) { exit abort; }

/^#/ { print; next; }

/./ {
  if (NF < inField) { data_error("not enough input fields"); }
  x = $(inField);
  y = convert_elems_to_stroke_pairs(x);
  printout(y, outField, inField, erase);
  next;
}  

function convert_elems_to_stroke_pairs(x)
{
  gsub(/{e}/,   "<E><X><E>", x);
  gsub(/{i}/,   "<I><X><I>", x);
  gsub(/{o}/,   "<E><X><D>", x);
  gsub(/{a}/,   "<E><X><I>", x);
  gsub(/{y}/,   "<E><X><Y>", x);
  gsub(/{q}/,   "<Q><X><Q>", x);
  gsub(/{l}/,   "<I><X><Y>", x);
  gsub(/{d}/,   "<E><X><G>", x);
  gsub(/{r}/,   "<I><X><R>", x);
  gsub(/{s}/,   "<E><X><R>", x);
  gsub(/{n}/,   "<I><X><N>", x);
  gsub(/{m}/,   "<I><X><M>", x);
  gsub(/{Ch}/,  "<E><X><E>", x);
  gsub(/{Sh}/,  "<E><X><E>", x);
  gsub(/{k}/,   "<K><X><P>", x);
  gsub(/{t}/,   "<T><X><P>", x);
  gsub(/{CKh}/, "<E><X><E>", x);
  gsub(/{CTh}/, "<E><X><E>", x);
  gsub(/{f}/,   "<K><X><J>", x);
  gsub(/{p}/,   "<T><X><J>", x);
  gsub(/{CFh}/, "<E><X><E>", x);
  gsub(/{CPh}/, "<E><X><E>", x);
  
  # Validity check:
  if (x ~ /[{}]/) { data_error(("bad element \"" x "\"")); }
  
  # Convert "<>" into "{}" in strokes:
  gsub(/[<]/, "{", x);
  gsub(/[>]/, "}", x);
  return x;
}

function printout(mw, ofn, ifn, del,   i)
{
  # prints $0 with "mw" inserted as field "$(ofn)"
  # if "del" is true, deletes field "$(ifn)"
  if (del) 
    { if (NF < ifn) 
        { data_error("not enough input fields\n"); }
      else 
        { for(i=ifn; i<NF-1; i++) { $(i) = $(i+1); } 
          NF--; 
        }
    }
  if (NF < ofn-1) { data_error("not enough output fields\n"); }
  if (ofn == 1)
    { print mw, $0; }
  else if (ofn == NF+1)
    { print $0, mw; }
  else
    { for (i=1;i<ofn;i++) { printf "%s%s", $(i), OFS; }
      printf "%s", mw;
      for (i=ofn;i<=NF;i++) { printf "%s%s", OFS, $(i); }
      printf "\n";
    }
}  

function data_error(msg)
{ 
  printf "line %d: %s\n", NR, msg >> "/dev/stderr";
  abort = 1;
  exit 1;
}

function arg_error(msg)
{ 
  printf "%s\n", msg >> "/dev/stderr";
  abort = 1;
  exit 1;
}