# Last edited on 1999-12-10 08:04:13 by stolfi

# Usage: "gawk -f factor-elements.gawk ..."

function factor_text(x,   y,e)
{
  # Decomposes "x" into its QOKOKOKOF elements, separated by ":".
  # Assumes "x" is uncapitalized EVA without comments and fillers.
  # EVA spaces and "/" are allowed.

  gsub(/{[^{}]*}/, "", x);
  gsub(/[!]/, "", x);
  if (match(x, /[^-=\/,. *?%a-z]/)) { error(("invalid char in word \"" x "\"")); }
  
  # Map "sh" "ch" "ee" to single letters to simplify the parsing.
  gsub(/ch/, "C", x);
  gsub(/sh/, "S", x);
  gsub(/ee/, "E", x);

  y = ""; 
  
  while (x != "")
    { # printf "x = [%s]\n", x > "/dev/stderr";
      if (match(x, /^[-=\/,. ]+/))
        { e = substr(x,1,RLENGTH); x = substr(x, RLENGTH+1);
          y = ( y e );
        }
      else
        { # split off initial <q> if any:
          if (match(x, /^[q]/)) 
            { e = substr(x,1,RLENGTH); x = substr(x, RLENGTH+1); }
          else
            { e = "_"; }
          y = ( y "{" e "}");

          while (1)
            { # split off "[aoy]" group with eventual [ci] prefix and [he] suffix
              if (match(x, /^[ic]*[aoy][aoyeh]*/))
                { e = substr(x,1,RLENGTH); x = substr(x, RLENGTH+1); }
              else
                { e = "_"; }
              y = ( y e );

              # copy next main letter with [ci] prefix and [he] suffix
              if (match(x, /^[ic]*[^-=\/,. aoy][he]*/))
                { e = substr(x,1,RLENGTH); x = substr(x, RLENGTH+1); }
              else
                { break; }
              y = ( y "{" e "}");
            }
        }
    }

  # Unfold letter folding:

  gsub(/E/, "ee", y);
  gsub(/C/, "ch", y);
  gsub(/S/, "sh", y);

  return y;
}