# Last edited on 2012-05-05 19:47:20 by stolfilocal

# To be included in factor-field-general etc.
# Factors a Chinese pinyin text with disambiguating suffixes 
# by placing "{}" around each letter, plus a single "{}"
# around the tone and disambiguation suffix.
# If the tone is 4 or omitted. If the tone and suffix are
# omitted then the corresponding element is omitted too.

function factor_text(x,   y,e,ts,t,s)
{
  # Extract tone and disambiguating suffix:
  if (match(x, /[^0-9.][0-9.]*$/))
    { ts = substr(x, RSTART+1); x = substr(x, 1, RSTART);
      if (match(ts, /[.]/))
        { t = substr(ts, 1,RSTART-1); s = substr(ts, RSTART+1); }
      else
        { t = ts; s = ""; }
    }
  else
    { ts = ""; t = ""; s = ""; }
  # Format checks:
  if (t !~ /^[1-5]?$/) { data_error(("bad tone code \"" x ts "\"")); }
  if (x !~ /^(([a-zA-Z]|ü|Ü|ê|Ê)[:\^]?)+$/) { data_error(("bad pinyin \"" x ts "\"")); }
  # Omit tone 4 (the most common one):
  if (t == "4") { t = ""; }
  # Make each letter into one element,
  # but beware of the letters "u:" and "e^" 
  y = gensub(/(([a-zA-Z]|ü|Ü|ê|Ê)[:\^]?)/, "{\\1}", "g", x);
  if ((t != "") || (s != "")) { y = ( y "{" ts "}" ); }
  return y;
}