# Last edited on 2012-05-05 19:52:58 by stolfilocal

# To be included in factor-field-general etc.
# Factors a Chinese pinyin text by placing "{}" around 
# the initial and final consonants (if present), each 
# medial vowel separately, and the tone/disambiguation suffix.
# If there is no suffix, inserts an `{.}' dummy element.

function factor_text(x,   y,e)
{
  # printf "%s -> ", ("\"" x "\"") > "/dev/stderr";
  
  y = "";
  # Extract the initial conconant, if any.  Note that "y" and "w" are
  # not considered consonants, and that the syllable "ng" has only
  # the final part.
  if (! match(x, /^[nN]([gG]|$)/))
    { if (match(x, /^([cszCSZ][hH]|[b-df-hj-np-txzB-DF-HJ-NP-TXZ])/))
        { e = substr(x, RSTART,RLENGTH); x = substr(x, RSTART+RLENGTH);
          y = (y "{" e "}");
        }
    }
  # printf "%s -> ", ("\"" y x "\"") > "/dev/stderr";
  
  # Extract the syllable vowels.  Note that "yi" and "wu" are single 
  # vowels, and that "ü"/"ê" can be written "u:"/"e^" in some 
  # contexts.
  while (match(x, /^([yY][iI]|[wW][uU]|[eE][\^]?|[uU][:]?|[aioywAIOYW]|ü|ê|Ü|Ê)/))
    { e = substr(x, RSTART,RLENGTH); x = substr(x, RSTART+RLENGTH);
      y = (y "{" e "}");
    }
  # printf "%s -> ", ("\"" y x "\"") > "/dev/stderr";
  
  # Extract the syllable final, if any.
  if (match(x, /^([nN][gG]?|[rR])/))
    { e = substr(x, RSTART,RLENGTH); x = substr(x, RSTART+RLENGTH);
      y = (y "{" e "}");
    }
  # printf "%s -> ", ("\"" y x "\"") > "/dev/stderr";
  
  # Parse the tone code (a digit) and the disambiguating suffix 
  # (a `.' followed by one or more digits), as a single element.
  # Either part can be omitted, including the `.'; but provide 
  # a `.' element in any case.
  if (x !~ /[.]/) { x = (x "."); }
  if (match(x, /^[0-9]?[.][0-9]*/))
    { e = substr(x, RSTART,RLENGTH); x = substr(x, RSTART+RLENGTH);
      y = (y "{" e "}");
    }
  # printf "%s -> ", ("\"" y x "\"") > "/dev/stderr";
  
  # Now we must have consumed all of "x":
  if (x != "") { data_error(("bad pinyin word \"" y x "\"")); }
  # printf "%s\n", ("\"" y "\"") > "/dev/stderr";
  
  return y;
}