# Last edited on 2002-01-18 15:11:22 by stolfi

# Factors a text by placing "{}" around each basic glyph.
# Assumes that the field is encoded in  EVA. 
# To be included in factor-field-general etc.

function factor_text(x,   y,e)
{
  # Assumes x is EVA.  Factors it into
  # basic glyphs, wraps each glyph in braces.
  # Font capitalization is ignored on input and 
  # added to the output.
  
  # Clean up any junk:
  gsub(/{[^{}]*}/, "", x);
  gsub(/[!]/, "", x);
  
  # Make sure that the input has ligature capitalizations:
  x = uncapitalize_ligatures(x);
  x = capitalize_ligatures(x);
  
  if (match(x, /[^-=\/,. *?%a-zA-Z]/)) 
    { data_error(("invalid char in word \"" x "\"")); }
  
  # Split word into basic glyphs:
  y = "";
  while (x != "") 
    { if (match(x, /^[-=\/,. ]+/))
        { e = substr(x,1,RLENGTH); x = substr(x, RLENGTH+1);
          y = ( y e );
        }
      else
        { match(x, /^([A-Z]*[a-z]|[A-Z]+[?][A-Z?]*[h]|[?][KTPFH]*[h]|[A-Z]*[?])/); 
          if (RSTART != 1) { data_error("duh?"); }
          e = substr(x, 1, RLENGTH); x = substr(x, RLENGTH+1);
          y = ( y "{" e "}");
        }
    }
  return y;
}

function capitalize_ligatures(w)
{
  # Capitalize ligatures:
  gsub(/ch/, "Ch", w);
  gsub(/sh/, "Sh", w);
  gsub(/ckh/, "CKh", w); gsub(/ikh/, "IKh", w); gsub(/[?]kh/, "?Kh", w);
  gsub(/cth/, "CTh", w); gsub(/ith/, "ITh", w); gsub(/[?]th/, "?Th", w);
  gsub(/cph/, "CPh", w); gsub(/iph/, "IPh", w); gsub(/[?]ph/, "?Ph", w);
  gsub(/cfh/, "CFh", w); gsub(/ifh/, "IFh", w); gsub(/[?]fh/, "?Fh", w);
  gsub(/c[?]h/, "C?h", w); 
  gsub(/i[?]h/, "I?h", w);
  gsub(/c[?]/, "C?", w);
  gsub(/hh/, "Hh", w);
  return w;
}

function uncapitalize_ligatures(w)
{
  # Removes ligature-capitalization:
  gsub(/C/, "c", w);
  gsub(/S/, "s", w);
  gsub(/I/, "i", w);
  gsub(/H/, "h", w);
  gsub(/K/, "k", w);
  gsub(/T/, "t", w);
  gsub(/P/, "p", w);
  gsub(/F/, "f", w);
  gsub(/Y/, "y", w);
  gsub(/O/, "o", w);
  gsub(/A/, "a", w);
  return w;
}