# Last edited on 2000-09-21 16:51:10 by stolfi
# To be included in gawk scripts

function extract_gallows_attr(word,   w)
{
  # Returns the number of gallows in "word", or -1 if the 
  # word contains bad characters.
  
  if (word ~ /[^a-z]/)
    { # printf "%20s %20s %2d\n", word, "?", -1 > "/dev/stderr";
      return(-1);
    }
  else
    { w = word;
      gsub(/[^ktpf]/, "",w); 
      # printf "%20s %20s %2d\n", word, w, length(w) > "/dev/stderr";
      return(length(w)); 
    }
}

function extract_mantle_attr(word,ct2,ctplat,ctke,   w)
{ 
  # Returns the number of table characters in "word", or -1 is the word
  # contains bad characters.
  # If "ct2=1", counts "ch", "sh", "ee" as 2 tablechars each.
  # If "ctplat=1", counts gallows platforms as a "ch".
  # If "ctke=1", counts single "e" after gallows/tablechar as tablechar.
  
  if (word ~ /[^a-z]/)
    { # printf "%20s %20s %2d\n", word, "?", -1 > "/dev/stderr";
      return (-1);
    }
  else
    { w = word;
      gsub(/[ktpf]/,"k",w);
      gsub(/[i]k[h]/,"ckh",$3);
      if (ctplat) 
        { gsub(/[c]k[h]*/,"Ck",w); }
      else
        { gsub(/[c]k[h]*/,"k",w); }
      if (ctke) 
        { gsub(/k[e][e][e][e]/,"kCC",w);
          gsub(/k[e][e][e]/,"keC",w);
          gsub(/k[e][e]/,"kC",w);
          gsub(/k[e]/,"ke",w);
        }
      else
        { gsub(/k[e][e][e][e]/,"kCC",w);
          gsub(/k[e][e][e]/,"kC",w);
          gsub(/k[e][e]/,"kC",w);
          gsub(/k[e]/,"k",w);
        }
      gsub(/[cs]h/,"C",w); gsub(/ee/,"C",w);
      gsub(/se/,"C",w); gsub(/es/,"C",w);
      gsub(/[ceh]/,"e",w); 
      if (! ctke) { gsub(/Ce/,"C",w); }
      if (ct2) { gsub(/C/,"ee",w); } else { gsub(/C/,"e",w); }
      gsub(/[^e]/,"",w);
      # printf "%20s %20s %2d\n", word, w, length(w) > "/dev/stderr";
      return(length(w));
    }
}

function extract_e_d_attr(word,   w)
{
  # Returns "-1" if the word is invalid,
  # "1" if the word contains the "ed" digraph,
  # "0" if it doesn't.
  
  if (word ~ /[^a-z]/)
    { return(-1); }
  else if (word ~ /[e][d]/)
    { return(1); }
  else
    { return(0); }
}

function extract_random_attr(word,prob,   w)
{
  # Returns -1 if the word is invalid, else
  # returns a random bit which is 1 with probability "prob".
  if (word ~ /[^a-z]/)
    { return(-1); }
  else 
    { return (rand() <= prob); } 
}