# Functions for textual equivalence
# To be included by gawk scripts

# ========================================================================
# Functions for finding similar words:

function weq_reduce(txt)
{ 
  # Reduces a word or phrase to a pattern.
  # Assumes all word separators have been mapped to ".".
  # Assumes the following global variables:
  #
  #   equatekt: TRUE to map "t" to "k"
  #   equatepf: TRUE to map "f" to "p"
  #   stripq:   TRUE to remove word-initial "q"

  txt = ("." txt ".");
  if(stripq) { gsub(/[.]q/, ".", txt); }
  if(equatekt) { gsub(/t/, "k", txt); }
  if(equatepf) { gsub(/f/, "p", txt); }
  gsub(/[.]y/, ".o", txt);
  gsub(/qy/, "qo", txt);
  gsub(/[y][.]/, "o.", txt);
  gsub(/eeee/, "chch", txt);
  gsub(/eee/, "che", txt);
  gsub(/ee/, "ch", txt);
  gsub(/^[.][.]*/, "", txt);
  gsub(/[.][.]*$/, "", txt);
  gsub(/[.][.][.]*/, ".", txt);
  return txt
}

# ========================================================================
# Functions for erasing comments:

function weq_erase_comments(old,   new, i)
{
  # Removes '{}' comments and other fillers from "old".
  new = "";
  while (length(old) != 0)
    { i = index(old, "{");
      if (i == 0)
        { new = (new old); old = ""; }
      else if (i > 1)
        { new = (new substr(old, 1, i-1));
          old = substr(old, i);
        }
      else
        { match(old, /^{[^{}]*}/);
          if (RSTART > 0) 
            { old = substr(old, RLENGTH + 1); }
          else
            { printf "line %d, bad {}-comment\n", NR > "/dev/stderr";
              new = (new old); old = "";
            }
        }
    }
  return new;
}