#! /usr/bin/gawk -f
# Last edited on 2004-02-02 06:17:35 by stolfi

# A filter to be run on the main.raw to turn it into 
# the starting version of main.org

/^[@]chinword/ {
  gb = gensub(/^[@]chinword{(.*)}{.*}.*$/, "\\1", "g", $0);
  py = pyspread(gensub(/^[@]chinword{.*}{(.*)}.*$/, "\\1", "g", $0));
  printf "@chinword{%s}{%s}\n", gb, py; next;
} 
/./ { print; next; }

function pyspread(s) {
  # Insert spaces around numbers, punct, etc:
  s = gensub(/([^- a-zA-Z:0-5]+) /, " \\1 ", "g", s); 
  # Remove any spaces adjacent to existing "-":
  s = gensub(/[ ]+[-]/, "-", "g", s);
  s = gensub(/[-][ ]+/, "-", "g", s);
  # Turn existing "-" into "=":
  s = gensub(/[-]/, "=", "g", s);
  # Remove superfluous end-spaces:
  s = gensub(/^[ ]+/, "", "g", s);
  s = gensub(/[ ]+$/, "", "g", s);
  # Replace any existing spaces by "_":
  s = gensub(/[ ]+/, "_", "g", s);
  # Insert "-" on both sides of every syllable:
  s = gensub(/([a-zA-Z][a-z:]*[0-5])/, "-\\1-", "g", s);
  # Remove "-" adjacent to old spaces, old "-" or string ends:
  s = gensub(/(^|[_=])[-]/, "\\1", "g", s);
  s = gensub(/[-]($|[_=])/, "\\1", "g", s);
  # Restore old "=" to "-":
  s = gensub(/[=]/, "-", "g", s);
  # Remove redundant "-"s
  s = gensub(/[-][-]+/, "-", "g", s);
  # Restore old spaces:
  s = gensub(/[_]+/, " ", "g", s);
  # Mark numbers with "~" to distinguish them from words:
  s = gensub(/(^|[ _])([^ _]*[^A-Za-z0-9:][0-9])/, "\\1~\\2", "g", s);
  # Convert "u:" and "ue:" to "ü" and "üe":
  s = gensub(/[u][:]([a-z1-5])/, "ü\\1", "g", s);
  s = gensub(/[u][e][:]([a-z1-5])/, "üe\\1", "g", s);
  return s;
}