#! /usr/bin/gawk -f # Last edited on 2004-02-02 06:17:35 by stolfi # A filter to be run on the main.raw to turn it into # the starting version of main.org /^[@]chinword/ { gb = gensub(/^[@]chinword{(.*)}{.*}.*$/, "\\1", "g", $0); py = pyspread(gensub(/^[@]chinword{.*}{(.*)}.*$/, "\\1", "g", $0)); printf "@chinword{%s}{%s}\n", gb, py; next; } /./ { print; next; } function pyspread(s) { # Insert spaces around numbers, punct, etc: s = gensub(/([^- a-zA-Z:0-5]+) /, " \\1 ", "g", s); # Remove any spaces adjacent to existing "-": s = gensub(/[ ]+[-]/, "-", "g", s); s = gensub(/[-][ ]+/, "-", "g", s); # Turn existing "-" into "=": s = gensub(/[-]/, "=", "g", s); # Remove superfluous end-spaces: s = gensub(/^[ ]+/, "", "g", s); s = gensub(/[ ]+$/, "", "g", s); # Replace any existing spaces by "_": s = gensub(/[ ]+/, "_", "g", s); # Insert "-" on both sides of every syllable: s = gensub(/([a-zA-Z][a-z:]*[0-5])/, "-\\1-", "g", s); # Remove "-" adjacent to old spaces, old "-" or string ends: s = gensub(/(^|[_=])[-]/, "\\1", "g", s); s = gensub(/[-]($|[_=])/, "\\1", "g", s); # Restore old "=" to "-": s = gensub(/[=]/, "-", "g", s); # Remove redundant "-"s s = gensub(/[-][-]+/, "-", "g", s); # Restore old spaces: s = gensub(/[_]+/, " ", "g", s); # Mark numbers with "~" to distinguish them from words: s = gensub(/(^|[ _])([^ _]*[^A-Za-z0-9:][0-9])/, "\\1~\\2", "g", s); # Convert "u:" and "ue:" to "ü" and "üe": s = gensub(/[u][:]([a-z1-5])/, "ü\\1", "g", s); s = gensub(/[u][e][:]([a-z1-5])/, "üe\\1", "g", s); return s; }