# Last edited on 2000-01-30 10:50:07 by stolfi # Usage: "gawk -f factor-elements.gawk ..." function factor_text(x, y,e) { # Decomposes "x" into its QOKOKOKOF elements, separated by ":". # Assumes "x" is uncapitalized EVA without comments and fillers. # EVA spaces and "/" are allowed. # In this variant, the [ech]s are considered independent K letters. gsub(/{[^{}]*}/, "", x); gsub(/[!]/, "", x); if (match(x, /[^-=\/,. *?%a-z]/)) { error(("invalid char in word \"" x "\"")); } # Map "sh" "ch" to single letters to simplify the parsing. gsub(/ch/, "C", x); gsub(/sh/, "S", x); y = ""; while (x != "") { # printf "x = [%s]\n", x > "/dev/stderr"; if (match(x, /^[-=\/,. ]+/)) { e = substr(x,1,RLENGTH); x = substr(x, RLENGTH+1); y = ( y e ); } else { # split off initial if any: if (match(x, /^[q]/)) { e = substr(x,1,RLENGTH); x = substr(x, RLENGTH+1); } else { e = "_"; } y = ( y "{" e "}"); while (1) { # split off "[aoy]" group if (match(x, /^[aoy][aoy]*/)) { e = substr(x,1,RLENGTH); x = substr(x, RLENGTH+1); } else { e = "_"; } y = ( y e ); # copy next main letter with eventual [i] prefix if (match(x, /^[i]*[dlrsxvnmgj]/)) { e = substr(x,1,RLENGTH); x = substr(x, RLENGTH+1); } else if (match(x, /^[^-=\/,. aoy]/)) { e = substr(x,1,RLENGTH); x = substr(x, RLENGTH+1); } else { break; } y = ( y "{" e "}"); } } } # Unfold letter folding: gsub(/C/, "ch", y); gsub(/S/, "sh", y); return y; }