#! /usr/bin/gawk -f # Last edited on 2025-12-08 04:04:25 by stolfi # Factors a text by placing "{}" around each element of the 2025 word model. # Each is wrapped with "{}" # # To be included in factor-field-general etc. function factor_text(x, y,e) { # Decomposes "x" into its 2025 word paradigm elements. # Assumes "x" is uncapitalized EVA without alignment fillers # Unparsed glyphs are turned into "{*...}" y = ""; while (x != "") { # printf "x = [%s]\n", x > "/dev/stderr"; if (match(x, /^[-.,«=»\/]/)) { e = substr(x,1,RLENGTH); x = substr(x, RLENGTH+1); y = ( y e ); } else if (match(x, /^[{][^{}]*[}]/)) { e = substr(x,1,RLENGTH); x = substr(x, RLENGTH+1); y = ( y e ); } else if (match(x, /^[<][%$|][>]/)) { e = substr(x,1,RLENGTH); x = substr(x, RLENGTH+1); y = ( y e ); } else if (match(x, /^[<]![^<>]*[>]/)) { e = substr(x,1,RLENGTH); x = substr(x, RLENGTH+1); y = ( y e ); } else if (match(x, /^[aoyqlmngvxrd]/)) { e = substr(x,1,RLENGTH); x = substr(x, RLENGTH+1); y = ( y "{" e "}" ); } else if (match(x, /^([i]|[i][i]|[i][i][i])[nmr]/)) { e = substr(x,1,RLENGTH); x = substr(x, RLENGTH+1); y = ( y "{" e "}" ); } else if (match(x, /^[Ss][Hh]*[h]([e][e]|[e]|)/)) { e = substr(x,1,RLENGTH); x = substr(x, RLENGTH+1); y = ( y "{" e "}" ); } else if (match(x, /^[s]/)) { e = substr(x,1,RLENGTH); x = substr(x, RLENGTH+1); y = ( y "{" e "}" ); } else if (match(x, /^[?]([e][e]|[e]|)/)) { e = substr(x,1,RLENGTH); x = substr(x, RLENGTH+1); y = ( y "{" e "}" ); } else if (match(x, /^[CcIi][TtKkPpFfWwZz][Hh]*[h]([e][e]|[e]|)/)) { e = substr(x,1,RLENGTH); x = substr(x, RLENGTH+1); y = ( y "{" e "}" ); } else if (match(x, /^[CcIi][[Hh]*[h]([e][e]|[e]|)/)) { e = substr(x,1,RLENGTH); x = substr(x, RLENGTH+1); y = ( y "{" e "}" ); } else if (match(x, /^[TtKkPpFfWwZz]([e][e]|[e]|)/)) { e = substr(x,1,RLENGTH); x = substr(x, RLENGTH+1); y = ( y "{" e "}" ); } else { e = substr(x,1,1); x = substr(x, 2); y = ( y "{*" e "}" ) } } return y; }