#! /usr/bin/gawk -f # Last edited on 2026-03-02 08:13:03 by stolfi # GAWK functions to map Voynichese words to "roots" in # a way that makes languages A and B more similar to each other. function root_from_word(wd, rt) { rt = tolower(wd) gsub(/[<][!][^<>]*[>]/, "", rt) gsub(/[<].[>]/, "", rt) gsub(/[«=»]/, "", rt) gsub(/[&][0-9][0-9][0-9][;]?/, "?", rt) rt = gensub(/[{]([^{}]*)[}]/, "\\1", "g", rt) # Map rare characters @b, @j, etc to '?': gsub(/[bjuvxy]/, "?", rt) # Delete @q, @a, @o, @y: gsub(/[q]/, "", rt) gsub(/[oay]/, "", rt) # Elminate codas: gsub(/[i]*[nmg]/, "", rt) gsub(/[i][i]?r/, "", rt) # Eliminate benches: gsub(/ee[e]?/, "", rt) gsub(/[ics]h[e]?/, "", rt) gsub(/c'h[e]?/, "", rt) # Collapse all gallows to 'K': # Assume 'w' and 'z' are puffs with hooks. gsub(/[wztkpf][e]?/, "K", rt) # Delete gallows platforms: gsub(/[ic]Kh[he]?/, "K", rt) # Map all dealers to D: gsub(/[i]*[dlrs]/, "D", rt) # Map empty roots to 'Z': # Multiple times because each may catch only half: rt = gensub(/([-,.]|^)([-,.]|$)/, "\\1Z\\2", "g", rt) rt = gensub(/([-,.]|^)([-,.]|$)/, "\\1Z\\2", "g", rt) return rt }