#! /usr/bin/gawk -f # Last edited on 2004-02-26 16:33:41 by stolfi # Sampling functions for rugg/hnd # Gordon Rugg's fake voynichese, manual version. # To be included in wds-to-tlw function smp_define_patterns(smp,sec) { # No patterns needed if (sec != "tot.1") { data_error(("invalid section \"" sec "\"")); } } function smp_reclassify_word(smp,sec,cursec,curlin,type,wd) { # Take all folios (there is nothing else, still...) if (cursec !~ /^{p}{f[0-9]+[a-z]/) { return "n"; } # Within the selected sections, reject anything that is not text if (cursec !~ /{tx}$/) { return "x"; } # Discard punctuation other than parag breaks: if ((type == "p") && (wd != "=")) { return "n"; } return type; } function smp_fix_word(smp,sec,type,wd) { # Do nothing. return wd; } function smp_is_good_word(smp,sec,type,wd) { # Accept only lowercase alpha. return (wd ~ /^[a-z]+$/); }