# Last edited on 2002-01-16 02:46:25 by stolfi # Sampling functions for chrc/red # Chinese - Dream of the Red Mansion in GB code, # converted to Voynichese-looking Roman-like code # To be included in select-evt-lines, fix-raw-words, select-gud-bad-words function select_evt_line(subsec,chapter,unit,linenum) { # Consider any part, running text (unit type "P"): if (subsec = "bod.1") { return (unit ~ /^[P]/); } else { arg_error(("bad subsection \"" subsec "\"")); } } function fix_raw_word(word) { # Assume that the word table maps each GB byte-pair that corresponds to # a Chinese character to a string `@xxxxx' where `xxxxx' are lowercse letters; # and any other GB codes (punctuation, non-Chinese letters, etc.) to `*DELETE*'. if (word ~ /^[@]/) { # Just remove the `@' gsub(/^[@]/, "", word); } else if ((word != "*DELETE*") && (word != "*delete*")) { word = "??"; } return word; } function define_patterns() { # No patterns needed } function is_good_word(word) { # The Voynichese-like pseudo-Roman code uses lowercase letters only return (word ~ /^[aoydsrlciektph]+$/); }