# Last edited on 2002-01-16 02:59:10 by stolfi # Sampling functions for latn/ptt # The "Pentateuch" in Latin (Vulgate), all lowercase. # To be included in select-evt-lines, fix-raw-words, select-gud-bad-words function select_evt_line(subsec,chapter,unit,linenum) { # Make each book into a subsection; # Consider only running text (unit type "P"): if (unit !~ /^[P]/) { return 0; } if (subsec == "gen.1") { return (chapter ~ /^[a]/); } else if (subsec == "exo.1") { return (chapter ~ /^[b]/); } else if (subsec == "lev.1") { return (chapter ~ /^[c]/); } else if (subsec == "num.1") { return (chapter ~ /^[d]/); } else if (subsec == "deu.1") { return (chapter ~ /^[e]/); } else { arg_error(("bad subsection \"" subsec "\"")); } } function fix_raw_word(word) { # Map upper case to lower case (just in case). word = tolower(word); return word; } function define_patterns() { # No patterns needed } function is_good_word(word) { # Accept only lowercase alpha, but no "w" or "j". # The text uses neither hyphen nor apostrophe. return (word ~ /^[a-ik-vx-z]+$/); }