# Last edited on 2002-01-16 14:15:28 by stolfi # The New Testament in Greek, lowercased, in an ad-hoc encoding. # Sampling functions for grek/nwt # To be included in select-evt-lines, fix-raw-words, select-gud-bad-words function select_evt_line(subsec,chapter,unit,linenum) { # Make each of the four Gospels into a subsection; # Consider only running text (unit type "P"): if (unit !~ /^[P]/) { return 0; } if (subsec == "mat.1") { return (chapter ~ /^[A]/); } else if (subsec == "mrk.1") { return (chapter ~ /^[B]/); } else if (subsec == "luk.1") { return (chapter ~ /^[C]/); } else if (subsec == "joh.1") { return (chapter ~ /^[D]/); } else { arg_error(("bad subsection \"" subsec "\"")); } } function fix_raw_word(word) { # Map upper case to lower case (just in case), # Map digraphs that stand for single Greek letters to single bytes. # word = tolower(word); gsub(/ph/, "f", word); # Actually done already in the source gsub(/th/, "ð", word); gsub(/ps/, "ç", word); gsub(/ch/, "q", word); return word; } function define_patterns() { # No patterns needed } function is_good_word(word) { # Accept only lowercase bytes that stand for Greek letters: # No [chjqvwy], but [ë] (eta), [ô] (omega), and [fðçq] as above. # The text uses neither hyphen nor apostrophe. return (word ~ /^[abd-gik-uxzëôðç]+$/); }