# Last edited on 2002-01-16 02:48:57 by stolfi # Sampling functions for engl/cul # English - Culpeper's herbal, mapped to lowercase. # To be included in select-evt-lines, fix-raw-words, select-gud-bad-words function select_evt_line(subsec,chapter,unit,linenum) { # Omit opening quote (part "a"), # Take running text (unit type "P"), # in the author's prologue, in the herbal proper, # and in the recipes section, as three separate subsections. # if (unit !~ /^[P]/) { return 0; } if (subsec == "pre.1") { return (chapter ~ /^[b]/); } else if (subsec == "her.1") { return (chapter ~ /^[h]/); } else if (subsec == "rec.1") { return (chapter ~ /^[r]/); } else { arg_error(("bad subsection \"" subsec "\"")); } } function fix_raw_word(word) { # Map upper case to lower case, break at hyphens. word = tolower(word); if (word !~ /^[-]*$/) { gsub(/[-]/, "\n", word); } return word; } function define_patterns() { # No patterns needed } function is_good_word(word) { # Accept only lowercase alpha, plus apostrophe. # Apostrophes can't be doubled. # Note that 'tis OK to begin an' end with apostrophe! return (word ~ /^([']?[a-z])+[']?$/); # # The following allows hyphenated words. # Note that each word of # an hyphenated compound must contain at least one letter. # return (word ~ /^(([']?[a-z])+[']?)([-]([']?[a-z])+[']?)*$/); }