# Last edited on 2002-01-16 02:49:53 by stolfi # Sampling functions for engn/wow # English - proper names from Well's "War of the Worlds", # mapped to lowercase. # To be included in select-evt-lines, fix-raw-words, select-gud-bad-words function select_evt_line(subsec,chapter,unit,linenum) { # Omit opening quote (part "a"), # Take running text (unit type "P") in the story proper # (parts [bc]), as a single subsection. if (subsec = "bod.1") { return ((chapter ~ /^[bc]/) && (unit ~ /^[P]/)); } else { arg_error(("bad subsection \"" subsec "\"")); } } function fix_raw_word(word) { # Assumes that the word table has already # been used to map all non-names in initial-caps to "*DELETE*". # Now map words in all uppers or all lowers to "*DELETE*" # and map the rest to lower case. # Keep hyphens for now. if (word !~ /[A-Z][a-z]/) { word = "*DELETE*"; } else { word = tolower(word); } return word; } function define_patterns() { # No patterns needed } function is_good_word(word) { # Accept only lowercase alpha, plus apostrophe. # Apostrophes can't be doubled. # Note that 'tis OK to begin an' end with apostrophe! # return (word ~ /^([']?[a-z])+[']?$/); # # The following allows hyphenated words. # Note that each word of # an hyphenated compound must contain at least one letter. return (word ~ /^(([']?[a-z])+[']?)([-]([']?[a-z])+[']?)*$/); }