# Last edited on 2002-01-16 02:46:25 by stolfi
# Sampling functions for chrc/red
# Chinese - Dream of the Red Mansion in GB code,
# converted to Voynichese-looking Roman-like code
# To be included in select-evt-lines, fix-raw-words, select-gud-bad-words

function select_evt_line(subsec,chapter,unit,linenum)
{
  # Consider any part, running text (unit type "P"):
  if (subsec = "bod.1")
    { 
      return (unit ~ /^[P]/);
    }
  else
    { arg_error(("bad subsection \"" subsec "\"")); }
}

function fix_raw_word(word)
{
  # Assume that the word table maps each GB byte-pair that corresponds to 
  # a Chinese character to a string `@xxxxx' where `xxxxx' are lowercse letters;
  # and any other GB codes (punctuation, non-Chinese letters, etc.) to `*DELETE*'.
  if (word ~ /^[@]/)
    { # Just remove the `@' 
      gsub(/^[@]/, "", word);
    }
  else if ((word != "*DELETE*") && (word != "*delete*"))
    { word = "??"; }
  return word;
}

function define_patterns()
{
  # No patterns needed 
}

function is_good_word(word)
{ 
  # The Voynichese-like pseudo-Roman code uses lowercase letters only
  return (word ~ /^[aoydsrlciektph]+$/);
}