#! /usr/bin/gawk -f
# Last edited on 2004-02-26 16:33:41 by stolfi
# Sampling functions for rugg/hnd
# Gordon Rugg's fake voynichese, manual version.
# To be included in wds-to-tlw

function smp_define_patterns(smp,sec)
{
  # No patterns needed 
  if (sec != "tot.1") { data_error(("invalid section \"" sec "\"")); }
}

function smp_reclassify_word(smp,sec,cursec,curlin,type,wd)
{
  # Take all folios (there is nothing else, still...)
  if (cursec !~ /^{p}{f[0-9]+[a-z]/) { return "n"; }
  
  # Within the selected sections, reject anything that is not text
  if (cursec !~ /{tx}$/) { return "x"; }
  
  # Discard punctuation other than parag breaks: 
  if ((type == "p") && (wd != "=")) { return "n"; }

  return type;
}

function smp_fix_word(smp,sec,type,wd)
{
  # Do nothing.
  return wd;
}

function smp_is_good_word(smp,sec,type,wd)
{ 
  # Accept only lowercase alpha.
  return (wd ~ /^[a-z]+$/);
}