#! /usr/bin/gawk -f
# Last edited on 2002-03-01 23:18:47 by stolfi

# Extracts the bare text lines from an EVMT file, 
# discarding comments, page headers, and locators.

/^#/ {next;} 
/^ *$/ {next;}
/^<[^>]*> *$/ {next;}

// {
  gsub(/^<[^>]*> */, "", $0);
  gsub(/{[^{}]*}/, "", $0);
  gsub(/{[^{}]*}/, "", $0);
  gsub(/[\!]+/, "", $0);
  gsub(/[-\/=., ]+/, ".", $0);
  gsub(/^[.]+/, "", $0);
  gsub(/[.]+$/, "", $0);
  gsub(/[*%]/, "?", $0);
  print; next;
}