#! /bin/csh -f
# Last edited on 2004-02-04 08:08:03 by stolfi

# Extracts words from the raw Chinese pentateuch

gawk \
     ' /^ *([\#]|$)/ { next; } \
       /[@]fix / {  next; } \
       /[@]chapter/ { \
         gsub(/[ ]/, "_"); \
         gsub(/[{}]/, " "); \
         chnum = $2; book = $3; chtit = $4; entit = $5;  \
         gsub(/[_]/, " ", chtit); gsub(/[_]/, " ", entit); \
         printf "%s:%s\n%s\n%s\n~\n=\n", book, chnum, chtit, chnum; \
         next; \
       } \
       /[@]verse/ { \
         gsub(/[?]/, " ? "); \
         $0 = gensub(/^ *[@]verse{([A-Z]+)}{([0-9]+)}{([0-9]+)} *(.*)$/, \\
           "\\1:\\2:\\3\n\\4\n~\n=", \\
           "g", $0); \
         print; next; \
       } \
       /^ *[@]/ { print; next; } \
       /./ { print; next; } \
     ' \
  | tr '_ ' '\012\012' \
  | egrep -v '^ *$'