#! /bin/csh -f # Last edited on 2004-02-04 08:08:03 by stolfi # Extracts words from the raw Chinese pentateuch gawk \ ' /^ *([\#]|$)/ { next; } \ /[@]fix / { next; } \ /[@]chapter/ { \ gsub(/[ ]/, "_"); \ gsub(/[{}]/, " "); \ chnum = $2; book = $3; chtit = $4; entit = $5; \ gsub(/[_]/, " ", chtit); gsub(/[_]/, " ", entit); \ printf "%s:%s\n%s\n%s\n~\n=\n", book, chnum, chtit, chnum; \ next; \ } \ /[@]verse/ { \ gsub(/[?]/, " ? "); \ $0 = gensub(/^ *[@]verse{([A-Z]+)}{([0-9]+)}{([0-9]+)} *(.*)$/, \\ "\\1:\\2:\\3\n\\4\n~\n=", \\ "g", $0); \ print; next; \ } \ /^ *[@]/ { print; next; } \ /./ { print; next; } \ ' \ | tr '_ ' '\012\012' \ | egrep -v '^ *$'