#! /bin/csh -f # Last edited on 2004-01-30 06:02:57 by stolfi # Extracts words from the raw (UTF-8) Quran, for checking purposes. # Converts them to JSAR. gawk \ ' /^ *([\#]|$)/ { next; } \ /^ *[@]chapter/ { \ $0 = gensub(/@chapter *{(.*)}{.*}{(.*)}/, "\\1. \\2=", "g", $0); \ } \ /^ *[@]verse/ { \ $0 = gensub(/@verse *{(.*)}{(.*)}/, "\\1.\\2. ", "g", $0); \ } \ /^ *[@]= *$/ { $0 = "="; } \ /^ *[@]/ { next; } \ /./ { gsub(/[{][^{}]*[}]/, ""); print; next; } \ ' \ | html-to-hexbytes \ | hexbytes-to-jsar \ | gawk \ ' /./ { \ gsub(/[_]*[=]/, "_÷_=_"); \ gsub(/[ _]/, "\n"); \ print; next; \ } \ ' \ | egrep -v '^ *$'