#! /bin/csh -f # Last edited on 2004-01-30 06:55:52 by stolfi # Extracts words from the raw (HTML) Quran, for checking purposes. # Converts them to JSAR. gawk \ ' /^ *([\#]|$)/ { next; } \ /[@]verse/ { \ gsub(/[@]verse/, ""); gsub(/[{]/, ""); gsub(/[}]/, "."); \ print; next; \ } \ /[@]chapter/ { \ gsub(/[{}]/, " "); cn=$2; t=$4; \ printf "%s.\n#@@ %s\n÷\n=\n", cn, t; next; \ } \ /[@][=]/ { printf "÷\n=\n"; next; } \ /./ { printf "%s\n", $0; } \ ' \ | har-to-hexbytes \ | hexbytes-to-jsar \ | gawk \ ' /[#][@][@]/ { gsub(/[#][@][@]/, " "); } \ /./ { \ gsub(/[_ ]+$/, ""); \ gsub(/^[ ]*[_ ]*/, ""); gsub(/[_ ]+/, "\n"); \ print; next; \ } \ ' \ | egrep -v '^ *$'