#! /bin/csh -f
# Last edited on 2004-01-30 02:48:48 by stolfi

# Extracts words from the raw (UTF-8) Quran, for checking purposes.
# Converts them to JSAR.

gawk \
    ' /^ *([\#]|$)/ { next; } \
      /^ *[@]chapter/ { \
        $0 = gensub(/@chapter *{(.*)}{.*}{(.*)}/, "\\1. \\2=", "g", $0); \
      } \
      /^ *[@]verse/ { \
        $0 = gensub(/@verse *{(.*)}{(.*)}/, "\\1.\\2. ", "g", $0); \
      } \
      /^ *[@]= *$/ { $0 = "="; } \
      /^ *[@]/ { next; } \
      /./ { gsub(/[{][^{}]*[}]/, ""); print; next; } \
    ' \
  | utf-8-to-hexbytes \
  | hexbytes-to-jsar \
  | gawk \
    ' /./ { \
        gsub(/[_]*[=]/, "_รท_=_"); \
        gsub(/[ _]/, "\n"); \
        print; next; \
      } \
    ' \
  | egrep -v '^ *$'