#! /bin/csh -f
# Last edited on 2004-02-02 06:30:20 by stolfi

# Extracts words from the raw (hexbytes) Quran, for checking purposes.
# Converts them to JSAR.

cat \
  | egrep -v -e '^ *([\#]|$)' \
  | sed \
       -e '/@item{/d' \
       -e '/@fix/d' \
       -e 's:^@= *$: ! =:g' \
       -e 's:@chinword{[*]}{\(.*\)} *: * :g' \
       -e 's:@chinword{\(.*\)}{\(.*\)} *: \2:g' \
       -e 's:[-]: - :g' \
  | tr '_ ' '\012\012' \
  | egrep -v '^ *$'