#! /bin/csh -f 
# Last edited on 1998-07-14 23:19:48 by stolfi

set usage = "$0 NAME INI FIN  SYMBOL..."

# Applies the Reeds compression cycle several times
# starting from file NAME-INI.sig until NAME-FIN.sig
# (where INI and FIN are 2-digit numbers).
# Uses the list of SYMBOLs as replacements.

if ( $#argv < 3 ) then
  echo "usage: ${usage}"; exit 1
endif

set name = "$1"; shift;
set ini = "$1"; shift;
set fin = "$1"; shift;
set symbs = ( $* )

set n = `printf "%02d" ${ini}`
while( ${n} < ${fin} )
  echo " " 
  echo "  counting digraphs in ${name}-${n}.sig..." 
  cat ${name}-${n}.sig \
    | gather-tuples \
        -v order=2 \
        -v filler='_' \
        -v lowercase=0 \
    | grep -v '_' \
    | sort | uniq -c | expand \
    | sort +0 -1nr \
    > ${name}-${n}.cts
  set top = ( `head -1 ${name}-${n}.cts` )
  @ m = ${n} + 1
  set m = `printf "%02d" $m`
  set s = $symbs[1]; set symbs = ( $symbs[2-] )
  printf "#   %s = %s %7d\n" "$s" "$top[2]" "$top[1]" > ${name}-${m}.dic
  printf "  %s = %s %7d\n" "$s" "$top[2]" "$top[1]"
  echo "  replacing ${top[2]} by $s in ${name}-${n}.sig..." 
  cat ${name}-${n}.sig \
    | replace-signif-digraph "$top[2]" "$s" \
    > ${name}-${m}.sig
  echo "  reconstructing ${name}-${m}.txt..." 
  cat ${name}-${m}.sig \
    | sed -e 's/^.//' \
    | tr -d '\012' \
    | tr '\015' '\012' \
    > ${name}-${m}.txt
  set n = "$m"
end