The number of raw tokens taken for each sample is adjusted so that
  the number good tokens matches (as far as possible) the number of
  good tokens in the "prs" or "lab" samples, as appropriate.
  The following numbers refer to SYMBOL and ALPHA tokens only.
  
    echo 35040 > dat/engl/wow/tot.1/trunc-raw.num
    
    echo  3200 > dat/engl/cul/pre.1/trunc-raw.num 
    echo 23054 > dat/engl/cul/her.1/trunc-raw.num
    echo  8985 > dat/engl/cul/rec.1/trunc-raw.num
    
    echo  9118 > dat/latn/ptt/gen.1/trunc-raw.num
    echo  7254 > dat/latn/ptt/exo.1/trunc-raw.num
    echo  4981 > dat/latn/ptt/lev.1/trunc-raw.num
    echo  6984 > dat/latn/ptt/num.1/trunc-raw.num
    echo  6690 > dat/latn/ptt/deu.1/trunc-raw.num
    
    echo  9921 > dat/grek/nwt/mat.1/trunc-raw.num
    echo  6156 > dat/grek/nwt/mrk.1/trunc-raw.num
    echo 10525 > dat/grek/nwt/luk.1/trunc-raw.num
    echo  8425 > dat/grek/nwt/joh.1/trunc-raw.num
    
    echo 35041 > dat/span/qvi/tot.1/trunc-raw.num

    echo 36963 > dat/arab/qur/tot.1/trunc-raw.num

    echo 35307 > dat/geez/gok/tot.1/trunc-raw.num

    echo 35027 > dat/viet/ptt/tot.1/trunc-raw.num

    echo 35048 > dat/tibe/vim/tot.1/trunc-raw.num

    echo 35041 > dat/tibe/ccv/tot.1/trunc-raw.num

    echo 35027 > dat/chin/ptt/tot.1/trunc-raw.num

    echo 35027 > dat/chin/red/tot.1/trunc-raw.num

    echo 35027 > dat/chrc/red/tot.1/trunc-raw.num
    echo 35040 > dat/enrc/wow/tot.1/trunc-raw.num

    echo 35027 > dat/rugg/sfw/tot.1/trunc-raw.num
    echo 35027 > dat/rugg/hnd/tot.1/trunc-raw.num

    echo  1003 > dat/engl/wnm/tot.1/trunc-raw.num
    echo  1003 > dat/engl/cnp/tot.1/trunc-raw.num


# NOT DONE #############################################################
# 
# ROMAN NUMERALS
# 
#   Generating the Roman numerals 0-999, additive system:
#   [Now replaced by roman-coded texts such as enrc/wow and 
#   chrc/red]
#   
#     /bin/rm .roman-old.nums
#     foreach u ( '' I II III IIII V VI VII VIII VIIII )
#       foreach d ( '' X XX XXX XXXX L LX LXX LXXX LXXXX )
#         foreach c ( '' C CC CCC CCCC D DC DCC DCCC DCCCC )
#           echo "#$c$d$u" >> .roman-old.nums
#         end
#       end
#     end
# 
#   Generating the Roman numerals 0-999, subtractive system:
# 
#     /bin/rm .roman-new.nums
#     foreach u ( '' I II III IV V VI VII VIII IX )
#       foreach d ( '' X XX XXX XL L LX LXX LXXX XC )
#         foreach c ( '' C CC CCC CD D DC DCC DCCC CM )
#           echo "#$c$d$u" >> .roman-new.nums
#         end
#       end
#     end
# 
# 
# JUNK
#     
#   Generating the TeX-formatted summary:
#   [Now superseded by per-language summaries]
#   
#     foreach kind ( raw gud bad )
#       set ifile = ".summary-${kind}"
#       set tfile = "otherlangs-${kind}-tw-summary.tex"
#       printf "%% Created by Note-101.txt\n" > ${tfile}
#       printf "%%\n" >> ${tfile}
#       cat ${ifile} \
#         | gawk \
#             ' /./ { \
#                 smp = $1; tks = $2; wds = $3; \
#                 gsub(/[\/]/, "", smp); \
#                 printf "\\def\\%sGudTks{%d}\n",smp,tks; \
#                 printf "\\def\\%sGudWds{%d}\n",smp,wds; \
#               } \
#             ' \
#         >> ${tfile}
#       update-paper-include ${tfile} ${tbldir}/
#     end
# 
#   Take a small sample from each sampleuage, and pretend it is labels:
#   [Now replaced by more realistic samples such as engl/wnm and engl/cnp]
# 
#     set ratio = \
#       `gawk -v nprose=${nprose} -v nlabs=${nlabs} 'BEGIN{print nlabs/(nprose - 2*nlabs);}'`
#     echo ${ratio}
#     
#     foreach lp ( eng.0 lat.0 )
#       set lng = "${lp:r}"; set plus = "${lp:e}"
#       @ ntake = ${nlabs} + ${plus}
#       set ifile = "dat/${lng}/prose/raw.tks"
#       set ofile = "dat/${lng}/labs/raw.tks"
#       echo "${ifile} -> ${ofile}"
#       cat ${ifile} \
#         | gawk -v ratio=${ratio} '(rand() <= ratio){ print; }' \
#         | head -${ntake} \
#         > ${ofile}
#       cat ${ofile} | egrep -v '[^a-z]' > .gud
#       dicio-wc ${ofile} .gud
#     end