Hacking at the Voynich manuscript - Side notes
109 Computing the token entropy

Last edited on 2025-05-04 16:59:05 by stolfi

INTRODUCTION

  In this note we compute the first-order token entropy 
  for Voynichese and other languages. 
  
SETTING UP THE ENVIRONMENT

  Links:
  
    ln -s ../../compute-cum-cum-freqs
    ln -s ../../compute-cum-freqs
    ln -s ../../compute-freqs
    ln -s ../../combine-counts
    ln -s ../../compute-entropy
    ln -s ../../remove-freqs
    ln -s ../../totalize-fields
    ln -s ../../select-units
    ln -s ../../words-from-evt
    ln -s ../../format-counts-packed
    
    ln -s ../100/data
    ln -s ../101/lang

  Paper directories:

    set tbldir = "???/home/staff/stolfi/papers/voynich-words/techrep/tables/auto"
    set figdir = "???/home/staff/stolfi/papers/voynich-words/techrep/figures/auto"

ENTROPIES OF TOKENS
  
    set ofile = "token-entropies.tex"; echo "${ofile}"
    /bin/rm -f ${ofile}
    foreach kf ( text.voyn labs.voyn text.engl text.latn )
      set wkind = "${kf:r}"; set lang = "${kf:e}"
      cat lang/${lang}/${wkind}/gud.wfr \
        | gawk '/./{ print $1; }' \
        | compute-entropy \
        > ".tmp"
      set entropy = ( `cat .tmp` )
      echo "${kf}: ${entropy}"
      printf '\\def\\tkentropy'"${wkind}${lang}"'{'"${entropy}"'}\n' >> ${ofile}
    end
    /bin/mv -bv ${ofile} ${tbldir}/