# Makefile to build the word-map table # Last edited on 2012-05-05 19:18:34 by stolfilocal TBL := word-map.tbl SRC := source/main.wds MAKEFILE := word-map.make all: ${TBL} WORKBIN := ${STOLFIHOME}/projects/voynich/work BANKBIN := ${STOLFIHOME}/projects/voynich/work # We extract the list of Chinese ideograms from the plaintext itself, # unsing the same sample-functions we used for the pinyinified Chinese # version (chin/red); except that the word table only maps punctuation # to "*DELETE*" (just in case those words are not marked "p" in the # source "main.wds") So the output is GB codes instead of Pinyin. CHINSMP := chin/red CHINAWK := ../../${CHINSMP}/sample-fns.gawk CHINTBL := gb-punct-delete.tbl CHINTKS = /tmp/.chin.tks # We assign Roman-style codes to the ideograms of the "Red Mansion" # in random order, while retaining all copies of each ideogram. # Therefore the most frequent ideograms will tend to get # low codes, but codeword length will be uniform throughout the # whole encoded text. ${TBL}: ${SRC} ${MAKEFILE} \ ${WORKBIN}/wds-to-tlw \ ${CHINAWK} ${CHINTBL} \ ${WORKBIN}/roman-code-words \ ${WORKBIN}/roman-encoding.gawk \ ${WORKBIN}/roman-encoding-pseudo-voynich-1.gawk cat ${SRC} \ | ${WORKBIN}/wds-to-tlw \ -f ${CHINAWK} \ -v table=${CHINTBL} \ -v smp="${CHINSMP}" \ -v sec="tot.1" \ | gawk '($$1 == "a") { printf "%10.8f %s\n", rand(), $$3; }' \ | sort -b -k1,1g \ | gawk '//{ print $$2; }' \ > ${CHINTKS} head -1000 ${CHINTKS} | ${WORKBIN}/format-words-filled cat ${CHINTKS} \ | ${WORKBIN}/roman-code-words \ -f ${WORKBIN}/roman-encoding.gawk \ -f ${WORKBIN}/roman-encoding-pseudo-voynich-1.gawk \ -v honorCase=1 \ | sed \ -e '/^# BEGIN DICTIONARY/,/^# END DICTIONARY/!d' \ -e '/DICTIONARY/d' \ -e 's/^#[ ]*//' \ -e '/[*]DELETE[*]/!s/[ ][ ]*/ @/' \ > ${TBL} /bin/rm ${CHINTKS}