# Makefile to build the word-map table # Last edited on 2012-05-05 20:18:34 by stolfilocal OUTTBL := word-map.tbl MAKEFILE := word-map.make all: ${OUTTBL} WORKBIN := ${STOLFIHOME}/projects/voynich/work BANKBIN := ${STOLFIHOME}/projects/voynich/work # We obtain the list of English words from the plaintext itself, using # the same sample-functions that we used for the plaintext sample # (engl/wow). ENGLSMP := engl/wow ENGLSRC := source/main.wds ENGLAWK := ../../${ENGLSMP}/sample-fns.gawk ENGLTBL := ../../${ENGLSMP}/word-map.tbl ENGLDIC := .engl.dic ${ENGLDIC}: ${ENGLSRC} ${MAKEFILE} \ ${WORKBIN}/wds-to-tlw \ ${ENGLAWK} ${ENGLTBL} cat ${ENGLSRC} \ | ${WORKBIN}/wds-to-tlw \ -f ${ENGLAWK} \ -v table=${ENGLTBL} \ -v smp="${ENGLSMP}" \ -v sec="tot.1" \ | gawk '($$1 == "a") { print $$3; }' \ | sort | uniq -c | expand \ | sort -b -k1,1nr \ | gawk '/./ { print $$2; }' \ > ${ENGLDIC} head -100 ${ENGLDIC} | ${WORKBIN}/format-words-filled # We obtain the list of Vietnamese words from the Vietnamese # Pentateuch sample (viet/ptt), using the same sample-functions that # we used for that sample, with compounding so that it has about the # same number of distinct words as the English sample: VIETSMP := viet/ptt VIETSRC := ../../${VIETSMP}/source/main.wds VIETAWK := ../../${VIETSMP}/sample-fns.gawk VIETTBL := ../../${VIETSMP}/word-map.tbl VIETDIC := .viet.dic ${VIETDIC}: ${VIETSRC} ${MAKEFILE} \ ${WORKBIN}/wds-to-tlw \ ${VIETAWK} ${TBL} \ enlarge-lexicon cat ${VIETSRC} \ | ${WORKBIN}/wds-to-tlw \ -f ${VIETAWK} \ -v table=${VIETTBL} \ -v smp="${VIETSMP}" \ -v sec="tot.1" \ | gawk '($$1 == "a") { print $$3; }' \ | enlarge-lexicon -v num=4900 \ | sort | uniq -c | expand \ | sort -b -k1,1nr \ | gawk '/./ { print $$2; }' \ > ${VIETDIC} head -100 ${VIETDIC} | ${WORKBIN}/format-words-filled # Then we feed the two lists to the script make-word-subst-table ${OUTTBL}: ${ENGLSRC} ${VIETSRC} ${MAKEFILE} \ ${WORKBIN}/wds-to-tlw \ ${ENGLDIC} \ ${VIETDIC} \ make-word-subst-table make-word-subst-table \ -v old=${ENGLDIC} \ -v new=${VIETDIC} \ -v order=2 \ > ${OUTTBL} ###################################################################### # Sub-make - caller must define # ${SRC} = the "main.wds" source file, # ${SMP} = the sample directory (e.g. "engl/wow") for sample-functions, and # ${DIC} = the name of the output file (wordlist in order of decr. freq.) SMP := SMP.IS.UNDEFINED SRC := SRC.IS.UNDEFINED DIC := DIC.IS.UNDEFINED ifneq "/${SMP}" "/SMP.IS.UNDEFINED" AWK := ../../${SMP}/sample-fns.gawk TBL := ../../${SMP}/word-map.tbl wordlist: ${DIC} endif # End of ${SMP} / ${SRC} / ${DIC} submake ######################################################################