# Makefile to build the word-map table
# Last edited on 2023-05-10 16:17:17 by stolfi

OUTTBL := word-map.tbl
MAKEFILE := word-map.make

all: ${OUTTBL}

WORKBIN := ${STOLFIHOME}/projects/voynich/work
BANKBIN := ${STOLFIHOME}/projects/voynich/work

# We obtain the list of English words from the plaintext itself, using
# the same sample-functions that we used for the plaintext sample
# (engl/wow).

ENGLSMP := engl/wow
ENGLSRC := org/main.wds
ENGLAWK := ../../${ENGLSMP}/sample-fns.gawk
ENGLTBL := ../../${ENGLSMP}/word-map.tbl
ENGLDIC := .engl.dic

${ENGLDIC}: ${ENGLSRC} ${MAKEFILE} \
            ${WORKBIN}/wds-to-tlw \
	    ${ENGLAWK} ${ENGLTBL}
	cat ${ENGLSRC} \
	  | ${WORKBIN}/wds-to-tlw \
              -f ${ENGLAWK} \
              -v table=${ENGLTBL} \
	      -v smp="${ENGLSMP}" \
	      -v sec="tot.1" \
	  | gawk '($$1 == "a") { print $$3; }' \
          | sort | uniq -c | expand \
          | sort -b -k1,1nr \
          | gawk '/./ { print $$2; }' \
          > ${ENGLDIC}
	wc -l ${ENGLDIC}
	head -100 ${ENGLDIC} | ${WORKBIN}/format-words-filled

# We obtain the list of Vietnamese words from the Vietnamese
# Pentateuch sample (viet/ptt), using the same sample-functions that
# we used for that sample, with compounding so that it has about the
# same number of distinct words as the English sample:

VIETSMP := viet/ptt
VIETSRC := ../../${VIETSMP}/org/main.wds
VIETAWK := ../../${VIETSMP}/sample-fns.gawk
VIETTBL := ../../${VIETSMP}/word-map.tbl
VIETDIC := .viet.dic

${VIETDIC}: ${VIETSRC} ${MAKEFILE} \
            ${WORKBIN}/wds-to-tlw \
	    ${VIETAWK} ${TBL} \
            enlarge-lexicon
	cat ${VIETSRC} \
	  | ${WORKBIN}/wds-to-tlw \
              -f ${VIETAWK} \
              -v table=${VIETTBL} \
	      -v smp="${VIETSMP}" \
	      -v sec="tot.1" \
	  | gawk '($$1 == "a") { print $$3; }' \
          | enlarge-lexicon -v num=`cat ${ENGLDIC} | wc -l` \
          | sort | uniq -c | expand \
          | sort -b -k1,1nr \
          | gawk '/./ { print $$2; }' \
          > ${VIETDIC}
	wc -l ${VIETDIC}
	head -100 ${VIETDIC} | ${WORKBIN}/format-words-filled

# Then we feed the two lists to the script make-word-subst-table

${OUTTBL}: ${ENGLSRC} ${VIETSRC} ${MAKEFILE} \
            ${WORKBIN}/wds-to-tlw \
            ${ENGLDIC} \
            ${VIETDIC} \
            make-word-subst-table.gawk
	make-word-subst-table.gawk \
            -v old=${ENGLDIC} \
            -v new=${VIETDIC} \
            -v order=2 \
          > ${OUTTBL}

######################################################################
# Sub-make - caller must define 
#   ${SRC} = the "main.wds" source file,
#   ${SMP} = the sample directory (e.g. "engl/wow") for sample-functions, and
#   ${DIC} = the name of the output file (wordlist in order of decr. freq.)

SMP := SMP.IS.UNDEFINED
SRC := SRC.IS.UNDEFINED
DIC := DIC.IS.UNDEFINED

ifneq "/${SMP}" "/SMP.IS.UNDEFINED"

AWK := ../../${SMP}/sample-fns.gawk
TBL := ../../${SMP}/word-map.tbl

wordlist: ${DIC}

endif 
# End of ${SMP} / ${SRC} / ${DIC} submake
######################################################################