# Last edited on 2002-01-17 04:38:49 by stolfi 
# Voynichese token/word length distributions
# 
# Makefile for computing the Voynichese token and word length
# histograms for text, labels, and everything together, in terms of
# basic glyphs and OKOKOKO elements, for raw, good, and bad
# subsamples. It also generates comparison plots of the good and raw
# histograms (basic glyphs only), in order to ensure that the
# good-word selection did not introduce any significant bias in the
# length statistics.

LANGS := voyp voyl voyn
BOOK := vms

all: 
	for lang in ${LANGS}; do \
	  for elem in basic oko; do \
	    for qual in raw gud bad; do \
              ${MAKE} LANG=$$lang ELEM=$$elem QUAL=$$qual \
                -f vms-length-hists.make single-countfile; \
	    done; \
	  done; \
          for tkwd in t w; do \
            ${MAKE} LANG=$$lang TKWD=$$tkwd \
              -f vms-length-hists.make single-cmp-plot; \
          done; \
	done

PAPER_DIR := /home/staff/stolfi/papers/voynich-stats/techrep
TBL_DIR := ${PAPER_DIR}/tables/auto
FIG_DIR := ${PAPER_DIR}/figures/auto

######################################################################
# Recursive make for each Voynichese sub-language, sample quality, 
# and element type. Caller must define 
#   ${LANG} = "voyp", "voyl", etc.;
#   vms = "vms" only for now;
#
LANG := LANG.IS.UNDEFINED
ifneq "${LANG}" "LANG.IS.UNDEFINED"

SUBDIR := ${LANG}/${BOOK}/tot.t

######################################################################
# Recursive make for each Voynichese sub-language, sample quality, 
# and element type. Caller must define 
#   ${LANG} = "voyp", "voyl", etc.;
#   ${ELEM} = "basic" or "oko".
#
ELEM := ELEM.IS.UNDEFINED
ifneq "${ELEM}" "ELEM.IS.UNDEFINED"

######################################################################
# Recursive make for each Voynichese sub-language, element type, 
# and sample quality. Caller must define 
#   ${LANG} = "voyp", "voyl", etc.;
#   ${ELEM} = "basic" or "oko".
#   ${QUAL} = "raw", "gud", or "bad";
#
QUAL := QUAL.IS.UNDEFINED
ifneq "${QUAL}" "QUAL.IS.UNDEFINED"

WFR_FILE := sample/${SUBDIR}/${QUAL}.wfr
CTS_FILE := sample/${SUBDIR}/${QUAL}-fact-${ELEM}.cts

single-countfile: ${CTS_FILE}
	for tkwd in t w; do \
	  ${MAKE} LANG=${LANG} QUAL=${QUAL} ELEM=${ELEM} TKWD=$$tkwd \
	    -f vms-length-hists.make single-hist; \
	done; \

${CTS_FILE}: ${WFR_FILE} \
             capitalize-ligatures \
             factor-field-general factor-text-${ELEM}.gawk \
             compute-elem-counts vms-length-hists.make
	@echo "${WFR_FILE} -> ${CTS_FILE}"
	cat ${WFR_FILE} \
	  | capitalize-ligatures -v field=3 \
	  | factor-field-general \
	      -f factor-text-${ELEM}.gawk -v inField=3 -v outField=4 \
	  | gawk '//{ print $$1, $$3, $$4; }' \
	  | compute-elem-counts \
	  > ${CTS_FILE}

######################################################################
# Recursive make for each Voynichese sub-language, 
# element type, sample quality, and token/word counting.
# Caller must define 
#   ${LANG} = "voyp", "voyl", etc.;
#   ${ELEM} = "basic" or "oko".
#   ${QUAL} = "raw", "gud", or "bad";
#   ${TKWD} = "t" (tokens) or "w" (words).
#
TKWD := TKWD.IS.UNDEFINED
ifneq "${TKWD}" "TKWD.IS.UNDEFINED"

LHI_FILE := sample/${SUBDIR}/${QUAL}-fact-${ELEM}-${TKWD}.lhi

AVG_TEX := sample/${SUBDIR}/${QUAL}-fact-${ELEM}-${TKWD}-avlen.tex
AVG_TEX_EXP := ${TBL_DIR}/${SUBDIR}/${QUAL}-fact-${ELEM}-${TKWD}-avlen.tex

single-hist: ${LHI_FILE} ${AVG_TEX}

${LHI_FILE}: ${CTS_FILE} \
             compute-elem-count-distrib \
             vms-length-hists.make
	@echo "${CTS_FILE} -> ${LHI_FILE}"
	cat ${CTS_FILE} \
	  | gawk -v tkwd="${TKWD}" \
              '/./{ print (tkwd == "t" ? $$1 : 1), $$3, $$4; }'  \
	  | compute-elem-count-distrib \
	  > ${LHI_FILE}
	cat ${LHI_FILE}

${AVG_TEX}: ${LHI_FILE}
	cat ${LHI_FILE} \
	  | gawk \
              -v lg=${LANG} -v bk=${BOOK} -v ek=${ELEM} -v tw=${TKWD} \
	      ' /^[#]/{next;} \
	        /./{ t+= $$2; e += $$2*$$1; } \
	        END { \
	          xtw = ( tw == "t" ? "Tk" : "Wd" ); \
	          printf "\\def\\%s%sAvg%sN%s{%.2f}\n", lg, bk, xtw, ek, e/t; \
	        } \
	      ' \
	  > ${AVG_TEX}
	cat ${AVG_TEX}
	update-paper-include ${AVG_TEX} ${AVG_TEX_EXP}

endif
# End of ${LANG}/${ELEM}/${QUAL}/${TKWD} recursion
######################################################################

endif
# End of ${LANG}/${ELEM}/${QUAL} recursion
######################################################################

endif
# End of ${LANG}/${ELEM} recursion
######################################################################


######################################################################
# Recursive make for each Voynichese sub-language and token/word counting.
# Caller must define 
#   ${LANG} = "voyp", "voyl", etc.;
#   ${TKWD} = "t" (tokens) or "w" (words).
#
TKWD := TKWD.IS.UNDEFINED
ifneq "${TKWD}" "TKWD.IS.UNDEFINED"

LHI_FILES := \
  sample/${SUBDIR}/raw-fact-basic-${TKWD}.lhi \
  sample/${SUBDIR}/gud-fact-basic-${TKWD}.lhi
  
HIST_EPS := sample/${SUBDIR}/cleanup-${TKWD}-len-cmp.eps
HIST_EPS_EXPORT := ${FIG_DIR}/${SUBDIR}/cleanup-${TKWD}-len-cmp.eps

single-cmp-plot: ${HIST_EPS}

PLOT_OPTS := -freqs -size 1.25,1.00
ifeq "${LANG}" "voyl"
  PLOT_OPTS := ${PLOT_OPTS} -noylabels
endif

${HIST_EPS}: ${LHI_FILES} \
             compare-elem-count-distribs \
             vms-length-hists.make
	@echo "generating ${HIST_EPS}..."
	compare-elem-count-distribs ${PLOT_OPTS} \
	    sample/${SUBDIR}/raw-fact-basic-${TKWD}.lhi 'full'    1.00 1 1 \
	    sample/${SUBDIR}/gud-fact-basic-${TKWD}.lhi 'clean'   1.00 2 2 \
	  > ${HIST_EPS}
	update-paper-include ${HIST_EPS} ${HIST_EPS_EXPORT}

endif
# End of ${LANG}/${TKWD} recursion
######################################################################

endif
# End of ${LANG} recursion
######################################################################
