# Last edited on 2002-01-17 03:53:05 by stolfi
# Token/word length distributions for reference languages
# 
# Makefile for computing the token and word length
# histograms for "gud" subset of a reference language sample, 
# in terms of a specified factoring into elements. 

all: 
	@echo "make what?"

PAPER_DIR := /home/staff/stolfi/papers/voynich-stats/techrep
TBL_DIR := ${PAPER_DIR}/tables/auto
FIG_DIR := ${PAPER_DIR}/figures/auto

######################################################################
# Client must define 
#   ${LANG} = "engl", "chin", etc.;
#   ${BOOK} = "wow", "red", etc.;
#   ${ELEM} = "trivial", "viqr", etc. (element factorization);
#
LANG := LANG.IS.UNDEFINED
ifneq "${LANG}" "LANG.IS.UNDEFINED"
BOOK := BOOK.IS.UNDEFINED
ifneq "${BOOK}" "BOOK.IS.UNDEFINED"
ELEM := ELEM.IS.UNDEFINED
ifneq "${ELEM}" "ELEM.IS.UNDEFINED"

SUBDIR := ${LANG}/${BOOK}/tot.t

WFR_FILE := sample/${SUBDIR}/gud.wfr
CTS_FILE := sample/${SUBDIR}/gud-fact-${ELEM}.cts

FACTOR_AWK := factor-text-${ELEM}.gawk

single-sampelem: ${CTS_FILE}
	for tkwd in t w; do \
	  ${MAKE} LANG=${LANG} BOOK=${BOOK} TKWD=$$tkwd \
	    -f other-length-hists.make single-hist; \
	done

${CTS_FILE}: ${WFR_FILE} \
             factor-field-general ${FACTOR_AWK} \
             compute-elem-counts other-length-hists.make
	@echo "${WFR_FILE} -> ${CTS_FILE}"
	cat ${WFR_FILE} \
	  | factor-field-general \
	      -f ${FACTOR_AWK} -v inField=3 -v outField=4 \
	  | gawk '//{ print $$1, $$3, $$4; }' \
	  | compute-elem-counts \
	  > ${CTS_FILE}

######################################################################
# Caller must define 
#   ${LANG} = "engl", "chin", etc.;
#   ${BOOK} = "wow", "red", etc.;
#   ${ELEM} = "trivial", "viqr", etc. (element factorization);
#   ${TKWD} = "t" (tokens) or "w" (words).
#
TKWD := TKWD.IS.UNDEFINED
ifneq "${TKWD}" "TKWD.IS.UNDEFINED"

CTS_FILE := sample/${SUBDIR}/gud-fact-${ELEM}.cts
LHI_FILE := sample/${SUBDIR}/gud-fact-${ELEM}-${TKWD}.lhi

AVG_TEX := sample/${SUBDIR}/gud-fact-${ELEM}-${TKWD}-avlen.tex
AVG_TEX_EXP := ${TBL_DIR}/${SUBDIR}/gud-fact-${ELEM}-${TKWD}-avlen.tex

single-hist: ${LHI_FILE} ${AVG_TEX}

${LHI_FILE}: ${CTS_FILE} \
             compute-elem-count-distrib \
             other-length-hists.make
	@echo "${CTS_FILE} -> ${LHI_FILE}"
	cat ${CTS_FILE} \
	  | gawk -v tkwd="${TKWD}" \
	      '/./{ print (tkwd == "t" ? $$1 : 1), $$3, $$4; }'  \
	  | compute-elem-count-distrib \
	  > ${LHI_FILE}
	cat ${LHI_FILE}

${AVG_TEX}: ${LHI_FILE}
	cat ${LHI_FILE} \
	  | gawk \
              -v lg=${LANG} -v bk=${BOOK} -v ek=${ELEM} -v tw=${TKWD} \
	      ' /^[#]/{next;} \
	        /./{ t+= $$2; e += $$2*$$1; } \
	        END { \
	          xtw = ( tw == "t" ? "Tk" : "Wd" ); \
	          printf "\\def\\%s%sAvg%sN%s{%.2f}\n", lg, bk, xtw, ek, e/t; \
	        } \
	      ' \
	  > ${AVG_TEX}
	cat ${AVG_TEX}
	update-paper-include ${AVG_TEX} ${AVG_TEX_EXP}

endif
# End of ${LANG}/${BOOK}/${ELEM}/${TKWD} recursion
######################################################################

endif
endif
endif
# End of ${LANG}/${BOOK}/${ELEM} section
######################################################################