# Last edited on 2012-05-06 00:14:39 by stolfilocal 
# Makefile to compute the length distributions for 
# phonetic components ("slots") of Vietnamese words

MAKEFILE := slot-length-stats.make

######################################################################
# Caller must define 
#   ${LANG} = "voyn", "chin", etc.;
#   ${BOOK} = "wow", "vms", etc.
#   ${ELEM} = "bgly", "qoko", "viqr", etc.

LANG := LANG.IS.UNDEFINED
ifneq "${LANG}" "LANG.IS.UNDEFINED"

BOOK := BOOK.IS.UNDEFINED
ifneq "${BOOK}" "BOOK.IS.UNDEFINED"

ELEM := ELEM.IS.UNDEFINED
ifneq "${ELEM}" "ELEM.IS.UNDEFINED"

all:
	for tkwd in t w; do \
          for slot in inic vows tone finc; do \
            ${MAKE} \
              LANG=${LANG} BOOK=${BOOK} ELEM=${ELEM} \
              SLOT=$$slot TKWD=$$tkwd \
              -f ${MAKEFILE} single; \
          done; \
        done;

######################################################################
# Subsection for word weights; caller must define 
# ${LANG}, ${BOOK}, ${ELEM}, and also 
#   ${TKWD} = "t" (tokens) or "w" (words),
#  ${SLOT} = "inic" (initial consonant), etc.
#
TKWD := TKWD.IS.UNDEFINED
ifneq "${TKWD}" "TKWD.IS.UNDEFINED"

SLOT := SLOT.IS.UNDEFINED
ifneq "${SLOT}" "SLOT.IS.UNDEFINED"

SECK := tot.1
QUAL := gud

SMP_DIR := ${LANG}/${BOOK}/${SECK}

WFR_FILE := ${SMP_DIR}/${QUAL}.wfr
SLOT_FNS := ${LANG}/extract-slot-${SLOT}-fact-${ELEM}.gawk
CTS_FILE := ${SMP_DIR}/${QUAL}-slot-${SLOT}-fact-${ELEM}-${TKWD}.cts
LHI_FILE := ${SMP_DIR}/${QUAL}-slot-${SLOT}-fact-${ELEM}-${TKWD}.lhi

single: dat/${LHI_FILE}

dat/${CTS_FILE}: dat/${WFR_FILE} ${MAKEFILE} \
                 extract-slot dat/${SLOT_FNS}
	@echo "dat/${WFR_FILE} -> dat/${CTS_FILE}"
	@cat dat/${WFR_FILE} \
	  | gawk -v tkwd=${TKWD} \
              '/./{ print (tkwd == "t" ? $$1 : 1), $$2, $$3; }' \
	  | extract-slot -f dat/${SLOT_FNS} \
	  | sort -b -k1,1nr -k3,3n -k2,2 \
	  > dat/${CTS_FILE}
	@dicio-wc dat/${CTS_FILE}
        
dat/${LHI_FILE}: dat/${CTS_FILE} ${MAKEFILE} \
                 compute-elem-count-distrib
	@echo "dat/${CTS_FILE} -> dat/${LHI_FILE}"
	@cat dat/${CTS_FILE} \
	  | gawk '/./{gsub(/[{}]/,"",$$2); print;}' \
	  | sort -b -k2,2 \
	  | compute-elem-count-distrib -v sampleSep='|' \
	  > dat/${LHI_FILE}
	@echo "# ${LANG}/${BOOK}/${SECK}/${QUAL} slot = ${SLOT} counts = ${TKWD}"
	@cat dat/${LHI_FILE}
        
endif
endif
# End of ${LANG}/${BOOK}/${ELEM}/${TKWD}/${SLOT} rules
######################################################################

endif
endif
endif
# End of ${LANG}/${BOOK}/${ELEM} rules
######################################################################