# Last edited on 2002-01-18 14:49:55 by stolfi
# Makefile for computing frequencies of basic glyphs

all: 
	${MAKE} GLYPH_KIND=main -f glyph-freqs.make  joined
	${MAKE} GLYPH_KIND=rare -f glyph-freqs.make  joined

######################################################################
# Recursive make (caller must define ${GLYPH_KIND})
#
GLYPH_KIND := GLYPH_KIND.IS.UNDEFINED
ifneq "${GLYPH_KIND}" "GLYPH_KIND.IS.UNDEFINED"

ifeq "${GLYPH_KIND}" "main"
  # The "main" glyphs:

  GLYPHS := e,i,o,a,y,q,l,d,r,s,n,m,Ch,Sh,k,t,CKh,CTh,f,p,CFh,CPh
  
  # Use precomputed word frequency table for the entire VMS.

  LANG := voyn
  BOOK := maj
  SUBDIR := ${LANG}/${BOOK}/tot.1
  SOURCE_WFR :=  dat/${SUBDIR}/gud.wfr

  SHOW_BAD_WORDS := 1
  SHOW_FREQS := 1
  NCOLS_SINGLE := 2
endif

ifeq "${GLYPH_KIND}" "rare"
  # The "slightly-weird" glyphs -- weirdos that occur at least a 
  # few times, or look like they belong to a valid series. 

  GLYPHS := g,x,CHh,SHh,CKHh,CTHh,CFHh,CPHh,IKh,ITh,IFh,IPh,IKHh,ITHh,IFHh,IPHh

  # We use Takahashi's version of the text, because those glyphs
  # were omitted or invalidated in some of the other
  # versions (including the majority one).

  LANG := voyn
  BOOK := tak
  SUBDIR := ${LANG}/${BOOK}/tot.1
  SOURCE_WFR :=  dat/${SUBDIR}/gud.wfr

  SHOW_BAD_WORDS := 0
  SHOW_FREQS := 0
  NCOLS_SINGLE := 4
endif

OUT_PRF := ${SUBDIR}/${GLYPH_KIND}-glyph
TW_FRQ_FILES := \
  dat/${OUT_PRF}-t.frq \
  dat/${OUT_PRF}-w.frq 
JFR_JOINED :=  ${OUT_PRF}-tw.jfr
TEX_JOINED :=  ${OUT_PRF}-tw-freqs.tex

joined: dat/${JFR_JOINED} dat/${TEX_JOINED}

singles: 
	for tw in t w; do \
	  ${MAKE} TW=$$tw -f glyph-freqs.make single; \
	done

dat/${JFR_JOINED}: singles
	@echo "${OUT_PRF}-{t,w}.frq -> dat/${JFR_JOINED}"
	join \
	    -1 3 -2 3 -o 1.1,1.2,2.1,2.2,0 \
	    ${TW_FRQ_FILES} \
	  > dat/${JFR_JOINED}

dat/${TEX_JOINED}: dat/${JFR_JOINED} \
               tex-format-elem-tw-freqs parse-elem-list.gawk \
               glyph-freqs.make
	@echo "dat/${JFR_JOINED} -> dat/${TEX_JOINED}"
	mkdir -p exp/${SUBDIR}
	cat dat/${JFR_JOINED} \
	  | tex-format-elem-tw-freqs -f parse-elem-list.gawk \
	      -v elemList="${GLYPHS}" \
	      -v ncols=2 \
              -v showCounts=1 -v showFreqs=${SHOW_FREQS} \
	  > dat/${TEX_JOINED}
	update-paper-include dat/${TEX_JOINED} exp/${TEX_JOINED}

######################################################################
# Recursive make (caller must define ${GLYPH_KIND} and ${TW})
#
TW := TW.IS.UNDEFINED
ifneq "${TW}" "TW.IS.UNDEFINED"

OUT_PRF_TW := ${OUT_PRF}-${TW}
FRQ_SINGLE := ${OUT_PRF_TW}.frq
TEX_SINGLE := ${OUT_PRF_TW}-freq.tex

single: dat/${FRQ_SINGLE} dat/${TEX_SINGLE}

dat/${FRQ_SINGLE}: ${SOURCE_WFR} \
               capitalize-ligatures \
               factor-field-general factor-text-eva-to-basic.gawk \
               count-elems parse-elem-list.gawk compute-freqs \
               glyph-freqs.make
	@echo "${SOURCE_WFR} -> dat/${FRQ_SINGLE}"
	cat ${SOURCE_WFR} \
	  | capitalize-ligatures -v field=3 \
	  | factor-field-general \
              -f factor-text-eva-to-basic.gawk \
              -v inField=3 -v outField=4 \
	  | gawk -v tw="${TW}" \
	      '/./{ print (tw == "t" ? $$1 : 1), $$4; }'  \
	  | count-elems -f parse-elem-list.gawk \
	      -v elemList="${GLYPHS}" \
	      -v joinRepeats=0 \
	      -v showBadWords=${SHOW_BAD_WORDS} \
	  | compute-freqs \
	  > dat/${FRQ_SINGLE}

dat/${TEX_SINGLE}: dat/${FRQ_SINGLE} \
               tex-format-elem-freqs parse-elem-list.gawk \
               glyph-freqs.make
	@echo "dat/${FRQ_SINGLE} -> dat/${TEX_SINGLE}"
	mkdir -p exp/${SUBDIR}
	cat dat/${FRQ_SINGLE} \
	  | tex-format-elem-freqs -f parse-elem-list.gawk \
	      -v elemList="${GLYPHS}" \
	      -v ncols=${NCOLS_SINGLE} \
	      -v showCounts=1 -v showFreqs=${SHOW_FREQS} \
	      -v showClasses=0 -v showHeader=0 \
	  > dat/${TEX_SINGLE}
	update-paper-include dat/${TEX_SINGLE} exp/${TEX_SINGLE}

endif
# End of ${TW} recursion
######################################################################

endif
# End of ${GLYPH_KIND} recursion
######################################################################