# Last edited on 2025-05-01 18:23:18 by stolfi
# Creates the VMS samples:

MAKEFILE := vms-edition.make
# MAKERULES := 
MAKERULES := ${MAKEFILE}

LANG := voyn

.PHONY: all source export clean

# Trap spurious "make"s:
all: source

# "make source" creates the LaTeXsource file:
source: work
	${MAKE} -R -f ${MAKEFILE} ACTION=source everything

# Remove derived files from "dat":
clean:
	${MAKE} -R -f ${MAKEFILE} ACTION=clean everything
        
dat/${LANG}: ; mkdir dat/${LANG}
exp/${LANG}: ; mkdir exp/${LANG}

UTYPE_TBL := unit-to-type.tbl
	cat ${UNIT_TBL} \
          | gawk -v FS=":" '/./{print $$2,$$6}' \
          > ${UTYPE_TBL}

######################################################################
# Rules for a given ${ACTION} ("source", "export", "clean")
#
ACTION := ACTION.IS.UNDEFINED
ifneq "${ACTION}" "ACTION.IS.UNDEFINED"

# The various "books" (actually views of the same book, the VMS).
# The "sectioned" ones are worth analyzing separately by subsection.
#
SECTIONED_BOOKS := maj prs lab
UNSECTIONED_BOOKS := tak ini fin mid 

.PHONY: everything ev-recurse ev-pos-${ACTION}

VMS_SUBSECS := ${shell cat subsections.tags}

everything: ev-recurse ev-pos-${ACTION}

ev-recurse:
	for book in ${SECTIONED_BOOKS}; do \
          ${MAKE} -R -f ${MAKEFILE} \
            BOOK=$$book \
            SUBSECS="${VMS_SUBSECS}" \
            ACTION=${ACTION} single-book; \
        done
	for book in ${UNSECTIONED_BOOKS}; do \
          ${MAKE} -R -f ${MAKEFILE} \
            BOOK=$$book \
            SUBSECS="" \
            ACTION=${ACTION} single-book; \
        done

ev-pos-source:
	for sec in ${VMS_SUBSECS}; do \
          printf "\n%-32s" "voyn/{prs,lab}/$$sec/raw.wfr: "; \
          cat ${foreach B,prs lab,dat/${LANG}/${B}/$$sec/raw.wfr} \
            | gawk '/./{t += $$1;} END{print t}' ; \
          printf "%-32s" "voyn/maj/$$sec/raw.wfr: " ; \
          cat dat/${LANG}/maj/$$sec/raw.wfr  \
            | gawk '/./{t += $$1;} END{print t}' ; \
        done

ev-pos-export:

ev-pos-clean:

######################################################################
# Rules for given ${ACTION}/${BOOK} where
#   ${BOOK} = book to make ("prs", "lab", "maj", etc.)
#
BOOK := BOOK.IS.UNDEFINED
ifneq "${BOOK}" "BOOK.IS.UNDEFINED"
        
BOOK_DIR := ${LANG}/${BOOK}
        
dat/${BOOK_DIR}: ; mkdir dat/${BOOK_DIR}
exp/${BOOK_DIR}: ; mkdir exp/${BOOK_DIR}

BOOK_SUBSEC_LIST := ${BOOK_DIR}/subsections.tags
BOOK_SUBSEC_OK_LIST := ${BOOK_DIR}/subsections-ok.tags
DAT_BOOK_SUBSEC_LISTS := dat/${BOOK_SUBSEC_LIST} dat/${BOOK_SUBSEC_OK_LIST}

######################################################################
# Rules for given ${ACTION}/${BOOK}/${SUBSECS} where
#   ${SUBSECS} = blank-separated list of subsection samples to
#     create for that book, excluding "tot.1".
#
SUBSECS := SUBSECS.IS.UNDEFINED
ifneq "${SUBSECS}" "SUBSECS.IS.UNDEFINED"
        
BOOK_RGB_CTS := ${BOOK_DIR}/raw-gud-bad-tw-counts
BOOK_SUMM := ${BOOK_DIR}/raw-gud-bad-tw-summary

DAT_BOOK_TARGETS := \
  dat/${BOOK_RGB_CTS}.tex \
  dat/${BOOK_SUMM}.tex

.PHONY: single-book sb-recurse sb-pre-${ACTION} sb-pos-${ACTION}
.PHONY: sb-show-sizes sb-check-subsec-total

single-book: sb-pre-${ACTION} sb-recurse sb-pos-${ACTION}

sb-recurse: dat/${BOOK_DIR} exp/${BOOK_DIR}
	for subsec in ${SUBSECS} tot.1; do \
          ${MAKE} -R -f ${MAKEFILE} \
            BOOK=${BOOK} \
            SUBSEC=$$subsec \
            ACTION=${ACTION} single-subsec; \
        done

sb-pre-source: ${DAT_BOOK_SUBSEC_LISTS}

sb-pos-source: sb-show-sizes  sb-check-subsec-total ${DAT_BOOK_TARGETS}

sb-show-sizes:
	@dicio-wc ${foreach S,${SUBSECS} tot.1,dat/${BOOK_DIR}/${S}/raw.evt}
	@dicio-wc ${foreach S,${SUBSECS} tot.1,dat/${BOOK_DIR}/${S}/raw.lts}
	@dicio-wc ${foreach S,${SUBSECS} tot.1,dat/${BOOK_DIR}/${S}/raw.tks}
	@dicio-wc ${foreach S,${SUBSECS} tot.1,dat/${BOOK_DIR}/${S}/raw.wfr} \
          | gawk '/./{ printf "    %8s %s\n", $$1,$$4;}' 
	@dicio-wc ${foreach S,${SUBSECS} tot.1,dat/${BOOK_DIR}/${S}/gud.wfr} \
          | gawk '/./{ printf "    %8s %s\n", $$1,$$4;}' 
	@dicio-wc ${foreach S,${SUBSECS} tot.1,dat/${BOOK_DIR}/${S}/bad.wfr} \
          | gawk '/./{ printf "    %8s %s\n", $$1,$$4;}'

+sb-check-subsec-total: 
	@printf "\n%-24s" "dat/${BOOK_DIR}/*/raw.wfr: "
	@cat ${foreach S,${SUBSECS},dat/${BOOK_DIR}/${S}/raw.wfr} /dev/null \
          | gawk '/./{t += $$1;} END{print t}' 
	@printf "%-24s" "dat/${BOOK_DIR}/tot.1/raw.wfr: "
	@cat dat/${BOOK_DIR}/tot.1/raw.wfr \
          | gawk '/./{t += $$1;} END{print t}' 
        
sb-pre-export:
sb-pos-export:
	update-paper-include dat/${BOOK_RGB_CTS}.tex exp/${BOOK_RGB_CTS}.tex
	update-paper-include dat/${BOOK_SUMM}.tex    exp/${BOOK_SUMM}.tex

sb-pre-clean:
sb-pos-clean:
	-rm -f ${BOOK_SUBSEC_LISTS}

dat/${BOOK_SUBSEC_LIST}: dat/${BOOK_DIR} ${MAKERULES}
	echo "${SUBSECS}" \
          | tr ' ' '\012' \
          > dat/${BOOK_SUBSEC_LIST}

dat/${BOOK_SUBSEC_OK_LIST}: dat/${BOOK_SUBSEC_LIST}
	cat dat/${BOOK_SUBSEC_LIST} \
          | egrep -v '^(unk|xxx)' \
          > dat/${BOOK_SUBSEC_OK_LIST}

dat/${BOOK_RGB_CTS}.txt: ${MAKERULES} dat/${BOOK_DIR} \
	    count-raw-gud-bad-toks-wrds
	@echo " "
	@echo "      Good/bad statistics for ${BOOK_DIR}:"
	@echo " "
	count-raw-gud-bad-toks-wrds \
            dat/${BOOK_DIR} ${SUBSECS} / tot.1 \
          > dat/${BOOK_RGB_CTS}.txt
	cat dat/${BOOK_RGB_CTS}.txt \
          | sed -e 's:/::g' -e 's/^/      /' 

dat/${BOOK_RGB_CTS}.tex: ${MAKERULES} dat/${BOOK_RGB_CTS}.txt \
	    tex-format-raw-gud-bad-counts
	@echo " "
	@echo " dat/${BOOK_RGB_CTS}.txt -> dat/${BOOK_RGB_CTS}.tex"
	@echo " "
	cat dat/${BOOK_RGB_CTS}.txt \
	  | tex-format-raw-gud-bad-counts \
	  > dat/${BOOK_RGB_CTS}.tex

dat/${BOOK_SUMM}.tex: ${MAKERULES} dat/${BOOK_RGB_CTS}.txt \
	    tex-format-raw-gud-bad-summary
	@echo " "
	@echo " dat/${BOOK_RGB_CTS}.txt -> ${BOOK_SUMM}.tex"
	@echo " "
	cat dat/${BOOK_RGB_CTS}.txt \
	  | tex-format-raw-gud-bad-summary \
              -v sample=${LANG}${BOOK} \
	  > dat/${BOOK_SUMM}.tex

endif
# End of ${ACTION}/${BOOK}/${SUBSECS} rules
######################################################################

######################################################################
# Rules for given ${ACTION}/${BOOK}/${SUBSEC} where
#   ${SUBSEC} is a specific subsection (possibly "tot.1").
#
SUBSEC := SUBSEC.IS.UNDEFINED
ifneq "${SUBSEC}" "SUBSEC.IS.UNDEFINED"
        
SUBSEC_DIR := ${BOOK_DIR}/${SUBSEC}
        
dat/${SUBSEC_DIR}: ; mkdir dat/${SUBSEC_DIR}
exp/${SUBSEC_DIR}: ; mkdir exp/${SUBSEC_DIR}

PRS_UTYPES := parags,starred-parags,circular-lines,circular-text,radial-lines,titles
LAB_UTYPES := labels,words

# Define ${SOURCE_EVT} and ${TRANS_TAG}:

ifeq "${BOOK}" "tak"
  ifeq "${SUBSEC}" "tot.1"
    SOURCE_EVT := work/L16+H-eva/text16e6.evt
  else
    SOURCE_EVT := SOURCE_EVT.NOT.DEFINED
  endif
  TRANS_TAG := H
else
  ifeq "${SUBSEC}" "tot.1"
    SOURCE_EVT := work/Notes/045/only-m.evt
  else
    SOURCE_EVT := work/Notes/045/subsecs-m/${SUBSEC}.evt
  endif
  TRANS_TAG := A
endif

# Define ${UTYPES} and ${LINE_SEL}:

ifeq "${BOOK}" "tak"
  UTYPES := ${PRS_UTYPES},${LAB_UTYPES}
  OWN_EVT := YES
  LINE_SEL := 
endif

ifeq "${BOOK}" "maj"
  UTYPES := ${PRS_UTYPES},${LAB_UTYPES}
  OWN_EVT := YES
  LINE_SEL := 
endif

ifeq "${BOOK}" "prs"
  UTYPES := ${PRS_UTYPES}
  OWN_EVT := YES
  LINE_SEL := 
endif

ifeq "${BOOK}" "lab"
  UTYPES := ${LAB_UTYPES}
  OWN_EVT := YES
  LINE_SEL := 
endif

ifeq "${BOOK}" "ini"
  UTYPES := ${PRS_UTYPES}
  OWN_EVT := NO
  LINE_SEL := -v omitMedial=1 -v omitFinal=1
endif

ifeq "${BOOK}" "mid"
  UTYPES := ${PRS_UTYPES}
  OWN_EVT := NO
  LINE_SEL := -v omitInitial=1 -v omitFinal=1
endif

ifeq "${BOOK}" "fin"
  UTYPES := ${PRS_UTYPES}
  OWN_EVT := NO
  LINE_SEL := -v omitInitial=1 -v omitMedial=1
endif

ifeq "${OWN_EVT}" "YES"
  
  # Create a private copy of the EVT file, with specified
  # units and subsection, converting all weirdos to basic 
  # EVA chars, or to "*" if impossible. 

  RAW_EVT := dat/${SUBSEC_DIR}/raw.evt

  ${RAW_EVT}: ${SOURCE_EVT} ${MAKERULES} \
              basify-weirdos select-units ${UTYPE_TBL}
	@echo "${SOURCE_EVT} -> ${RAW_EVT}"
	cat ${SOURCE_EVT} \
          | egrep -v '[;][^'"${TRANS_TAG}"'][>]' \
          | sed -e 's/[&][*!][*!][*!][*!;]/*!!!!/g' \
          | basify-weirdos \
          | select-units \
              -v types="${UTYPES}" \
              -v table=${UTYPE_TBL} \
          > ${RAW_EVT}
          
else
  
  # Use the EVT file previously created for the "prs" book,
  # and this same subsection.  Assumes that the "prs" 
  # EVT file contains the same units as ${BOOK} should
  
  .PHONY: make-prs

  RAW_EVT := dat/${LANG}/prs/${SUBSEC}/raw.evt
  
  ${RAW_EVT}: make-prs
  
  make-prs:
	${MAKE} -R -f ${MAKEFILE} ACTION=source \
          BOOK=prs SUBSEC=${SUBSEC} ${RAW_EVT}
endif

# Extract raw token stream with locations from EVT file

RAW_LTS := dat/${SUBSEC_DIR}/raw.lts
 
${RAW_LTS}:  ${RAW_EVT} ${MAKERULES} \
              words-from-evt
	@echo "${RAW_EVT} -> ${RAW_LTS}"
	cat ${RAW_EVT} \
          | words-from-evt \
              -v showParags=1 \
              ${LINE_SEL} \
              -v showLocation=1 \
          | gawk \
              ' BEGIN { c = "f0.P0.0"; } \
                /^ *$$/{ print c, "1", "="; next; } \
                /./{ c = $$1; print $$1, "2", $$2; next; } \
              ' \
          > ${RAW_LTS}

# Extract raw token stream without locations: 

RAW_TKS := dat/${SUBSEC_DIR}/raw.tks
 
${RAW_TKS}:  ${RAW_LTS} ${MAKERULES}
	@echo "${RAW_LTS} -> ${RAW_TKS}"
	cat ${RAW_LTS} \
          | gawk '/./ { print $$3; }' \
          > ${RAW_TKS}

# Count raw word ocurrences and compute their rel. frequencies:

RAW_WFR := dat/${SUBSEC_DIR}/raw.wfr

${RAW_WFR}: ${RAW_TKS} ${MAKERULES} \
              compute-freqs
	@echo "${RAW_TKS} -> ${RAW_WFR}"
	cat ${RAW_TKS} \
          | egrep -v '=' \
          | sort | uniq -c | expand \
          | sort -b -k1nr -k2 \
          | compute-freqs \
          > ${RAW_WFR}
 
# Extract the good words:

GUD_WFR := dat/${SUBSEC_DIR}/gud.wfr

${GUD_WFR}: ${RAW_WFR} ${MAKERULES} \
              select-good-words
	@echo "${RAW_WFR} -> ${GUD_WFR}"
	cat ${RAW_WFR} \
          | select-good-words -v inField=3 -v writeBad=0 \
          > ${GUD_WFR}

# Extract the bad words:

BAD_WFR := dat/${SUBSEC_DIR}/bad.wfr

${BAD_WFR}: ${RAW_WFR} ${MAKERULES} \
              select-good-words
	@echo "${RAW_WFR} -> ${BAD_WFR}"
	cat ${RAW_WFR} \
          | select-good-words -v inField=3 -v writeBad=1 \
          > ${BAD_WFR}

DERIVED_FILES := ${RAW_TKS} ${RAW_LTS} ${RAW_WFR} ${GUD_WFR} ${BAD_WFR}
ifeq "OWN_EVT" "YES"
  DERIVED_FILES := ${RAW_EVT} ${DERIVED_FILES}
endif

.PHONY: single-subsec ss-${ACTION}

single-subsec: dat/${SUBSEC_DIR} exp/${SUBSEC_DIR} ss-${ACTION}

ss-source: ${DERIVED_FILES}

ss-export: 

ss-clean: 
	-rm -f ${DERIVED_FILES}

endif
# End ${ACTION}/${BOOK}/${SUBSEC} rules
######################################################################

endif
# End ${ACTION}/${BOOK} rules
######################################################################

endif
# End ${ACTION} rules
######################################################################