#! /bin/bash -eu
# Last edited on 2025-12-05 21:54:42 by stolfi

# Reads from {stdin} an EVT or IFF file where each line starts with "<{FNUM}[.]"
# where {FNUM} is "^f[0-9]+[rv][0-9]*$"
# Discards blank and #-comments and page headers "<{FNUM}>".
# Prefixes that line with "{PNUM}.{LSEQ}" and a space where 
# {PNUM} is "p[0-9][0-9][0-9]" and {LSEQ} is the line number "[0-9][0-9]".
# Uses "fnum_to_pnum.tbl".
# Writes the result to {stdout}.

cat \
  | gawk \
      ' /^[ ]*([#]|$)/ { next; }
        /^[<]f[0-9]+[rv][0-9]*[>]/ { next; }
        /^[<]f[0-9]+[rv][0-9]*[.]/ {
          if (NF != 2) { printf "** invalid NF = [%s]\n", $0 > "/dev/stderr"; exit(1) }
          fnum=$1; gsub(/</, "", fnum); gsub(/[.].*/, "", fnum);
          lseq=$1; gsub(/<f[0-9]+[rv][0-9]*[.]/, "", lseq);
          gsub(/[.;].*>/, "", lseq); lseq = sprintf("%02d", lseq);
          print fnum, lseq, $0;
          next;
        }
        // { printf "** invalid line = [%s]\n", $0 > "/dev/stderr"; exit(1) }
      ' \
  > .tmp
  
cat .tmp \
  | map_field.gawk \
      -v inField=1 -v outField=1 \
      -v table=fnum_to_pnum.tbl \
      -v defSubst='p???' \
  | gawk '//{ printf "%s.%03d %s %s\n", $1, $3, $4, $5 }' 
  
  

