#! /usr/bin/gawk -f # Last edited on 2025-12-08 15:21:40 by stolfi # Reads from {stdin} an EVT or IFF file. # Discards #-comments and blank lines. # Discard page headers "<{FNUM}>". # On every data line, remove all inline comments (""), Rene's # "location" codes ("P0", "P+", etc) and my # alignment marks ([«=»]). Also maps figure intrusions "-" to "." # Also removes unnecessary {} and lowercases "C", "I", "H" in common compounds. # Also removes ";" after weirdo codes. # Also maps "w", "z" back to "p", "f". # # Leaves the head/tail parag marker lines "<%>", "<$>", # Leaves the circular text "known start" marker "<=>" /^[ ]*([#]|$)/ { # Discard blank lines and comments: next; } // { # Fix Rene's f-num for the big fold-out: gsub(/^/ { # Discard page header lines: next; } /^]/, ">", $0) # Remove inline comments: gsub(/]*>/, "", $0) # Now there should be no blanks in the data field: if (NF != 2) { printf "** invalid NF = [%s]\n", $0 > "/dev/stderr"; exit(1) } # Remove Rene's variable assignments: gsub(/<@[A-Z]=[A-Z0-9]>/, "", $0) # Remove Stolfi's temporary parag markers: gsub(/<[|:]>/, "", $2) # Turn Rene's figure intrusion gaps into "-" as in Stolfi's gsub(/[<][-][>]/, "-", $2) # Map data field to lowercase: $2 = tolower($2) # Undo Stolfi's remap of hooked @p/@f to @w/@z: gsub(/w/, "p", $2) gsub(/z/, "f", $2) # Remove braces from standard ligatures: $2 = gensub(/{([csi][tkpf]?[h]*)}([^h]|$)/, "\\1\\2", "g", $2) # Map non-standard ligatures to "?": gsub(/[{][^{}]*[}]/, "?", $2) # Map some weirdos to normal glyphs: gsub(/e[']/, "s", $2) gsub(/a[']/, "?", $2) gsub(/o[']/, "?", $2) gsub(/y[']/, "?", $2) gsub(/q[']/, "?", $2) gsub(/[&@]cs[;]/, "sh", $2) gsub(/[&@]152[;]?/, "d", $2) gsub(/[&@]176[;]?/, "k", $2) gsub(/[&@]206[;]?/, "r", $2) gsub(/[&@]208[;]?/, "n", $2) gsub(/[&@]221[;]?/, "o", $2) gsub(/[&@]222[;]?/, "y", $2) # Map all other weirdos to "?": $2 = gensub(/[@&][0-9][0-9][0-9][;]?/, "?", "g", $2) $2 = gensub(/[@&][^;]*[;]/, "?", "g", $2) print; next; } // { printf "** invalid line = [%s]\n", $0 > "/dev/stderr"; exit(1) }