#! /usr/bin/gawk -f # Last edited on 2025-06-27 10:55:26 by stolfi # To be called by {replace_star_ids.sh}. # Reads an EVA-ish transcription of the SPS, with one data line per text line. # Replaces comments "# Star ..." by inline comments "" or "" on next parag head. # Uses the {VPOS} data and the "star-props.txt" file to identify the star. # The caller must define with "-v" the variable {table_file} which is # a table that maps star {VPOS} values to star IDs. More precisely, # each entry of the table must be "{PAGE}:{VPOS} {PAGE}:{STID}" where # {PAGE} is a page f-number, {VPOS} is a string "[-+][0-9][.][0-9]", # and {STID} is "S{NN}" where {NN} is a sequential number of the star in the # page, formatted with "%02d". BEGIN { abort = -1; if (table_file == "") { arg_error(("must define {table_file}")); } nlines = 0; # Lines in the input file. nstars = 0; # Number of stars mapped. nshorts = 0; # Number of short lines. nperfs = 0; # Number of "# PERFECT PARAG" lines found. npuffs = 0; # Number of lines with one-leg gallows [pfzw] gallows found. nleads = 0; # Number of parag head line markers "<%>" found. ntails = 0; # Number of parag tail line markers "<$>" found. cur_stid = ""; # Star ID obtained from the last "# Star" comment and not used yet. cur_page = ""; # Current page number, or "" before the first page. nstars_page = 0; # Number of stars found in current page. nshorts_page = 0; # Number of short lines found in current page. nperfs_page = 0; # Number of "# PERFECT PARAG" lines found in current page. npuffs_page = 0; # Number of lines with one-leg gallows [pfzw] gallows found in current page. nleads_page = 0; # Number of parag head line markers "<%>" found in current page. ntails_page = 0; # Number of parag tail line markers "<$>" found in current page. last_was_tail = 1; # Last line was a parag tail (or first of page). invert = 0; verbose = 1; split("", vstable); read_table(table_file, invert, vstable, verbose); } (abort >= 0) { exit abort; } // { nlines++; } /^ *$/ { print ""; next; } /^[<>]f[0-9]+[rv][>]/ { # Save page number: if (cur_page != "") { finish_page(); } cur_page = $1; gsub(/[<>]/, "", cur_page); start_page(); print; next; } /^[#] *[Ss]tar[ ]*[,]/ { lin = $0; nstars++; nstars_page++; if (cur_page == "") { data_error(("'# Star' comment before first page")); } if (match(lin, /vpos/)) { lin = substr(lin, RSTART + RLENGTH); gsub(/^[ ]+/, "", lin); if (match(lin, /^[0-9][0-9][-+][0-9][.][0-9]/)) { vpos = substr(lin, RSTART, RLENGTH); } else { data_error(("invalid {vpos} format")); } } else { data_error(("no 'vpos' keyword found")); } key = (cur_page ":" vpos); if (key in vstable) { val = vstable[key]; stid = val; gsub(/^f[0-9]*[rv][:]/, "", stid); if (! match(stid, /^S[0-9][0-9]$/)) { data_error(("bad star id from table '" val "'")); } if (val != (cur_page ":" stid)) { data_error(("bad table value '" val "'")); } if (cur_stid != "") { data_error(("unassigned star '" cur_stid "'")); } cur_stid = stid; } else { data_error(("key '" key "' not found in star table")); } print; next; } /^[#] *PERFECT/ { nperfs++; nperfs_page++; } /^[#]/ { print; next; } /^]/)) { data_error(("line already has an assigned star")); } if (match(text, "[«]")) { nshorts++; nshorts_page++; } if (match(text, "[»]")) { data_error(("indented line")); } if (has_puffs(text)) { npuffs++; npuffs_page++; } if (match(text, /[<][%][>]/)) { # Line is a parag head. if (RSTART != 1) { data_error(("parag head marker not initial")); } nleads++; nleads_page++; # Remove any embedded comments: if (! last_was_tail) { data_error(("parag head does not follow a tail")); } if (cur_stid != "") { # Insert the saved star code: stcomm = (""); } else { stcomm = ""; } gsub(/[<][%][>]/, ("<%>" stcomm), text); # printf " %s %s\n", loc, stcomm > "/dev/stderr"; cur_stid = ""; # To avoid double assignment. } else { # Line is not parag head. if (curs_stid != "") { data_error(("'# Star' comment not followed by parag head")); } } if (match(text, /[[<][$][>]/)) { # Line is parag tail. if (RSTART+RLENGTH-1 != length(text)) { data_error(("parag tail marker not final")); } ntails++; ntails_page++; last_was_tail = 1; } else { last_was_tail = 0; } printf "%-18s %s\n", loc, text; next; } END { if (abort >= 0) { exit abort; } if (cur_page != "") { finish_page(); } printf "\n" > "/dev/stderr"; printf "%-10s -", "TOTAL" > "/dev/stderr"; printf " %3d stars", nstars > "/dev/stderr"; printf " %3d short lines", nshorts > "/dev/stderr"; printf " %3d puff lines", npuffs > "/dev/stderr"; printf " %3d perfect", nperfs > "/dev/stderr"; printf " %3d heads", nleads > "/dev/stderr"; printf " %3d tails", ntails > "/dev/stderr"; printf "\n" > "/dev/stderr"; } function finish_page( ) { if (! last_was_tail) { data_error(("page does not end with parag tail")); } if (cur_stid != "") { data_error(("unassigned star '" cur_stid "'")); } printf "%-10s -", ("page " page) > "/dev/stderr"; printf " %3d stars", nstars_page > "/dev/stderr"; printf " %3d short lines", nshorts_page > "/dev/stderr"; printf " %3d puff lines", npuffs_page > "/dev/stderr"; printf " %3d perfect", nperfs_page > "/dev/stderr"; printf " %3d heads", nleads_page > "/dev/stderr"; printf " %3d tails", ntails_page > "/dev/stderr"; printf "\n" > "/dev/stderr"; } function start_page() { nstars_page = 0; nshorts_page = 0; nperfs_page = 0; npuffs_page = 0; nleads_page = 0; ntails_page = 0; cur_stid = ""; last_was_tail = 1; } function has_puffs(traw , tc) { # Tests whether {traw} has any [pfwz] letters outside comments. tc = traw; gsub(/[<]([-%$]|[!][^<>]*)[>]/, "", tc); if (match(tc, /[>]/)) { data_error(("unclosed embedded comment")); } return match(tc, /[pfwz]/); }