#! /usr/bin/gawk -f # Last edited on 2004-07-15 03:42:49 by stolfi BEGIN { usage = ( ARGV[0] " -v unum=NNNN -v unit=fNNx.Y -v pnum=pNNN -v ucmt=bla_bla" ); abort = -1; if ( unum == "") { arg_error(("must define \"-unum\"")); } if ( unit == "") { arg_error(("must define \"-unit\"")); } if ( pnum == "") { arg_error(("must define \"-pnum\"")); } if ( ucmt == "") { arg_error(("must define \"-ucmt\"")); } # Split unit into fnum and utag: nu = split(unit,ufld,/[.]/); if (nu != 2) { arg_error(("bad unit name")); } fnum = ufld[1]; utag = ufld[2]; # Cleanup blanks from comment: gsub(/[_]+$/, "", ucmt); gsub(/^[_]+/, "", ucmt); gsub(/[_][_]+/, "_", ucmt); } (abort >= 0) { exit abort; } /^ *([#]|$)/ { next; } /^[<]f[0-9]+[rv]?[1-6]?(|[.][A-Za-z][A-Za-z0-9]?)[>]/ { # Page/unit header lines next; } /^ *[<]f[0-9]+[rv][1-6]?[.][A-Za-z][A-Za-z0-9]?[.][0-9]+[a-z]?[;][A-Za-z][>]/ { loc = $1; gsub(/[<>]/, "", loc); lin = $0; gsub(/^ *[<][^ <>]*[>] */, "", lin); nfld = split(loc, fld, /[.;]/); if (nfld != 4) { data_error(("bad locator = \"" loc "\"")); } if (fld[1] != fnum) { data_error(("inconsistent fnum \"" fld[1] "\"")); } if (fld[2] != utag) { data_error(("inconsistent utag \"" fld[2] "\"")); } lnum = fld[3]; vtag = fld[4]; # Remove inline comments, fillers, and end-of-line markers: gsub(/[{][^{}]*[}]/, "", lin); gsub(/[!% ]/, "", lin); gsub(/ *[-=] *$/, "", lin); # Replace all inter-word spaces by ".": gsub(/[-.,]/, ".", lin); # Write line: print unum, pnum, fnum, utag, lnum, vtag, lin, ucmt; next; } // { data_error(("unrecognized line format \"" $0 "\"")); } function data_error(msg) { printf "%s:%d: ** %s\n", FILENAME, FNR, msg > "/dev/stderr"; abort = 1; exit 1; } function arg_error(msg) { printf "%s\n", msg > "/dev/stderr"; abort = 1; exit 1; }