#! /n/gnu/bin/gawk -f # Validates the format of the original interlinear file. # Usage: $0 < infile >& bugs function error(msg) { printf "%s\n", $0; printf "line %d: %s\n", NR, msg > "/dev/stderr"; } function checktext(lin) { res = 1 if (length(lin) <= 19) { error("missing text"); res = 0 } RSTART = 0 match(lin, /^<f[0-9][0-9]*[vr]*[0-9]*\.[A-Za-z][A-Za-z0-9]*\.[0-9][0-9]*[ab]*;[A-Z]>/) if (RSTART != 1) { error("bad location format"); res = 0 } if (substr(lin,RLENGTH+1, 19-RLENGTH) != substr(" ", 1, 19-RLENGTH)) { error("blanks missing"); res = 0 } if (substr(lin,20,1) == " ") { error("too many blanks"); res = 0 } txt = substr(lin,20,length(lin)-19) gsub(/{[^}]*}/, "", txt); gsub(/\[[-*%A-Z.24678]*[|][-*%A-Z.24678]*\]/, "", txt); gsub(/!*/, "", txt); gsub(/ *$/, "", txt); if (txt !~ /^[-*%A-Z.!24678]*[-=%]/) { error("invalid char in text"); res = 0 } return res } # blank lines /^ *$/ { next } # comment /^#/ { next } # panel declaration /^<f[0-9][0-9]*[rv][0-9]*[ab]?> *$/ { next } # sub-panel location declaration /^<f[0-9][0-9]*[rv][0-9]*[ab]?\.[A-Za-z][A-Za-z0-9]*> *$/ { next } # line of anonymous text /^<f[0-9][0-9]*[rv][0-9]*[ab]?\.[0-9][0-9]*[abc]?;[A-Z]> / { checktext($0) next } # line of text in sub-page location /^<f[0-9][0-9]*[rv][0-9]*[ab]?\.[A-Za-z][A-Za-z0-9]*\.[0-9][0-9]*[a]?;[A-Z]> / { checktext($0) next } /./ { error("bad format"); next }