#! /n/gnu/bin/gawk -f

# Validates the format of the original interlinear file.
# Usage: $0 < infile >& bugs

function error(msg)
{
  printf "%s\n", $0;
  printf "line %d: %s\n", NR, msg > "/dev/stderr";
}

function checktext(lin)
{
  res = 1
  if (length(lin) <= 19)
    { error("missing text"); res = 0 }
  RSTART = 0
  match(lin, /^<f[0-9][0-9]*[vr]*[0-9]*\.[A-Za-z][A-Za-z0-9]*\.[0-9][0-9]*[ab]*;[A-Z]>/)
  if (RSTART != 1) 
    { error("bad location format"); res = 0 }
  if (substr(lin,RLENGTH+1, 19-RLENGTH) != substr("                    ", 1, 19-RLENGTH))
    { error("blanks missing"); res = 0 }
  if (substr(lin,20,1) == " ")
    { error("too many blanks"); res = 0 }
  txt = substr(lin,20,length(lin)-19)
  gsub(/{[^}]*}/, "", txt);
  gsub(/\[[-*%A-Z.24678]*[|][-*%A-Z.24678]*\]/, "", txt);
  gsub(/!*/, "", txt);
  gsub(/  *$/, "", txt);
  if (txt !~ /^[-*%A-Z.!24678]*[-=%]/)
    { error("invalid char in text"); res = 0 }
  return res
}  

# blank lines
/^ *$/ {
  next
}

# comment
/^#/ {
  next
}

# panel declaration
/^<f[0-9][0-9]*[rv][0-9]*[ab]?> *$/ {
  next
}

# sub-panel location declaration
/^<f[0-9][0-9]*[rv][0-9]*[ab]?\.[A-Za-z][A-Za-z0-9]*> *$/ {
  next
}

# line of anonymous text
/^<f[0-9][0-9]*[rv][0-9]*[ab]?\.[0-9][0-9]*[abc]?;[A-Z]> / {
  checktext($0)
  next
}

# line of text in sub-page location
/^<f[0-9][0-9]*[rv][0-9]*[ab]?\.[A-Za-z][A-Za-z0-9]*\.[0-9][0-9]*[a]?;[A-Z]> / {
  checktext($0)
  next
}

/./ {
  error("bad format");
  next
}