#! /usr/bin/gawk -f # Last edited on 1999-04-15 05:37:17 by stolfi BEGIN { abort = -1; usage = ( \ "format-html-sections \\\n" \ " -v fnum=NUM \\\n" \ " -v exdent=NUM \\\n" \ " < INFILE.txt >> OUTFILE.html" \ ); if (exdent == "") { exdent = 0; } if (fnum == "") { arg_error("must define \"fnum\""); } print ""; print "
";
lastEdited = "";
nBlanks = 0;
ignoreBlanks = 1;
refsection = 0;
}
/^ *$/ { next; }
/^[^#]/ { error("bad line format"); }
/^# *Last edited/ { lastEdited = $0; next; }
/^# *$/ { if (! ignoreBlanks) { nBlanks++; } next; }
/^# Description:/ { pagelinks=1; }
/^# References:/ { refsection=1; }
/^# [^ ].*: *$/{
print "";
print "";
title = $0;
gsub(/^# */, "", title);
gsub(/[: ]*$/, "", title);
printf "";
next;
}
// {
while(nBlanks > 0) { printf "\n"; nBlanks--; }
}
/^#/ {
gsub(/^#/, " ", $0);
if (substr($0, 1, exdent) ~ /^[ ]*$/)
{ $0 = substr($0, exdent+1); }
else
{ error("bad indentation"); }
$0 = protect($0);
$0 = highlight_headword($0);
$0 = expand_refs($0, fnum, refsection);
print $0;
ignoreBlanks = 0;
next;
}
// { error("bad line format"); }
END {
print "";
print "";
if (lastEdit != "")
{ print ("" substr(lastEdit,2) "
"); } print ""; } function protect(txt) { gsub(/[&]/, "\\\&", txt); gsub(/, "\\\<", txt); gsub(/>/, "\\\>", txt); return (txt); } function highlight_headword(txt) { return gensub(/^([ ]*)[*][ ]*([^:]*):/, "\\1\\2:", ".", txt); } function expand_refs(txt,fnum,refsection, ref,out) { out = ""; while(txt != "") { if (match(txt, /^f[0-9]+[rv][1-6]?(|[[][0-9]+(|[,][0-9]+)[]])/)) { ref = substr(txt,1,RLENGTH); txt = substr(txt,RLENGTH+1); out = (out format_page_ref(ref,fnum)); } else if (match(txt, /^(ftp|http):[^ ]*/)) { ref = substr(txt,1,RLENGTH); txt = substr(txt,RLENGTH+1); out = (out format_www_ref(ref)); } else if (match(txt, /^[[][0-9,]+[]]/)) { ref = substr(txt,2,RLENGTH-2); txt = substr(txt,RLENGTH+1); if (refsection) { ref = format_bib_entry(ref,refsection); } else { ref = format_bib_ref(ref,refsection); } out = (out ref); } else if (match(txt, /^[¶][{][^{}]*[:][^{}]*[}]/)) { ref = substr(txt,3,RLENGTH-3); txt = substr(txt,RLENGTH+1); out = (out format_species_ref(ref)); } else if (match(txt, /^[µ][{][^{}]*[}]/)) { ref = substr(txt,3,RLENGTH-3); txt = substr(txt,RLENGTH+1); out = (out format_unit_ref(ref,fnum)); } else if (match(txt, /^.[^[fh¶µ]*/)) { out = (out substr(txt,1,RLENGTH)); txt = substr(txt,RLENGTH+1); } else { program_error(( "bad char: " out "»" txt )); } } return out; } function format_www_ref(ref) { return "" ref ""; } function format_bib_ref(ref, out,sep,item) { out = "["; while (ref != "") { if(match(ref, /^[0-9]+/)) { item = substr(ref,1,RLENGTH); ref = substr(ref,RLENGTH+1); if (out != "[") { out = (out ","); } out = ( out "" item "" ); gsub(/^[,]/, "", ref); } } return (out "]"); } function format_bib_entry(ref, out,sep,item) { if(! match(ref, /^[0-9]+$/)) { program_error(( "bad bib entry: " ref )); } return ( "[" ref "]" ); } function format_page_ref(ref,fnum, fref,tail) { if (match(ref, /^f[0-9]+[rv][1-6]?/)) { fref = substr(ref,1,RLENGTH); tail = substr(ref,RLENGTH+1); } else { program_error(("bad page ref \"" ref "\"")); } if (fref == fnum) { return ref; } else { return ("" fref "" tail); } } function format_unit_ref(ref,fnum, fref,uref) { if (match(ref, /^f[0-9]+[rv][1-6]?[.]$/)) { fref = substr(ref,1,RLENGTH-1); uref = substr(ref,RLENGTH+1); } else { fref = fnum; uref = ref; } return ("" ref "" tail); } function format_species_ref(ref, genus,species) { if (match(ref, /[:]/)) { genus = substr(ref,1,RSTART-1); species = substr(ref,RSTART+1); } else { program_error("bad species ref"); } return("" genus " " species ""); } function error(msg) { printf "line %d:%s\n", NR, msg >> "/dev/stderr"; abort = 1; exit abort; } function arg_error(msg) { printf "%s\n", msg >> "/dev/stderr"; abort = 1; exit abort; } function program_error(msg) { printf "program error: %s\n", msg >> "/dev/stderr"; abort = 1; exit abort; }