#! /usr/bin/gawk -f # Last edited on 2002-03-05 03:30:53 by stolfi BEGIN{ abort = -1; usage = ( \ "cat INFILE \\\n" \ " | format-soc \\\n" \ " -v title=STRING \\\n" \ " -v showWords=BOOL \\\n" \ " -v showWeak=BOOL \\\n" \ " > OUTFILE " \ ); # Reads a file containing lines of the form # # SEC USEQ FNUM UNIT LINE TRAN FPOS RPOS PFRST PLAST WORD HEAD TAG # 1 2 3 4 5 6 7 8 9 10 11 12 13 # # Assumes that the file is sorted by HEAD, then some fields, # then USEQ (FNUM and UNIT), NLIN, TRAN. Prints all entries # with the same HEAD, in compact format, supressing repeated fields. # If `showWords' is TRUE, prints words too, else locations only. # If `showWeak' is FALSE, omits weak matches. if (title == "") { title = "Occurrences of selected words"; } if (showWords == "") { showWords = 1; } if (showWeak == "") { showWeak = 1; } output_html_header(title); out_line_indent = 2; out_line_width = 56; # Not counting indentation } (abort >= 0) {exit abort;} (NF == 13){ sec = $1; useq = $2; fnum = $3; unit = $4; nlin = $5; tran = $6; fpos = $7; rpos = $8; pfrst = $9; plast = $10; word = $11; head = $12; tag = $13; if ((tag == 0) && (! showWeak)) { next; } if (head != ohead) { if (ohead != "") { finish_head(); } start_head(head); ohead = head; osec = ""; otag = ""; oword = ""; ofnum = ""; ounit = ""; onlin = ""; otran = ""; } if (otran != "") { print_comma(); } if (tag != otag) { if (otag != "") { close_font(); } open_font(tag_color(tag)); otag = tag; } if (showWords) { if (word != oword) { print_word(word); oword = word; } } if (sec != osec) { print_sec(sec); osec = sec; ofnum = ""; ounit = ""; onlin = ""; otran = ""; } if (fnum != ofnum) { print_fnum(fnum); ofnum = fnum; ounit = ""; onlin = ""; otran = ""; } if (unit != ounit) { print_unit(unit); ounit = unit; onlin = ""; otran = ""; } if (nlin != onlin) { print_nlin(nlin); onlin = nlin; otran = ""; } print_tran(tran); otran = tran; next; } END { if (abort >= 0) {exit abort;} if (ohead != "") { finish_head(); } output_html_trailer(); } function output_html_header(title) { printf "\n
\n\n", title;
}
function start_head(head, n,i)
{
printf "=== %s ", head;
n = 60 - 5 - length(head);
for (i = 1; i <= n; i++) { printf "="; }
printf "\n";
newline();
}
function tag_color(tag)
{
if (tag == 0)
{ return "00aa00"; }
else if (tag == 1)
{ return "ff7700"; }
else if (tag == 2)
{ return "ffbb00"; }
}
function open_font(clr)
{
printf "", clr;
}
function print_sec(sec)
{
open_font(sec_color(sec));
print_string((sec " "));
close_font();
}
function sec_color(sec)
{
if (sec == "her")
{ return "00cc00"; }
else if (sec == "str")
{ return "ff00ff"; }
else if (sec == "ast")
{ return "00ddff"; }
else if (sec == "bio")
{ return "ffaa66"; }
else if (sec == "unk")
{ return "aaaaaa"; }
else if (sec == "zod")
{ return "ffff00"; }
else
{ return "88ccff"; }
}
function print_fnum(fnum)
{
print_string(fnum);
}
function print_unit(unit)
{
if (ounit == "")
{ print_string(("." unit)); }
else
{ print_string(unit); }
}
function print_nlin(nlin)
{
if (onlin == "")
{ print_string(("." nlin)); }
else
{ print_string(nlin); }
}
function print_tran(tran)
{
if (otran == "")
{ print_string((":" tran)); }
else
{ print_string(tran); }
}
function print_word(word)
{
print_string(("(" word ")"));
}
function close_font()
{
printf "";
}
function print_comma()
{
printf ","; cur_line_width++;
if (cur_line_width + 1 > out_line_width)
{ newline(); }
else
{ printf " "; cur_line_width++; }
}
function print_string(str, n)
{
n = length(str);
if ((cur_line_width > 0) && (cur_line_width + n > out_line_width))
{ newline(); }
printf "%s", str; cur_line_width += n;
}
function newline()
{
printf "\n%*s", out_line_indent, ""; cur_line_width = 0;
}
function finish_head()
{
if (otag != "") { close_font(); }
printf "\n\n";
}
function output_html_trailer(title)
{
printf "\n\n\n";
}
/./{ data_error("bad line type"); }
function data_error(msg)
{
printf "*** line %d: %s\n", FNR, msg > "/dev/stderr";
abort = 1; exit abort;
}