#! /usr/bin/gawk -f # Last edited on 2002-03-05 03:30:53 by stolfi BEGIN{ abort = -1; usage = ( \ "cat INFILE \\\n" \ " | format-soc \\\n" \ " -v title=STRING \\\n" \ " -v showWords=BOOL \\\n" \ " -v showWeak=BOOL \\\n" \ " > OUTFILE " \ ); # Reads a file containing lines of the form # # SEC USEQ FNUM UNIT LINE TRAN FPOS RPOS PFRST PLAST WORD HEAD TAG # 1 2 3 4 5 6 7 8 9 10 11 12 13 # # Assumes that the file is sorted by HEAD, then some fields, # then USEQ (FNUM and UNIT), NLIN, TRAN. Prints all entries # with the same HEAD, in compact format, supressing repeated fields. # If `showWords' is TRUE, prints words too, else locations only. # If `showWeak' is FALSE, omits weak matches. if (title == "") { title = "Occurrences of selected words"; } if (showWords == "") { showWords = 1; } if (showWeak == "") { showWeak = 1; } output_html_header(title); out_line_indent = 2; out_line_width = 56; # Not counting indentation } (abort >= 0) {exit abort;} (NF == 13){ sec = $1; useq = $2; fnum = $3; unit = $4; nlin = $5; tran = $6; fpos = $7; rpos = $8; pfrst = $9; plast = $10; word = $11; head = $12; tag = $13; if ((tag == 0) && (! showWeak)) { next; } if (head != ohead) { if (ohead != "") { finish_head(); } start_head(head); ohead = head; osec = ""; otag = ""; oword = ""; ofnum = ""; ounit = ""; onlin = ""; otran = ""; } if (otran != "") { print_comma(); } if (tag != otag) { if (otag != "") { close_font(); } open_font(tag_color(tag)); otag = tag; } if (showWords) { if (word != oword) { print_word(word); oword = word; } } if (sec != osec) { print_sec(sec); osec = sec; ofnum = ""; ounit = ""; onlin = ""; otran = ""; } if (fnum != ofnum) { print_fnum(fnum); ofnum = fnum; ounit = ""; onlin = ""; otran = ""; } if (unit != ounit) { print_unit(unit); ounit = unit; onlin = ""; otran = ""; } if (nlin != onlin) { print_nlin(nlin); onlin = nlin; otran = ""; } print_tran(tran); otran = tran; next; } END { if (abort >= 0) {exit abort;} if (ohead != "") { finish_head(); } output_html_trailer(); } function output_html_header(title) { printf "\n
\n\n", title; } function start_head(head, n,i) { printf "=== %s ", head; n = 60 - 5 - length(head); for (i = 1; i <= n; i++) { printf "="; } printf "\n"; newline(); } function tag_color(tag) { if (tag == 0) { return "00aa00"; } else if (tag == 1) { return "ff7700"; } else if (tag == 2) { return "ffbb00"; } } function open_font(clr) { printf "", clr; } function print_sec(sec) { open_font(sec_color(sec)); print_string((sec " ")); close_font(); } function sec_color(sec) { if (sec == "her") { return "00cc00"; } else if (sec == "str") { return "ff00ff"; } else if (sec == "ast") { return "00ddff"; } else if (sec == "bio") { return "ffaa66"; } else if (sec == "unk") { return "aaaaaa"; } else if (sec == "zod") { return "ffff00"; } else { return "88ccff"; } } function print_fnum(fnum) { print_string(fnum); } function print_unit(unit) { if (ounit == "") { print_string(("." unit)); } else { print_string(unit); } } function print_nlin(nlin) { if (onlin == "") { print_string(("." nlin)); } else { print_string(nlin); } } function print_tran(tran) { if (otran == "") { print_string((":" tran)); } else { print_string(tran); } } function print_word(word) { print_string(("(" word ")")); } function close_font() { printf ""; } function print_comma() { printf ","; cur_line_width++; if (cur_line_width + 1 > out_line_width) { newline(); } else { printf " "; cur_line_width++; } } function print_string(str, n) { n = length(str); if ((cur_line_width > 0) && (cur_line_width + n > out_line_width)) { newline(); } printf "%s", str; cur_line_width += n; } function newline() { printf "\n%*s", out_line_indent, ""; cur_line_width = 0; } function finish_head() { if (otag != "") { close_font(); } printf "\n\n"; } function output_html_trailer(title) { printf "\n\n\n"; } /./{ data_error("bad line type"); } function data_error(msg) { printf "*** line %d: %s\n", FNR, msg > "/dev/stderr"; abort = 1; exit abort; }