#! /n/gnu/bin/gawk -f
# Last edited on 1998-07-12 15:00:34 by stolfi

# Colorizes the output of "extract-signif-chars" by a tuple-indexed table.

BEGIN {
  usage = ( \
      "colorize-text-by-tuple \\\n" \
      "  -v order=ORDER \\\n" \
      "  [ -v filler=CHAR ] \\\n" \
      "  [ -v lowercase=BOOL ] \\\n" \
      "  -v table=TABLE \\\n" \
      "  [ -v default=DEFDIGS ] \\\n" \
      "  [ -v dmax=DMAX ] \\\n" \
      "  [ -v ymin=YMIN ] \\\n" \
      "  [ -v ymax=YMAX ] \\\n" \
      "  < SIGFILE > COLFILE" \
    );
  #
  # Colorizes letters in a text based on context.
  #
  # The file TABLE must have entries DIGS WORD where DIGS is a string
  # of digits and WORD a string of letters, both with length = ORDER.
  # 
  # The input SIGFILE must have been produced by extract-signif-chars.
  # Let "txt[0...N-1]" be the sequence of significant
  # characters from SIGFILE (class 3). The program
  # will compute from them a string of numbers "val[0..N-1]",
  # as follows: starting with all zero "val"s, whenever
  # some substring "txt[i..i+ORDER-1]" matches some WORD from the
  # TABLE file, this program will add the digits of the associated
  # DIGS to "val[i..i+ORDER-1]".
  #
  # The final value of "val[i]" is then used to colorize the character
  # "txt[i]" in the standard output (using HTML formatting). Values in
  # the range 0 to DMAX (clipped) are mapped to the range of
  # intensities YMIN to YMAX (in [0 _ 1]) and pseudo-colored.
  #
  abort = -1;
  check_options();

  # Color table, indexed by quantized "val[i]":
  n_colors = 10;
  for (i=0;i<n_colors;i++)
    { xcolor[i] = xcolor_from_val(i/(n_colors-1), ymin, ymax); }

  ignored_color = xcolor_from_r_g_b_y(1,1,1, (2*ymin+ymax)/3);
  comment_color = xcolor_from_r_g_b_y(0,1,0, ymin);
  
  current_color = "";
  init_tup();
}
    
/^[0]/{
  if (abort >= 0) { exit(abort); }
  push_deco(decode(substr($0,2)));
  next;
}

/^[1]/{
  if (abort >= 0) { exit(abort); }
  push_char(filler, "_");
  push_deco(decode(substr($0,2)));
  next;
}

/^[2]/{
  if (abort >= 0) { exit(abort); }
  for (i=1;i<order;i++) { push_char(filler, "_"); }
  if (NR == 1) { push_deco("\n"); }
  push_deco(decode(substr($0,2)));
  next;
}

/^[3]/{
  c = substr($0,2,1); m = map[c];
  if (m == filler) { error(("\"filler\" character found on input")); }
  push_char(m, c);
  next;
}

END {
  if (abort >= 0) { exit(abort); }
  flush_tup();
  # close last <font> directive
  print_text("\n", ""); 
}

function init_tup(   i)
{
  tup = "";
  for (i=1;i<order;i++) { tup = (tup " "); }
  split("", val); split("", ext); split("", glu);
  wait = order-1;
}

function push_char(m, c,   i,d,ds,ic)
{
  # The argument "m" must be one character.  Appends the character "m"
  # to the "tup" buffer, and remembers its external representation is
  # "c".
  #
  # Specifically, sets "ext[order] = c, "glu[order] = "", val[order] =
  # 0".  Then fetches the "val" corresponding to "tup" from the table,
  # distributes it over "val[1..order]".  Finally outputs the first
  # char of "tup", "ext[1]", "glu[1]", and deletes them.
  
  # printf "push_char(\"%s\", \"%s\")\n", m,c > "/dev/stderr";
  if (m == "") { error("push_tup: empty m"); }
  
  # extend current tuple with new character:
  tup = (tup m); ext[order] = c; glu[order] = ""; val[order] = 0;
  if (wait == 0)
    { 
      # Now "tup" must have "order" characters. 
      # Find its digit string "ds" and add it to the "val" buffer:
      if (tup in dgs) { ds = dgs[tup]; } else { ds = default; }
      for (i=1; i<=order; i++) 
        { d = substr(ds,i,1); val[i] += d; } 
      # write out the first character of "tup":
      print_tup_head();
    }
  else
    { wait--; }
  # Shift buffer left by 1:
  pop_tup();
}

function push_deco(s)
{
  if (order > 1) 
    { glu[order-1] = (glu[order-1] s); }
  else
    { print_text(s, ignored_color); }
}

function print_tup_head(  v)
{
  # Prints fist character in "tup" buffer, and associated glue:
  v = int(val[1]/dmax*(n_colors-1));
  v = (v < 0 ? 0 : (v >= n_colors ? n_colors-1 : v));
  print_text(ext[1], xcolor[v]);
  print_text(glu[1], ignored_color);
}

function pop_tup(  i)
{
  # shift out first position of "tup" buffer and acessories:
  tup = substr(tup,2,order-1);
  for (i=1; i<order; i++) 
    { ext[i] = ext[i+1]; glu[i] = glu[i+1]; val[i] = val[i+1]; }
  delete ext[order]; delete glu[order]; delete val[order];
}

function flush_tup(   i)
{
  for (i=1;i<order;i++) 
    { if (substr(tup,1,1) != filler) { error(("internal error 1")); }
      print_tup_head();
      pop_tup();
    }
}


function decode(str)
{
  gsub(/\015/, "\n", str);
  return str;
}

function check_options(   i,c,mk,ucs,lcs,uc,lc)
{
  # Analyzes/defaults the option variables, namely
  #
  #   "order" "filler" "lowercase" "table" "default" 
  #   "dmax" "ymin" "ymax" 
  #
  # Defines the global variable "map" that maps characters to lowercase 
  # if so desired.
  
  if (order == "") { error("should define \"order\""); } 
  if ((order < 1) || (order > 20)) { error("funny \"order\""); } 
    
  if (filler == "") { filler = "_"; }
  if (length(filler) != 1)
    { error(("the \"filler\" should be a single char")); }

  # --- lowercase mapping ----------------------------------------------
  split("", map);
  for (i=0;i<256;i++) { c = sprintf("%c", i); map[c] = c; }
  
  if (lowercase == "") { lowercase = 1; }
  if (lowercase > 0) 
    { ucs = "ABCDEFGHIJKLMNOPQRSTUVWXYZАБВГДЕЖЗИЙКЛМНОПРСТУФХЦШЩЪЫЬЭЮ";
      lcs = "abcdefghijklmnopqrstuvwxyzабвгдежзийклмнопрстуфхцшщъыьэю";
      for (i=1;i<=length(ucs);i++)
        { uc = substr(ucs,i,1); lc = substr(lcs,i,1);
          map[uc] = lc;
        }
    }

  if (table == "") { error("should define \"table\""); }  

  while ( getline lin < table )
    { split(lin, fld);
      if ((3 in fld) || !(2 in fld)) { error("file " table " line " NR ": bad format"); }
      d = fld[1];
      w = fld[2];

      if (length(w) != order) 
        { error("file " table " line " NR ": wrong word length"); } 

      if (length(d) != order) 
        { error("file " table " line " NR ": wrong digs length"); } 
      if (d !~ /^[0-9][0-9]*$/) 
        { error("file " table " line " NR ": bad digs"); } 
      dgs[w] = d;
    }

  if (default == "") { for (i=1;i<=order;i++) { default = (default "9"); } }
  if (dmax == "") { dmax = 9; } 
  if (ymin == "") { ymin = 0.40; } 
  if (ymax == "") { ymax = 1.00; } 
}

function print_text(str, color)
{ 
  if (str != "") 
    { if (current_color != color)
        { if (current_color != "") { printf "</font>"; }
          if (color != "") { printf "<font color=#%s>", color; }
          current_color = color;
        }
      gsub(/[&]/, "\\&amp;", str);
      gsub(/[<]/, "\\&lt;", str);
      gsub(/[>]/, "\\&gt;", str);
      printf "%s", str;
    }
}

function error(msg)
{ printf "%s\n", msg >>  "/dev/stderr";
  abort = 1; exit 0;
}

function abs(x) { return (x >= 0 ? x : -x) }

function rgb_from_hue(rgb, h,   hf, hi)
{
  while (h >= 1) { h = h - 1; }
  while (h < 0) { h = h + 1; }
  h = 6*h;
  hi = int(h); hf = h - hi;
  if (hi == 0)
    { rgb[0] = 1;    rgb[1] = hf;   rgb[2] = 0;    }
  else if (hi == 1)
    { rgb[0] = 1-hf; rgb[1] = 1;    rgb[2] = 0;    }
  else if (hi == 2)
    { rgb[0] = 0;    rgb[1] = 1;    rgb[2] = hf;   }
  else if (hi == 3)
    { rgb[0] = 0;    rgb[1] = 1-hf; rgb[2] = 1;    }
  else if (hi == 4)
    { rgb[0] = hf;   rgb[1] = 0;    rgb[2] = 1;    }
  else if (hi == 5)
    { rgb[0] = 1;    rgb[1] = 0;    rgb[2] = 1-hf; }
}

function y_from_rgb(rgb)
{
  return 0.30*rgb[0] + 0.60*rgb[1] + 0.10*rgb[2];
}

function rgb_fix_y(rgb, y,   yy, ar, aw, ab)
{
  # mixes white or black into "rgb" so that its intensity is "y".
  yy = y_from_rgb(rgb);
  if (yy < y)
    { # mix white
      ar = (1-y)/(1-yy);
      aw = (y-yy)/(1-yy);
      rgb[0] = ar*rgb[0] + aw;
      rgb[1] = ar*rgb[1] + aw;
      rgb[2] = ar*rgb[2] + aw;
    }
  else if (yy > y)
    { # mix black
      ar = y/yy;
      rgb[0] = ar*rgb[0] + aw;
      rgb[1] = ar*rgb[1] + aw;
      rgb[2] = ar*rgb[2] + aw;
    }
}

function gamma(r)
{
  return sqrt(r)
}

function xcolor_from_rgb(rgb,   rr, gg, bb)
{ 
  rr = int(gamma(rgb[0])*255 + 0.5);
  gg = int(gamma(rgb[1])*255 + 0.5);
  bb = int(gamma(rgb[2])*255 + 0.5);
  return sprintf("%02x%02x%02x", rr, gg, bb);
}  

function xcolor_from_val(v,ymin,ymax,    y,rgb)
{
  # Assumes "v" is in [0 _ 1].
  v = (v < 0 ? 0 : (v > 1 ? 1 : v));
  rgb_from_hue(rgb, 0.6667 - v);
  y = ymin*exp((v)*log(ymax/ymin));
  rgb_fix_y(rgb, y);
  # printf "v = %7.3f  y = %6.4f  rgb = (%6.4f,%6.4f,%6.4f)\n", \
  #   v, y, rgb[0], rgb[1], rgb[2] > "/dev/stderr";
  return(xcolor_from_rgb(rgb));
}

function xcolor_from_r_g_b_y(r,g,b,y,   rgb)
{ 
  split("", rgb); rgb[0] = r; rgb[1] = g; rgb[2] = b;
  rgb_fix_y(rgb, y);
  return(xcolor_from_rgb(rgb));
}