# Last edited on 2002-01-16 01:51:27 by stolfi

# Defines functions to perform Roman-like number encoding and decoding.
# To be included in gawk programs with the sub-package roman-encoding-XXX.gawk, 
# for some XXX. The client must call renc_init() from that sub-package 
# before any use of renc_encode or renc_decode.

function renc_encode(num, n,res,d0,d1,d2,d3)
{
  n = num;
  d0 = (n % renc_base0); n = (n - d0)/renc_base0;
  d1 = (n % renc_base1); n = (n - d1)/renc_base1;
  d2 = (n % renc_base2); n = (n - d2)/renc_base2;
  d3 = (n % renc_base3); n = (n - d3)/renc_base3;
  d4 = (n % renc_base4); n = (n - d4)/renc_base4;
  if (n != 0) { renc_error(("index " num " is too big to encode")); }
  res = "";
  res = renc_add_digit(res, renc_digs4[d4]);
  res = renc_add_digit(res, renc_digs3[d3]);
  res = renc_add_digit(res, renc_digs2[d2]);
  res = renc_add_digit(res, renc_digs1[d1]);
  res = renc_add_digit(res, renc_digs0[d0]);
  return res;
}

function renc_add_digit(code,dgcode,  fld)
{
  # Adds a digit "dgcode" around the given "code" word.
  # "dgcode" must have a ":" in it that specifies where to insert "code".
  split(dgcode, fld, ":");
  return ( fld[1] code fld[2] );
}

function renc_decode(code,errcode,  c,num,d0,d1,d2,d3)
{
  # If the "code" is a valid code, returns its numeric value.
  # Otherwise returns the arbitrary string "errcode". 
  
  c = ( code ":0" )
  c = renc_remove_digit(c, renc_digs0, renc_mul0);
  c = renc_remove_digit(c, renc_digs1, renc_mul1);
  c = renc_remove_digit(c, renc_digs2, renc_mul2);
  c = renc_remove_digit(c, renc_digs3, renc_mul3);
  c = renc_remove_digit(c, renc_digs4, renc_mul4);
  if (c !~ /[:]/) { return errcode; }
  return 0 + substr(c,2);
}

function renc_remove_digit(cdn,digs,mul,   \
  code,num,ndgmax,skip,dgval,nc,i,dgcode,ndg,fld,npr,nsf \
)
{ 
  # The "cdn" argument should have the form "code:num" where "code" is a
  # codeword and "num" a decimal number. Removes a maximal-length
  # prefix-suffix pair from of "code" that matches some digit pattern
  # "digs[i]". Then returns "codeN:numN" where "codeN" is "code" with
  # "digs[i]" removed, "numN = num = mul*i". Each digit pattern must
  # have a ":" separating the prefix and suffix parts.

  split(cdn,fld,":");
  code = fld[1]; num = fld[2];

  ndgmax = -1;
  skip = -1;
  dgval = -1;
  nc = length(code);
  for (i in digs)
    { dgcode = digs[i];
      ndg = length(dgcode) - 1;
      if (ndg <= nc)
        { split(dgcode, fld, ":");
          npr = length(fld[1]); nsf = length(fld[2]);
          if ((substr(code,1,npr) == fld[1]) && (substr(code,nc-nsf+1) == fld[2]))
            { if (ndg > ndgmax) { ndgmax = ndg; skip = npr; dgval = i; } }
        }
    }
  if (ndgmax >= 0)
    { return (substr(code, skip+1, nc-ndgmax) ":" num + mul*dgval); }
  else
    { # renc_error(("no digit matches \"" code "\""));
      return "";
    }
}

function renc_error(msg)
{ 
  printf "%s\n", msg > "/dev/stderr";
  abort = 1;
  exit 1
}