# Last edited on 2002-01-16 01:25:12 by stolfi

# To be included in gawk programs together with roman-encoding.gawk
# Defines renc_init() for Voynichese-looking Roman-like numerals

function renc_init()
{
  # A fancy encoding system with split prefix/suffix digits
  # The length distribution, if sampled over the entire range,
  # should be quite similar to the word length distribution of Voynichese

  # Digit 0 length distribution = (1,2,2,1)
  renc_base0 = 6; renc_mul0 = 1;
  renc_digs0[0] = ":o";
  renc_digs0[1] = "a:";
  renc_digs0[2] = ":y";
  renc_digs0[3] = "y:o";
  renc_digs0[4] = "a:y";
  renc_digs0[5] = "o:y";

  # Digit 1 length distribution = (1,4,1)
  renc_base1 = 6; renc_mul1 = renc_mul0*renc_base0;
  renc_digs1[0] = ":";
  renc_digs1[1] = ":d";
  renc_digs1[2] = ":s";
  renc_digs1[3] = "d:";
  renc_digs1[4] = "s:";
  renc_digs1[5] = "d:s";

  # Digit 2 length distribution = (1,4,1)
  renc_base2 = 6; renc_mul2 = renc_mul1*renc_base1;
  renc_digs2[0] = ":";
  renc_digs2[1] = ":r";
  renc_digs2[2] = ":l";
  renc_digs2[3] = "r:";
  renc_digs2[4] = "l:";
  renc_digs2[5] = "l:r";

  # Digit 3 length distribution = (1,4,1)
  renc_base3 = 6; renc_mul3 = renc_mul2*renc_base2;
  renc_digs3[0] = ":";
  renc_digs3[1] = ":e";
  renc_digs3[2] = ":i";
  renc_digs3[3] = "c:";
  renc_digs3[4] = "c:i";
  renc_digs3[5] = "c:e";

  # Digit 4 length distribution = (0,3,3)
  renc_base4 = 6; renc_mul4 = renc_mul3*renc_base3;
  renc_digs4[0] = "k:";
  renc_digs4[1] = "t:";
  renc_digs4[2] = "p:";
  renc_digs4[3] = "k:h";
  renc_digs4[4] = "t:h";
  renc_digs4[5] = "p:h";

  renc_max_num = renc_mul4 * renc_base4 - 1;
  renc_max_code = renc_encode(renc_max_num);
}