#! /usr/bin/python -t
# _*_ coding: iso-8859-1 _*_
# Last edited on 2023-01-19 00:19:32 by stolfi

MODULE_NAME = "txtable"
MODULE_DESC = "Tools for handling plain-text tables"
MODULE_VERS = "1.0"

# !!! TO DO : accept {infoColSep,headerColSep,ruleColSep,commentSep} as parameters.
# !!! TO DO : extend empty rule fields to blanks, non-empty ones to dashes.
# !!! TO DO : write the {MODULE_INFO} string.

MODULE_COPYRIGHT = "Copyright Š 2008 State University of Campinas"

MODULE_INFO = "!!! MODULE_INFO to be written"

import sys
import re
import string
from decimal import *

# MODULE FUNCTIONS 

def parse_row(lin):
  "Removes TABs and other garbage from line {lin}, then splits" \
  " it into its main parts, which are returned as result:\n" \
  "   {tag} the line tag (a one-character string).\n" \
  "   {ind} the line's indentation (an integer).\n" \
  "   {fld} the line body's fields (a tuple).\n" \
  "   {cmt} the line's trailing comment (a string).\n" \
  "   {err} error message, if any (a string), or {None}.\n" \
  "  Note that the line '  # foo' yields {' ', 0, [ ], '  # foo'}," \
  " not {' ', 2, [ ], '# foo'}.\n" \
  "  Does not strip blanks from the fields."

  # Being optimistic:
  err = None;
  
  # Remove tabs and other crud from the line {lin}, preserving its visual appearance:
  lin = re.sub(r"[\240\014]", " ", lin)
  lin = re.sub(r"[\015]", "", lin)
  lin = lin.expandtabs()
  
  # Remove trailing spaces:
  lin = re.sub(r"[ ]+$", "", lin)    
  
  # Split out the comment {cmt}:
  m = re.search(r"^(.*?)([ ]*([#].*|))$", lin)
  if m == None : prog_error("duh?")
  lin = m.group(1)
  cmt = m.group(2)

  if lin == "" : 
    # Blank line: 
    tag = " "
    ind = 0
    fld = [ ]
  else :
    # Non-blank line.
    # Determine the indentation {ind}: 
    m = re.search(r"^([ ]*)(.*)$", lin)
    if m == None : prog_error("deh?")
    ind = m.end(1)
    lin = m.group(2)
    if lin == "" : prog_error("boh?")
    if re.match(r"^[-+ ]*$", lin) != None : 
      # Assume that it is a rule line:
      tag = "+"
    elif re.search(r"[!]", lin) != None : 
      # Assume that it is a header line:
      if re.search(r"[|]", lin) != None :
        err = "ambiguous header/info row"
      tag = "!" 
    else :
      # Assume that it is an info line: 
      tag = "|"
    # Break {lin} into fields by the {tag} character:
    fld = re.split("[" + tag + "]", lin)
    # sys.stderr.write("fld = Ť%sť\n" % fld)
    if len(fld) < 1 : prog_error("beh?")

  return tag,ind,fld,cmt,err
# ----------------------------------------------------------------------
   
def field_tot_rank(xv) :
  "Returns the totalling-rank of the string {xv}.\n" \
  "  Namely, if {xv} is 'total', 'subtotal', 'subsubtotal',  etc., returns" \
  " the number of 'sub' prefixes.  Otherwise returns {None}."
  
  # Strip any leading blanks: 
  xv = re.sub(r"^[ ]+", "", xv);
  
  # Count "sub" prefixes into {k}:
  k = 0;
  while 1:
    yv = re.sub(r"^(SUB|Sub|sub)[-_]?", "", xv, 1)
    if yv == xv : break
    k += 1; xv = yv
  if re.match(r"(TOTAL|Total|total)[ ]*$", xv) :
      return k
  else :
      return None
# ----------------------------------------------------------------------

def numeric_value(xv, frSep, thSep, altZero) :
  "Returns the numeric value of {xv}, or {None} if it is not a well-formed number.\n" \
  "  Assumes that {xv} is non-blank."
  
  # Strip leading ad trailing blanks: 
  xv = re.sub(r"^[ ]+", "", xv);
  xv = re.sub(r"[ ]+$", "", xv);

  if (altZero != "" and xv == altZero) or xv == frSep :
    # Field is a variant of zero:
    return Decimal(0)
  elif not re.search(r"[0-9]", xv) :
    # Field contains no digits:
    return None
  else :
    # May be be fractional number, possibly with {thSep,frSep}.
    xv = strip_thsep(xv, thSep);
    if frSep == None : 
      # There must be no fraction point:
      if string.find(xv, ".") >= 0 : return None
    elif frSep != "." : 
      # Make sure that the fraction separator, if any, is "."
      # There must be no "." already:
      if string.find(xv, ".") >= 0 : return None
      # Change the  frSep}, if present, to "." : 
      xv = string.replace(xv, frSep, ".")
    # Now parse as an US-style number:
    if re.match(r"^[-+]?[0-9]+([.][0-9]*|)$", xv) :
      # Sign, nonempty integer, optional "." and fraction.
      v = Decimal(xv)
    elif re.match(r"^[-+]?[.][0-9]+$", xv) : 
      # Sign, ".", nonempty fraction.
      v = Decimal(xv)
    else :
      return None
  # Normalize minus-zero to ordinary zero:
  if v == Decimal(0) : v = Decimal(0)
  return v
# ----------------------------------------------------------------------

def format_value(v, pr, th, ps, frSep, thSep, altZero):
  "Typesets the decimal numeric value {v} with given format parameters.\n" \
  "  The parameters are:\n" \
  "    {pr} number of digits after fraction point; {None} or {-1} for integer.\n" \
  "    {th} if true, inserts thousands-separators every three digits from point.\n" \
  "    {ps} if true, inserts an explicit '+' before positive values.\n" \
  "    {frSep} character to use as fraction separator, or {None}.\n" \
  "    {thSep} character to use as thousands-separator, or {None}.\n" \
  "    {altZero} special representation for value 0, or {None}." \
  
  if v == Decimal(0) :
    xv = format_zero_value(v, pr, frSep, altZero)
  else :
    xv = format_nonzero_value(v, pr, ps, frSep)
    # Insert the thousands separator if requested:
    if th : xv = insert_thsep(xv, pr, thSep)
  return xv;
# ----------------------------------------------------------------------

def format_zero_value(v, pr, frSep, altZero) :
  "Formats a zero value.\n" \
  "  Namely, (1) if {altZero} is not {None}, uses {altZero}; else " \
  " (2) if {pr} is a non-negative integer, uses the fraction separator {frSep}; else" \
  " (3) uses a single '0'.\n" \
  "  In cases (1) or (2), if {pr} is positive, appends {pr} blanks to" \
  " the result to maintain numeric alignment. "
  
  if altZero != None : 
    # Use {altZero}:
    xv = altZero; 
  elif pr == None or pr < 0 : 
    # Integer column, use just "0":
    return "0"
  else :
    # Fractional column, use {frSep} with {pr} blanks:
    if frSep == None or len(frSep) != 1 : prog_error("wee!")
    xv = frSep
  if pr != None and pr > 0 : xv = xv + " "*pr
  return xv
# ----------------------------------------------------------------------

def format_nonzero_value(v, pr, ps, frSep) :
  "Formats a nonzero value.\n" \
  "  namely, if {pr} is {None} or negative, formats {v} as an integer. Otherwise" \
  " formats it as a decimal fraction, with {frSep} as the fraction separator" \
  " and {pr} fraction digits. In any case, if {v} is" \
  " positive and {ps} is true, inserts an explicit '+' sign."
  if pr == None or pr < 0 : 
    # Format the number in "d" format.
    v = v.to_integral(rounding=ROUND_HALF_UP)
    xv = "%s" % v
  else :
    if frSep == None or len(frSep) != 1 : prog_error("woo!")
    # Format the number in "f" format with {pr} decimals:
    v = v.quantize(Decimal((0,(1,0),-pr)), rounding=ROUND_HALF_UP)
    xv = "%s" % v
    if (pr == 0) : xv = xv + "."
  if v > 0 and ps : xv = "+" + xv
  # if all is well, {pr} should be large enough to represent {v} without any rounding : 
  if Decimal(xv) != v : prog_error("unexpected rounding of Ť%sť to Ť%sť" % (v, xv))

  # Replace the "." by {frSep} if given: 
  if pr != None and pr >= 0 and frSep != None and frSep != "." :
    xv = xv.replace(".", frSep, 1)
  return xv
# ----------------------------------------------------------------------

def insert_thsep(xv, pr, ch) :
  "Inserts the thousands-separator {ch} in {xv}.\n" \
  "  Assumes that {xv} is a number with precision {pr}, with some" \
  " unknown fraction separator and no thousands-separator." \
  " Inserts the single character {ch} (which must be non-digit)" \
  " between the integer and fraction digits of {xv} at every three digits" \
  " from the decimal point position."

  if ch == None or len(ch) != 1 : prog_error("ulp!")
  if pr == None or pr < 0 : 
    return insert_thsep_int(xv,ch)
  else :
    n = len(xv);
    ip = xv[0 : n-pr-1]
    pt = xv[n-pr-1 : n-pr]
    fp = xv[n-pr:];
    return insert_thsep_int(ip,ch) + pt + insert_thsep_frac(fp,ch)
# ----------------------------------------------------------------------

def insert_thsep_int(ip, ch) :
  "Inserts the thousands-separator {ch} in the integer part {ip} of a number.\n" \
  "  Requires {ch} to be non-digit and non-empty"
  while 1 :   
    m = re.match(r"^([-+]?[0-9]+)([0-9][0-9][0-9])(|[^0-9].*)$", ip);
    if not m : break
    ip = m.group(1) + ch + m.group(2) + m.group(3)
  return ip;
# ----------------------------------------------------------------------

def insert_thsep_frac(fp, ch) :
  "Inserts the thousands-separator {ch} in the fraction part {dfip} of a number.\n" \
  "  Requires {ch} to be non-digit and non-empty"
  while 1 :
    m = re.match(r"(^|[^0-9])([0-9][0-9][0-9])([0-9]+)$", fp);
    if not m : break
    fp = m.group(1) + m.group(2) + ch + m.group(3)
  return fp;
# ----------------------------------------------------------------------

def strip_thsep(xv, thSep) :
  "Removes all occurrences of the thousands-separator {thSep} from {xv}.\n" \
  "  They must be between digits."
  "  If {thSep} is not {None}, it must be a single character."
  if thSep == None : return xv
  if len(thSep) != 1 : prog_error("eek!")
  pat = r"^(.*[0-9])[" + thSep + r"]([0-9].*)$"
  while 1 :
     m = re.search(pat, xv)
     if not m : break
     xv = m.group(1) + m.group(2)
  return xv;
# ----------------------------------------------------------------------

def get_precision(xv, frSep) :
  " Obtains the precision (number of fraction digits) of a number {xv}.\n" \
  "  Assumes that any  thousands-separators have been removed, and that " \
  "{frSep} is {None} or the fraction part is delimited by {frSep}," \
  " which must be a single character.  If {xv} has no {frSep}," \
  " or {frSep} is {None}, returns -1."
  
  if frSep == None : return -1
  if len(frSep) != 1 : prog_error("ook!")
  
  # Strip any trailing blanks: 
  xv = re.sub(r"[ ]+$", "", xv);
  
  # Locate {frSep} and count chars after it:
  k = xv.find(frSep)
  if k < 0 : 
    return -1
  else :
    return len(xv) - 1 - k
# ----------------------------------------------------------------------
 
def error(msg) :
  "Prints an error message {msg} to {stderr} and halts."
  sys.stderr.write("%s: %s\n" % (MODULE_NAME, msg))
  sys.exit(1)
# ----------------------------------------------------------------------

# THE TABLE CLASS

class TxTable :
  "A table of numeric and alphabetic items."
  
  def __init__(tbl, frSep, thSep, altZero) :
    tbl.debug = 0;
    
    if frSep != None :
      if re.match(r"[-+|!0-9 ]", frSep) :
        error("invalid fraction separator")
    if thSep != None :
      if re.match(r"[-+|!0-9]", thSep) :
        error("invalid thousands separator")
      if thSep == frSep :
        error("fraction and thousands separators must be distinct")
    if altZero != None :
      if re.match(r"[+|!1-9]", altZero) :
        error("invalid alternate zero format")

    tbl.frSep = frSep      # Fraction separator (one char, or {None} if no fracs allowed).
    tbl.thSep = thSep      # Thousands separator (one char, or {None} if not allowed).
    tbl.altZero = altZero  # Preferred representation for zero, or {None} if none.

    tbl.ind = -1;   # Indentation of table, or -1 if not known.
    tbl.ncols = -1; # Number of columns in table, or -1 if not known.
    tbl.nrows = 0;  # Number of rows in table.
    # These items are indexed by {[i]} where {i} is row in {0..nrows-1}:
    tbl.tag = [ ];  # Tag of row {i} (a single char, " ", "!", "+", or "|").
    tbl.cmt = [ ];  # Comment of row {i}.
    tbl.trk = [ ];  # Total-rank of row {i}, or {-1} if it is not a total row.

    # These items are indexed by {[j]} where {j} is a column in {0..ncols-1}:
    tbl.nump = [ ]; # Tells whether column is numeric (0 or 1, or {None} if unknown).
    tbl.prec = [ ]; # Number of digits after fraction sep or -1 if all ints (int, or {None}).
    tbl.thfg = [ ]; # Tells whether to use thousands-separators in this column (0 or 1, or {None}).
    tbl.psfg = [ ]; # Tells whether nonzero values should have explicit sign (0 or 1, or {None}).

    # These items are indexed by {[i][j]} where {i} is row and {j} is column.
    # They are defined only when {tag[i]} is not " ".
    tbl.fld = [ ];  # Table entry in row {i} and column {j} (printable string).
    tbl.val = [ ];  # Table values (decimal numbers or {None}).
    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

  def add_row(tbl, tag, ind, fld, cmt):
    "Appends a new row to the table.\n" \
    "  The row's components are {ind,tag,fld,cmt}," \
    " as defined by the {parse_row()} function.\n" \
    "  If the row is not blank, sets/checks {tbl.ncols} from {len(fld)}," \
    " and updates {tbl.ind} from {ind}.  Also sets the numeric" \
    " values {tbl.val[i][j]} of all fields in the row, using the" \
    " current attributes {frSep,thSep,altZero}; the value is {None}" \
    " if the row is a header or rule, or if the field is not a valid number."

    # get the index of this new row.
    i = tbl.nrows 

    # Extend row lists and save row's attributes:
    tbl.tag[i:i] = [ tag ]
    tbl.cmt[i:i] = [ cmt ]
    tbl.fld[i:i] = [ fld ]
    tbl.val[i:i] = [ None ];
    tbl.trk[i:i] = [ None ];
      
    # Dispatch on {tag}: 
    if tag == " " : 
      # Blank row, must have zero fields and zero indent:
      if len(fld) != 0 : prog_error("xii!")
      if ind != 0 : prog_error("xoo!")
    elif (tag == "+") or (tag == "|") or (tag == "!") :
      # Non-blank row, must have at least one field:
      if len(fld) <= 0 : prog_error("xee!")
       # Check/set {tbl.ncols}, update {tbl.ind}:
      if tbl.ncols == -1 : 
        # First non-blank row:
        if tbl.ind >= 0 : prog_error("eta!")
        tbl.ncols = len(fld)
        tbl.ind = ind
        # Allocate the column format attribute lists:
        tbl.nump = [ None ] * tbl.ncols;
        tbl.prec = [ None ] * tbl.ncols;
        tbl.thfg = [ None ] * tbl.ncols;
        tbl.psfg = [ None ] * tbl.ncols;
      else :
        if tbl.ind < 0 : prog_error("eca!")  
        if ind < tbl.ind : tbl.ind = ind
        if tbl.ncols != len(fld) : 
          error("row %d: inconsistent number of columns = %d %d\n  %s" % (i+1, tbl.ncols, len(fld), fld[0]))
        
      # Obtain the numeric values of fields, and set {tbl.trk[i]}: 
      tbl.val[i] = [ None ]*tbl.ncols
      if tag == "|" :
        tbl.trk[i:i] = [ None ]
        for j in range(tbl.ncols) :
          # Strip leading and trailing blanks from field:
          xvij = tbl.fld[i][j]
          xvij = re.sub(r"[ ]+$", "", xvij)
          xvij = re.sub(r"^[ ]+", "", xvij)
          tbl.fld[i][j] = xvij
          tbl.val[i][j] = numeric_value(xvij, tbl.frSep, tbl.thSep, tbl.altZero)
          trk = field_tot_rank(xvij)
          if trk != None : tbl.trk[i] = trk
        
    else :
      # Unexpected tag:
      prog_error("noo! tag = Ť%sť" % tag)

    # One more row:
    tbl.nrows += 1
    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 

  def printout(tbl):
    "Prints the table in its current state.\n" \
    "  The data for each row {i} is taken from {tbl.tag[i],tbl.fld[i],tbl.cmt[i]}." \
    "  If {tbl.colw[j]} is set, extends each field in that" \
    " column to that width. Does not use any other table attributes."

    # Determine the column widths:
    colw = tbl.column_widths();

    # Print row by row:
    for i in range(tbl.nrows) :
      if tbl.tag[i] != " " : 
        # Non-blank row.
        # Print the table's indentation:
        if tbl.ind > 0 : sys.stdout.write(" "*tbl.ind)
        # Print the table fields:
        for j in range(tbl.ncols) :
          # Print the separator between columns: 
          if j > 0 : sys.stdout.write("%s" % tbl.tag[i])
          # Print the field:
          xvij = tbl.fld[i][j];
          wdij = colw[j]
          if tbl.tag[i] == "+" : 
            # Replace field by a string of '-'s:
            sys.stdout.write("-"*wdij);
          elif (j < len(tbl.nump)) and tbl.nump[j] : 
            # Pad at left, even the header:
            sys.stdout.write("%*s" % (wdij, xvij))
          else :
            # Pad at right:
            sys.stdout.write("%*s" % (-wdij, xvij))
      # Print the row's comment:
      sys.stdout.write(tbl.cmt[i])
      sys.stdout.write("\n")
    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 

  def column_widths(tbl) :
    "Returns a list of the column widths based on the current fields.\n" \
    "  Uses only the current values of {tbl.fld[i][j]} of non-blank" \
    " table rows"

    if tbl.ncols > 0 :
      colw = [ 0 ]*tbl.ncols;
      for i in range(tbl.nrows) :
        if tbl.tag[i] != " " : 
          # Non-blank row.
          for j in range(tbl.ncols) :
            n = len(tbl.fld[i][j])
            if (n > colw[j]) : colw[j] = n
    else :
      colw = [ ]
    return colw
    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 

  def choose_column_formats(tbl) :
    "Determines the formatting attributes of each column.\n" \
    "  The formatting attributes of column {j} are" \
    " {tbl.nump[j],tbl.prec[j],tbl.thfg[j],tbl.psfg[j]}," \
    " where {tbl.prec[j],tbl.thfg[j],tbl.psfg[j]} are defined" \
    " only if {tbl.nump[j]} is true.\n" \
    "  Assumes that {val[i][j]} is meaningful only if {i} is an info row."
      
    for j in range(tbl.ncols) :
      tbl.nump[j] = tbl.are_column_entries_numeric(j)
      if tbl.nump[j] : 
        # Determine {tbl.prec[j],tbl.thfg[j],tbl.psfg[j]}:
        tbl.prec[j], tbl.thfg[j], tbl.psfg[j] = tbl.get_column_format_params(j)

      if tbl.debug :
        # Debugging printouts:
        sys.stderr.write("column %2d" % j)
        sys.stderr.write("  nump = %d" % tbl.nump[j])
        if tbl.nump[j] :
          sys.stderr.write("  prec = %2d" % tbl.prec[j])
          sys.stderr.write("  thfg = %d" % tbl.thfg[j])
          sys.stderr.write("  psfg = %d" % tbl.psfg[j])
        sys.stderr.write("\n")
    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 

  def are_column_entries_numeric(tbl, j) :
    "Determines whether column {j} is numeric from the looks of its info fields.\n" \
    "  Namely, returns TRUE iff there is at" \
    " least one non-empty info field with valid numeric value, and no" \
    " non-empty info field without such value."

    # Count numeric and non-numeric info fields in column:
    nnum = 0
    nalf = 0
    for i in range(tbl.nrows) :
      if tbl.tag[i] == "|" and not re.match(r"^[ ]*$", tbl.fld[i][j]) : 
        # Non-empty info field:
        vij = tbl.val[i][j]; 
        # Tally numeric and alphabetic fields:
        if vij != None :
          nnum += 1
        else :
          nalf += 1
    # Decide whether column is numeric or alphabetic:
    if tbl.debug:
      sys.stderr.write("#! column %d nnum = %d nalf = %d\n" % (j,nnum,nalf))
    return nnum > 0 and nalf == 0
    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 

  def get_column_format_params(tbl, j) :
    "Determines the formatting parameters of each column.\n" \
    "  Namely, returns {prec,thfg,psfg} determined from the textual forms" \
    " {tbl.fld[i][j]} of the info fields in column {j} and their" \
    " numeric values {tbl.val[i][j]}."
    prec = -1;
    thfg = 0;
    psfg = 0;
    for i in range(tbl.nrows) :
      if tbl.tag[i] == "|" : 
        # Info line:
        xvij = tbl.fld[i][j];
        vij = tbl.val[i][j];
        if xvij != tbl.altZero : 
          # Remove the thousands separators, if any : 
          yvij = strip_thsep(xvij, tbl.thSep);
          # if there were any, mark the column as needing them : 
          thfg = thfg or (yvij != xvij);
          # Get the precision {pr}, or -1 if integer :
          pri = get_precision(yvij, tbl.frSep)
          prec = max(prec, pri);
          if tbl.debug :
            sys.stderr.write("  row %d  xvij = Ť%sť  yvij = Ť%sť  prec = %d\n" % (i, xvij, yvij, pri))
          # Check whether there is a leading '+' on a nonzero value: 
          psfg = psfg or (vij != 0 and re.match(r"[ ]*[+]", yvij) != None)
          # sys.stderr.write("Ť%sť (Ť%sť)" % (xvij, yvij))
          # sys.stderr.write("  pr = %d  th = %d  ps = %d\n", pr, th, ps)

    return prec, thfg, psfg         
    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
  
  def recompute_and_reformat_values(tbl) :
    "Reformats all numeric info fields of {tbl}, recomputing (sub)totals.\n" \
    "  This method (1) strips leading and trailing blanks from all info and header fields," \
    " (2) recomputes the numeric value of any non-empty fields in (sub)total rows and numeric columns," \
    " (3) reconstructs all non-empty info fields in each numeric column from" \
    " their numeric values, according to the column's common format," \
    " and (4) adds one blank around all header and info fields (except at table" \
    " edges).  It does not affect rule rows." \
    "  The method uses numeric values {tbl.val[i][j]}, the column format parameters" \
    " {tbl.nump[j],tbl.prec[j],tbl.thfg[j],tbl.psfg[j]}, and the row total-ranks" \
    " {tbl.trk[i]}.\n"
    
    for i in range(tbl.nrows) :
      if tbl.tag[i] == "!" or tbl.tag[i] == "|" : 
        # Header or info row -- reformat all fields (recomputing totals, if any) : 
        for j in range(tbl.ncols) :
          xvij = tbl.fld[i][j]
          vij = tbl.val[i][j]
          if tbl.debug :
            sys.stderr.write("Ť%sť = %s" % (xvij, vij))
          # Reformat the element, without any padding:
          if tbl.nump[j] and tbl.tag[i] == "|" :
            # Numeric column in info row.
            # Recompute field if needed : 
            if re.match(r"^[ ]*$", xvij) :
              # Empty field, set value to {None} so that it stays empty:
              if vij != None : prog_error("ixe!")
              xvij = ""
            else :
              if tbl.trk[i] != None : 
                # (Sub)total row, must recompute the value:
                vij = tbl.recompute_total(i,j)
              # Reformat the value:
              pr = tbl.prec[j];
              th = tbl.thfg[j];
              ps = tbl.psfg[j];
              xvij = format_value(vij,pr,th,ps,tbl.frSep,tbl.thSep,tbl.altZero);
          else :
            # Non-numeric column, or header row: 
            # Just strip surrounding blanks: 
            xvij = re.sub(r"^[ ]+", "", xvij);
            xvij = re.sub(r"[ ]+$", "", xvij);

          # Add padding where needed:
          if xvij != "" :
            # Non-empty field, add padding blanks except at table edges:
            if j > 0 : xvij = " " + xvij
            if j < tbl.ncols-1 : xvij = xvij + " "


          # Store the recomputed/reformatted field: 
          tbl.fld[i][j] = xvij;
          tbl.val[i][j] = vij;
          if tbl.debug :
            sys.stderr.write(" --> Ť%sť = %s\n" % (xvij, vij))
    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
 
  def recompute_total(tbl,i,j) :
    "Computes the correct value of a (sub)total in line {i}, column {j} of {tbl}.\n" \
    "  Namely, returns the sum of the numeric values of all elements" \
    " in column {j} that are in non-total info rows above row {i}," \
    " up to the first total line with rank less than or equal to that of row {i}.\n" \
    "  Assumes that column {j} is numeric, so that all non-empty fields have numeric values."
    
    if tbl.trk[i] == None : prog_error("ahh!")
    if not tbl.nump[j] : prog_error("fee!")
    sum = 0;
    k = i - 1;
    while k >= 0 :
      if tbl.tag[k] != '|' :
        # Header, rule, or blank row:
        pass
      else :
        # Info row:
        if tbl.trk[k] == None : 
          # Ordinary line, accumulate it:
          if tbl.val[k][j] != None : sum += tbl.val[k][j];
        elif tbl.trk[k] <= tbl.trk[i] : 
          # (Sub)total line of same or lower rank, stop:
          break
        else :
          # (Sub)total line of higher rank, ignore:
          pass
      k -= 1
    return sum;
    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 

def prog_error(msg):
  "Prints the error message {msg} about a program error, and aborts."
  sys.stderr.write("%s\n" % msg);
  sys.exit(1)

# DOCUMENTATION

format_INFO = \
  "  Each input line is parsed into three parts: the /margin/, the" \
  " /body/, and the /comment/.\n" \
  "\n" \
  "  If the line contains a '#' character, the comment consists of" \
  " all blanks that precede the '#', the '#' itself, and everything that" \
  " follows the '#'. Otherwise the comment part consists of" \
  " all trailing blanks in the line.\n" \
  "\n" \
  "  The margin then consists of the leading blanks that are not part of" \
  " the comment. The body is anything that is not margin or comment.\n" \
  "\n" \
  "ROW TYPES\n" \
  "  The body of each input line is classified into four line" \
  " types, each identified by a one-character /tag/:\n" \
  "\n" \
  "   a /blank line/, if the body is empty (tag = ' ').\n" \
  "\n" \
  "   an /info line/, if it contains at least one '|' (tag = '|');\n" \
  "\n" \
  "   a /header line/, if it contains at least one '!' and no '|' (tag = '!')\n" \
  "\n" \
  "   a /rule line/, if it contains at least one '+' and only blanks, '-' or '+' (tag = '+');\n" \
  "\n" \
  "  Also, any non-blank line that does not contain any explicit column separator" \
  " ('+', '!', or '|') is assumed to be an info line (tag '|').  Ditto for" \
  " a line that contains  both '|' and '!'.\n" \
  "\n" \
  "TABLE FIELDS\n" \
  "  Each line of the input contains zero or more /fields/. A blank" \
  " line has zero fields; otherwise, the occurrences of the tag in the" \
  " line's body split it into one or more fields.\n" \
  "\n" \
  " Fields are numbered from left to right, starting at 1.\n" \
  "\n" \
  " Note that if the body begins (resp. ends) with the tag character," \
  " the first (resp. last) field of that line will be empty. Note also" \
  " that one cannot have a header line with exactly one field, because" \
  " it will be parsed as an info line.\n" \
  "\n" \
  " TABLE ROWS AND COLUMNS\n" \
  "  A table is set of lines (/rows/)  such that all non-blank" \
  " lines have the same number of fields.\n" \
  "\n" \
  " Thus, each table either has only blank lines, or has a well-defined" \
  " and positive number of columns, and a positive number of non-blank" \
  " rows. The columns are numbered from left to right, starting at 1.\n" \
  "\n" \
  " COLUMN NAMES\n" \
  "  If the table has a header row, each field of that row is taken" \
  " as the /name/ of that column, provided it is non-empty and" \
  " distinct from all previous headers. Otherwise the column remains" \
  " nameless. (Column names are relevant only for some programs, e.g." \
  " {table-join}.)\n" \
  "\n" \
  "NUMERIC AND ALPHABETIC FIELDS\n" \
  "  A field of an info row is considered /numeric/ if it is non-empty" \
  " and consists of an optional sign, followed by one or more decimal" \
  " digits, possibly with {frSep} (decimal point) and/or {thSep}" \
  " (thousands separator) characters. The {thSep} character may appear" \
  " only between two digits. The /numeric value/ of such a field is" \
  " the decimal integer or fraction obtained by removing any {thSep}" \
  " characters and replacing the {frSep} character by '.'. The {altZero}" \
  " string, if not empty, and a single {frSep} are also accepted as" \
  " numeric fields, with numeric value 0. A field is considered" \
  " alphabetic if it is non-empty and not numeric.\n" \
  "\n" \
  "NUMERIC AND ALPHABETIC COLUMNS\n" \
  "  Each column of a table then classified as /numeric/ or /alphabetic/." \
  " A column is assumed to be numeric if its info rows contain" \
  " at least one numeric field and no alphabetic fields.  Note that" \
  " any non-empty, non-numeric field in an info row marks the" \
  " the whole column as alphabetic.\n" \
  "\n" \
  " For each numeric column, the program also defines the /precision/" \
  " as being the maximum number of digits after the decimal point in" \
  " any info field, or -1 if no info field has an explicit decimal point." \
  " It also defines a /thousands-flag/, which is true if" \
  " and only if {thSep} is not empty and the column contains an" \
  " info field with thousands-separators.  It also sets" \
  " the /plus-flag/ if any nonzero info field in the column has an" \
  " explicit '+' sign.\n" \
  "\n" \
  "(SUB)TOTAL ROWS\n" \
  "  A /total row/ is an info row that contains the string 'TOTAL'," \
  " 'total', or 'Total' as one of its fields, prefixed by zero or more" \
  " instances of 'SUB', 'Sub', or 'sub' (with or without joining" \
  " hyphens). The number of such prefixes is the /rank/ of that total" \
  " row.\n" \
  "\n" \
  " A total row is /consistent/ if the numeric value of every field of" \
  " that row that belongs to a numeric column is equal to the sum of" \
  " the values of all fields in that column that are not total rows and" \
  " lie strictly between that row and the previous total row with the" \
  " same or lower rank (or the top of the table if there is no such" \
  " previous total row).\n" \
  "\n" \
  "CANONICAL FIELD FORMAT\n" \
  "  To print a table in its /canonical format/, the leading and" \
  " trailing blanks in each field are discarded (so a field which is all" \
  " blanks is assumed to be empty).  Then, every non-empty field" \
  " in an info row and a numeric column is replaced by its numeric value," \
  " converted to a string according to the column's format (the column's consensus precision," \
  " thousands-flag, and plus-sign-flag attributes).  Then, every non-empty field in an" \
  " info or header row is padded with an extra blank at the left (except" \
  " for the first field of the row) and one extra blank at the right" \
  " (except for the last field).\n" \
  "\n" \
  "NUMERIC FIELD FORMATTING\n" \
  "  When converting the numeric value 0 to its canonical representation," \
  " if the {altZero} parameter string is not empty, the result is the" \
  " {altZero} string followed by {max(0,prec)} blanks, where {prec} is the" \
  " column's precision; otherwise, if {prec} is not -1, it is a single" \
  " {frSep} followed by {prec} blanks; otherwise it is just '0'.\n" \
  "\n" \
  " When converting a nonzero numeric value to its canonical" \
  " representation, the value is first printed with {sprintf} using '%d'" \
  " or '%+d' format if {prec} is -1, or '%#.{prec}f' format or" \
  " '#+.{prec}f' format if {prec} is non-negative; where the '+' form is" \
  " used iff the column's plus-flag is set. Then, if {thSep} is" \
  " non-empty and the column's thousands-flag is set, the character" \
  " {thSep} is inserted to separate the digits of the integer and fraction" \
  " parts in groups of three, starting at the fraction point.\n" \
  "\n" \
  "COLUMN WIDTHS\n" \
  "  The /column width/ of each column is defined as the maximum" \
  " length of any of its fields, including info, header, and rule" \
  " rows. The /table margin/ is also defined as the shortest margin" \
  " of any non-blank row in the table.\n" \
  "\n" \
  "COLUMN WIDTH REGULARIZATION\n" \
  "  To /regularize/ a column, every field in it extended so as to match" \
  " the column width {wd}. In a rule row, that means replacing the" \
  " field by a string of {wd} '-'s. In a a header or info row, that" \
  " means extending with blanks until its length is {wd}; the blanks are" \
  " added at the left in numeric columns, and at the right in" \
  " non-numeric columns.\n" \
  "\n" \
  "CUSTOMIZATION\n" \
  "  Clients can change the fraction separator {frSep}, the thousands separator {thSep}, the alternate zero representation {altZero}, the field separators {infoColSep}, {ruleColSep} and {headerColSep}, and the comment-lead character {commentSep}, at table initialization time.\n" \
  "\n" \
  "  There are some constraints on those parameters, necessary to ensure unambiguous parsing and avoid misleading results.  The {frSep} and {thSep} parameters must be the {None} value or" \
  " distinct single characters, not in [-+0123456789]. The {frSep}" \
  " character must be non-blank.  If {frSep} is {None}, numeric fields" \
  " may not have fractional parts.  If {thSep} is {None}, numeric fields" \
  " may not have thousands-separators.  The {altZero} string," \
  " if not {None}, must not contain [+123456789] (but may contain '-' and/or '0'," \
  " or embedded blanks).\n" \
  "\n" \
  "  The column separator {infoColSep} cannot be {None}; it must be a single characters -- non-blank, not in [-+0123456789], distinct from {frSep} and {thSep} (if they are not {None}) and not present in {altZero} (if it is not {None}).\n" \
  "\n" \
  "  If the {haderColSep} is {None}, header lines are not allowed. Otherwise, {haderColSep} must be a single character -- non-blank, not in [-+0123456789], distinct from {infoColSep}, {frSep} and {thSep} (if they are not {None}) and not present in {altZero} (if it is not {None}).\n" \
   "\n" \
  "  If the {ruleColSep} is {None}, rule lines are not allowed.  Otherwise, {ruleColSep} must be a single character -- non-blank, not in [-0123456789] (but may be '+'), distinct from {infoColSep}, {headerColSep}, {frSep} and {thSep} (if they are not {None}) and not present in {altZero} (if it is not {None}).\n" \
  "\n" \
  "  If {commentSep} is {None}, comments are not allowed.  Otwerwise the {commentSep} must be a single character -- non-blank, not in [-+0123456789], and distinct from {infoColSep}, {haderColSep}, {ruleColSep}, {frSep}, and {thSep} (if they are not {None}). "

def parse_args(pp) :
  "Parses command line arguments for text tables.\n" \
  "\n" \
  "  Expects an {ArgParser} instance containing the arguments," \
  " still unparsed.  Returns {tblSep,frSep,thSep,altZero,err}, where" \
  " {err} is an error message, if any (a string) or {None}."
  
  # sys.stderr.write("called txtable.parse_args\n")
 
  # Being optimistic:
  err = None

  if pp.keyword_present("-tblSep") :
    tblSep = pp.get_next()
  else :
    tblSep = r"^[ ]*END_TABLE[ ]*$"

  if pp.keyword_present("-frSep") :
    frSep = pp.get_next_char(mayBeNone = 1)
  else :
    frSep = "."

  if pp.keyword_present("-thSep") :
    thSep = pp.get_next_char(mayBeNone = 1)
  else :
    thSep = ","

  if pp.keyword_present("-altZero") :
    altZero = pp.get_next(mayBeNone = 1)
  else :
    altZero = None

  # !!! if ((len(frSep) > 1) or (frSep ~ /[-+0-9 |!]/))
  # !!!   { arg_error(("invalid parameter {frSep} = \"" frSep "\"")); }
  # !!! if ((len(thSep) > 1) or (thSep ~ /[-+0-9 |!]/) or (thSep == frSep))
  # !!!   { arg_error(("invalid parameter {thSep} = \"" thSep "\"")); }
  # !!! if ((altZero ~ /[+1-9|!]/) or (altZero ~ /^[ ]/) or (altZero ~ /[ ]$/))
  # !!!   { arg_error(("invalid parameter {altZero} = \"" altZero "\"")); }
  # !!! 
  return tblSep, frSep, thSep, altZero, err
  # ----------------------------------------------------------------------

options_HELP = \
  "    [ -fracSep {FR_SEP_CHAR} ] \\\n" \
  "    [ -thSep {TH_SEP_CHAR} ] \\\n" \
  "    [ -altZero {ALT_Z_STRING} ] \\\n" \
  "    [ -infoColSep {INFO_SEP_CHAR} ] \\\n" \
  "    [ -headerColSep {HEADER_SEP_CHAR} ] \\\n" \
  "    [ -ruleColSep {RULE_SEP_CHAR} ] \\\n" \
  "    [ -commentSep {CMT_SEP_CHAR} ]"             

options_INFO = \
  "  -fracSep {FR_SEP_CHAR}\n" \
  "    Defines the character to use as a fraction separator.  The default is '.'.\n" \
  "\n" \
  "  -thSep {TH_SEP_CHAR}\n" \
  "    Defines the character to use as a thousands separator.  The default is ','.\n" \
  "\n" \
  "  -altZero {ALT_Z_STRING}\n" \
  "    Defines a preferred representation for zero in numeric fields.  The" \
  " default is '0' in all-integer columns, or a single {FR_SEP_CHAR} in" \
  " columns that contain fractional numbers.\n" \
  "\n" \
  "  -infoColSep {INFO_SEP_CHAR}\n" \
  "  -headerColSep {HEADER_SEP_CHAR}\n" \
  "  -ruleColSep {RULE_SEP_CHAR}\n" \
  "    These parameters define the characters to use as column separators" \
  " in normal (data) rows, in header rows, and in rule rows.  They" \
  " default to '|', '!', and '+', respectively.\n" \
  "\n" \
  "  -commentSep {CMT_SEP_CHAR}\n" \
  "    Defines the comment-introducing character. The default is '#'.\n" \