#! /usr/bin/python3 -t # _*_ coding: iso-8859-1 _*_ # Last edited on 2022-09-29 08:57:27 by stolfi PROG_NAME = "txtable-reformat" PROG_DESC = "Recomputes and realigns tables in plain ascii format" PROG_VERS = "1.0" # !!! TO DO : add support for {infoColSep,headerColSep,ruleColSep,commentSep} options. import sys import re import os sys.path[1:0] = [ sys.path[0] + '/../lib', os.path.expandvars('${STOLFIHOME}/lib'), '.' ] import txtable; from txtable import TxTable import argparser; from argparser import ArgParser from decimal import * PROG_COPYRIGHT = "Copyright © 2008-06-30 by the State University of Campinas (UNICAMP)" PROG_HELP = \ PROG_NAME+ " \\\n" \ +txtable.options_HELP+ " \\\n" \ " [ -tableSep {TBL_SEP_REGEXP} ] \\\n" \ +argparser.help_info_HELP+ " \\\n" \ " < INFILE \\\n" \ " > OUTFILE" PROG_INFO = \ "NAME\n" \ " " +PROG_NAME+ " - " +PROG_DESC+ ".\n" \ "\n" \ "SYNOPSIS\n" \ " " +PROG_HELP+ "\n" \ "\n" \ "DESCRIPTION\n" \ " Reads a text file from {stdin}, containing zero or more" \ " tables in plain ascii format. Recomputes the derived fields" \ " (such as totals and sub-totals) in those tables, then" \ " prints the entries in each column properly aligned" \ " and formatted to a common format.\n" \ "\n" \ "EXAMPLE\n" \ "\n" \ " Here is a typical input:\n" \ "\n" \ " # ------------------------------------------\n" \ " # Financial results under new management\n" \ "\n" \ " +----+----+----+\n" \ " ! Year ! Kind ! Profits !\n" \ " +--+--+--+\n" \ " # With Microploft software:\n" \ " +--+--+--+\n" \ " | 2004| so-so | 34,567,221.00 |\n" \ " |2005 | terrible | 63339.0 | # Must check this!\n" \ " +---+----+---+\n" \ " | SUBTOTAL | - | 0 |\n" \ " +----+----+----+\n" \ " # With Linukes software:\n" \ " +--+--+---------------+\n" \ " | 2006 | good | +53000000 |\n" \ " | 2007 | could be better | 19,255,421 |\n" \ " +----+----+----+\n" \ " | SUBTOTAL | - | 0 |\n" \ " +----+----+----+\n" \ " | TOTAL | - | 9999 |\n" \ " +--+--+--+\n" \ "\n" \ " # ------------------------------------------\n" \ "\n" \ " Here is the corresponding output:\n" \ "\n" \ " # ------------------------------------------\n" \ " # Financial results under new management\n" \ "\n" \ " +----------+-----------------+-----------------+\n" \ " ! Year ! Kind ! Profits !\n" \ " +----------+-----------------+-----------------+\n" \ " # With Microploft software:\n" \ " +----------+-----------------+-----------------+\n" \ " | 2004 | so-so | +34,567,221.00 |\n" \ " | 2005 | terrible | +63,339.00 | # Must check this!\n" \ " +----------+-----------------+-----------------+\n" \ " | SUBTOTAL | - | +34,630,560.00 |\n" \ " +----------+-----------------+-----------------+\n" \ " # With Linukes software:\n" \ " +----------+-----------------+-----------------+\n" \ " | 2006 | good | +53,000,000.00 |\n" \ " | 2007 | could be better | +19,255,421.00 |\n" \ " +----------+-----------------+-----------------+\n" \ " | SUBTOTAL | - | +72,255,421.00 |\n" \ " +----------+-----------------+-----------------+\n" \ " | TOTAL | - | +106,885,981.00 |\n" \ " +----------+-----------------+-----------------+\n" \ "\n" \ " # ------------------------------------------\n" \ "\n" \ "TABLE SEPARATORS\n" \ " By default, tables are separated by lines whose only non-blank" \ " contents is the string 'END_TABLE'. Each table is processed" \ " independently. Each line of the file is a row of the table. In" \ " normal data rows, the fields are separated by '|'s. The table" \ " may also have header rows (with fields separated by '!'s) and" \ " rule rows (whose fields are strings of blanks or dashes, separated" \ " by '+'s) Within each table, all rows must have the same" \ " number of columns; there is no support for entrie" \ "s that span two or more columns.\n" \ "\n" \ "OVERVIEW OF TABLE PROCESSING\n" \ " For each column of each table, the program" \ " determines whether its data fields (excluding headers and" \ " rules) are numbers or non-numbers. If they are numbers, it also" \ " determines the consensus format --- namely, their maximum precision" \ " (number of fraction digits), whether the digits are grouped" \ " into thousands (as in '12,345,678.90') and whether the" \ " entries have explicit '+' signs. It then recomputes the" \ " numeric values in any (sub)total lines, to make" \ " them consistent. Finally, the program recasts all" \ " numeric values in each column to the common format, and" \ " prints the whole table to standard output, padding all fields" \ " so that each column is properly aligned.\n" \ "\n" \ "BLANK LINES AND COMMENTS\n" \ " The input file may contain blank lines. Any line (blank or not) may" \ " have a comment, that starts with the '#' character and extends to the" \ " end of the line. Blank lines and comments are" \ " written out unchanged, with their original indentation.\n" \ "\n" \ "TABLE INDENTATION\n" \ " In the output, non-blank table rows are indented by a uniform" \ " amount, equal to the smallest indentation of any non-blank input line.\n" \ "\n" \ "INPUT TABLE FORMAT\n" \ +txtable.format_INFO+ "\n" \ "\n" \ "OPTIONS\n" \ +txtable.options_INFO+ "\n" \ "\n" \ " -tableSep {TBL_SEP_REGEXP}\n" \ " Defines the table separator lines as being those that match the" \ " regular expression {TBL_SEP_REGEXP}. The default is '^[ ]*END_TABLE[ ]*$'.\n" \ "\n" \ "DOCUMENTATION OPTIONS\n" \ +argparser.help_info_INFO+ "\n" \ "\n" \ "SEE ALSO\n" \ " txtable-join(1).\n" \ "\n" \ "AUTHOR\n" \ " Created 2008-06-30 by Jorge Stolfi, IC-UNICAMP.\n" \ "\n" \ "MODIFICATION HISTORY\n" \ " 2008-06-30 by J. Stolfi, IC-UNICAMP: created by" \ " translation of the GAWK script {txtable-g-reformat}.\n" \ "\n" \ "WARRANTY\n" \ " " +argparser.help_info_NO_WARRANTY+ "\n" \ "\n" \ "RIGHTS\n" \ " " +PROG_COPYRIGHT+ ".\n" \ "\n" \ " " +argparser.help_info_STANDARD_RIGHTS # COMMAND ARGUMENT PARSING pp = ArgParser(sys.argv, sys.stderr, PROG_HELP, PROG_INFO); tblSep, frSep, thSep, altZero, err = txtable.parse_args(pp); while pp.keyword_present("-m") : pp.get_next(); pp.finish(); in_file_name = "stdin" in_line_number = 0; in_current_line = ""; # TESTS: # sys.stderr.write(txtable.insert_thsep_int("123456789","/") + "\n") # # def vtest(xv, frs, ths, altz) : # v = txtable.numeric_value(xv, frs, ths, altz) # sys.stderr.write("xv = «%s» v = «%s»\n" % (xv, v)) # for pr in -1, 0, 2, 6 : # for th in 0, 1 : # for ps in 0, 1 : # yv = txtable.format_value(v, pr, th, ps, frs, ths, altz) # sys.stderr.write(" pr = %d th = %d ps = %d xv = «%s»\n" % (pr, th, ps, yv)) # # ---------------------------------------------------------------------- # # vtest("000'00", "'", ",", None) # vtest("000'00", "'", ",", "---") # vtest("---", "'", ",", "---") # vtest("123,456,78'901,203", "'", ",", "---") # vtest("-123,456,78'901,203", "'", ",", "---") # FOR REAL: class State : tbl = None # The current table. state = State(); def data_error(msg) : "Prints the error message {msg} about the current input line, and aborts." sys.stderr.write("%s:%d: ** %s\n" % (in_file_name, in_line_number, msg)); sys.stderr.write(" %s\n" % in_current_line); sys.exit(1) def arg_error(msg): "Prints the error message {msg} about the command line arguments, and aborts." sys.stderr.write("** %s\n" % msg); sys.stderr.write("usage: %s\n" % PROG_HELP); sys.exit(1) def clear_table(state) : "Clears the current table and sets its global format params." state.tbl = TxTable(frSep, thSep, altZero) state.tbl.debug = 0 # ---------------------------------------------------------------------- def output_table(state) : "Recomputes and reformats the table {tbl}, and prints it to {stdout}." if state.tbl.debug : sys.stdout.write("### RAW TABLE #############################################\n") state.tbl.printout(); sys.stdout.write("### REFORMATTED TABLE #####################################\n") sys.stderr.write("formatting table with %d rows and %d cols\n" % (state.tbl.nrows, state.tbl.ncols)) state.tbl.choose_column_formats() state.tbl.recompute_and_reformat_values() state.tbl.printout() # ---------------------------------------------------------------------- # LOOP ON INPUT LINES clear_table(state); while 1: lin = sys.stdin.readline() if lin == "" : break in_line_number += 1; in_current_line = lin; if re.search(tblSep,lin) : # Table separator: output_table(state) clear_table(state) sys.stdout.write(lin) else: tag, ind, fld, cmt, err = txtable.parse_row(lin) if err != None : data_error(err) state.tbl.add_row(tag, ind, fld, cmt) output_table(state) # sys.stderr.write("done.\n")