#! /usr/bin/gawk -f # Last edited on 2008-07-01 20:08:59 by stolfi BEGIN { abort = -1; USAGE = ( \ "txtable-g-reformat \\\n" \ " -f ${STOLFIHOME}/lib/txtable-g.gawk \\\n" \ " [ -v frsep={CHAR}] \\\n" \ " [ -v thsep={CHAR} ] \\\n" \ " [ -v altzero={STRING} ] \\\n" \ " [ -v tblsep={REGEXP} ] \\\n" \ " < INFILE \\\n" \ " > OUTFILE" \ ); # Reads a text file {INFILE} and formats the tables contained therein. # # WARNING --- this AWK script uses {double} for numeric # computations, so it cannot handle large numbers and it # may print fractions incorrectly. # # # EXAMPLE # # A typical input: # # +----+----+----+ # : Year : Kind : Profits : # +--+--+--+ # # With Microploft software: # +--+--+--+ # | 2004| so-so | 34,567,221.00 | # |2005 | terrible | 63339.0 | # Must check this! # +----+----+----+ # | SUBTOTAL | - | 0 | # +----+----+----+ # # With Linukes software: # +--+--+--+ # | 2006 | good | +53000000 | # | 2007 | could be better | 19,255,421 | # +----+----+----+ # | SUBTOTAL | - | 0 | # +----+----+----+ # | TOTAL | - | 9999 | # +--+--+--+ # # Corresponding output: # # DESCRIPTION # # The program reads the input file, parses it into zero or more # tables, as described in the {table-funcs.gawk} library. # Tables are separated by the {tblsep} regular expression # (which defaults to "^ *END_TABLE *$"). # # For each table, it determines the attributes of each table column. # It recomputes the numeric values in any (sub)total lines, to make # them consistent. It then recasts all fields to their canonical # format, and regularizes the field widths in each column. # # Finally, the program outputs the whole table. Blank lines are # written out unchanged, with their original comments. Any other # output line consists of the (uniformized) table margin, followed # by the fields of that line, separated by the appropriate delimiter # character, and then the line's original comment. # # Any blank lines before the first table are written out # unchanged. # COMMAND ARGUMENT PARSING txtable_argument_parse(); if (tblsep == "") { tblsep = "^ *END_TABLE *$"; } txtable_clear(); } (abort >= 0) { exit abort; } ($0 ~ tblsep) { # Table separator: txtable_finish(); txtable_clear(); print; next; } // { # One more input line: txtable_split_line($0); txtable_save_line(row_ind,row_tag,row_nf,row_fld,row_cmt); next; } END { if (abort >= 0) { exit abort; } txtable_finish(); exit 0; } function txtable_finish() { # printf "### RAW TABLE #############################################\n"; # txtable_print(); # printf "### REFORMATTED TABLE #####################################\n"; printf "formatting table with %d rows and %d cols\n", tbl_nrows, tbl_ncols > "/dev/stderr"; txtable_get_column_attributes(frsep,thsep,altzero); txtable_recompute_and_reformat_values(frsep,thsep,altzero); txtable_print(); }