#! /usr/bin/gawk -f # Last edited on 2007-10-31 12:21:15 by stolfi BEGIN { USAGE = ( "renumber-item.list < INFILE.txt > OUTFILE.txt" ); # Reads a text file and renumbers every /item line/ --- a line that # seems to be part of a numbered list of items. # Assumes that an item line at level {r \geq 1} starts with {2*(r-1) # + ind} blanks, for some {ind}, followed by {r} unsigned numbers, # each terminated by '.' with no spaces between them, and at least # one additional blank. The number {ind} defines the extra # indentation and must be the same for all item lines in the file. # Item lines are renumbered sequentially with 2-digit numbers, # preserving the indentation. # Lines that start with "#! " are interpreted as error messages # left by a previous run, and are silently removed. # Beware with data lines that accidentally start with digits # followed by ". ", they will be interpreted as item lines. abort = -1; # Abnormal exit flag. ind = -1; # Extra indentation, initially undefined. curlev = -1; # Level of previous item line, initially undefined. split("", inum); # {inum[0..curlev-1]} is the last item line number. } (abort >= 0) { exit abort; } /^[\#][!][ ]/ { next; } // { # Parse the line, setting {lev}, {ind}, {txt}: parse_item_line_number($0); if (lev > 0) { # Expand {inum} as needed: while(curlev < lev) { inum[curlev] = 0; curlev++; } if (curlev > lev) { curlev = lev; } # Increment the line number: inum[lev-1]++; # Print the line: printf "%*s", ind + 2*(lev-1), ""; for (i = 0; i < lev; i++) { printf "%d.", inum[i]; } printf " %s\n", txt; } else { print; } next; } function parse_item_line_number(lin, n,r,m) { # Parses the line {lin} as an item line. # If it is an item line, sets {lev} to the numbering level, # and sets {txt} to the part that follows the item number # and the separating blank. Also checks/updates the # global indentation {ind}. lev = 0; # Default - not an item line. # Grab and remove the total indentation {n}: match(lin, /^[ ]*/); n = RLENGTH; lin = substr(lin, RSTART+RLENGTH); # Try to parse an item number from {lin}: r = 0; while (match(lin, /^[0-9]+[.]/)) { r++; lin = substr(lin, RSTART+RLENGTH); } # Check if it looks like an item line: if ((r > 0) && match(lin, /^[ ]/)) { # Assume it is an item line. # Remove the leading blank: lin = substr(lin, RSTART+RLENGTH); # Compute the natural indentation {m} for level {r} items m = 2*(r-1); # Check or grab the global indentation {ind}: if (n < m) { data_warning("insufficient indentation"); return; } else if (ind < 0) { # First item line seen, save {ind}: ind = n - m; } else if (ind != n-m) { data_warning("inconsistent indentation"); return; } # Return lev = r; txt = lin; } } function data_warning(msg) { printf "#! %s:%d: ** %s\n", FILENAME, FNR, msg; } END { if (abort >= 0) { exit abort; } }