#! /usr/bin/gawk -f # Last edited on 2022-12-11 04:37:25 by stolfi # Reads an ".sdnd" file with 4 columns " {size} {date} {name} {dir}" where # {name} is the last component of a pathname and {dir} is the start # of the pathname, ending in "/", possibly "./". # Finds any group of two or more consecutive lines that have the same # {size}{date} {name} fialds, where the size is 2 bytes or more. # Outputs a ".sdndd" file with one line for each group, # where the first 3 fields are {size} {date} {name} # and the remaining fields are the {dir} fiends in that group. BEGIN{ split("", dirs); clear_group(); abort = -1; } (abort >= 0) { exit(abort); } // { if (NF != 4) { data_error(("bad {NF} = " NF));} sz = $1; dt = $2; fn = $3; dr = $4; if ((sz != osz) || (dt != odt) || (fn != ofn)) { close_group(); clear_group(); } osz = sz; odt = dt; ofn = fn; dirs[nd] = dr; nd++; next; } END { if (abort >= 0) { exit(abort); } close_group(); } function clear_group() { osz = -1; odt = ""; ofn = ""; nd = 0; } function close_group( k) { if ((osz >= 2) && (nd >= 2)) { printf "%14d %s %s", osz, odt, ofn; for (k = 0; k < nd; k++) { printf " %s", dirs[k]; } printf "\n" } } function data_error(msg) { printf "%s:%d: ** %s\n", FILENAME, FNR, msg > "/dev/stderr"; printf " [[%s]]\n", $0; abort = 1; exit(abort) }