#! /bin/bash

# Reads the output of {find-files-size-date}.  Prints a sumary with number of files, total bytes, and max bytes by extsnrion. If a file does not have an extesion, uses the last part of the file name.

gawk '//{ print $1, $3, ("@@@" $3); }' \
  | sed \
      -e 's:@@@[/]+:@@@:g' \
      -e 's:@@@[^ ]*[.]:@@@.:g' \
      -e 's:@@@[^ ]*[/]:@@@/:g' \
      -e 's:@@@::g' \
  | sort -k3,3 \
  | gawk \
      ' BEGIN { clearit("NONE") } 
        // { 
          sz=$1; fi=$2; ex=$3; 
          if (ex != exo) { 
            dumpit(); clearit(ex)
          } 
          n++; tsz += sz; if (sz > msz) { msz = sz; mfi = fi }
        } 
        END { dumpit() }
        
        function clearit(ex) {
          n=0; tsz = 0; 
          msz = -1; mfi = "NONE";
          exo = ex;
        }
        function dumpit() {
          if (exo != "NONE") { 
            tszM = tsz/1000000
            tszA = tszM/n
            printf "%10d %14.6f %12.3f %14d %s %s\n", n, tszM, tszA, msz, exo, mfi; }
        }
      ' \
  | sort -k2,2gr -k4 
