#! /bin/bash
# Last edited on 2024-01-19 16:21:32 by stolfi

# Usage: "cat {FILE.csdf} | $0 > .dir-sizes.sf
#
# The input must have fields "{CKSUM} {SIZE} {DATE} {FILENAME}"
# where {FILENAME} begins with "/home/stolfi/" or "/home/stolfi2/stolfi".
#
# The output has "{TOTSIZE} {DIR}" where {DIR} is a directory and
# {TOTSIZE} is the total file size in GB.  In the {DIR},
# "/home/stolfi/" is replacedbt "@/"
# and "/home/stolfi2/stolfi" by "=/".

gawk \
    ' BEGIN { ona=""; tsz = 0; } 
      // { 
        sz = $2; na=("@" $4); 
        gsub(/@[\/]home[\/]stolfi[\/]/, "@", na); 
        gsub(/@[\/]home[\/]stolfi2[\/]stolfi[\/]/, "=", na);
        gsub(/=projects[\/]/, "=projects!", na);
        gsub(/[\/].*$/, "/", na); 

        if (na != ona) { dump(); ona = na; tsz = 0; } 
        tsz += sz;
        next;
      }
      END { dump(); }
      function dump(  xna) {
        xna = ona; 
        gsub(/[!]/, "/", xna);
        gsub(/[@]/, "@/", xna);
        gsub(/[=]/, "=/", xna);
        printf "%10.1f %s\n", tsz/1000000, xna;
      }
    ' \
  | sort -k1gr
