#! /bin/bash
# Last edited on 2017-05-05 03:29:10 by stolfilocal

# USAGE: ${cmd} {DIRECTORY_A} {DIRECTORY_B}

dira="$1"; shift; # Older backup dir.
dirb="$1"; shift; # Newer dir.

for dir in ${dira} ${dirb} ; do
  if [[ ! -d ${dir} ]]; then
    echo "** directory '${dir}' not found" 1>&2 ; exit 1
  fi
  pushd ${dir}
#   echo "=== directory ${PWD} ===========================" 1>&2   
#   summarize-files-in-current-dir.sh
  popd
done

# Find files that have the same pathname relative to the two directories:
bool 1.2 ${dira}/.files.dir ${dirb}/.files.dir > .files-same-path.dir

# Find files that have the same checksum in both directories:
bool 1.2 ${dira}/.files.chk ${dirb}/.files.chk > .files-same-cksum.chk

# Files that have the same relative path and checksum are duplicated:
bool 1.2 .files-same-path.dir .files-same-cksum.dir > .files-dup.dir

for dir in ${dira} ${dirb} ; do
  grep -F -f .files-same-cksum.chk ${dir}/.files.csf | sort > ${dir}/.files-same-cksum.csf
  cat ${dir}/.files-same-cksum.csf | gawk '//{ print $3; }' | sort > ${dir}/.files-same-cksum.dir
done

join -j1 1 -j2 1 -o 0,1.2,2.2,1.3,2.3 {${dira},${dirb}}/.files-same-cksum.csf \
  | gawk \
      ' //{
          if (NF != 5) { printf "** bug NF: %s\n", $0 > "/dev/stderr"; } 
          if ($2 != $3) { printf "** bug sizes differ: %s\n", $0 > "/dev/stderr"; } 
          printf "%s %14d %s %s\n", $1, $2, $4, $5; next; 
        }
      ' \
  > .files-same-cksum.csj
