#! /bin/bash
# Last edited on 2017-08-22 12:20:03 by jstolfi

# Usage: ${cmd} ${dir1} ${dir2}
#
# Enumerates all plain files in ${dir2} and its subdirs.  Removes each file
# ${dir2}/${path}, if there is an identical file ${dir1}/${path}.

dir1="$1"; shift
dir2="$1"; shift

# Confirm from user:
# while [[ 1 ]]; do
#   echo "remove all files in ${dir2} with duplicates in ${dir1}? [y/n]" 1>&2
#   read ans rest
#   if [[ "/${ans}" == "/n" ]]; then
#     echo "aborted" 1>&2; exit 1
#   elif [[ "/${ans}" == "/y" ]]; then
#     break
#   else
#     echo "bad input" 1>&2 
#   fi
# done

tmp=/tmp/$$

if [[ ! ( -d ${dir1} ) ]]; then echo "${dir1} missing or not a directory" 1>&2; exit 1; fi
if [[ ! ( -d ${dir2} ) ]]; then echo "${dir2} missing or not a directory" 1>&2; exit 1; fi

# Collect paths of all plain files in ${dir2}.
# Remove blanks in filenames temporarily.
flist="${tmp}.dir"
( cd ${dir2} && find ./ -type f -print ) \
  | sed -e 's:^[.][/]::g' -e 's:[ ]:_BLANK_:g' \
  | sort \
  > ${flist}
  
# Compare and delete:
for pt in `cat ${flist}`; do 
  # Restore blanks in filename:
  pt="${pt//_BLANK_/ }"
  # Check:
  if [[ -f "${dir1}/${pt}" ]]; then
    if cmp "${dir1}/${pt}" "${dir2}/${pt}"; then
      rm -v "${dir2}/${pt}"
    else
      echo "${dir2}/${pt} differs" 1>&2
    fi
  else
    echo "${dir2}/${pt} is unique" 1>&2
  fi
done

# Remove empty subdirectories of ${dir2}:
elist=${tmp}.empty
while [[ 1 ]]; do
  # Enumerate all empty dirs. Protect blanks.
  find ${dir2}/ -type d -empty -print \
    | sed -e 's:[ ]:_BLANK_:g' \
    > ${elist}
  if [[ ! ( -s ${elist} ) ]]; then break; fi
  for pt in `cat ${elist}`; do 
    # Restore blanks in dirname:
    pt="${pt//_BLANK_/ }"
    rmdir -v "${pt}"
  done
done
  
