# To be sourced (included) by {bash} scrips.
# Last edited on 2024-07-19 16:00:51 by stolfi
# 
# Defines two main shell functions (commands), "remdup" and "make_link".
#
# The command "remdup {DIR} {PATH} removes "{DIR}/{PATH}", recursively
# if it is a diretory. If "{DIR}/{PATH}" does not exist, a warning is
# printed and the script continues. Other conditions, like "{DIR}" not
# being a directory, are errors and interrupt the script.
# 
# This command is modified by commands "keep_item", "keep_in_dir", or
# "dont_keep" that are supposed to specify the path or folder with the
# retained copy of the files that are removed next. Each applies
# to the following "remdup" commands, until superseded by another 
# of these commands.
# 
# The command "keep_item {KP_ITEM}" specifies that, for any
# following "remdup {DIR} {PATH}", the file or folder
# "{DIR}/{PATH}" should be subtended by "{KP_ITEM}".
# 
# The command keep_item {KP_DIR}" specifies that, for any following
# "remdup {DIR} {PATH}", the file or folder "{DIR}/{PATH}" should
# be subtended by "{KP_DIR}/{PATH}".
# 
# A path "{A}" is subtended by a path "{B}" if they are both files
# and have the same content, of they are both folders and every
# plain file "{A}/{P}" has a plain file "{B}/{P}" with same
# checksum, size, and mod-date.
# 
# The command "dont_keep" specifies that any following "remdup
# {DIR} {PATH}" commands should be executed without checking for
# retained copies.
#
# The command "make_link {DIR} {PATH} {LINK}" creates a symlink
# "{DIR}/{LINK}" to "{DIR}/{PATH}". Some consistency checks are done,
# e.g. "{DIR}/{LINK}" cannot be an existing file or directory, and
# "{DIR}/{PATH}" should be an existing file or directory. 
#
# However if "{DIR}/{LINK}" already exists and is a symlink whose
# targets exists, it is accepted even if it does not point to
# "{DIR}/{PATH}"; a warning is printed and the script continues.

function dont_keep() {
  kpdir=""; kpitem=""
}

function keep_in_dir() {
  kpdir="$1"; shift;
  kpitem=""
}

function keep_item() {
  kpdir=""
  kpitem="$1"; shift;
}

function comp_dirs() {
  # Compares the contents of folders ${dir1} ${dir1}, using 
  # checksum, file size, and file name only (ignoring date).
  # Ignores tilde files and dot files.
  # Returns with status 0 if ${dir1} is a subset of ${dir2}
  # 1 otherwise.
  
  dir1="$1"; shift
  dir2="$1"; shift
  
  if [[ ! ( -d ${dir1} ) ]]; then echo "** \"${dir1}\" is not a directory"  1>&2; exit 1; fi
  if [[ ! ( -d ${dir2} ) ]]; then echo "** \"${dir2}\" is not a directory"  1>&2; exit 1; fi
  
  tmp1="/tmp/$$-1.csdf"
  tmp2="/tmp/$$-2.csdf"
  tmpd="/tmp/$$-d.csdf"
  tmpe1="/tmp/$$-e1.csdf"
  tmpe2="/tmp/$$-e2.csdf"
  
  echo "comparing folder \"${dir1}\" with folder \"${dir2}\"..." 1>&2
  ( cd ${dir1} && find_all_files_cksum_size.sh ./ 2> ${tmpe1} ) | egrep -v -e '[~]$' | egrep -v -e '/[.][a-zA-Z0-9#]' | sort > ${tmp1}
  ( cd ${dir2} && find_all_files_cksum_size.sh ./ 2> ${tmpe2} ) | egrep -v -e '[~]$' | egrep -v -e '/[.][a-zA-Z0-9#]' | sort > ${tmp2}
  bool 1-2  ${tmp1} ${tmp2} > ${tmpd}
  if [[ -s  ${tmpd} ]]; then
    echo "** files in ${dir1} that are not in ${dir2}:" 1>&2
    cat ${tmpd} | sort -b -k3 | sed -e 's:^:  :g' 1>&2
    exit 1
  else
    echo "is a proper subset" 1>&2
    return 0
  fi
}

function check_kept() {
  # Checks that the item to be deleted or moved ${dir}/${nam} is equal to 
  # (but not the same as) the item to be kept, either ${kpdir}/${nam} or to ${kpitem}.

  if [[ $# -ne 2 ]]; then
    echo "** {check_kept} requires exactly two arguments" 1>&2; exit 1
  fi
  
  dir="$1"; shift
  nam="$1"; shift

  kill="${dir}/${nam}"
  if [[ "/${kpdir}" != "/" ]]; then
    keep="${kpdir}/${nam}"
  elif [[ "/${kpitem}" != "/" ]]; then
    keep="${kpitem}"
  else
    keep=""
  fi
  if [[ "/${keep}" != "/" ]]; then
    if [[ ( ! ( -f ${keep} ) ) && ( ! ( -d ${keep} ) ) ]]; then
      echo "** copy to be kept \"${keep}\" is neither file nor folder" 1>&2 ; exit 1
    elif [[ "${keep}" == "${kill}" ]]; then
      echo "** trying to delete the copy to be kept \"${kill}\"" 1>&2 ; exit 1
    else
      if [[ -f ${kill} ]]; then
        if [[ ! ( -f ${keep} ) ]]; then
          echo "** trying to replace a file \"${kill}\" with a folder \"${keep}\"" 1>&2; exit 1
        elif ( cmp -s ${kill} ${keep} ); then
          echo "keeping ${keep} (equal)" 1>&2
        else
          echo "** file \"${kill}\" differs from file \"${keep}\"" 1>&2; exit 1
        fi
      elif [[ -d ${kill} ]]; then
        if [[ ! ( -d ${keep} ) ]]; then
          echo "** trying to replace a folder \"${kill}\" with a file \"${keep}\"" 1>&2; exit 1
        elif ( comp_dirs ${kill} ${keep} ); then
          echo "keeping ${keep} (superset)" 1>&2
        else
          echo "** folder \"${kill}\" is not a subset of \"${keep}\"" 1>&2; exit 1
        fi
      else
        echo "** invalid file type \"${kill}\"" 1>&2; exit 1
      fi
    fi
  fi
}

function remdup() {
  if [[ $# -ne 2 ]]; then
    echo "** {remdup} requires exactly two arguments" 1>&2; exit 1
  fi
  
  dir="$1"; shift
  nam="$1"; shift
  
  if [[ ( "/${dir}" == "/" ) || ( "/${nam}" == "/" ) ]]; then
    echo "** argument of {remdup} is empty" 1>&2 ; exit 1
  fi
  
  torem="${dir}/${nam}"
  cmd=( )
  if [[ -L ${torem} ]]; then
    echo "** item \"${torem}\" is a symlink" 1>&2; exit 1
  elif [[ -f ${torem} ]]; then
    # Regular file:
    cmd=( rm -fv ${torem} )
  elif [[ -d ${torem} ]]; then
    # Directory:
    cmd=( rm -frv ${torem} )
  elif [[ -e ${torem} ]]; then
    echo "** item \"${torem}\" is neither a file nor a folder" 1>&2; exit 1
  else
    echo "!! item \"${torem}\" does not exist, ignored" 1>&2; return 0
  fi
  
  check_kept ${dir} ${nam} || exit 1
  
  if [[ ${dry} -ne 0 ]]; then
    echo "will do: ${cmd[*]}" 1>&2
  else
    ${cmd[@]}
  fi
}

function make_symlink() {
  # Creates a symbolic link from ${dir} to ${dst}, with name ${link}, 
  # if it does not exist.
  
  dir="$1"; shift
  dst="$1"; shift
  if [[ $# -eq 0 ]]; then
    link="${dst##*/}"
  elif  [[ $# -eq 1 ]]; then
    link="$1"; shift
  else
    echo "** {make_symlink}: wrong number of arguments" 1>&2 ; exit 1
  fi
  if [[ "@${link}" =~ / ]]; then
    echo "** {make_symlink}: invalid link name with '/'" 1>&2 ; exit 1
  fi
  
  if [[ ! ( -d ${dir} ) ]]; then
    echo "** {make_symlink}: \"${dir}\" is not a folder" 1>&2 ; exit 1
  fi
  
  cmds1=( cd ${dir} )
  cmds2=( ln -s ${dst} ${link} )
  
  if [[ ${dry} -ne 0 ]]; then
    echo "will do: ( ${cmds1[*]} && ${cmds2[*]} )"  1>&2; return 0
  else
    if [[ -L ${dir}/${link} ]]; then
      if [[ ! ( -e ${dir}/${link} ) ]]; then
        echo "** {make_symlink}: target of existing link \"${dir}/${link}\" does not exist" 1>&2 ; exit 1
      else
        echo "!! link \"${dir}/${link}\" already exists, ignored" 1>&2; return 0
      fi
    elif [[ -e ${dir}/${link} ]]; then
      echo "** {make_symlink}: \"${dir}/${link}\" exists but is not a link" 1>&2 ; exit 1
    elif [[ ! ( -e ${dir}/${dst} ) ]]; then
      echo "** {make_symlink}: intended target \"${dir}/${dst}\" does not exist" 1>&2 ; exit 1
    else
      echo "linking ${dir}/${link} to ${dir}/${dst}" 1>&2
      ( ${cmds1[@]} && ${cmds2[@]} )
    fi
  fi
}
