#! /bin/bash
# Last edited on 2024-04-02 15:55:06 by stolfi

PROG_NAME=${0##*/}
PROG_DESC="a filter that reads the output of {find-all-files-size-date}, excludes junk and archives"
PROG_HELP=( "${PROG_NAME} [ -extilde] < {INFILE}.sdf > {OUTFILE}.sdf" )
PROG_INFO=(
  "\nNAME"
  "\n  ${PROG_NAME} - ${PROG_DESC}."
  "\n"
  "\nSYNOPSIS"
  "\n  ${PROG_HELP[@]}"
  "\n"
  "\nDESCRIPTION"
  "\n"
  "\n Reads a list of files from {stdin}. Discards uninteresting files, writes the rest to stdout."
  "\n" 
  "\n The input list may be"
  "\n  - a plain list of file names;"
  "\n  - list of links with ' -> ';"
  "\n  - list of size-date-filename as produced by {find_all_files_size_date.sh}"
  "\n  - list of cksum-size-filename as produced by {find_all_files_cksum_size.sh}"
  "\n  - list of cksum-size-date-filename as produced by {find_all_files_cksum_size_date.sh}"
  "\n"
  "\n  Excludes from the list the known cache files such as from \".cache\" and \".thumbnail\" directories. "
  "\n" 
  "\n Also excludes the executable binaries listed in {${HOME}/programs/binaries.dir}"
  "\n" 
  "\n If the \"-extilde\" option is given, also excludes files than end with '~'."
  "\n" \
  "\nSEE ALSO"
  "\n  find_all_files_size_date.sh, find_all_files_cksum_size.sh, find_all_files_cksum_size_date.sh"
  "\nAUTHOR"
  "\n  Created 2020-09-04 by Jorge Stolfi, Unicamp"
  "\n  Modified 2022-10-16 by J.Stolfi"
  "\n  Modified 2024-03-27 by J.Stolfi to use egrep instead of sed"
)

# Parse arguments:

extilde=0
while  [[ $# -gt 0 ]]; do
  if [[ "/$1" == "/-exclude-tilde" ]]; then
    extilde=1; shift;
  else
    echo 'unrecognized argument "'"$1"'" ...' 1>&2
    echo -e "usage:\n  ${PROG_HELP[@]}" 1>&2 ; exit 1 
  fi
done

# Prefix for temporary file names
tmp="/tmp/$$"

echo "excluding junk files ..." 1>&2
xfile="${tmp}_xtrash.grep"

echo "preparing ${xfile} ..." 1>&2

rm -f ${xfile}


# Files with blanks in name:
cat >>${xfile} <<EOF
[\\][ ]
EOF

# Trash top folders:
cat >>${xfile} <<EOF
([ ]|^)[.]FBReader\b
([ ]|^)[.]Xauthority\b
([ ]|^)[.]adobe\b
([ ]|^)[.]audacity\b
([ ]|^)[.]cache\b
([ ]|^)[.]ccache\b
([ ]|^)[.]dasher\b
([ ]|^)[.]dbus\b
([ ]|^)[.]designer\b
([ ]|^)[.]dropbox\b
([ ]|^)[.]dt\b
([ ]|^)[.]eXtace\b
([ ]|^)[.]eggcups\b
([ ]|^)[.]emacs[.]d/session\b
([ ]|^)[.]emacs[.]d/[.]emacs[.]d\b
([ ]|^)[.]emacs[.]d/auto-save-list\b
([ ]|^)[.]esd_auth\b
([ ]|^)[.]esmtp_queue\b
([ ]|^)[.]evolution\b
([ ]|^)[.]fontconfig\b
([ ]|^)[.]fullcircle\b
([ ]|^)[.]galeon\b
([ ]|^)[.]gconf3\b
([ ]|^)[.]gconfd3\b
([ ]|^)[.]gegl\b
([ ]|^)[.]gimp-2.2\b
([ ]|^)[.]gimp-2.6\b
([ ]|^)[.]gimp-2.8\b
([ ]|^)[.]githubs\b
([ ]|^)[.]github_pat\b
([ ]|^)[.]gnome\b
([ ]|^)[.]gnupg\b
([ ]|^)[.]gnuplot\b
([ ]|^)[.]gstreamer\b
([ ]|^)[.]icedtea\b
([ ]|^)[.]icons\b
([ ]|^)[.]java\b
([ ]|^)[.]jetadmin\b
([ ]|^)[.]jpi_cache\b
([ ]|^)[.]jssc\b
([ ]|^)[.]kde\b
([ ]|^)[.]local\b
([ ]|^)[.]mailboxlist\b
([ ]|^)[.]mailcap\b
([ ]|^)[.]mcop\b
([ ]|^)[.]mcoprc\b
([ ]|^)[.]metacity\b
([ ]|^)[.]mime-types\b
([ ]|^)[.]mozilla\b
([ ]|^)[.]mplayer\b
([ ]|^)[.]nautilus\b
([ ]|^)[.]nedit\b
([ ]|^)[.]netscape\b
([ ]|^)[.]nv\b
([ ]|^)[.]openoffice\b
([ ]|^)[.]pki\b
([ ]|^)[.]povray\b
([ ]|^)[.]thumbnails\b
([ ]|^)[.]thunderbird\b
([ ]|^)[.]zoom/logs/crash
([ ]|^)ChromiumUnsnapped\b
([ ]|^)bin/i686-Linux
([ ]|^)lib/i686-Linux
([ ]|^)snap/chromium\b
EOF

# Trash subfolders of top ".config" folder:
cat >>${xfile} <<EOF
([ ]|^)[.]config/Qt
([ ]|^)[.]config/Skype\b
([ ]|^)[.]config/Trolltech
([ ]|^)[.]config/Webcamoid
([ ]|^)[.]config/akonadi
([ ]|^)[.]config/atril\b
([ ]|^)[.]config/autostart\b
([ ]|^)[.]config/bunkus[.]org
([ ]|^)[.]config/caja\b
([ ]|^)[.]config/calibre\b
([ ]|^)[.]config/celluloid\b
([ ]|^)[.]config/chromium\b
([ ]|^)[.]config/compiz\b
([ ]|^)[.]config/dnfdragora\b
([ ]|^)[.]config/enchant\b
([ ]|^)[.]config/evince\b
([ ]|^)[.]config/evolution\b
([ ]|^)[.]config/exaile\b
([ ]|^)[.]config/flowblade\b
([ ]|^)[.]config/folder-color\b
([ ]|^)[.]config/fontforge\b
([ ]|^)[.]config/gnote\b
([ ]|^)[.]config/google-chrome\b
([ ]|^)[.]config/google\b
([ ]|^)[.]config/guvcview
([ ]|^)[.]config/kalarmrc
([ ]|^)[.]config/kconf
([ ]|^)[.]config/libaccounts
([ ]|^)[.]config/libreoffice
([ ]|^)[.]config/macromedia\b
([ ]|^)[.]config/octave\b
([ ]|^)[.]config/pavucontrol
([ ]|^)[.]config/pluma\b
([ ]|^)[.]config/pulse\b
([ ]|^)[.]config/sealert\b
([ ]|^)[.]config/specialmailcollectionsrc\b
([ ]|^)[.]config/texlive2013\b
([ ]|^)[.]config/tilda\b
([ ]|^)[.]config/xfburn\b
([ ]|^)[.]config/xfce4\b
([ ]|^)[.]config/yelp
([ ]|^)[.]config/yumex
([ ]|^)[.]config/zoomus
EOF

# Mirror directories:
cat >>${xfile} <<EOF
([ ]|^)PUB/include\b
([ ]|^)include\b
EOF

# Expendable extensions, such as derived files:
cat >>${xfile} <<EOF
[.][ao]$
[.]pyc$
[.]io$
[.]ho$
[.]so$
[.]aux$
[.]errs$
[.]log$
[.]blg$
[.]toc$
[.]dvi$
[.]lof$
[.]lot$
[.]dmg$
[.]done$
[.]did$
[.]diff$
[.]prdiff$
[.]so[.][0-9][0-9]*$
[.]class$
EOF

# Trash subfolders of any folder:
cat >>${xfile} <<EOF
([ /]|^)JUNK
([ /]|^)[.]macromedia/
([ /]|^)[.]svn([- /]|$)
([ /]|^)[.]git([- /]|$)
([ /]|^)__pycache__[/]
([ /]|^)akonadi
([ /]|^)dropbox
([ /]|^)exaile
([ /]|^)out[.]old/
([ /]|^)out[_0-9]*/
([ /]|^)testes/saida/
EOF

# Trash dotfiles in top folder and subfolders:
cat >>${xfile} <<EOF
([ /]|^)[.][uvwxyz][uvwxyz][uvwxyz]*\b
([ /]|^)[.]aa[a]*\b
([ /]|^)[.]absent\b
([ /]|^)[.]adobe\b
([ /]|^)[.]all\b
([ /]|^)[.]asx
([ /]|^)[.]asz\b
([ /]|^)[.]audacity
([ /]|^)[.]bar\b
([ /]|^)[.]bashes\b
([ /]|^)[.]baz\b
([ /]|^)[.]bb[b]*\b
([ /]|^)[.]bigs\b
([ /]|^)[.]cache\b
([ /]|^)[.]cc[c]*\b
([ /]|^)[.]ccache\b
([ /]|^)[.]dasher\b
([ /]|^)[.]dbus\b
([ /]|^)[.]dd[d]*\b
([ /]|^)[.]deleted\b
([ /]|^)[.]deps[.]make$\b
([ /]|^)[.]designer\b
([ /]|^)[.]did\b
([ /]|^)[.]diff\b
([ /]|^)[.]dir
([ /]|^)[.]done\b
([ /]|^)[.]dropbox-dist\b
([ /]|^)[.]dropbox\b
([ /]|^)[.]dt\b
([ /]|^)[.]dup\b
([ /]|^)[.]dups
([ /]|^)[.]dups\b
([ /]|^)[.]eXtace\b
([ /]|^)[.]eggcups\b
([ /]|^)[.]empty\b
([ /]|^)[.]esd_auth\b
([ /]|^)[.]esmtp_queue\b
([ /]|^)[.]evolution\b
([ /]|^)[.]extra\b
([ /]|^)[.]flowblade\b
([ /]|^)[.]fontconfig\b
([ /]|^)[.]foo\b
([ /]|^)[.]fullcircle\b
([ /]|^)[.]galeon\b
([ /]|^)[.]gconf3\b
([ /]|^)[.]gconfd3\b
([ /]|^)[.]gegl-
([ /]|^)[.]gimp-2.8\b
([ /]|^)[.]gitconfig\b
([ /]|^)[.]gnome2\b
([ /]|^)[.]gnome2_private\b
([ /]|^)[.]gnome3\b
([ /]|^)[.]gnome\b
([ /]|^)[.]gnupg\b
([ /]|^)[.]gnuplot_history\b
([ /]|^)[.]gstreamer-
([ /]|^)[.]icedteaplugin\b
([ /]|^)[.]icons\b
([ /]|^)[.]java\b
([ /]|^)[.]jetadmin\b
([ /]|^)[.]jpi_cache\b
([ /]|^)[.]jssc\b
([ /]|^)[.]junk\b
([ /]|^)[.]kde3\b
([ /]|^)[.]kde\b
([ /]|^)[.]lesshst\b
([ /]|^)[.]local\b
([ /]|^)[.]lock\b
([ /]|^)[.]macromedia\b
([ /]|^)[.]mailboxlist\b
([ /]|^)[.]mailcap\b
([ /]|^)[.]mcop\b
([ /]|^)[.]mcoprc\b
([ /]|^)[.]metacity\b
([ /]|^)[.]mime-types\b
([ /]|^)[.]miss
([ /]|^)[.]mozilla\b
([ /]|^)[.]mp4s
([ /]|^)[.]mplayer\b
([ /]|^)[.]nautilus\b
([ /]|^)[.]nedit\b
([ /]|^)[.]netscape6\b
([ /]|^)[.]nv\b
([ /]|^)[.]ok
([ /]|^)[.]openoffice
([ /]|^)[.]pee\b
([ /]|^)[.]pki\b
([ /]|^)[.]played\b
([ /]|^)[.]povray[/][.]povray\b
([ /]|^)[.]pretty\b
([ /]|^)[.]pulse\b
([ /]|^)[.]qt\b
([ /]|^)[.]redhat\b
([ /]|^)[.]rhn-applet.conf\b
([ /]|^)[.]rhopenoffice1.1\b
([ /]|^)[.]spamassasin\b
([ /]|^)[.]ssh2\b
([ /]|^)[.]ssh[/][.]ssh\b
([ /]|^)[.]sss\b
([ /]|^)[.]subversion\b
([ /]|^)[.]sversionrc\b
([ /]|^)[.]teminfo\b
([ /]|^)[.]temp\b
([ /]|^)[.]test\b
([ /]|^)[.]texlive2007\b
([ /]|^)[.]texlive2013\b
([ /]|^)[.]texlive2017\b
([ /]|^)[.]texlive\b
([ /]|^)[.]themes\b
([ /]|^)[.]thumbnails\b
([ /]|^)[.]tmp\b
([ /]|^)[.]to-add[0-9]*\b
([ /]|^)[.]to-del[0-9]*\b
([ /]|^)[.]to-fix[0-9]*\b
([ /]|^)[.]to-remove[0-9]*\b
([ /]|^)[.]toadd[0-9]*\b
([ /]|^)[.]todel[0-9]*\b
([ /]|^)[.]torem[0-9]*\b
([ /]|^)[.]torm[0-9]*\b
([ /]|^)[.]uha\b
([ /]|^)[.]units_history\b
([ /]|^)[.]ups\b
([ /]|^)[.]uu[u]*\b
([ /]|^)[.]vm\b
([ /]|^)[.]vnc\b
([ /]|^)[.]vv[v]*\b
([ /]|^)[.]w3\b
([ /]|^)[.]wapi\b
([ /]|^)[.]wget-hsts\b
([ /]|^)[.]xcdroast\b
([ /]|^)[.]xchat\b
([ /]|^)[.]xdvirc\b
([ /]|^)[.]xfig\b
([ /]|^)[.]xfigrc\b
([ /]|^)[.]xmms\b
([ /]|^)[.]xscreensaver\b
([ /]|^)[.]xsession-errors
EOF

# Other trash files:
cat >>${xfile} <<EOF
([ /]|^)[,]
([ /]|^)[#].*[#]$
([ /]|^)[=][=][=]*$
EOF

# Directories that are already backups:
cat >>${xfile} <<EOF
([ ]|^)pkg
EOF

# Optionally, tilde files:
if [[ ${extilde} -ne 0 ]]; then
  echo '[~]$' >> ${xfile}
fi

# Binary executables:
cat ${HOME}/programs/binaries.dir \
  | sed \
      -e 's@^@([ ]|^)(programs/.*|bin|lib)/@g' \
  >> ${xfile}
echo "see ${xfile}" 1>&2

/usr/bin/egrep -v -f ${xfile} 

echo "done." 1>&2

# /bin/rm -f ${xfile}
