#! /bin/bash
# Last edited on 2012-12-08 21:30:40 by stolfilocal

PROG_NAME=${0##*/}
PROG_DESC="find all shellscript-like files under current directory"
PROG_HELP=(
  "${PROG_NAME} [ -notheses ] {DIR}.."
)
PROG_INFO=(
  "\nNAME"
  "\n  ${PROG_NAME} - ${PROG_DESC}."
  "\n"
  "\nSYNOPSIS"
  "\n  ${PROG_HELP[@]}"
  "\n"
  "\nDESCRIPTION"
  "\n  Writes to stdout a list of all shellscript-like files"
  "\n  in the specified directories."
  "\n"
  "\n  Includes files with extensions"
  "\n  \".csh\" and \".sh\" as well as executable files"
  "\n  that have a line beginning with \"#!\".  Note that this"
  "\n  criterion will get also some AWK scripts."
  "\n"
  "\n  Also includes many human-readable files that may contain"
  "\n  shell commands, such as notebooks, README's,"
  "\n  \".gbinit\" files, and files with extensions \".1\"."
  "\n"
  "\n  Excludes from the list files that have names"
  "\n  containing \"junk\", \"old\", or \"save\" (delimited"
  "\n  by periods, dashes, or slashes), in any capitalization; and"
  "\n  directory trees with certain roots (such as \"ARCHIVE\","
  "\n  \"GARBAGE\", \"IMPORT\", \"IMPORT-*\", \"*/PUB/include\","
  "\n  \"*/stolfi/include\", \"pkg\", \"posters/*/[0-9][0-9][-0-9]*[0-9]/\",  etc.)."
  "\n"
  "\n  This program does not list makefiles; see {find-all-makefiles}"
  "\n  for that."
  "\nOPTIONS"
  "\n  -notheses"
  "\n    Excludes subdirectories called \"theses\", too, in any"
  "\n    capitalization."
  "\n"
  "\nSEE ALSO"
  "\n  find(1)"
  "\nAUTHOR"
  "\n  Created 2006-05-08 by Jorge Stolfi, Unicamp"
)

# ----------------------------------------------------------------------
# INTERNAL OPTIONS

# ----------------------------------------------------------------------
# COMMAND LINE PARSING

# Parse command line switches: 
notheses=( cat )
# bogus=( cat )
while [[ ( $# -ge 1 ) && ( "/$1" =~ /-.* ) ]]; do
  if [[ ( $# -ge 1 ) && ( "/$1" == "/-notheses" ) ]]; then 
    notheses=( egrep -v -i -e "[\\/]theses|teses[\\/]" ); shift;
#   elif [[ ( $# -ge 2 ) && ( "/$1" == "/-bogus" ) ]]; then 
#     extrop=( ${extrop[@]} "-bogus" "$2" ); shift; shift;
   else
    echo "unknown option $1" 1>&2 ;
    echo -e "usage:\n  ${PROG_HELP[@]}" 1>&2 ; exit 1 
  fi
done 

# echo "notheses = ( ${notheses[*]} )" 1>&2

# Get positional parameters
dirs=( \
  ` echo "$@" \
      | tr ' ' '\012' \
      | sed -e 's:^\$:./:' -e 's:[/]*\$:/:' \
      | ${notheses[@]} \
  ` \
)
if [[ ${#dirs[@]} == 0 ]]; then
  dirs=( ./ )
fi

echo "searching directories ${dirs[*]}" 1>&2

# # Check for leftover arguments:
# if [[ $# -ne 0 ]]; then
#   echo 'wrong number of arguments "'"$1"'" ...' 1>&2
#   echo -e "usage:\n  ${PROG_HELP[@]}" 1>&2 ; exit 1 
# fi

# END COMMAND LINE PARSING
# ----------------------------------------------------------------------

# Prefix for temporary file names
tmp="/tmp/$$"

echo "looking for shellscript-like files that are identifiable by name ..." 1>&2
find "${dirs[@]}" \
    -name 'ARCHIVE' -prune -o \
    -name 'GARBAGE' -prune -o \
    -name 'IMPORT' -prune -o \
    -name 'IMPORT-*' -prune -o \
    -name '00-TO-CD' -prune -o \
    -name '00-TARFILES-CONVERTED' -prune -o \
    -name 'hand-in' -prune -o \
    -name 'pkg' -prune -o \
    -wholename '*/projects/imgbank/*/[0-9][0-9][0-9]/*' -prune -o \
    -wholename '*/posters/*/[0-9][0-9][-0-9]*[0-9]/*' -prune -o \
    -wholename '*/stolfi/include' -prune -o \
    -wholename '*/PUB/include' -prune -o \
    -type f \
      \( \
        -name '*.sh' -o \
        -name '*.csh' -o \
        -name '*.1' -o \
        -name '.gdbinit' -o \
        -name '*Noteboo*[a-zA-Z0-9]' -o \
        -name '*README' -o \
        -name '*LEIAME' \
      \) -print \
  > ${tmp}.chm

echo "looking for executable files ..." 1>&2
find "${dirs[@]}" \
    -name 'ARCHIVE' -prune -o \
    -name 'GARBAGE' -prune -o \
    -name 'IMPORT' -prune -o \
    -name 'IMPORT-*' -prune -o \
    -name '00-TO-CD' -prune -o \
    -name '00-TARFILES-CONVERTED' -prune -o \
    -name 'hand-in' -prune -o \
    -name 'pkg' -prune -o \
    -wholename '*/projects/imgbank/*/[0-9][0-9][0-9]/*' -prune -o \
    -wholename '*/posters/*/[0-9][0-9][-0-9]*[0-9]/*' -prune -o \
    -type f \
      -perm -u=x \
      -print \
  | egrep -i -v -e '[-\/.](junk|save|old).*[\/.]' \
  | egrep -v -e '[~]$' \
  > ${tmp}.exs

echo "excluding excutable files that are binary programs ..." 1>&2
xargs \
  --delimiter='\12' \
  --max-args=300 \
  --no-run-if-empty \
  \
  egrep \
    --binary-files=without-match \
    --max-count 1 \
    --regexp '^[\#][\!]' \
    --files-with-matches \
  <  ${tmp}.exs \
  >> ${tmp}.chm 

echo "sorting file names and removing various trash files ..." 1>&2
cat ${tmp}.chm \
  | egrep -i -v -e '[-\/.](junk|save|old).*[\/.]' \
  | egrep -v -e '[\/]posters/.*/[0-9][0-9][-0-9]*[0-9]/' \
  | egrep -v -e '[~]$' \
  | ${notheses[@]} \
  | sed -e 's:^[.][\/]::' \
  | sort \
  | uniq

/bin/rm -f ${tmp}.chm ${tmp}.exs 
