#! /bin/bash # Last edited on 2024-10-06 18:49:27 by stolfi PROG_NAME=${0##*/} PROG_DESC="find all source-like files under given directories" PROG_HELP=( "${PROG_NAME} [ -notheses ] {DIR}.." ) PROG_INFO=( "\nNAME" "\n ${PROG_NAME} - ${PROG_DESC}." "\n" "\nSYNOPSIS" "\n ${PROG_HELP[@]}" "\n" "\nDESCRIPTION" "\n Writes to \".all-sources\" a list of all source-like files" "\n in the specified directories." "\n" "\n Includes files with standard source-file extensions, such as" "\n \".c\", \".h\" (C language), \".cpp\" (C++) \".awk\" (AWK), \".gawk\" (GAWK)," "\n \".pas\" (Pascal), \".f\" (FORTRAN), \".lsp\" (Lisp)," "\n \".el\" (Emacs Lisp), \".py\" (Pyton), \".sh\" (bash), \".csh\" (C shell)," "\n \".i3\", \".m3\", \".ig\", and \".mg\" (Modula-3)." "\n" "\n Also includes files called \"Makefile\" or with \".make\" extension," "\n except \"Deps.make\"." "\n" "\n Excludes from the list garbage files. These include files that have names" "\n containing an iso-style date like \"2019-11-19\"" "\n (delimited by dashes or slashes) or" "\n any of the strings \"junk\", \"old\", \"snap\", \"snapshot\", or \"save\"" "\n (delimited by periods, dashes, or slashes), in any capitalization; and" "\n directory trees whose root is called \"00-ARCHIVE\"," "\n \"00-MIRROR\", \"00-BACKUP\", \"00-RESTORE\"," "\n \"GARBAGE\" \"IMPORT-*\", \"PUB/include\", \"pkg\"," "\n \"posters/*/[0-9]*[0-9][0-9][-0-9]*[0-9]/\"," "\n or \"hand-in\", in that capitalization." "\n" \ "\n Also writes to \".chfiles\" the subset" "\n of \".all-sources\" that are \"Makefile\" or" "\n have extension \".c\" or \".h\"." \ "\n" \ "\nOPTIONS" "\n -notheses" "\n Excludes subdirectories called \"theses\", too, in any" "\n capitalization." "\n" "\n -exclude {PATH_PAT}" "\n Excludes subdirectories whose full pathname" " matches {PATH_PAT}. Use \"**\" for any string" " including slashes, \"*\" for any string excluding slashes." "\n" "\nSEE ALSO" "\n find(1)" "\nAUTHOR" "\n Created 2006-05-08 by Jorge Stolfi, Unicamp" ) # ---------------------------------------------------------------------- # INTERNAL OPTIONS # ---------------------------------------------------------------------- # COMMAND LINE PARSING # Parse command line switches: notheses=( cat ) excludes=( ) while [[ $# -ge 1 ]]; do opt="$1" if [[ ( $# -ge 2 ) && ( "/${opt}" == "/-exclude" ) ]]; then excludes+=( -path "'$2'" -prune -o ); echo "excluding '$2'" 1>&2 shift; shift elif [[ "/${opt}" == "/-notheses" ]]; then notheses=( egrep -v -i -e "[\\/]theses|teses[\\/]" ); shift; elif [[ "/$1" =~ ^[/][-].* ]]; then echo "unknown option ${opt}" 1>&2 ; echo -e "usage:\n ${PROG_HELP[@]}" 1>&2 ; exit 1 else break fi done # echo "notheses = ( ${notheses[*]} )" 1>&2 # Get positional parameters dirs=( \ ` echo "$@" \ | tr ' ' '\012' \ | ${notheses[@]} \ ` \ ) echo "dirs = [${dirs[@]}]" 1>&2 if [[ ${#dirs[@]} == 0 ]]; then dirs=( ./ /home/stolfi2/stolfi/ ) fi echo "searching directories ${dirs[*]}" 1>&2 # # Check for leftover arguments: # if [[ $# -ne 0 ]]; then # echo 'wrong number of arguments "'"$1"'" ...' 1>&2 # echo -e "usage:\n ${PROG_HELP[@]}" 1>&2 ; exit 1 # fi # END COMMAND LINE PARSING # ---------------------------------------------------------------------- # Prefix for temporary file names tmp="/tmp/$$" echo "looking for source files that are identifiable by name ..." 1>&2 find "${dirs[@]}" \ -name '*-dotfiles*' -prune -o \ -name '.[a-zA-Z0-9]*' -prune -o \ -name '00-ARCHIVE' -prune -o \ -name '00-BACKUP' -prune -o \ -name '00-MIRROR' -prune -o \ -name '00-RESTORE' -prune -o \ -name '00-TARFILES' -prune -o \ -name '00-TARFILES-CONVERTED' -prune -o \ -name '00-TO-CD' -prune -o \ -name 'ARCHIVE' -prune -o \ -name 'ChromiumUnsnapped' -prune -o \ -name 'GARBAGE' -prune -o \ -name 'IMPORT-*' -prune -o \ -name 'JSLIBS-backup' -prune -o \ -name 'JSLIBS-LATER' -prune -o \ -name 'JUNK' -prune -o \ -name 'MODPKG' -prune -o \ -name 'OLD' -prune -o \ -name 'SAVE' -prune -o \ -name 'hand-in' -prune -o \ -name 'include' -prune -o \ -name 'out' -prune -o \ -name 'packages' -prune -o \ -name 'pkg' -prune -o \ -name 'snap' -prune -o \ \ ${excludes[@]} \ -path '**/Download/books/everett-piraha' -prune -o \ -path '**/Download/samsung-galaxy4/Pictures' -prune -o \ -path '**/Download/videos' -prune -o \ -path '**/Pictures/Webcam' -prune -o \ -path '**/WhatsApp/Media' -prune -o \ -path '**/bureau/diretoria' -prune -o \ -path '**/mc857/2020-1/notas/ra' -prune -o \ -path '**/posters/*/[0-9][0-9][-0-9]*[0-9]*' -prune -o \ -path '**/programs/python/mc857-*' -prune -o \ -path '**/projects/biobank/**/item' -prune -o \ -path '**/projects/bitcoin/bitcointalk/Gavin_Andresen' -prune -o \ -path '**/projects/bitcoin/wallets/raw/BitPay.com/[0-9][0-9]*' -prune -o \ -path '**/projects/eleicoes/2010/dados-por-urna*' -prune -o \ -path '**/projects/fragments/IAB-2002/2009-01-30-manaus/RAW' -prune -o \ -path '**/projects/fragments/ceramic-3/data' -prune -o \ -path '**/projects/fragments/glazed-1/data' -prune -o \ -path '**/projects/image-collections/life' -prune -o \ -path '**/projects/imgbank/**/[0-9][0-9][0-9]' -prune -o \ -path '**/projects/imgbank3/**/[0-9][0-9][0-9]' -prune -o \ -path '**/projects/musis/2009-12-11-ceaz-thesis' -prune -o \ -path '**/projects/musis/[A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9][A-Z0-9]' -prune -o \ -path '**/projects/neuromat/00-DATA/*/*runs' -prune -o \ -path '**/projects/neuromat/00-DATA/*/*runs-[A-Z]' -prune -o \ -path '**/projects/stereo-linescan/images' -prune -o \ -path '**/projects/terras-indigenas/raw' -prune -o \ -path '**/projects/text-tracking/FETCH' -prune -o \ -path '**/projects/urna/urna-2010/dados-por-urna' -prune -o \ -path '**/projects/usd-inflation/00-DATA/split' -prune -o \ -path '**/projects/voynich/work/Notes/057/stats' -prune -o \ -path '**/projects/wikimapia/obj' -prune -o \ -type f \ \( \ -name '*.[chf]' -o \ -name '*otebook.*[a-zA-Z0-9]' -o \ -name 'Makefile' -o \ -name '*README*' -o \ -name '*.make' -o \ -name '*.cc' -o \ -name '*.el' -o \ -name '*.cpp' -o \ -name '*.pas' -o \ -name '*.sh' -o \ -name '*.csh' -o \ -name '*.java' -o \ -name '*.lsp' -o \ -name '*.awk' -o \ -name '*.gawk' -o \ -name '*.nawk' -o \ -name '*.py' -o \ -name '*.pl' -o \ -name '*.[im][3g]' \ \) -print \ > ${tmp}.chm echo "sorting file names and removing additional trash files ..." 1>&2 cat ${tmp}.chm \ | egrep -i -v -e '(^|[-/.])(junk|save|snap|snapshot|old).*[-\/.]' \ | egrep -v -e '[/]Deps.make$' \ | egrep -v -e '[~]$' \ | ${notheses[@]} \ | sed \ -e 's:^[.][/]::' \ -e 's:^/home/[j]*stolfi(|local)/::g' \ -e 's:^/home/staff/[j]*stolfi(|local)/::g' \ -e 's:^/home/stolfi2/stolfi/::g' \ | sort -r \ | uniq \ > .all-sources echo "separating C sources ..." 1>&2 cat .all-sources \ | egrep -e 'Makefile|[.][ch]$' \ | egrep -v -e 'SAVE|JUNK|OLD' \ | sort -r | uniq \ > .chfiles /bin/rm -f ${tmp}.chm ${tmp}.exs