#! /bin/bash # Last edited on 2024-04-24 05:37:48 by stolfi PROG_NAME=${0##*/} PROG_DESC="find all files under given directories; print size, mod time, name" PROG_HELP=( "${PROG_NAME} [-nsec] [-exclude {PATTERN} | -exclude-path {PATTERN} ].. {DIR}.." ) PROG_INFO=( "\nNAME" "\n ${PROG_NAME} - ${PROG_DESC}." "\n" "\nSYNOPSIS" "\n ${PROG_HELP[@]}" "\n" "\nDESCRIPTION" "\n Writes to stdout a list of all ordinary files" "\n in the specified directories (which usually should be disjoint)." "\n" "\n For each file, prints: the size in bytes, the last-modified time" "\n (in the format YYYY-MM-DD-hhmmss, UTC timeone), and the file's pathname." "\n" "\n Rejects any directory or file names that contain blanks, line" "\n breaks, backslashes, single or double quotes. Assumes" "\n that the full paths found do not contain any double slashes \"//\"." "\n" "\n Does not list symbolic links, pipes, etc. Excludes a few" "\n trash directories that are known to contain many" "\n invalid file names, such as {.cache}, {cache}, {.config/chromium}, etc." "\n" "\nOPTIONS" "\n -nsec." "\n If this option is present, the time field ends with a" "\n fraction of second consisting of a '.' and nine decimal" "\n digits (nanoseconds). Otherwise the time field has just" "\n whole seconds." "\n" "\n -exclude {PATTERN}" "\n -exclude-path {PATTERN}" "\n Each occurrence of this option specifies the name" "\n of a file to be excluded from the listing. See the \"-name\" and" "\n \"-wholename\" options of \"find(1)\", respectively, for the syntax of {PATTERN}." "\n If the pattern matches a a directory, also excludes all sub-directories and files therein." "\n" "\nSEE ALSO" "\n find(1), find_all_files_cksum_size.sh, find_all_links_size_date.sh" "\nAUTHOR" "\n Created 2007-01-17 by Jorge Stolfi, Unicamp" ) # ---------------------------------------------------------------------- # COMMAND LINE PARSING # Parse command line switches: nseccmd=( sdf-file-remove-nsec ) exclop=( ) while [[ ( $# -ge 1 ) && ( "/$1" =~ /-.* ) ]]; do if [[ ( $# -ge 1 ) && ( "/$1" == "/-nsec" ) ]]; then nseccmd=( sdf-file-put-nsec ); shift; elif [[ ( $# -ge 2 ) && ( "/$1" == "/-exclude" ) ]]; then exclop+=( -name "$2" -prune -o ); shift; shift; elif [[ ( $# -ge 2 ) && ( "/$1" == "/-exclude-path" ) ]]; then exclop+=( -wholename "$2" -prune -o ); shift; shift; else echo "unknown option $1" 1>&2 ; echo -e "usage:\n ${PROG_HELP[@]}" 1>&2 ; exit 1 fi done dirs=( "$@" ) # END COMMAND LINE PARSING # ---------------------------------------------------------------------- echo "nseccmd = [" "${nseccmd[@]}" "]" 1>&2 echo "exclop = [" "${exclop[@]}" "]" 1>&2 # Regularize directory names to start with "./" and end with "/"; assume that "//" is same as "/": # echo "dirs = [" ${dirs[@]} "]" 1>&2 if [[ ${#dirs[@]} == 0 ]]; then # Default is current directory: dirs=( ./ ); else dirs=( \ ` echo "${dirs[@]}" \ | tr ' ' '\012' \ | sed -e '/^\$/d' -e 's:^\([^/]\):./\1:' -e 's:^[.]/[.]/:./:' -e 's:[/]*\$:/:' -e 's:[/][/]*:/:' \ ` \ ) fi echo "dirs = [" "${dirs[@]}" "]" 1>&2 tmp="/tmp/$$" tfile="${tmp}.dir" # Output of {find}. efile="${tmp}.err" # Errors. gfile="${tmp}.gud" # Good files. # Find files. Write size, moddate and wholename (between "//..//") to ${tfile}: # Also strip leading "./". # echo "finding files ..." 1>&2 export TZ=UTC find "${dirs[@]}" \ -type l -prune -o \ -name 'cache' -prune -o \ -name '.cache' -prune -o \ -name '.local' -prune -o \ -name '.thumbnails' -prune -o \ -name '.macromedia' -prune -o \ -name '.mozilla' -prune -o \ -name '.xsession-errors' -prune -o \ -name '.all-files-*' -prune -o \ -wholename '*/testfn/bad' -prune -o \ -wholename '*/Crash Reports' -prune -o \ -wholename '*/ChromiumUnsnapped' -prune -o \ -wholename '*/snap/chromium' -prune -o \ -wholename '*/.config/chromium' -prune -o \ "${exclop[@]}" \ \( -type f -printf "%14s %TY-%Tm-%Td-%TH%TM%TS //%p//\n" \) \ | sed -e 's:/[.]/:/:g' -e 's:/[.]/:/:g' -e 's:/[.]/:/:g' -e 's:/[.]/:/:g' \ > ${tfile} # Check for bad characters and line breaks: cat ${tfile} \ | list_bad_filenames.gawk \ -v efile=${efile} \ > ${gfile} wc -l ${gfile} 1>&2 # gawk \ # -v efile="${efile}" \ # ' (NF != 3) { printf "(0) %s\n", $0 > efile; next; } # Embedded blanks. # /[^ /][\/][\/][^ ]/ { printf "(1) %s\n", $0 > efile; next; } # Embedded "//". # /["\\]/ { printf "(2) %s\n", $0 > efile; next; } # Embedded doublequotes, backslashes. # /['"'"']/ { printf "(3) %s\n", $0 > efile; next; } # Embedded single quotes. # ($1 !~ /^[0-9]+$/) { printf "(4) %s\n", $0 > efile; next; } # Embedded newlines? # ($2 !~ /^(19[7-9][0-9]|20[0-3][0-9])-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01])-[0-9]+([.][0-9]+|)$/) { printf "(5) %s\n", $0 > efile; next; } # Embedded newlines? # ($3 !~ /^[\/][\/].+[\/][\/]$/) { printf "(6) %s\n", $0 > efile; next; } # // { print; } # ' \ # ${tfile} \ # > ${gfile} # If there were any bad filenames, show them: if [[ -s ${efile} ]]; then echo '** bad filenames found:' 1>&2 cat ${efile} 1>&2 fi # echo "fixing nanoseconds ..." 1>&2 cat ${gfile} \ | sed -e "s: //: :" -e "s://$::" \ | ${nseccmd[@]} rm -f ${tfile} ${efile} ${gfile} exit 0