# Last edited on 2024-08-05 16:28:55 by stolfi SYNCHRONIZING MOVES AND DELETIONS BETWEEN VOSTRO AND MANAUS On manaus: now="2024-08-03" cd ~/projects/wikipedia find_all_files_cksum_size_date.sh ./ ./chemistry/ | sort -b > .all-files-wp-manaus-${now}.csdf On vostro: now="2024-08-03" cd ~/projects/wikipedia find_all_files_cksum_size_date.sh ./ ./chemistry/ | sort -b > .all-files-wp-vostro-${now}.csdf now="2024-08-03"; rsync -avzu ${SMAN}:projects/wikipedia/.all-files-wp-manaus-${now}.csdf ./ & Finding possibly duplicated files: for m in vostro manaus; do cat .all-files-wp-${m}-${now}.csdf \ | csdf_list_duplicated_files.gawk \ > .dup-files-wp-${m}-${now}.csdf done Extracting the non-junk files with checksum+size joined: for m in vostro manaus; do cat .all-files-wp-${m}-${now}.csdf \ | egrep -v -e '[.](js|css)$' \ | egrep -v -e '[~#]$' \ | gawk '//{ if ($2+0 != 0) { printf "%s.%014d %s %s\n", $1, $2, $3, $4; } }' \ | sort \ > .all-${m}.xdf done Extracting the differences: bool 1-2 .all-vostro.xdf .all-manaus.xdf > .only-vostro.xdf bool 2-1 .all-vostro.xdf .all-manaus.xdf > .only-manaus.xdf Joining the differences by checksum+size: join \ -j1 -a1 -a2 -e '???' -o 0,1.2,2.2,1.3,2.3 .only-{manaus,vostro}.xdf \ > .join.xddff Checking for same checksum, size, name but different dates (except seconds): cat .join.xddff \ | gawk \ ' // { dt1 = substr($2,1,15); dt2 = substr($3,1,15); na1 = $4; na2 = $5; if ((dt1 == "???") || (dt2 == "???")) { next; } if (na1 == na2) { next } if (dt1 != dt2) { print } } ' \ > .difdates.xddff Created a file ".cleanup-2024-08-04.txt" to remove garbage and duplicate files. Executed it on vostro and manaus. Saved the logs. Repeated everything. MOVING ALL DOCS TO 00-DOCS ( for fp in `cat .pdf-files .html-files` ; do \ move_to_00-DOCS.sh ${fp}; \ done \ )