#! /bin/tcsh -f # # list all image files in chronological order # write a script for deleting anything that is backed up # set disk = /data/$USER if (-d "$1") set disk = "$1" # see what time it is set today = `date +"%m %d %y" | awk '{print $1+0"-"$2+0"-"$3}'` set two_weeks_ago = `echo "puts [expr [clock seconds] - 2*7*24*60*60]" | tclsh` echo "checking image files on $disk" set stream = `echo "$disk" | awk -F "[ /]" '$NF==""{$NF=$(NF-1)} $2!="data"{$NF=$NF"_"$2} {print $NF}'` set lastlog = /data/log/last_system_${stream}_backup.txt set outlog = /data/log/image_history_${stream}_${today}.log # find the last archived file date echo "checking $lastlog" grep -q "|" $lastlog >& /dev/null if($status) then set lastfile = `awk '{print $NF}' $lastlog` else set lastfile = `awk -F "[|]" '{print $NF}' $lastlog` endif if(-e "$lastfile") then set lasttime = `echo "$lastfile" | awk 'NF>0{print "puts \"[file mtime "$0"] "$0"\""}' | tclsh | sort -n | tail -1 | awk '{print $1}'` else set lasttime = `echo "$lastlog" | awk 'NF>0{print "puts \"[file mtime "$0"] "$0"\""}' | tclsh | sort -n | tail -1 | awk '{print $1}'` endif if ("$lasttime" == "") then set stream = `echo "$disk" | awk -F "[ /]" '$NF==""{$NF=$(NF-1)} {print $NF}'` set recentlogs = `ls -1rt /data/log/watchdog.log.* | tail -10` set lasttime = `grep "$lastfile" $recentlogs | awk '{print int($7);exit}'` endif set lastdate = `echo "puts [clock format $lasttime]" | tclsh` echo "last backed-up file on $lastdate" if($lasttime > $two_weeks_ago) then echo "will keep last two weeks on disk..." set lasttime = $two_weeks_ago endif set lastdate = `echo "puts [clock format $lasttime]" | tclsh` echo "preparing to delete files before $lastdate" # uncompress anything that was compressed echo "looking for compressed images" find $disk -name '*.img.Z' -exec uncompress \{\} \; -print find $disk -name '*.img.gz' -exec gunzip \{\} \; -print find $disk -name '*.img.bz2' -exec bunzip2 \{\} \; -print # find all images echo "cataloging all images..." find $disk -name '*.img' -printf "%T@ %s %t %p\n" |\ sort -n |\ awk '{sum+=$2} {printf "%6.1f GB %s\n", sum/1024/1024/1024, $0}' |\ tee $outlog |\ awk -v lasttime=$lasttime '$3<=lasttime{\ file=$NF;escfile="";for(i=1;i<=length(file);++i){c=substr(file,i,1);\ if(c~/[\134\041\042#$&\047\050\051\052\140\073\077 ]/) c="\\"c;\ escfile=escfile c}\ print "echo "$1" GB freed up to", $6,$7,$8,$9;\ print "echo removing", escfile; print "rm -f", escfile}' |\ cat >! purge.com set images = `cat $outlog | wc -l` echo "found $images images" set earliest_image = `awk 'NF>9{print $NF;exit}' $outlog` set earliest_image_date = `awk 'NF>9{print $6,$7,$8,$9;exit}' $outlog` set earliest_image_epoch = `awk 'NF>9{print $3;exit}' $outlog` echo "checking archiver records" # check the archiver logs set depth = "" set pattern = `echo $disk | awk '{gsub("/"," ");print}' | awk '{print $NF}'` set years = `awk '{print $9}' $outlog | sort -u` set searchpaths = "" foreach year ( $years ) if(! -e /archive/${year}) then set year = "" #set depth = "-maxdepth 1" endif set searchpaths = ( $searchpaths /archive/${year} ) end if("$searchpaths" == "") set searchpaths = "" echo "looking in $searchpaths for file lists containing $pattern " find $searchpaths $depth -newer $earliest_image \ \( -name 'files_????.list' -o -name 'files_?????.list' \) \ -exec grep -q $pattern \{\} \; \ -printf "%T@ %p\n" |\ sort -n |\ awk '$1>'$earliest_image_epoch |\ cat >! archive_logs.txt # filter out ones that pre-date our earliest image set test = `cat archive_logs.txt | wc -l` echo "found $test" # filter out ones that still have iso files? # record the "Date of backup" cat archive_logs.txt |\ awk '{n=split($NF,w,"_");split(w[n],sn,".");print "DATE",$1,sn[1]; system("cat "$2)}' |\ awk '/^DATE/{date=$2;num=$3;next} \ {print "ARCHIVED",date,num,$0}' |\ cat >! archived.log set archived = `cat archived.log | wc -l` set earliest_archive_epoch = `awk '{print $2;exit}' archived.log` if ("$earliest_archive_epoch" != "") then set earliest_archive_date = `echo "$earliest_archive_epoch" | awk '{print "puts \"[clock format "$1"]\""}' | tclsh` echo "$archived images have been archived since $earliest_archive_date" endif # figure out which DVDs are system copies awk '/submitting to robot as/{printf "SYSTEM_DVD %04d %s\n",$NF,FILENAME}' /data/log/backup_rimage_system* |\ cat >! system_dvds.log set notsystem = `ls -1rt /data/log/backup_rimage_* | grep -v system` awk '/submitting to robot as/{printf "NONSYSTEM_DVD %04d %s\n",$NF,FILENAME}' $notsystem |\ cat >! notsystem_dvds.log # check how many times each image on disk has (possibly) been backed up cat system_dvds.log notsystem_dvds.log archived.log $outlog |\ awk '/^SYSTEM_DVD/{systemdvd[$2]=$3;next} \ /^NONSYSTEM_DVD/{nonsystemdvd[$2]=$3;next} \ /^ARCHIVED/{file=$NF;\ archived[file]=$2;dvd[file]=$3;++count[file];\ if(insystem[file]==""){insystem[file]=systemdvd[$3]};next} \ $2=="GB"{epoch=$3;file=$NF;\ file=substr(file,index(file,"data")-1);\ gsub("^/home/|^/data2/","/data/",file);\ split(file,w,"/");\ datafile="/data/"substr(file,length(w[2])+3);\ if(insystem[file]==""){file=datafile};\ printf "%d %d %d %s %s\n",count[file],(insystem[file]!=""),archived[file],dvd[file],file}' |\ cat >! archive_counts.log # format: number_of_backups in_system_archive epoch_archived dvd_SN filename # do not delete files that have not been backed up more than once cat archive_counts.log |\ awk '$1==0 {print "no backups at all of",$NF;next}\ $1==1 && $2==1{print "only system backup of",$NF;next}\ $2==0{print "no system backup of",$NF,"user DVD: "$4;next}\ {print $1,"backups of",$NF}' |\ cat >! archive_info.txt grep -v "backups of" archive_info.txt >! archive_anomalies.txt cat archive_anomalies.txt |\ awk '/no backups at all/{print $NF}' |\ cat >! need_system_backup.txt set backup_failures = `cat need_system_backup.txt | wc -l` echo "$backup_failures images still need to be archived" if ("$backup_failures" != "0") then echo "$backup_failures images still need to be archived" |\ cat - need_system_backup.txt | less endif # now look for files that are out of place cat $outlog |\ awk '{print $3,$6,$7,$8,$9,$10}' |\ cat >! ondisk.log set earliest = `sort -n ondisk.log | awk '{print $1;exit}'` set newest = `sort -nr ondisk.log | awk '{print $1;exit}'` set recentlogs = "" echo -n "" >! recentlogs.txt ls -1t /data/log/ |\ awk '/watchdog.log/{print "/data/log/"$0}' |\ cat >! watchdoglogs.txt set lines = `cat watchdoglogs.txt | wc -l` set line = 0 while ( $line < $lines ) @ line = ( $line + 1 ) set watchdog = `awk "NR==$line" watchdoglogs.txt` set toonew = `tail -10 $watchdog | awk -v newest=$newest '$7>newest{toonew=1;exit} END{print toonew+0}'` if($toonew) continue set tooold = `head -1 $watchdog | awk -v earliest=$earliest '$7> recentlogs.txt if("$tooold" == "1") break end echo "checking recent logs..." cat $recentlogs |\ awk '/start_operation collectFrame /{filename=$15"/"$14".img";gsub("//","/",filename)}\ /operation_completed collectFrame/ {date[filename]=int($7)" "$2" "$3" "$4" "$6;\ print date[filename],"collected",filename}\ /stog_note movedExistingFile/{gsub("//","/",$NF);\ date[$NF]=int($7)" "$2" "$3" "$4" "$6;\ orig=$NF;gsub("OVERWRITTEN_FILES/","",orig);\ if(date[orig]){date[$NF]=date[orig]};\ print date[$NF],"collected",$NF;\ }' |\ tee collected_unsorted.log |\ awk 'NR%100==0{n=split($NF,w,"/");printf "%s %s %s %s %d images collected: %-80s\r",$2,$3,$4,$5,NR,w[n]}' sort -n collected_unsorted.log >! collected.log cat collected.log ondisk.log |\ awk '{n=split($NF,w,"/");base=w[n]}\ $6=="collected"{collected[$NF]=$1;truedate[$NF]=$2" "$3" "$4" "$5;\ collected[base]=$1;fullname[base]=$NF;++seen[base];next}\ collected[$NF] && $1-collected[$NF]<=100{next}\ ! collected[$NF] && ! collected[base]{print "never collected",$NF;next} \ ! collected[$NF] && collected[base] && seen[base]==1{print "moved",fullname[base],"to",$NF;\ if($1-collected[base]>100) print "touch -c --date=\""truedate[fullname[base]]"\"",$NF;next} \ ! collected[$NF] && collected[base] && seen[base]>1{print "multiplet",fullname[base],"in",$NF;next}' |\ tee anomalies.log |\ less #rm -f ondisk.log collected.log echo "" echo "all current image files are listed in $outlog " echo "purge.com is a script that will delete all images prior to $lastdate" echo "please edit purge.com to avoid deleting any important files! " echo "then run: source purge.com"