#! /usr/bin/awk -f BEGIN { # # # Automatically adjusts SDCORR card in SCALA for "optimum" statistics # as given by agrovata. # # The provided script MUST generate mergeing statistics (agrovata or scala). # The following scripts must be creatable. # # test_script = "./" FILENAME "_test" # best_script = "./" FILENAME "_best" # # The latter will continuously be updated to the best SDCORR line # found so far. # # # # # # # # these are convergence criteria # specify the number of decimal places you want to refine # each variable to. sdfac_decimals = 2 sdprime_decimals = 0 sdadd_decimals = 2 # defaults for Golden Section search # go ahead and modify these if you're # SURE the best value is bracketed maximum_sdprime = 15 maximum_sdadd = 0.1 minimum_sdprime = 0 minimum_sdadd = 0 # alternately, you may specify "tune=factor" best_so_far = 1000000 line = 0 if(!debug) debug = 0 } # finish up initialization (for linux awk) NR==1{ if((!test_script)&&(FILENAME)) test_script = "./" FILENAME "_test" if((!best_script)&&(FILENAME)) best_script = "./" FILENAME "_best" if((!test_log)&&(FILENAME)) test_log = "./" FILENAME ".log" if(!test_script) test_script = "./scala.com_test" if(!best_script) best_script = "./scala.com_best" if(!test_log) test_log = "./scala.com.log" } ######################################################################## # Analyze the supplied script & copy it into memory # ######################################################################## # copy the script into an array { ++line; ++number_of_lines; script[line] = $0; # now make everything easier to search $0 = tolower($0); } # look for the "SDCORR" card in scala /^sdcorr/ { # get the current sd correction parameters sdfac = $2; if(NF == 4) { sdprime = $3; sdadd = $4; } else { # no sdprime specified sdprime = 0; sdadd = $3; } sdcorr_line = line; } END { ######################################################################## # Now begins the "real" program # ######################################################################## # user may specify "tuning mode" if(tune > 1) { if(sdprime) { maximum_sdprime = sdprime * tune; minimum_sdprime = sdprime / tune; } if(sdadd) { maximum_sdadd = sdadd * tune; minimum_sdadd = sdadd / tune; } } # signal to optimize ... max_sdfac = max_sdprime = max_sdadd = "restart" done = 0; while(!done) { # write the script WriteScript(test_script); if(value[script[sdcorr_line]] != 0) { # we have already caclulated this value RMSD_sigma = value[script[sdcorr_line]] if(debug>1) print "recall: " script[sdcorr_line] "\t\tRMSD(sigma): " RMSD_sigma*100 "\tbest so far: " best_so_far*100; } else { # we havn't run this pair before # run it, and filter output printf "trying: %s ", script[sdcorr_line] sdcorr_pad; GetResults(test_script); # remember all values obtained (to avoid repeats) value[script[sdcorr_line]] = RMSD_sigma; # print out rating of this run printf "RMS(scatter/sigma -1): %8.5f ", RMSD_sigma; # update absolute best result if(RMSD_sigma < best_so_far) { best_so_far = RMSD_sigma; best_sdcorr = script[sdcorr_line]; # write out best scripts immediately WriteScript(best_script); # print out the values used, and their effect printf "best so far: %8.5f ", best_so_far; } # finish the line print ""; } # pick next values based on last run... if(NextSDadd()) { # ...has converged # "inner" loop done, so pick next sdprime value if(NextSDprime()) { # ...has converged # both have converged, so we are done done = "true"; } # reoptimize sdadd with new sdprime max_sdadd = "restart"; } } # Main loop has exited. # parameters have converged, so finish up # update scala's output files to best values #GetResults(best_script); # update variables for output split(best_sdcorr, best); # sdfac = best[2]; sdfac = sprintf("%." sdfac_decimals "f", best_sdfac); sdprime = best[3]; sdadd = best[4]; # get rid of "test" script system("rm -f " test_script) # update the "best" script WriteScript(best_script); if(summary != "no") # option for cleaner output { print "\n\nSUMMARY:\n" printf("%s ==> RMSD = %.4f\n", best_sdcorr, value[best_sdcorr]); print "\n" print "****************************************" print "** BEST CARD FOUND **" printf "** %21s **\n", script[sdcorr_line]; print "****************************************" print "\n" # tell user what to do next print "***********************" print "*** ALL DONE!!! ***" print "***********************" print "Your new " FILENAME " can be found at: " best_script } } ######################################################################## # Functions used in this script # ######################################################################## ######################################################################### # NextSDprime() # # # # Updates optimizing parameters based on observed output from # # the agrovata script. # # # # uses: RMSD_sigma # # # # modifies: sdprime # # # # contains: max_sdprime # # min_sdprime # # best_sdprime # # # # value[] # # # ######################################################################### function NextSDprime() { # offset SDprime to keep from interfering with sdfac in value[] sdprime += 100; best_sdprime += 100; # this should be known first value[sdprime] = RMSD_sigma; # pick some reasonable limits if (max_sdprime == "restart") { min_sdprime = minimum_sdprime +100; value[max_sdprime] = 100000; best_sdprime = min_sdprime; value[best_sdprime] = 100000; max_sdprime = maximum_sdprime +100; value[max_sdprime] = 100000; } # use the Golden Section method to find minimum sdprime = GoldStep(min_sdprime, sdprime, best_sdprime, max_sdprime); min_sdprime = Gold_min; max_sdprime = Gold_max; best_sdprime = Gold_best; # just for monitoring... sdprime_step = sdprime - best_sdprime; # update move counter ++move; # now move sdprime back to true value sdprime -= 100; best_sdprime -= 100; # check for convergence if(max_sdprime - min_sdprime < 0.1^sdprime_decimals) { #convergence reached doneness = "true"; } else { doneness = ""; } # in case you're intetested... return doneness; } ######################################################################### # NextSDadd() # # # # Updates optimizing parameters based on observed output from # # the agrovata script. # # # # uses: RMSD_sigma # # # # modifies: sdadd # # # # contains: max_sdadd # # min_sdadd # # best_sdadd # # # # value[] # # # ######################################################################### function NextSDadd() { # this should be known first value[sdadd] = RMSD_sigma; # pick some reasonable limits if (max_sdadd == "restart") { min_sdadd = minimum_sdadd; value[max_sdadd] = 100000; best_sdadd = min_sdadd - min_sdadd_step/10; value[best_sdadd] = 100000; max_sdadd = maximum_sdadd; value[max_sdadd] = 100000; } # use the Golden Section method to find minimum sdadd = GoldStep(min_sdadd, sdadd, best_sdadd, max_sdadd); min_sdadd = Gold_min; max_sdadd = Gold_max; best_sdadd = Gold_best; # just for monitoring... sdadd_step = sdadd - best_sdadd; # update move counter ++move; # check for convergence if(max_sdadd - min_sdadd < 0.1^sdadd_decimals) { #convergence reached doneness = "true"; } else { doneness = ""; } # in case you're intetested... return doneness; } ######################################################################### # WriteScript(filename) # # # # Writes the script contained in the script[] array to the # # file given in "filename." # # The following cards, however, are rewriten with # # the existing trial values: WIDTH # # # # uses: script[] # # bin_scale # # width # # # ######################################################################### function WriteScript( filename ) { for(line = 1; line <= number_of_lines; ++line) { if(line == sdcorr_line) { script[line] = "SDCORR " sdfac \ " " round(sdprime, sdprime_decimals) \ " " round(sdadd, sdadd_decimals); sdcorr_pad = ""; for(i=0;i < 25 - length(script[line]); ++i) {sdcorr_pad = sdcorr_pad " "} } print script[line] > filename; } # close the file close(filename); #change it to an executable; system("chmod u+x " filename); } ######################################################################### # GetResults(filename) # # # # Runs the script given as "filename" and analyzes the output # # for the agrovata Sigma(scatter/SD) graph. # # The RMS deviation from an even distribution of # # observations per intensity bin is computed as RMSD_width. # # The RMS deviation from 1.0 of the scatter/SD is # # also computed and placed in RMSD_sigma. # # # # modifies: RMSD_width # # RMSD_width_p # # RMSD_sigma # # RMSD_sigma_p # # RMSD_sigma_w # # best_sdfac # # # ######################################################################### function GetResults ( filename ) { # report what we're doing # print "running " filename "..." # analyze stdout from "agrovata" script command = filename # if(test_log) command = filename " | tee " test_log while(command | getline > 0) { # look for the graph entry if ($0 ~ /^ \$GRAPHS: Sigma\(scatter\/SD\)/) { graph = 1 # initialize cumulative variables count_full["sum"]=count_part["sum"]=count["sum"]=0 bins = 0 } if ($1 == "TOTALS:") graph = 0 if (graph && ($0 !~ /[a-z]/) && NF>3) { # we are reading a line of graph data ++bins # gather stats # count_full[bins] = $5 # count_part[bins] = $9 count_full[bins] = substr($0, 38, 9)+0 count_part[bins] = substr($0, 77, 9)+0 count[bins] = count_full[bins]+count_part[bins] # sums count_full["sum"] += count_full[bins] count_part["sum"] += count_part[bins] count["sum"] += count[bins] # scatt_full[bins] = $7-1 # scatt_part[bins] = $NF-1 scatt_full[bins] = substr($0, 55, 8)-1 scatt_part[bins] = $NF-1 scatt[bins] = (scatt_full[bins]+scatt_part[bins])/2 } # look for refined SDfac if($0 ~ /Final assessment of SDcorrection multipliers/) { sdfac_list = 1 } if($0 ~ /Summary/) {sdfac_list = 0} if(sdfac_list) { if(($0 ~ /[0-9]/)&&($0 !~ /[a-z]/)) { if(NF>2) { ++sdfacs sdfac_sum+=(count_full["sum"]*$2+count_part["sum"]*$5)/(count["sum"]) } else { sdfac_list = 0 } } } } # close the pipe! close(filename); # calculate mean SDFAC if(sdfacs) best_sdfac = sdfac_sum/sdfacs # printf "%.2f %.2f %.2f %.2f \n", best_sdfac, count_full["sum"], count_part["sum"], count["sum"] # the scatt and count arrays should now contain the LAST graph in the log if(bins) { # calculate average values count_full["avg"] = count_full["sum"]/bins count_part["avg"] = count_part["sum"]/bins count["avg"] = count["sum"] /bins # and now rmsds count_full["rms"]=count_part["rms"]=count["rms"]=0 scatt_full["rms"]=scatt_part["rms"]=scatt["rms"]=scatt["wrms"]=0 for(i=1; i<=bins; ++i) { count_full["rms"] += (count_full[i]-count_full["avg"])*(count_full[i]-count_full["avg"]) count_part["rms"] += (count_part[i]-count_part["avg"])*(count_part[i]-count_part["avg"]) count["rms"] += (count[i] -count["avg"] )*(count[i] -count["avg"] ) scatt_full["rms"] += scatt_full[i]*scatt_full[i] scatt_part["rms"] += scatt_part[i]*scatt_part[i] scatt["rms"] += scatt[i]*scatt[i] scatt["wrms"] += count_full[i]*scatt_full[i]*scatt_full[i] scatt["wrms"] += count_part[i]*scatt_part[i]*scatt_part[i] } count_full["rms"] = sqrt(count_full["rms"]/bins) count_part["rms"] = sqrt(count_part["rms"]/bins) count["rms"] = sqrt(count["rms"]/bins) scatt_full["rms"] = sqrt(scatt_full["rms"]/bins) scatt_part["rms"] = sqrt(scatt_part["rms"]/bins) scatt["rms"] = sqrt(scatt["rms"]/bins) scatt["wrms"] = sqrt(scatt["wrms"]/count["sum"]) } else { print "ERROR: no stats generated by " filename print "please edit " FILENAME " and make sure it runs by itself." exit } # update global variables RMSD_width = count_full["rms"] RMSD_width_p = count_part["rms"] RMSD_sigma = scatt_full["rms"] RMSD_sigma_p = scatt_part["rms"] RMSD_sigma_w = scatt["wrms"] RMSD_sigma = RMSD_sigma_w } ######################################################################### # GoldStep(min, last, best, max) # # # # Computes the Golden (next) step to minimize a variable # # # # modifies: # # # ######################################################################### function GoldStep(min, last, best, max) { # debugging... if(debug) printf "\n" min " " best " " max " ==GoldStep==> " # eliminate sections that cannot contain the minimum if(last >= best) { # we tried a larger value last time if(value[last] <= value[best]) { # minimum must be somewhere between "best" and "max" min = best; best = last; } else { # minimum must be somewhere between "min" and "last" max = last; } } else { # we tried a smaller value last time if(value[last] <= value[best]) { # minimum must be somewhere between "min" and "best" max = best; best = last; } else { # minimum must be somewhere between "last" and "max" min = last; } } # now decide what width to try next if((max - best) > (best - min)) { # next value should be larger Gold_next = best + 0.38*(max - best) } else { # next value should be smaller Gold_next = best - 0.38*(best - min) } # save the parameters as Globals ( this is awful!) Gold_min = min; Gold_max = max; Gold_next = Gold_next; Gold_best = best; # debugging... if (debug) printf min " " best " " max "\t" return Gold_next; } function abs(number) { return sqrt(number^2); } function round(number, sigdigs) { return int(number*10^sigdigs)/10^sigdigs; }