#! /bin/awk -f NR==1 { # if((FILENAME == "")||(FILENAME ~ /^-/)) { print "------------------------------------------------------------------------" print "AutoScalePack: James Holton - 9-18-04" print "" print "Automatically adjusts estimated error and error scale factor" print "in a scalepack.com script until the chi^2s are all as close as" print "possible to 1. To use AutoScalePack, enter the name of your" print "_working_ scapepack script on the command line, i.e.:" print "" print "autoscalepack scalepack.com" print "" print "------------------------------------------------------------------------" print "To increase the precision of your estimated error and error" print "scale factor determination, type:" print "" print "autoscalepack decimals=n scalepack.com" print "" print "where n is the number of *additional* decimal places you want." print "The default is 1 decimal place for error scale factor and" print "2 decimal places for estimated error." print "" print "------------------------------------------------------------------------" print "If you want to change the names of the two scripts generated by" print "AutoScalePack, you can specify them on the command line as well:" print "" print "autoscalepack best_script=name test_script=anothername scalepack.com" print "" print "------------------------------------------------------------------------" print "Notes:" print "\tYour scalepack.com must begin with a line like:" print "" print "\t#! /usr/bin/csh -f" print "" print "\tor it may not run properly when called by AutoScalePack." print "" print "\tAlso, since your scalepack.com script will be run many times," print "\tbe sure to comment out your @reject line if your script does not" print "\tconverge by itself." print "" print "------------------------------------------------------------------------" exit } if(!test_script) test_script = "./test_" FILENAME if(!best_script) best_script = "./best_" FILENAME # # The latter will continuously be updated to the best error model # found so far. # # # # # # # # these are convergence criteria # specify the number of decimal places you want to refine # each variable to. if(!decimals) decimals = 0; if(digits) decimals = digits; if(!sdadd_decimals) sdadd_decimals = 2 + decimals; if(!scale_decimals) scale_decimals = 1 + decimals; # defaults for Golden Section search # go ahead and modify these if you're # SURE the best value is bracketed if(! maximum_scale) maximum_scale = 5 if(! maximum_sdadd) maximum_sdadd = 0.3 if(! minimum_scale) minimum_scale = 0.9 if(! minimum_sdadd) minimum_sdadd = 0 # alternately, you may specify "tune=factor" best_so_far = 1000000 line = 0 if(!debug) debug = 0 number_of_zones = 10; } ######################################################################## # Analyze the supplied script & copy it into memory # ######################################################################## # look for the scalepack command line /scalepack/ { if (!scalepack_line) scalepack_line = line; } # copy the script into an array { ++line; ++number_of_lines; script[line] = $0; # now make everything easier to search $0 = tolower($0); $0 = stripcomments($0); # read "estimated error" values beyond the first line if((!comment)&&(number_error > 0)) { # look for "estimated error" entries for(i = 1; (i <= NF)&&(number_error>0); ++i) { # build an array of "estimated error" values estimated_error[number_error] = $i+0; if(sdadd < estimated_error[number_error]) sdadd = estimated_error[number_error] --number_error; } # remember last line listing "estimated error" values last_estimated_error_line = line; if (debug) print; } } # look for the "number of zones" entry /^number of zones/ { # get the listed number of zones number_of_zones = $4; } # look for the "estimated error" entry /^estimated error/ { # get the current sd correction parameters number_error = number_of_zones+0; # look for "estimated error" entries for(i = 3; i < NF; ++i) { # build an array of "estimated error" values estimated_error[number_error] = $i+0; if(sdadd < estimated_error[number_error]) sdadd = estimated_error[number_error] --number_error; } estimated_error_line = line; last_estimated_error_line = line; if(debug) print; } # look for the "error scale factor" /^error scale factor/ { # this value should occur on same line scale = $4; # correct for stupid values if(scale <= 0) scale = 1.0; if(sdpart <= 0) sdpart = 1.0; # remember where this line is error_scale_factor_line = line; if (debug) print; } # shouldn't write out rejection files while optimizing! #/^write rejection file/ { # write_reject_line = line; # origional_write_reject_line = script[line]; # # # comment this out # #script[line] = "[write rejection file " $4 "\t[shouldn't write out rejections while optimizing!]"; #} END { # clear print buffer $0 = ""; if((FILENAME == "")||(FILENAME ~ /^-/)) { exit } ######################################################################## # Now begins the "real" program # ######################################################################## # turn off optimization for missing parameters if(error_scale_factor_line == "") { print "\042error scale factor\042 not found in " FILENAME print "\042error scale factor\042 will NOT be optimized" scale = 1 maximum_scale = scale minimum_scale = scale } if(estimated_error_line == "") { print "\042estimated error\042 not found in " FILENAME print "\042estimated error\042 will NOT be optimized" sdadd = 0.06 maximum_sdadd = sdadd minimum_sdadd = sdadd } # user may specify "tuning mode" if(tune > 1) { maximum_scale = scale * tune; minimum_scale = scale / tune; maximum_sdadd = sdadd * tune; minimum_sdadd = sdadd / tune; } maximum_scale = round(maximum_scale, scale_decimals); minimum_scale = round(minimum_scale, scale_decimals); maximum_sdadd = round(maximum_sdadd, sdadd_decimals); minimum_sdadd = round(minimum_sdadd, sdadd_decimals); print minimum_scale " < error scale factor < " maximum_scale print minimum_sdadd " < estimated error < " maximum_sdadd # signal to optimize ... max_scale = max_sdadd = "restart" done = 0; while(!done) { # write the script (and generate printed value strings) WriteScript(test_script); if(value[printed_scale printed_sdadd] != 0) { # we have already caclulated this value RMSD_sigma = value[printed_scale printed_sdadd] if(debug) print "recall: " printed_scale " and" printed_sdadd "\t\tRMSD(sigma): " RMSD_sigma*100 "\tbest so far: " best_so_far*100; } else { # we havn't run this value before # run it, and filter output printf "trying: " printed_scale " and" printed_sdadd printed_pad; GetResults(test_script); # remember all values obtained (to avoid repeats) value[printed_scale printed_sdadd] = RMSD_sigma; # print out rating of this run printf"RMSD(chi**2): " RMSD_sigma*100; # update absolute best result if(RMSD_sigma < best_so_far) { best_so_far = RMSD_sigma; best_sdcorr = printed_scale printed_sdadd; # write out best scripts immediately WriteScript(best_script); # print out the values used, and their effect print "\tbest so far: " best_so_far*100; } else { # finish the line print ""; } } # pick next values based on last run... if(NextSDscale()) # ...has converged { # "inner" loop done, so pick next sdadd value if(NextSDadd()) # ...has converged { # both have converged, so we are done done = "true"; } # sdadd not done, so reoptimize scale with new sdadd max_scale = "restart"; } if(debug > 1) done = 1; } # update scalepack's output file to best values GetResults(best_script); # update variables for output split(best_sdcorr, best); scale = best[1]; sdadd = best[2]; # write out unmolested script script[write_reject_line] = origional_write_reject_line; WriteScript(best_script); print "\n\nSUMMARY:\n" printf("error scale factor %s\n", printed_scale); printf("estimated error(s) %s\n", printed_sdadd); printf("RMSD of chi**2 %s\n", best_so_far); print "\n" # tell user what to do next print "***********************" print "*** ALL DONE!!! ***" print "***********************" print "Your new " FILENAME " can be found at: " best_script } ######################################################################## # Functions used in this script # ######################################################################## ######################################################################### # NextSDscale() # # # # Updates optimizing parameters based on observed output from # # the scalepack script. # # # # uses: RMSD_sigma # # # # modifies: scale # # # # contains: max_scale # # min_scale # # best_scale # # # # value[] # # # ######################################################################### function NextSDscale() { # this should be known first value[scale] = RMSD_sigma; # pick some reasonable limits if (max_scale == "restart") { min_scale = minimum_scale; value[max_scale] = 100000; max_scale = maximum_scale; value[max_scale] = 100000; best_scale = min_scale; value[best_scale] = 100000; } # use the Golden Section method to find minimum scale = GoldStep(min_scale, scale, best_scale, max_scale); min_scale = Gold_min; max_scale = Gold_max; best_scale = Gold_best; # if(round(scale, scale_decimals) == 0) scale = 1; # just for monitoring... scale_step = scale - best_scale; # update move counter ++move; # check for convergence if(max_scale - min_scale < 0.1^scale_decimals) { #convergence reached doneness = "true"; } else { doneness = ""; } # in case you're intetested... return doneness; } ######################################################################### # NextSDadd() # # # # Updates optimizing parameters based on observed output from # # the agrovata script. # # # # uses: RMSD_sigma # # # # modifies: sdadd # # # # contains: max_sdadd # # min_sdadd # # best_sdadd # # # # value[] # # # ######################################################################### function NextSDadd() { # this should be known first value[sdadd] = RMSD_sigma; # pick some reasonable limits if (max_sdadd == "restart") { min_sdadd = minimum_sdadd; value[max_sdadd] = 100000; best_sdadd = min_sdadd - min_sdadd_step/10; value[best_sdadd] = 100000; max_sdadd = maximum_sdadd; value[max_sdadd] = 100000; } # use the Golden Section method to find minimum sdadd = GoldStep(min_sdadd, sdadd, best_sdadd, max_sdadd); min_sdadd = Gold_min; max_sdadd = Gold_max; best_sdadd = Gold_best; # just for monitoring... sdadd_step = sdadd - best_sdadd; # update move counter ++move; # check for convergence if(max_sdadd - min_sdadd < 0.1^sdadd_decimals) { #convergence reached doneness = "true"; } else { doneness = ""; } # in case you're intetested... return doneness; } ######################################################################### # WriteScript(filename) # # # # Writes the script contained in the script[] array to the # # file given in "filename." # # The "error scale factor" and "estimated error" lines # # however, are rewriten with the existing scale and sdadd values. # # # # uses: script[] # # scale # # sdadd # # error_scale_factor_line # # estimated_error_line # # last_estimated_error_line # # number_estimated_error # # number_of_zones # # # ######################################################################### function WriteScript( filename ) { # round off values to proper significant figures printed_scale = " " round(scale, scale_decimals) printed_sdadd = " " round(sdadd, sdadd_decimals) # compute pad length for clean console printing printed_pad = ""; for(i=0;i < 20 - length(printed_scale printed_sdadd); ++i) {printed_pad = printed_pad " "} # make sure it gets executed properly if(script[1] !~ /^#! /) { print "#! /bin/csh -f" > filename; } # edit script lines and print them out for(line = 1; line <= number_of_lines; ++line) { # reset the line that contained "error scale factor" to new value if(line == error_scale_factor_line) { script[line] = "error scale factor" printed_scale; } # print out the estimated errors (all the same) if(line == estimated_error_line) { print "estimated error" > filename; # print 5 estimated errors on each line for(line = 1; line <= int(number_of_zones/5); ++line) { for(i = 1; i <= 5; ++i) { printf printed_sdadd > filename; } print "" > filename; } # in case there are any left for(i = 1; i <= number_of_zones%5; ++i) { printf printed_sdadd > filename; } #if(!(number_of_zones%5)) print "" > filename; # restore printing of rest of script line = last_estimated_error_line; } else { # print script[line] print script[line] > filename; } } # close the file close(filename); #change it to an executable; system("chmod u+x " filename); } ######################################################################### # GetResults(filename) # # # # Runs the script given as "filename" and analyzes the chi**2 # # values given at the end of the file # # The RMS deviation from 1.0 of the chi**2s is computed # # and placed in RMSD_sigma. # # # # modifies: RMSD_sigma # # # ######################################################################### function GetResults ( filename ) { # to start off on right foot... lines_to_go = -1; # report what we're doing # print "running " filename "..." # get rid of old rejection files system("rm -f ./reject "); # analyze stdout from the script while(filename | getline > 0) { # look for the LAST chi**2 table if ($0 ~ /R-factors by shells/) { # start counting down to graph data lines_to_go = 9; number_of_chisquares = 0; } if (lines_to_go > 0) { #count down --lines_to_go; } if (lines_to_go ==0) { # we are reading the first line of graph data # read until we hit the summary line for(i = 0; $0 !~ /All/; ++i) { # collect "scatter/SD" data... scatter[i] = substr($0, 43) + 0; ++number_of_chisquares; if(debug) print $0 " ==> " scatter[i]; # grab next line and loop again... filename | getline } # done with the graph lines_to_go = -1; } } # close the pipe! close(filename); # compute deviation from "evenness" for chi^2s deviation_in_scatt = 0; for(i = 0; i < number_of_chisquares; ++i) { # compute RMS deviation from ideal chi^2 (1.00) deviation_in_scatt += (scatter[i] - 1)^2; } # update global variable if(number_of_chisquares) { RMSD_sigma = sqrt(deviation_in_scatt/number_of_chisquares) } else { print "ERROR: No statistics generated by " filename; RMSD_sigma = "???"; } } ######################################################################### # GoldStep(min, last, best, max) # # # # Computes the Golden (next) step to minimize a variable # # # # modifies: # # # ######################################################################### function GoldStep(min, last, best, max) { # debugging... if(debug) printf "\n" min " " best " " max " ==GoldStep==> " # eliminate sections that cannot contain the minimum if(last >= best) { # we tried a larger value last time if(value[last] <= value[best]) { # minimum must be somewhere between "best" and "max" min = best; best = last; } else { # minimum must be somewhere between "min" and "last" max = last; } } else { # we tried a smaller value last time if(value[last] <= value[best]) { # minimum must be somewhere between "min" and "best" max = best; best = last; } else { # minimum must be somewhere between "last" and "max" min = last; } } # now decide what width to try next if((max - best) > (best - min)) { # next value should be larger Gold_next = best + 0.38*(max - best) } else { # next value should be smaller Gold_next = best - 0.38*(best - min) } # save the parameters as Globals (God! this is awful!) Gold_min = min; Gold_max = max; Gold_next = Gold_next; Gold_best = best; # debugging... if (debug) printf min " " best " " max "\t" return Gold_next; } function abs(number) { return sqrt(number^2); } function round(number, sigdigs) { # create a format string from sigdigs variable formatstring = "%." sigdigs "f" return sprintf(formatstring, number); } function stripcomments(string) { if(!comment) comment = 0; outstring = ""; for(i = 1; i <= length(string); ++i) { character = substr(string, i, 1); if("[" == character) comment = 1; if((comment)&&("]" == character)) comment = 0; if((!comment)&&("]" != character)) outstring = outstring character; } return outstring; }