#! /bin/awk -f
NR==1 {
#
if((FILENAME == "")||(FILENAME ~ /^-/))
{
print	"------------------------------------------------------------------------"
print	"AutoScalePack:				James Holton - 9-18-04"
print	""
print	"Automatically adjusts estimated error and error scale factor"
print	"in a scalepack.com script until the chi^2s are all as close as"
print	"possible to 1.  To use AutoScalePack, enter the name of your"
print	"_working_ scapepack script on the command line, i.e.:"
print	""
print	"autoscalepack scalepack.com"
print	""
print	"------------------------------------------------------------------------"
print	"To increase the precision of your estimated error and error"
print	"scale factor determination, type:"
print	""
print	"autoscalepack decimals=n scalepack.com"
print	""
print	"where n is the number of *additional* decimal places you want."
print	"The default is 1 decimal place for error scale factor and"
print	"2 decimal places for estimated error."
print	""
print	"------------------------------------------------------------------------"
print	"If you want to change the names of the two scripts generated by"
print	"AutoScalePack, you can specify them on the command line as well:"
print	""
print	"autoscalepack best_script=name test_script=anothername scalepack.com"
print	""
print	"------------------------------------------------------------------------"
print	"Notes:"
print	"\tYour scalepack.com must begin with a line like:"
print	""
print	"\t#! /usr/bin/csh -f"
print	""
print	"\tor it may not run properly when called by AutoScalePack."
print	""
print	"\tAlso, since your scalepack.com script will be run many times,"
print	"\tbe sure to comment out your @reject line if your script does not"
print	"\tconverge by itself."
print	""
print	"------------------------------------------------------------------------"
exit
}
	if(!test_script) test_script = "./test_" FILENAME
	if(!best_script) best_script = "./best_" FILENAME
#
#	The latter will continuously be updated to the best error model
#	found so far.
#
#
#
#
#
#
#
	# these are convergence criteria
	# specify the number of decimal places you want to refine
	# each variable to.
	if(!decimals) decimals = 0;
	if(digits) decimals = digits;
	if(!sdadd_decimals) sdadd_decimals = 2 + decimals;
	if(!scale_decimals) scale_decimals = 1 + decimals;

	# defaults for Golden Section search
	# go ahead and modify these if you're 
	# SURE the best value is bracketed
	if(! maximum_scale) maximum_scale   = 5
	if(! maximum_sdadd) maximum_sdadd   = 0.3

	if(! minimum_scale) minimum_scale   = 0.9
	if(! minimum_sdadd) minimum_sdadd   = 0
	
	# alternately, you may specify "tune=factor"

	best_so_far = 1000000
	line = 0
	if(!debug) debug = 0
	
	number_of_zones = 10;
}
########################################################################
#	Analyze the supplied script & copy it into memory              #
########################################################################

# look for the scalepack command line
/scalepack/ {

    if (!scalepack_line) scalepack_line = line;

}

# copy the script into an array
{
    ++line;
    ++number_of_lines;
    script[line] = $0;

    # now make everything easier to search
    $0 = tolower($0);
    $0 = stripcomments($0);
    
    # read "estimated error" values beyond the first line
    if((!comment)&&(number_error > 0))
    {
	# look for "estimated error" entries
	for(i = 1; (i <= NF)&&(number_error>0); ++i)
	{
	    # build an array of "estimated error" values
	    estimated_error[number_error] = $i+0;
	    if(sdadd < estimated_error[number_error]) sdadd = estimated_error[number_error]
	    --number_error;
	}
	
	# remember last line listing "estimated error" values
	last_estimated_error_line = line;

	if (debug) print;
    }
}

# look for the "number of zones" entry
/^number of zones/ {
    # get the listed number of zones
    number_of_zones = $4;
}

# look for the "estimated error" entry
/^estimated error/ {
    # get the current sd correction parameters
    number_error = number_of_zones+0;
    
    # look for "estimated error" entries
    for(i = 3; i < NF; ++i)
    {
	# build an array of "estimated error" values
	estimated_error[number_error] = $i+0;
	if(sdadd < estimated_error[number_error]) sdadd = estimated_error[number_error]
	--number_error;
    }

    estimated_error_line = line;
    last_estimated_error_line = line;

    if(debug) print;
}

# look for the "error scale factor" 
/^error scale factor/ {

    # this value should occur on same line
    scale = $4;

    # correct for stupid values
    if(scale <= 0) scale = 1.0;
    if(sdpart <= 0) sdpart = 1.0;

    # remember where this line is
    error_scale_factor_line = line;

    if (debug) print;
}

# shouldn't write out rejection files while optimizing!
#/^write rejection file/ {
#    write_reject_line = line;
#    origional_write_reject_line = script[line];
#
#    # comment this out
#    #script[line] = "[write rejection file " $4 "\t[shouldn't write out rejections while optimizing!]";
#}


END {
# clear print buffer
$0 = "";

if((FILENAME == "")||(FILENAME ~ /^-/))
{
    exit
}
########################################################################
#	Now begins the "real" program                                  #
########################################################################

    # turn off optimization for missing parameters
    if(error_scale_factor_line == "")
    {
	print "\042error scale factor\042 not found in " FILENAME
	print "\042error scale factor\042 will NOT be optimized"
	scale = 1
	maximum_scale = scale
	minimum_scale = scale
    }
    if(estimated_error_line == "")
    {
	print "\042estimated error\042 not found in " FILENAME
	print "\042estimated error\042 will NOT be optimized"
	sdadd = 0.06
	maximum_sdadd = sdadd
	minimum_sdadd = sdadd
    }

    # user may specify "tuning mode"
    if(tune > 1)
    {
	maximum_scale = scale * tune;
	minimum_scale = scale / tune;

	maximum_sdadd = sdadd * tune;
	minimum_sdadd = sdadd / tune;
    }
    maximum_scale = round(maximum_scale, scale_decimals);
    minimum_scale = round(minimum_scale, scale_decimals);

    maximum_sdadd = round(maximum_sdadd, sdadd_decimals);
    minimum_sdadd = round(minimum_sdadd, sdadd_decimals);

    print minimum_scale " < error scale factor < " maximum_scale
    print minimum_sdadd " <  estimated error < " maximum_sdadd

    # signal to optimize ...
    max_scale = max_sdadd = "restart"
    done = 0;
    
    while(!done)
    {
	# write the script (and generate printed value strings)
	WriteScript(test_script);

	if(value[printed_scale printed_sdadd] != 0)
	{
	    # we have already caclulated this value
	    
	    RMSD_sigma = value[printed_scale printed_sdadd]
	    if(debug) print  "recall: " printed_scale " and" printed_sdadd "\t\tRMSD(sigma): " RMSD_sigma*100 "\tbest so far: " best_so_far*100;
	}
	else
	{
	    # we havn't run this value before
	    
	    # run it, and filter output
	    printf  "trying: " printed_scale " and" printed_sdadd printed_pad;
	    GetResults(test_script);
	
	    # remember all values obtained (to avoid repeats)
	    value[printed_scale printed_sdadd] = RMSD_sigma;

	    # print out rating of this run
	    printf"RMSD(chi**2): " RMSD_sigma*100;

	    # update absolute best result
	    if(RMSD_sigma < best_so_far) 
	    {
		best_so_far = RMSD_sigma;
		best_sdcorr = printed_scale printed_sdadd;

		# write out best scripts immediately
		WriteScript(best_script);

		# print out the values used, and their effect
		print "\tbest so far: " best_so_far*100;
	    }
	    else
	    {
		# finish the line
		print "";
	    }
	}

	# pick next values based on last run...
	if(NextSDscale()) # ...has converged
	{
	    # "inner" loop done, so pick next sdadd value
	    if(NextSDadd()) # ...has converged
	    {
		# both have converged, so we are done
		done = "true";
	    }
	    # sdadd not done, so reoptimize scale with new sdadd
	    max_scale = "restart";
	}
	
	if(debug > 1) done = 1;
    }

    # update scalepack's output file to best values
    GetResults(best_script);

    
    # update variables for output
    split(best_sdcorr, best);
    scale   = best[1];
    sdadd   = best[2];    
    

    # write out unmolested script
    script[write_reject_line] = origional_write_reject_line;
    WriteScript(best_script);
    
    print  "\n\nSUMMARY:\n"
    printf("error scale factor %s\n", printed_scale);
    printf("estimated error(s) %s\n", printed_sdadd);
    printf("RMSD of chi**2     %s\n", best_so_far);
    print  "\n"

    # tell user what to do next
    print "***********************"
    print "***   ALL DONE!!!   ***"
    print "***********************"

    print "Your new " FILENAME " can be found at: " best_script

}
########################################################################
#	Functions used in this script                                  #
########################################################################

#########################################################################
#	NextSDscale()							#
#									#
#	Updates optimizing parameters based on observed output from	#
#	the scalepack script.						#
#									#
#	uses:		RMSD_sigma					#
#									#
#	modifies:	scale						#
#									#
#	contains:	max_scale					#
#			min_scale					#
#			best_scale					#
#									#
#			value[]						#
#									#
#########################################################################
function NextSDscale()
{
    # this should be known first
    value[scale] = RMSD_sigma;

    # pick some reasonable limits
    if (max_scale == "restart") 
    {
	min_scale = minimum_scale;
	value[max_scale] = 100000;

	max_scale = maximum_scale;
	value[max_scale] = 100000;

	best_scale = min_scale;
	value[best_scale] = 100000;
    }

    # use the Golden Section method to find minimum
    scale = GoldStep(min_scale, scale, best_scale, max_scale);
    min_scale = Gold_min;
    max_scale = Gold_max;
    best_scale = Gold_best;

#    if(round(scale, scale_decimals) == 0) scale = 1;


    # just for monitoring...
    scale_step = scale - best_scale;
	
    # update move counter
    ++move;

    # check for convergence
    if(max_scale - min_scale < 0.1^scale_decimals)
    {
	#convergence reached
       doneness = "true";
    }
    else
    {
	doneness = "";
    }
    
    # in case you're intetested...
    return doneness;
}

#########################################################################
#	NextSDadd()							#
#									#
#	Updates optimizing parameters based on observed output from	#
#	the agrovata script.						#
#									#
#	uses:		RMSD_sigma					#
#									#
#	modifies:	sdadd						#
#									#
#	contains:	max_sdadd					#
#			min_sdadd					#
#			best_sdadd					#
#									#
#			value[]						#
#									#
#########################################################################
function NextSDadd()
{
    # this should be known first
    value[sdadd] = RMSD_sigma;

    # pick some reasonable limits
    if (max_sdadd == "restart") 
    {
	min_sdadd = minimum_sdadd;
	value[max_sdadd] = 100000;

	best_sdadd = min_sdadd - min_sdadd_step/10;
	value[best_sdadd] = 100000;

	max_sdadd = maximum_sdadd;
	value[max_sdadd] = 100000;
    }

    # use the Golden Section method to find minimum
    sdadd = GoldStep(min_sdadd, sdadd, best_sdadd, max_sdadd);
    min_sdadd = Gold_min;
    max_sdadd = Gold_max;
    best_sdadd = Gold_best;

                
    # just for monitoring...
    sdadd_step = sdadd - best_sdadd;
	
    # update move counter
    ++move;

    # check for convergence
    if(max_sdadd - min_sdadd < 0.1^sdadd_decimals)
    {
	#convergence reached
	doneness = "true";
    }
    else
    {
	doneness = "";
    }

    # in case you're intetested...
    return doneness;
}


#########################################################################
#	WriteScript(filename)						#
#									#
#	Writes the script contained in the script[] array to the	#
#	file given in "filename."					#
#		The "error scale factor" and "estimated error" lines	#
#	however, are rewriten with the existing scale and sdadd values.	#
#									#
#	uses:		script[]					#
#			scale						#
#			sdadd						#
#			error_scale_factor_line				#
#			estimated_error_line				#
#			last_estimated_error_line			#
#			number_estimated_error				#
#			number_of_zones					#
#									#
#########################################################################
function WriteScript( filename )
{	
    # round off values to proper significant figures
    printed_scale = " " round(scale, scale_decimals)
    printed_sdadd = " " round(sdadd, sdadd_decimals)

    # compute pad length for clean console printing
    printed_pad = "";
    for(i=0;i < 20 - length(printed_scale printed_sdadd); ++i) {printed_pad = printed_pad " "}

    # make sure it gets executed properly
    if(script[1] !~ /^#! /)
    {
	print "#! /bin/csh -f" > filename;
    }

    # edit script lines and print them out
    for(line = 1; line <= number_of_lines; ++line)
    {
	# reset the line that contained "error scale factor" to new value
	if(line == error_scale_factor_line)
	{
	    script[line] = "error scale factor" printed_scale;
	}

	# print out the estimated errors (all the same)
	if(line == estimated_error_line)
	{
	    print "estimated error" > filename;
	    
	    # print 5 estimated errors on each line
	    for(line = 1; line <= int(number_of_zones/5); ++line)
	    {
		for(i = 1; i <= 5; ++i)
		{
		    printf printed_sdadd > filename;
		}
		print "" > filename;
	    }
	    
	    # in case there are any left
	    for(i = 1; i <= number_of_zones%5; ++i)
	    {
		printf printed_sdadd > filename;
	    }
	    #if(!(number_of_zones%5)) print "" > filename;
	    
	    # restore printing of rest of script
	    line = last_estimated_error_line;
	}
	else
	{
#	    print script[line]
	    print script[line] > filename;
	}
    }
    # close the file
    close(filename);

    #change it to an executable;
    system("chmod u+x " filename);
}

#########################################################################
#	GetResults(filename)						#
#									#
#	Runs the script given as "filename" and analyzes the chi**2	#
#	values given at the end of the file				#
#		The RMS deviation from 1.0 of the chi**2s is computed	#
#	and placed in RMSD_sigma.					#
#									#
#	modifies:	RMSD_sigma					#
#									#
#########################################################################
function GetResults ( filename )
{
    # to start off on right foot...
    lines_to_go = -1;

    # report what we're doing
#    print "running " filename "..."

    # get rid of old rejection files
    system("rm -f ./reject ");

    # analyze stdout from the script
    while(filename | getline > 0)
    {
	# look for the LAST chi**2 table
	if ($0 ~ /R-factors by shells/) 
	{
	    # start counting down to graph data
	    lines_to_go = 9;
	    number_of_chisquares = 0;
	}

	if (lines_to_go > 0)
	{
	    #count down
	    --lines_to_go;
	}

	if (lines_to_go ==0)
	{
	    # we are reading the first line of graph data

	    # read until we hit the summary line
	    for(i = 0; $0 !~ /All/; ++i)
	    {
		# collect "scatter/SD" data...
		scatter[i] = substr($0, 43) + 0;
		++number_of_chisquares;
		if(debug) print $0 " ==> " scatter[i];
		
		# grab next line and loop again...
		filename | getline
	    }

	    # done with the graph
	    lines_to_go = -1;
	}
    }
	
    # close the pipe!
    close(filename);


    # compute deviation from "evenness" for chi^2s
    deviation_in_scatt = 0;

    for(i = 0; i < number_of_chisquares; ++i)
    {
	# compute RMS deviation from ideal chi^2 (1.00)
	deviation_in_scatt += (scatter[i] - 1)^2;
    }

    # update global variable
    if(number_of_chisquares)
    {
	RMSD_sigma = sqrt(deviation_in_scatt/number_of_chisquares)
    }
    else
    {
	print "ERROR: No statistics generated by " filename;
	RMSD_sigma = "???";
    }
}

#########################################################################
#	GoldStep(min, last, best, max)					#
#									#
# Computes the Golden (next) step to minimize a variable		#
#									#
#	modifies:							#
#									#
#########################################################################

function GoldStep(min, last, best, max)
{
    # debugging...
    if(debug) printf "\n" min " " best " " max " ==GoldStep==> "
    
    # eliminate sections that cannot contain the minimum
    if(last >= best)
    {
	# we tried a larger value last time
	if(value[last] <= value[best])
	{
	    # minimum must be somewhere between "best" and "max"
	    min = best;
	    best = last;
	}
	else
	{
	    # minimum must be somewhere between "min" and "last"
	    max = last;
	}
    }
    else
    {
	# we tried a smaller value last time
	if(value[last] <= value[best])
	{
	    # minimum must be somewhere between "min" and "best"
	    max = best;
	    best = last;
	}
	else
	{
	    # minimum must be somewhere between "last" and "max"
	    min = last;	    
	}
    }

    # now decide what width to try next
    if((max - best) > (best - min))
    {
	# next value should be larger
	Gold_next = best + 0.38*(max - best)
    }
    else
    {
	# next value should be smaller
	Gold_next = best - 0.38*(best - min)
    }
    
    # save the parameters as Globals (God! this is awful!)
    Gold_min = min;
    Gold_max = max;
    Gold_next = Gold_next;
    Gold_best = best;
    
    # debugging...
    if (debug) printf min " " best " " max "\t"
    
    return Gold_next;
}


function abs(number)
{
	return sqrt(number^2);
}

function round(number, sigdigs)
{
    # create a format string from sigdigs variable
    formatstring = "%." sigdigs "f"
    
    return sprintf(formatstring, number);
}

function stripcomments(string)
{
    if(!comment) comment = 0;
    outstring = "";
    
    for(i = 1; i <= length(string); ++i)
    {
	character = substr(string, i, 1);

	if("[" == character) comment = 1;
	if((comment)&&("]" == character)) comment = 0;
	
	if((!comment)&&("]" != character)) outstring = outstring character;
    }
    
    return outstring;
}