#! /bin/gawk -f
BEGIN {
#
#
#	Automatically adjusts SDCORR card in SCALA for "optimum" statistics
#	as given by agrovata.
#
#	The provided script MUST generate mergeing statistics (agrovata or scala).
#	The following scripts must be creatable.
#
#	test_script = "./" FILENAME "_test"
#	best_script = "./" FILENAME "_best"
#
#	The latter will continuously be updated to the best SDCORR line
#	found so far.
#
#
#
#
#
#
#
	# these are convergence criteria
	# specify the number of decimal places you want to refine
	# each variable to.
	sdfac_decimals = 2
	sdprime_decimals = 0
	sdadd_decimals = 2

	# defaults for Golden Section search
	# go ahead and modify these if you're 
	# SURE the best value is bracketed
	maximum_sdprime = 15
	maximum_sdadd   = 0.1

	minimum_sdprime = 0
	minimum_sdadd   = 0
	
	# alternately, you may specify "tune=factor"

	best_so_far = 1000000
	line = 0
	if(!debug) debug = 0
}
# finish up initialization (for linux awk)
NR==1{
    if((!test_script)&&(FILENAME)) test_script = "./" FILENAME "_test"
    if((!best_script)&&(FILENAME)) best_script = "./" FILENAME "_best"
    if((!test_log)&&(FILENAME))    test_log    = "./" FILENAME ".log"

    if(!test_script) test_script = "./scala.com_test"
    if(!best_script) best_script = "./scala.com_best"
    if(!test_log)    test_log    = "./scala.com.log"
}

########################################################################
#       Analyze the supplied script & copy it into memory              #
########################################################################

# copy the script into an array
{
    ++line;
    ++number_of_lines;
    script[line] = $0;

    # now make everything easier to search
    $0 = tolower($0);
}

# look for the "SDCORR" card in scala
/^sdcorr/ {
    # get the current sd correction parameters
    sdfac = $2;
    if(NF == 4)
    {
	sdprime = $3;
	sdadd = $4;
    }
    else
    {
	# no sdprime specified
	sdprime = 0;
	sdadd = $3;
    }	

    sdcorr_line = line;
}

END {
########################################################################
#	Now begins the "real" program                                  #
########################################################################


    # user may specify "tuning mode"
    if(tune > 1)
    {
	if(sdprime)
	{
	    maximum_sdprime = sdprime * tune;
	    minimum_sdprime = sdprime / tune;
	}

	if(sdadd)
	{
	    maximum_sdadd = sdadd * tune;
	    minimum_sdadd = sdadd / tune;
	}
    }

    # signal to optimize ...
    max_sdfac = max_sdprime = max_sdadd = "restart"
    done = 0;
    
    while(!done)
    {
	# write the script
	WriteScript(test_script);

	if(value[script[sdcorr_line]] != 0)
	{
	    # we have already caclulated this value
	    
	    RMSD_sigma = value[script[sdcorr_line]]
	    if(debug>1) print  "recall: " script[sdcorr_line] "\t\tRMSD(sigma): " RMSD_sigma*100 "\tbest so far: " best_so_far*100;
	}
	else
	{
	    # we havn't run this pair before
	    
	    # run it, and filter output
	    printf  "trying: %s ", script[sdcorr_line] sdcorr_pad;
	    GetResults(test_script);
	
	    # remember all values obtained (to avoid repeats)
	    value[script[sdcorr_line]] = RMSD_sigma;

	    # print out rating of this run
	    printf "RMS(scatter/sigma -1): %8.5f    ", RMSD_sigma;

	    # update absolute best result
	    if(RMSD_sigma < best_so_far) 
	    {
		best_so_far = RMSD_sigma;
		best_sdcorr = script[sdcorr_line];

		# write out best scripts immediately
		WriteScript(best_script);

		# print out the values used, and their effect
		printf "best so far: %8.5f ", best_so_far;
	    }
	    # finish the line
	    print "";
	}

	# pick next values based on last run...
	if(NextSDadd()) 
	{
	    # ...has converged
	    
	    # "inner" loop done, so pick next sdprime value
	    if(NextSDprime()) 
	    {
		# ...has converged
		
		# both have converged, so we are done
		done = "true";
	    }
	    # reoptimize sdadd with new sdprime
	    max_sdadd = "restart";
	}
    }
    
    # Main loop has exited.
    # parameters have converged, so finish up


    # update scala's output files to best values
    #GetResults(best_script);

    
    # update variables for output
    split(best_sdcorr, best);
#    sdfac   = best[2];
    sdfac   = sprintf("%." sdfac_decimals "f", best_sdfac);
    sdprime = best[3];
    sdadd   = best[4];    
    
    # get rid of "test" script
    system("rm -f " test_script)
    # update the "best" script
    WriteScript(best_script);
    
    if(summary != "no")	    # option for cleaner output
    {
	print  "\n\nSUMMARY:\n"
	printf("%s ==> RMSD = %.4f\n", best_sdcorr, value[best_sdcorr]);
	print  "\n"
	print  "****************************************"
	print  "**      BEST CARD FOUND               **"
	printf "**      %21s         **\n", script[sdcorr_line];
	print  "****************************************"
	print  "\n"

	# tell user what to do next
	print "***********************"
	print "***   ALL DONE!!!   ***"
	print "***********************"

	print "Your new " FILENAME " can be found at: " best_script

    }

}
########################################################################
#	Functions used in this script                                  #
########################################################################


#########################################################################
#	NextSDprime()							#
#									#
#	Updates optimizing parameters based on observed output from	#
#	the agrovata script.						#
#									#
#	uses:		RMSD_sigma					#
#									#
#	modifies:	sdprime						#
#									#
#	contains:	max_sdprime					#
#			min_sdprime					#
#			best_sdprime					#
#									#
#			value[]						#
#									#
#########################################################################
function NextSDprime()
{
    # offset SDprime to keep from interfering with sdfac in value[]
    sdprime += 100;
    best_sdprime += 100;

    # this should be known first
    value[sdprime] = RMSD_sigma;

    # pick some reasonable limits
    if (max_sdprime == "restart") 
    {
	min_sdprime = minimum_sdprime +100;
	value[max_sdprime] = 100000;

	best_sdprime = min_sdprime;
	value[best_sdprime] = 100000;

	max_sdprime = maximum_sdprime +100;
	value[max_sdprime] = 100000;
    }

    # use the Golden Section method to find minimum
    sdprime = GoldStep(min_sdprime, sdprime, best_sdprime, max_sdprime);
    min_sdprime = Gold_min;
    max_sdprime = Gold_max;
    best_sdprime = Gold_best;

        
    # just for monitoring...
    sdprime_step = sdprime - best_sdprime;
	
    # update move counter
    ++move;

    # now move sdprime back to true value
    sdprime -= 100;
    best_sdprime -= 100;

    # check for convergence
    if(max_sdprime - min_sdprime < 0.1^sdprime_decimals)
    {
	#convergence reached
	doneness = "true";
    }
    else
    {
	doneness = "";
    }
    
    # in case you're intetested...
    return doneness;
}

#########################################################################
#	NextSDadd()							#
#									#
#	Updates optimizing parameters based on observed output from	#
#	the agrovata script.						#
#									#
#	uses:		RMSD_sigma					#
#									#
#	modifies:	sdadd						#
#									#
#	contains:	max_sdadd					#
#			min_sdadd					#
#			best_sdadd					#
#									#
#			value[]						#
#									#
#########################################################################
function NextSDadd()
{
    # this should be known first
    value[sdadd] = RMSD_sigma;

    # pick some reasonable limits
    if (max_sdadd == "restart") 
    {
	min_sdadd = minimum_sdadd;
	value[max_sdadd] = 100000;

	best_sdadd = min_sdadd - min_sdadd_step/10;
	value[best_sdadd] = 100000;

	max_sdadd = maximum_sdadd;
	value[max_sdadd] = 100000;
    }

    # use the Golden Section method to find minimum
    sdadd = GoldStep(min_sdadd, sdadd, best_sdadd, max_sdadd);
    min_sdadd = Gold_min;
    max_sdadd = Gold_max;
    best_sdadd = Gold_best;

                
    # just for monitoring...
    sdadd_step = sdadd - best_sdadd;
	
    # update move counter
    ++move;

    # check for convergence
    if(max_sdadd - min_sdadd < 0.1^sdadd_decimals)
    {
	#convergence reached
	doneness = "true";
    }
    else
    {
	doneness = "";
    }

    # in case you're intetested...
    return doneness;
}





#########################################################################
#	WriteScript(filename)						#
#									#
#	Writes the script contained in the script[] array to the	#
#	file given in "filename."					#
#		The following cards, however, are rewriten with		#
#	the existing trial values:	WIDTH				#
#									#
#	uses:		script[]					#
#			bin_scale					#
#			width						#
#									#
#########################################################################
function WriteScript( filename )
{	
    for(line = 1; line <= number_of_lines; ++line)
    {
	if(line == sdcorr_line)
	{
	    script[line] = "SDCORR " sdfac \
				 " " round(sdprime, sdprime_decimals) \
				 " " round(sdadd, sdadd_decimals);
	    sdcorr_pad = "";
    	    for(i=0;i < 25 - length(script[line]); ++i) {sdcorr_pad = sdcorr_pad " "}
	}

	print script[line] > filename;
    }
    # close the file
    close(filename);

    #change it to an executable;
    system("chmod u+x " filename);
}


#########################################################################
#       GetResults(filename)                                            #
#                                                                       #
#       Runs the script given as "filename" and analyzes the output     #
#       for the agrovata Sigma(scatter/SD) graph.                       #
#               The RMS deviation from an even distribution of          #
#       observations per intensity bin is computed as RMSD_width.       #
#               The RMS deviation from 1.0 of the scatter/SD is         #
#       also computed and placed in RMSD_sigma.                         #
#                                                                       #
#       modifies:       RMSD_width                                      #
#                       RMSD_width_p                                    #
#                       RMSD_sigma                                      #
#                       RMSD_sigma_p                                    #
#                       RMSD_sigma_w                                    #
#                       best_sdfac                                      #
#                                                                       #
#########################################################################
function GetResults ( filename )
{
    # report what we're doing
#    print "running " filename "..."

    # analyze stdout from "agrovata" script
    command = filename
#    if(test_log) command = filename " | tee " test_log
    while(command | getline > 0)
    {
        # look for the graph entry
        if ($0 ~ /^ \$GRAPHS: Sigma\(scatter\/SD\)/)
	{
	    graph = 1
	    
	    # initialize cumulative variables
	    count_full["sum"]=count_part["sum"]=count["sum"]=0
	    bins = 0  
	}
	if ($1 == "TOTALS:") graph = 0

        if (graph && ($0 !~ /[a-z]/) && NF>3)
        {
            # we are reading a line of graph data
	    ++bins
	    
	    # gather stats	    
#	    count_full[bins]  = $5
#	    count_part[bins]  = $9
	    count_full[bins]  = substr($0, 38, 9)+0
	    count_part[bins]  = substr($0, 77, 9)+0
	    count[bins]       = count_full[bins]+count_part[bins]

	    # sums
	    count_full["sum"] += count_full[bins]
	    count_part["sum"] += count_part[bins]
	         count["sum"] += count[bins]

#	    scatt_full[bins]  = $7-1
#	    scatt_part[bins]  = $NF-1
	    scatt_full[bins]  = substr($0, 55, 8)-1
	    scatt_part[bins]  = $NF-1
	    scatt[bins]       = (scatt_full[bins]+scatt_part[bins])/2
	}

        # look for refined SDfac
	if($0 ~ /Final assessment of SDcorrection multipliers/) { sdfac_list = 1 }
	if($0 ~ /Summary/) { sdfac_list = 0 }

	if(sdfac_list)
	{
	    if(($0 ~ /[0-9]/)&&($0 !~ /[a-z]/))
	    {
		if(NF>2)
		{
		    ++sdfacs
		    sdfac_sum+=(count_full["sum"]*$2+count_part["sum"]*$5)/(count["sum"])
		}
		else
		{
		    sdfac_list = 0
		}
	    }
	}
    }
    
    # close the pipe!
    close(filename);

    # calculate mean SDFAC
    if(sdfacs) best_sdfac = sdfac_sum/sdfacs
#    printf best_sdfac

    # the scatt and count arrays should now contain the LAST graph in the log
    if(bins)
    {
	# calculate average values
	count_full["avg"] = count_full["sum"]/bins
	count_part["avg"] = count_part["sum"]/bins
	count["avg"]      = count["sum"]     /bins

	# and now rmsds
	count_full["rms"]=count_part["rms"]=count["rms"]=0
	scatt_full["rms"]=scatt_part["rms"]=scatt["rms"]=scatt["wrms"]=0
	for(i=1; i<=bins; ++i)
	{
	    count_full["rms"] += (count_full[i]-count_full["avg"])*(count_full[i]-count_full["avg"])
	    count_part["rms"] += (count_part[i]-count_part["avg"])*(count_part[i]-count_part["avg"])
	    count["rms"]      += (count[i]     -count["avg"]     )*(count[i]     -count["avg"]     )

	    scatt_full["rms"] += scatt_full[i]*scatt_full[i]
	    scatt_part["rms"] += scatt_part[i]*scatt_part[i]
	    scatt["rms"]      += scatt[i]*scatt[i]
	    scatt["wrms"]     += count_full[i]*scatt_full[i]*scatt_full[i]
	    scatt["wrms"]     += count_part[i]*scatt_part[i]*scatt_part[i]
	}
	
	count_full["rms"] = sqrt(count_full["rms"]/bins)
	count_part["rms"] = sqrt(count_part["rms"]/bins)
	count["rms"]      = sqrt(count["rms"]/bins)
	
	scatt_full["rms"] = sqrt(scatt_full["rms"]/bins)
	scatt_part["rms"] = sqrt(scatt_part["rms"]/bins)
	scatt["rms"]      = sqrt(scatt["rms"]/bins)
	scatt["wrms"]     = sqrt(scatt["wrms"]/count["sum"])
    }
    else
    {
	print "ERROR: no stats generated by " filename
	print "please edit " FILENAME " and make sure it runs by itself."
	exit
    }

    # update global variables
    RMSD_width   = count_full["rms"]
    RMSD_width_p = count_part["rms"]
    RMSD_sigma   = scatt_full["rms"]
    RMSD_sigma_p = scatt_part["rms"]
    RMSD_sigma_w = scatt["wrms"]

    RMSD_sigma = RMSD_sigma_w

}

#########################################################################
#	GoldStep(min, last, best, max)					#
#									#
# Computes the Golden (next) step to minimize a variable		#
#									#
#	modifies:							#
#									#
#########################################################################

function GoldStep(min, last, best, max)
{
    # debugging...
    if(debug) printf "\n" min " " best " " max " ==GoldStep==> "
    
    # eliminate sections that cannot contain the minimum
    if(last >= best)
    {
	# we tried a larger value last time
	if(value[last] <= value[best])
	{
	    # minimum must be somewhere between "best" and "max"
	    min = best;
	    best = last;
	}
	else
	{
	    # minimum must be somewhere between "min" and "last"
	    max = last;
	}
    }
    else
    {
	# we tried a smaller value last time
	if(value[last] <= value[best])
	{
	    # minimum must be somewhere between "min" and "best"
	    max = best;
	    best = last;
	}
	else
	{
	    # minimum must be somewhere between "last" and "max"
	    min = last;	    
	}
    }

    # now decide what width to try next
    if((max - best) > (best - min))
    {
	# next value should be larger
	Gold_next = best + 0.38*(max - best)
    }
    else
    {
	# next value should be smaller
	Gold_next = best - 0.38*(best - min)
    }
    
    # save the parameters as Globals ( this is awful!)
    Gold_min = min;
    Gold_max = max;
    Gold_next = Gold_next;
    Gold_best = best;
    
    # debugging...
    if (debug) printf min " " best " " max "\t"
    
    return Gold_next;
}


function abs(number)
{
	return sqrt(number^2);
}

function round(number, sigdigs)
{
	return int(number*10^sigdigs)/10^sigdigs;
}

