#! /bin/tcsh -f
#
#
#	bestFH.com 				-James Holton 7-6-18
#
#	calculate a "best" estimate of F for the heavy atoms
#	alone (a la B. W. Matthews 1966 Acta Cryst 20, 230-239)
#	by combining ALL anomalous and isomorphous differences.  
#	(as many as you want)
#
#	FH should give cleaner Pattersons, difference Fouriers,
#	and direct methods.
#
#	Do NOT combine differences from data sets expected to have
#	different heavy atom locations.  That would be silly.
#
#
# set this to wherever your awk program is
alias nawk /usr/bin/nawk
nawk 'BEGIN{print}' >& /dev/null
if($status) alias nawk awk

set mtzfile   = "./mtz/all.mtz"		# MAD data set
set outfile   = "./FH.mtz"		# contains FH, SIGFH
set shelxfile = "./fh"		# same thing, shelx format
set fourfile  = "./FH_Four.map"		# Phased Fourier of FH (if phase is available)
set pattfile  = "./FH_Patt.map"		# Combined Patterson map
set wpattfile = "./wFH_Patt.map"	# Combined, weighted Patterson map
set logfile   = "./bestFH.log"		# all the CCP4 logs

set tempfile = ./bestFH_temp


# initialize internal variables
set loRES = 1000
set hiRES = ""
set order = ""		# "order" of increasing dispersive signal data sets

set DATA_cutoff = 1	# sigma cutoff      (not used)
set MAX_dano 		# upper Dano cutoff (not used)
set MAX_diso		# upper Diso cutoff (not used)
set scaling = scale	# apply one scale to each difference data set
#set scaling = isotropic # use a B-factor too (wise? )

if($#argv == 0) goto Help

# this procedure (re)sets most of the above variables
# from either the provided files, or the command line
goto Setup

Help:
cat << EOF

usage: $0 alldata.mtz

where:
alldata.mtz	- contains all the Fs and DANOs you want to combine

`basename $0` will calculate an (unscaled) estimate of FH, the scattering factor
of the heavy metal alone, by combining any and all anomalous and isomorphous
(dispersive) differences made available to it.  The procedure is derived
from Matthews et. al. 1966 Acta Cryst 20, 230.

FH is better than Dano or Diso alone, both because the averaging tends
to give better signal/noise, and because considering BOTH isomorphous
and anomalous simultaneously reduced the systematic errors arising
from cross-terms in difference intensity data.  FH usually gives 
cleaner Pattersons and difference Fouriers, as well as improved 
performace of direct methods programs.

Procedure:
The isomorphous difference data sets are computed from the provided Fs,
(assigned a sign), scaled together, and a sigma-weighed mean isomorphous 
difference is computed.  A similar procedure is applied to the anomalous 
differences.

The average Diso and Dano data sets are then scaled to each other, and
combined for the final estimate of FH = sqrt(Diso^2 + k * Dano^2).

REMEMBER: Do NOT combine differences from data sets expected to have
different heavy atom locations.  That would be silly.  If you don't
know why, you should read Matthews et. al. 1966 Acta Cryst 20, 230.

EOF

rm -f ${tempfile}Fs >& /dev/null
rm -f ${tempfile}Ds >& /dev/null
rm -f ${tempfile}Ps >& /dev/null
rm -f ${tempfile}Fpairs >& /dev/null
exit 2
#
#   This procedure (at the bottom of the script) does the following
#   1) scan the command line for the mtz file
#   2) set the CELL, SG, and other variables
#   3) generate dataset name lists: ${tempfile}Fs and ${tempfile}Ds
Return_from_Setup:


################################################################
#   initial report on intended program flow
################################################################
# start the logfile
echo "" >! $logfile
echo "$0 $*" >> $logfile

if(-e "$mtzfile") then
    echo "calculating FH from data in $mtzfile" | tee -a $logfile
endif

echo "" | tee -a $logfile
echo "resolution $loRES $hiRES" | tee -a $logfile
# get a "better" resolution limit for scaling?
set scaleRES = "$loRES $hiRES"

# count how many datasets we have
set Fs = `cat ${tempfile}Fs | wc -l`
set Ds = `cat ${tempfile}Ds | wc -l`

# trivial assignments of dispersive difference order
# (2 datasets -> doesn't matter)
# (1 dataset  -> unusable)
if($Fs < 3) set order = `nawk '{print $1}' ${tempfile}Fs`
if($Fs < 2) set order = ""

# if user doesn't care, pick Diso ordering automatically
if(("$order" == "")&&($Fs > 2)) then
    
    echo -n "Evaluating difference data "
    echo -n "" >! ${tempfile}diso_dano
    
    # make a list of essential pairs
    cat ${tempfile}Fs |\
    nawk '{++n; F[n]=$1} \
          END{for(i=1;i<=n;++i){for(j=i;j<=n;++j){if(i!=j){\
              print F[i] " - " F[j];}}}}' |\
    cat >! ${tempfile}Fpairs
    
    foreach pair ( `nawk '{print NR}' ${tempfile}Fpairs` )
	# retrieve the pair  
	set F1 = `nawk -v pair=$pair 'NR==pair{print $1}' ${tempfile}Fpairs`
	set F2 = `nawk -v pair=$pair 'NR==pair{print $3}' ${tempfile}Fpairs`
	
	# and get the sigmas too
	set SIGF1 = `nawk -v F=$F1 '$1==F{print $2}' ${tempfile}Fs`
	set SIGF2 = `nawk -v F=$F2 '$1==F{print $2}' ${tempfile}Fs`
	
	# also, get magnitudes of anomalous diffs
	if($Ds != 0) then
	    @ i = ( ( $pair  % $Ds ) + 1 )
	    set DANO    = `nawk -v n=$i 'NR==n{print $1}' ${tempfile}Ds`
	    set SIGDANO = `nawk -v n=$i 'NR==n{print $2}' ${tempfile}Ds`

	    set DANOcards = "DPH1=$DANO SIGDPH1=$SIGDANO"
	else
	    set DANOcards = ""
	endif
    
	# make some ordinary scaleit cards
	cat << EOF-scaleitin >! ${tempfile}scaleit.in
RESOLUTION $scaleRES
refine $scaling
weight
LABIN FP=$F1 SIGFP=$SIGF1 -
      FPH1=$F2 SIGFPH1=$SIGF2 $DANOcards
END
EOF-scaleitin
	
	# entertainment
	echo -n "."
	
	# put the labels in the log file
	echo -n "$F1 $F2 $DANO " >> ${tempfile}diso_dano
	
	# run scaleit to get Diso and Dano
	cat ${tempfile}scaleit.in |\
	scaleit HKLIN $mtzfile HKLOUT /dev/null |\
	nawk '/Sc_kraut SCALE/{iso=index($0,"diso")-4; ano=index($0,"<ano>")+3}\
	/THE TOTALS/{print substr($0,iso)+0, substr($0,ano)+0}' |\
	cat >> ${tempfile}diso_dano
	
	# ${tempfile}diso_dano has format: F1 F2 DANOF3  <Diso> <Dano>
    end
    rm -f ${tempfile}Fpairs >& /dev/null
    rm -f ${tempfile}scaleit.in >& /dev/null

    # get the single largest isomorphous difference
    set order = `sort -n -k4 ${tempfile}diso_dano | tail -1 | nawk '{print $2}'`
    if("$order" == "") then
	echo ""
	echo "no isomorphous differences"
	echo "estimating FH requires isomorphous/dispersive and anomalous differences! "
	echo "Therefore: $mtzfile needs to contain at least two columns of Fs"
	echo "sorry"
	goto Clean_up
    endif

    # now order the remaining datasets with increasing distance from this "native"
    cat ${tempfile}diso_dano |\
    nawk -v ref=$order 'BEGIN{print ref, 0, "order"} \
       $1==ref{print $2, $4} $2==ref{print $1, $4}' |\
    sort -n -k2 >! ${tempfile}order
    # ${tempfile}order has format: F diso(from Fref)
    
    # store the new order in a variable
    set order = `nawk '{print $1}' ${tempfile}order`

    # make sure nothing bad happened (this isn't exactly ergodic! )
    set Fs = `cat ${tempfile}Fs | wc -l`
    if($#order != $Fs) then
	# something has gone horribly wrong
	echo "ERROR: unable to determine the best order of"
	echo "       isomorphous/dispersive differences"
	echo "sorry! "
	echo ""
	goto Help
    endif
    
    # reorder the Dano list too (doesn't really matter)
    cat ${tempfile}diso_dano ${tempfile}Ds |\
    nawk 'NF>2{dano[$3]=$5} \
          NF==2{print $0, dano[$1]}' |\
    sort -nru -k3 |\
    nawk '{print $1, $2}' >! ${tempfile}
    mv ${tempfile} ${tempfile}Ds >& /dev/null

    # clean up
    rm -f ${tempfile}scaleit.in >& /dev/null
    rm -f ${tempfile}diso_dano >& /dev/null
endif

# whatever its source, put the f' order in a file
echo "$order" |\
nawk 'BEGIN{RS=" "} NF==1{++i; print $1, "order"}' |\
cat >! ${tempfile}order

# re-order the Fs list
cat ${tempfile}Fs ${tempfile}order |\
nawk '$NF!="order"{sig[$1]=$2} \
      $NF=="order"{print $1, sig[$1]}' |\
cat >! ${tempfile}
mv ${tempfile} ${tempfile}Fs >& /dev/null
set Fs = `cat ${tempfile}Fs | wc -l`


# print out final ordering results:
echo ""
echo -n " f' order: "
cat ${tempfile}Fs | nawk '{printf "%s ", $1} END{print ""}'

echo -n " f"\"" order: "
cat ${tempfile}Ds | nawk '{printf "%s ", $1} END{print ""}'

# clean up
rm -f ${tempfile}order >& /dev/null


# jump ahead if there are no anomalous datasets (why bother?)
if("$Ds" == 0) then
    # make sure this doesn't exist (this is a signal later on)
    rm -f ${tempfile}dano.mtz >& /dev/null
    goto calculate_iso
endif

# no need to weigh anomalous datasets if there is only one of them
if("$Ds" == 1) then
    # all we need to do is rename the dataset
    set temp = `cat ${tempfile}Ds`
    # better than crashing, (I guess)
    if($#temp != 2) set temp = ( $temp $temp )

    echo "extracting $temp[1] as Dano"
    cad hklin1 $mtzfile hklout ${tempfile}dano.mtz << EOF-cad >> $logfile
    LABIN FILE 1 E1=$temp[1] E2=$temp[2]
    CTYPO FILE 1 E1=F        E2=Q
    LABOU FILE 1 E1=Dano     E2=SIGDano
EOF-cad

    # go on to calculate isomorphous differences
    goto calculate_iso
endif

weigh_ano:
echo ""
echo "weighting anomalous differences"

# extract the anomalous differences, and treat them as Fs
cat ${tempfile}Ds |\
nawk '{printf "%s %s ", $1, $2} END{print ""}' |\
nawk 'BEGIN{printf "LABIN FILE 1 ";} \
{for(i=1;i<=NF;++i){printf "E%d=%s ", i, $i}; printf "\nCTYPIN FILE 1 "; \
 for(i=1;i<=NF;i+=2){printf "E%d=F E%d=Q ", i, i+1}; print ""}' |\
cad HKLIN1 $mtzfile HKLOUT ${tempfile}danos.mtz >> $logfile

# put anomalous diffs on the same scale
set i = 1
echo -n "" >! ${tempfile}scaleit.log
while( $i <= $Ds )
    cat << EOF-scaleitin >! ${tempfile}scaleit.in
RESOLUTION $scaleRES
#refine scale
refine $scaling
weight
EOF-scaleitin

    # make the LABIN card (not too many FPHs! )
    head -1 ${tempfile}Ds |\
    nawk '{printf "LABIN FP=%s SIGFP=%s ", $1, $2}' |\
    cat >> ${tempfile}scaleit.in
    
    # no more than 6 at a time
    cat ${tempfile}Ds |\
    nawk -v first=$i '{++n} n>=first && n<(first+6){++i;\
	printf "-\nFPH%d=%s SIGFPH%d=%s ", i, $1, i, $2} \
    END {print "\nEND"}' |\
    cat >> ${tempfile}scaleit.in
    
    # now actually run scaleit
    cat ${tempfile}scaleit.in |\
    scaleit HKLIN ${tempfile}danos.mtz \
            HKLOUT ${tempfile}danoscaled.mtz |\
     tee -a ${tempfile}scaleit.log >> $logfile
    
    # accumulate scaled datasets
    mv ${tempfile}danoscaled.mtz ${tempfile}danos.mtz >& /dev/null
    @ i = ( $i + 6 )
end
mv ${tempfile}danos.mtz ${tempfile}danoscaled.mtz >& /dev/null

# print out "weights" (effective weighting is 1/scale^2)
cat ${tempfile}scaleit.log |\
nawk '/APPLICATION OF SCALES/,/--------------------------/' |\
nawk '$1 == "Derivative"{++i; print $1, i, $3}' >! ${tempfile}scales

cat ${tempfile}Ds ${tempfile}scales |\
nawk 'NF==2{++n; label[n]=$1; if(length($1)>maxlen) maxlen=length($1)}\
    $1 == "Derivative"{w=0; if($3+0!=0) w=1/($3*$3)\
    printf "%-" maxlen "s : %.3f\n", label[$2], w}' |\
sort -nr -k3


rm -f ${tempfile}scaleit.log >& /dev/null
rm -f ${tempfile}scales      >& /dev/null


combine_ano:
echo "combining anomalous differences into Dano"

# now do a sigma-weighted average of all anomalous diffs
set Ds = `cat ${tempfile}Ds | wc -l`
echo $Ds | nawk '{print "NREF -1"; print "FORMAT \047(3i5,"$1*2"f15.7)\047"}' |\
mtzdump HKLIN ${tempfile}danoscaled.mtz |\
nawk '/LIST OF REFLECTIONS/,/MTZDUMP/' |\
nawk '! /[A-Z]/ && NF>3{HKL=substr($0,1,15); sum=norm=n="";\
    # run down list of D, sigD \
    for(i=4;i<NF;i+=2){D=$i;sigD=$(i+1); w=0; \
	# sigma of 0 means zero, but no weight \
	if(sigD+0==0) norm+=0;\
	# add up weigted sum \
        if(sigD+0>0){w=1/(sigD*sigD); sum+=w*D; norm+=w;}}\
    # do not print anything for "all-missing" HKLs \
    # print zero for all-zero hkls \
    if(norm==0) print HKL, 0, 0; \
    # print sigma-weighted average (abs value? ) \
    if(norm+0 > 0) print HKL, sum/norm, 1/sqrt(norm)}' |\
cat >! ${tempfile}dano.hkl
rm -f ${tempfile}danoscaled.mtz >& /dev/null


f2mtz HKLIN ${tempfile}dano.hkl HKLOUT ${tempfile}sortme.mtz << EOF-f2mtz >> $logfile
CELL $CELL
SYMM $SGnum
LABOUT H K L Dano SIGDano
CTYPO  H H H F Q
EOF-f2mtz
rm -f ${tempfile}dano.hkl >& /dev/null

echo "H K L" |\
sortmtz HKLIN ${tempfile}sortme.mtz HKLOUT ${tempfile}dano.mtz >> $logfile
rm -f ${tempfile}sortme.mtz

# indicate finish
echo ""

# summed anomalous diffs should now be loaded into:
# ${tempfile}dano.mtz, labeled as Dano SIGDano


calculate_iso:
###########################################################
# now we need to calculate isomorphous diffs 
# before we can treat them as we did the anomalous diffs


# jump ahead if there are no isomorphous datasets (why bother?)
if("$Fs" < 2) then
    # make sure this doesn't exist (this is a signal later on)
    rm -f ${tempfile}diso.mtz >& /dev/null
    goto compute_k
endif

if($Fs == 2) then
    echo "calculating isomorphous (dispersive) difference as Diso"
else
    echo "subtracting isomorphous (dispersive) differences"
    echo "note: f' differences should all have the same sign! "
endif

# make sure this doesn't already exist
rm -f ${tempfile}disos.mtz >& /dev/null

# make list with largest difference first
cat ${tempfile}Fs |\
nawk '{++n; F[n]=$1} \
    END{for(i=1;i<=n;++i){for(j=n;j>i;--j){if(i!=j){\
    print F[i] " - " F[j];}}}}' |\
cat >! ${tempfile}Fpairs
set Fpairs = `cat ${tempfile}Fpairs | wc -l`
set pair = 0

while ( $pair < $Fpairs )
    @ pair = ( $pair + 1 )
    # retrieve the pair  
    set F1 = `nawk -v pair=$pair 'NR==pair{print $1}' ${tempfile}Fpairs`
    set F2 = `nawk -v pair=$pair 'NR==pair{print $3}' ${tempfile}Fpairs`
    
    # and get the sigmas too
    set SIGF1 = `nawk -v F=$F1 '$1==F{print $2}' ${tempfile}Fs`
    set SIGF2 = `nawk -v F=$F2 '$1==F{print $2}' ${tempfile}Fs`
    
    echo "${F1}-${F2}"
    
    # extract these columns from the file
    echo "LABIN FILE 1 E1=$F1 E2=$SIGF1 E3=$F2 E4=$SIGF2" |\
    cad HKLIN1 $mtzfile HKLOUT ${tempfile}dump.mtz >> $logfile

    set sign = 1    
    set test = `echo head | mtzdump hklin ${tempfile}dump.mtz | awk '/Column Labels/{getline;getline;print $4}'`
    if("$test" != "$F1") then
	# GOD DAMN YOUUUUU!!!
	set sign = -1
    endif

    # dump the Fs as text, calculate F1-F2 sqrt(SIG1^2+SIG2^2)
    echo "NREF -1" |\
    mtzdump HKLIN ${tempfile}dump.mtz |\
    nawk '/LIST OF REFLECTIONS/,/MTZDUMP/' |\
    nawk -v s=$sign '! /[A-Z?]/ && NF>1{HKL=substr($0,1,13);\
        F1=substr($0,14,12); F2=substr($0,36,10);\
	SIGF1=substr($0,26,10); SIGF2=substr($0,46,10); \
     print HKL, s*(F1-F2), sqrt(SIGF1*SIGF1 + SIGF2*SIGF2)}' |\
    cat >! ${tempfile}diso.hkl
    rm -f ${tempfile}dump.mtz >& /dev/null
    
    # special case: only one difference set
    if($Fs == 2) set pair = ""

    # read back into mtz format
    f2mtz HKLIN ${tempfile}diso.hkl HKLOUT ${tempfile}sortme.mtz << EOF-f2mtz >> $logfile
    CELL $CELL
    SYMM $SGnum
    LABOUT H K L Diso${pair} SIGDiso${pair}
    CTYPO  H H H F Q
EOF-f2mtz
    rm -f ${tempfile}diso.hkl >& /dev/null
    
    if("$pair" == "") set pair = 999    
    
    # sort it (just in case)
    echo "H K L" |\
    sortmtz HKLIN ${tempfile}sortme.mtz HKLOUT ${tempfile}diso.mtz >> $logfile
    rm -f ${tempfile}sortme.mtz
    
    # add columns into an mtz
    if(-e ${tempfile}disos.mtz) then
	cad HKLIN1 ${tempfile}disos.mtz HKLIN2 ${tempfile}diso.mtz \
	    HKLOUT ${tempfile}cadded.mtz << EOF-cadadd >> $logfile
	LABIN FILE 1 ALL
	LABIN FILE 2 ALL
EOF-cadadd
	# update the cumulative mtz
	mv ${tempfile}cadded.mtz ${tempfile}disos.mtz
    else
	# create the cumulative mtz
	mv ${tempfile}diso.mtz ${tempfile}disos.mtz
    endif
end


# check for trivial case: one difference dataset
if($Fs <= 2) then
    # no need to calculate relative weights for one dataset
    mv ${tempfile}disos.mtz ${tempfile}diso.mtz
    
    goto compute_k
endif


weigh_iso:
echo "weighting isomorphous differences"

# now put all these differences on the same scale
set pair = 1
set Fpairs = `cat ${tempfile}Fpairs | wc -l`
echo -n "" >! ${tempfile}scaleit.log 
while ( $pair <= $Fpairs )
    cat << EOF-scaleitin >! ${tempfile}scaleit.in
RESOLUTION $scaleRES
#refine scale
refine $scaling
weight
EOF-scaleitin
    # first one is the reference
    echo -n "LABIN FP=Diso1 SIGFP=SIGDiso1 " >> ${tempfile}scaleit.in
    
    # scale 6 at a time
    cat ${tempfile}Fpairs |\
    nawk -v first=$pair '{++n} n>=first && n<(first+6){++i;\
	printf "-\nFPH%d=Diso%d SIGFPH%d=SIGDiso%d ", i, n, i, n;}\
	END{print "\nEND"}' |\
    cat >> ${tempfile}scaleit.in
    
    # run scaleit
    cat ${tempfile}scaleit.in |\
    scaleit HKLIN ${tempfile}disos.mtz \
            HKLOUT ${tempfile}disoscaled.mtz  |\
     tee -a ${tempfile}scaleit.log >> $logfile
    rm -f ${tempfile}scaleit.in >& /dev/null

    @ pair = ( $pair + 6 )
    
    # output is input for next round of scaling
    mv ${tempfile}disoscaled.mtz ${tempfile}disos.mtz >& /dev/null
end
mv ${tempfile}disos.mtz ${tempfile}disoscaled.mtz >& /dev/null


# print out "weights" (effective weighting is 1/scale^2)
cat ${tempfile}scaleit.log |\
nawk '/APPLICATION OF SCALES/,/--------------------------/' |\
nawk '$1 == "Derivative"{++n; print $1, n, $3}' >! ${tempfile}scales

cat ${tempfile}Fpairs ${tempfile}scales |\
nawk '$2 == "-"{++n; label[n]=$1 $2 $3; if(length(label[n])>maxlen) maxlen=length(label[n])}\
    $1 == "Derivative"{w=0; if($3+0!=0) w=1/($3*$3)\
    printf "%-" maxlen "s : %.3f\n", label[$2], w}' |\
sort -nr -k3

rm -f ${tempfile}scaleit.log >& /dev/null
rm -f ${tempfile}scales      >& /dev/null


combine_iso:
echo "combining isomorphous differences into Diso"

# add these scaled data sets together (sigma-weighted again)
# (hopefully, our "ordering" procedure has made sure all
#  these differences have the same sign)
set Fpairs = `cat ${tempfile}Fpairs | wc -l`
rm -f ${tempfile}Fpairs >& /dev/null

echo $Fpairs | nawk '{print "NREF -1"; print "FORMAT \047(3i5,"$1*2"f15.7)\047"}' |\
mtzdump HKLIN ${tempfile}disoscaled.mtz |\
nawk '/LIST OF REFLECTIONS/,/MTZDUMP/' |\
nawk '! /[A-Z]/ && NF>3{HKL=substr($0,1,15); sum=norm=n="";\
    # run down list of D, sigD \
    for(i=4;i<NF;i+=2){D=$i;sigD=$(i+1); w=0; \
	# sigma of 0 means zero, but no weight \
	if(sigD+0==0) norm+=0;\
	# add up weigted sum \
        if(sigD+0>0){w=1/(sigD*sigD); sum+=w*D; norm+=w;}}\
    # do not print anything for "all-missing" HKLs \
    # print zero for all-zero hkls \
    if(norm==0) print HKL, 0, 0; \
    # print sigma-weighted average (abs value?) \
    if(norm+0 > 0) print HKL, sum/norm, 1/sqrt(norm)}' |\
cat >! ${tempfile}diso.hkl
rm -f ${tempfile}disoscaled.mtz >& /dev/null

# read the averaged isomorphous differences back into an mtz
f2mtz HKLIN ${tempfile}diso.hkl HKLOUT ${tempfile}sortme.mtz << EOF-f2mtz >> $logfile
CELL $CELL
SYMM $SGnum
LABOUT H K L Diso SIGDiso
CTYPO  H H H F Q
EOF-f2mtz
rm -f ${tempfile}diso.hkl >& /dev/null

echo "H K L" |\
sortmtz HKLIN ${tempfile}sortme.mtz HKLOUT ${tempfile}diso.mtz >> $logfile
rm -f ${tempfile}sortme.mtz

# indicate finish


compute_k:
##############################################################
# now we need the "k" that will put Diso and Dano on the same
# scale: k/2 = <|Diso|>/<|Dano|>

##############################################################
echo ""

# handle special cases
if(($Ds == 0)&&($Fs <= 1)) then
    # this should never happen, but...
    echo "ERROR: no difference data in ${mtzfile}! "
    goto Help
endif
if($Ds == 0) then
    # anomalous difference data is totally missing
    # just rename the isomorphous differences
    echo "WARNING: treating Diso data as FH (FH is better with Dano and Diso)"
    cad hklin1 ${tempfile}diso.mtz hklout ${tempfile}sortme.mtz << EOF >> $logfile
    labin file 1 E1=Diso E2=SIGDiso
    labou file 1 E1=FH   E2=SIGFH
EOF
    rm -f ${tempfile}diso.mtz >& /dev/null
    rm -f ${tempfile}dano.mtz >& /dev/null
#    set wpattfile = ""

    goto sort_final
endif
if($Fs <= 1) then
    # isomorphous difference data is totally missing
    # just re-name the anomalous differences
    echo "WARNING: treating Dano data as FH (FH is better with Dano and Diso)"
    cad hklin1 ${tempfile}dano.mtz hklout ${tempfile}sortme.mtz << EOF >> $logfile
    labin file 1 E1=Dano E2=SIGDano
    labou file 1 E1=FH   E2=SIGFH
EOF
    rm -f ${tempfile}diso.mtz >& /dev/null
    rm -f ${tempfile}dano.mtz >& /dev/null
#    set wpattfile = ""

    goto sort_final
endif

##############################################################
echo -n "scaling Diso to Dano  "

# use scaleit to put Diso and Dano on the same scale
cad HKLIN1 ${tempfile}diso.mtz HKLIN2 ${tempfile}dano.mtz \
HKLOUT ${tempfile}diso_dano.mtz << EOF-cad >> $logfile
LABIN FILE 1 ALL
LABIN FILE 2 ALL
EOF-cad
rm -f ${tempfile}diso.mtz >& /dev/null
rm -f ${tempfile}dano.mtz >& /dev/null


scaleit HKLIN ${tempfile}diso_dano.mtz \
 HKLOUT ${tempfile}scaled.mtz << EOF-scaleit | tee ${tempfile}scaleit.log >> $logfile
RESOLUTION $scaleRES
#refine scale
refine $scaling
weight
LABIN FP=Diso SIGFP=SIGDiso FPH1=Dano SIGFPH1=SIGDano
end
EOF-scaleit
rm -f ${tempfile}diso_dano.mtz >& /dev/null
rm -f ${tempfile}scaleit.in >& /dev/null


# print out "k" value from scaling
cat ${tempfile}scaleit.log |\
nawk '/APPLICATION OF SCALES/,/--------------------------/' |\
nawk '$1 == "Derivative"{printf "k= %.3f\n", 2*$3}'

rm -f ${tempfile}scaleit.log >& /dev/null


combine_diso_dano:
##############################################################
# now combine Diso and Dano as the "best" estimate of total FH

echo "calculating FH = sqrt( Diso^2 + (k/2)^2 * Dano^2 )"

# also do a "Patterson weight" of 1/(sigma(FH^2))^2
echo "NREF -1" |\
mtzdump HKLIN ${tempfile}scaled.mtz |\
nawk '/LIST OF REFLECTIONS/,/MTZDUMP/' |\
nawk '! /[A-Z]/ && NF>1{ HKL=substr($0,1,13); \
    Diso=substr($0,14,12); SIGDiso=substr($0,26,10); \
    Dano=substr($0,36,10); SIGDano=substr($0,46,10); \
    FH=fh=sqrt(Diso*Diso + Dano*Dano); if(fh==0) fh=1;\
    varFH  = (Diso*SIGDiso/fh)^2 + (Dano*SIGDano/fh)^2;\
    varFH2 = 4 * FH^2 * varFH; \
    W=0; if(varFH) W = 1/sqrt(varFH); if(W>maxW) maxW=W;\
    print HKL, FH, sqrt(varFH), W} \
    END{if(maxW) print 1/maxW > "'${tempfile}'norm"}' |\
cat >! ${tempfile}FH.hkl

set norm = `tail -1 ${tempfile}norm`
rm -f ${tempfile}norm >& /dev/null
if($#norm != 1) set norm = 1

# read FH back into mtz (finally)
f2mtz HKLIN ${tempfile}FH.hkl HKLOUT ${tempfile}sortme.mtz << EOF-f2mtz >> $logfile
CELL $CELL
SYMM $SGnum
LABOUT H K L FH SIGFH W
CTYPO  H H H F Q W
SCALE  1 1 1 1 1 $norm
EOF-f2mtz
rm -f ${tempfile}FH.hkl >& /dev/null


sort_final:
sftools << EOF >> $logfile
read ${tempfile}sortme.mtz
calc W COL W = col SIGFH -0.5 **
write ${tempfile}FHW.mtz col FH SIGFH W
y
stop
y
EOF
mv ${tempfile}FHW.mtz ${tempfile}sortme.mtz

# sort it (for good measure)
echo "H K L" |\
sortmtz HKLIN ${tempfile}sortme.mtz HKLOUT $outfile >> $logfile
if((! $status)&&(-e "$outfile")) then
    echo "$outfile is ready."
else
    echo "ERROR! see $logfile for what happened..."
    exit 9
endif

rm -f ${tempfile}sortme.mtz
##############################################################


# use scaleit to get the recommended DIFF
scaleit hklin $outfile << EOF >! ${tempfile}scaleit
analyze
labin FP=FH SIGFP=SIGFH FPH1=FH SIGFPH1=SIGFH
SCALE FPH1 0.000001
END
EOF
if("$DIFF" == "") set DIFF = `nawk '/acceptable differences/{print $NF}' ${tempfile}scaleit`
rm -f ${tempfile}scaleit >& /dev/null
if("$DIFF" == "") set DIFF = 1000


Shelx_format:
if("$shelxfile" == "") goto Fourier

# dump FH out in shelx format too.
mtz2various HKLIN $outfile HKLOUT ${tempfile}.hkl << EOF-shelx >> $logfile
OUTPUT SHELXD
#FSQUARED
LABIN FP=FH SIGFP=SIGFH
END
EOF-shelx
nawk '! /^[A-Z]/ && $5+0!=0' ${tempfile}.hkl >! ${shelxfile}.hkl
nawk '/^[A-Z]/' ${tempfile}.hkl | grep -v HKLF >! ${shelxfile}.ins 
set test = `grep LATT ${shelxfile}.ins | wc -l`
if($test == 0) then
    # crap, now we have to do it ourselves!
    cat << EOF >! ${shelxfile}.ins
TITLE SHELX input file
CELL 1.2345 $CELL
ZERR 1.000  0.001 0.001 0.001 0 0 0
EOF
    # extract symmetry operators
    cat ${CLIBD}/symop.lib |\
    awk -v num=$SGnum '$1==num && NF>5{brav=substr($4,1,1);latt=-1;\
           nops=$3;\
	   if(brav=="P")latt=-1;\
	   if(brav=="I")latt=-2;\
	   if(brav=="R")latt=-3;\
	   if(brav=="F")latt=-4;\
	   if(brav=="A")latt=-5;\
	   if(brav=="B")latt=-6;\
	   if(brav=="C")latt=-7;\
	  print "LATT",latt;\
	  getline;getline;while(++n<nops){print "SYMM",$0;getline}}' |\
    cat >> ${shelxfile}.ins
endif
cat << EOF >> ${shelxfile}.ins 
SFAC Se
UNIT 5
TREF
EOF
# this has to be the last instruction
grep HKLF ${tempfile}.hkl >> ${shelxfile}.ins 
if("$status") then
    echo "HKLF 3" >> ${shelxfile}.ins 
endif
rm -f ${tempfile}.hkl
#rm -f ${shelxfile}.ins

if((! $status)&&(-e "${shelxfile}.hkl")) echo "${shelxfile}.hkl is the SHELX version of $outfile."


Fourier:
###########################################
if(("$PHASE" == "")||("$fourfile" == "")) goto Patterson
# calculate a phased Fourier of FH
# first, we need to retrieve the phase columns
set FOM_cad = ""
set W_cad = ""
set W1_cad = ""
# set up FOM weight
set W = ""
if("$FOM" != "") then
    set FOM_cad = "E2=$FOM"
    set W_cad = ""
    set W = "W=$FOM"
else
    echo "using 1/sigma^2 as FOM"
    # use 1/sigma^2?
    set FOM_cad = ""
    set W_cad = "E3=W"
    set W1_cad = "E1=W"
    set W = "W=W"
endif

# special case of Diso or Dano only
if(($Ds == 0)||($Fs <= 1)) then
    # no double-difference data
    set E1 = "E1=F"
    set fftF1="F1=FH"
    if($Fs <= 1) then
	# anomalous data only
	set E1="E1=D"
	set fftF1="DANO=FH"
    endif
    
    # but still want phased FH map
    cad hklin1 $outfile hklin2 $mtzfile \
	hklout ${tempfile}phased.mtz << EOF-cad >> $logfile
    LABIN FILE 1 E1=FH E2=SIGFH $W_cad
    LABIN FILE 2 E1=$PHASE $FOM_cad
    CTYPO FILE 1 $E1 E2=Q
EOF-cad

    # ordinary, boring difference Fourier
    fft HKLIN ${tempfile}phased.mtz MAPOUT ${tempfile}FH.map << EOF-fft >> $logfile
    TITLE ${hiRES}A map of FH @ $PHASE $FOM
    RESOLUTION $scaleRES
    LABIN $fftF1 SIG1=SIGFH PHI=$PHASE $W
    EXCLUDE SIG1 0
EOF-fft
    
    goto norm_Four
endif

# calculate a combined, phased difference Fourier
cad hklin1 ${tempfile}scaled.mtz hklin2 $mtzfile \
    hklin3 $outfile \
    hklout ${tempfile}phased.mtz << EOF-cad >> $logfile
LABIN FILE 1 E1=Diso E2=SIGDiso E3=Dano E4=SIGDano
LABIN FILE 2 E1=$PHASE $FOM_cad
LABIN FILE 3 $W1_cad
CTYPO FILE 1 E1=F E2=Q E3=D E4=Q
EOF-cad

# now we make the isomorphous difference Fourier
fft HKLIN ${tempfile}phased.mtz MAPOUT ${tempfile}Diso.map << EOF-fft >> $logfile
TITLE ${hiRES}A map of Diso @ $PHASE $FOM
RESOLUTION $scaleRES
LABIN F1=Diso SIG1=SIGDiso PHI=$PHASE $W
EXCLUDE SIG1 0
EOF-fft

# then make the anomalous difference Fourier (phases rotated 90 degrees)
fft HKLIN ${tempfile}phased.mtz MAPOUT ${tempfile}Dano.map << EOF-fft >> $logfile
TITLE ${hiRES}A map of Dano @ $PHASE $FOM
RESOLUTION $scaleRES
LABIN DANO=Dano SIG1=SIGDiso PHI=$PHASE $W
EXCLUDE SIG1 0
EOF-fft

# try to make sure we get the right sign
# assume map has at least one large peak (which should be positive)
echo "go" | mapdump mapin ${tempfile}Diso.map |\
nawk '/Minimum density/{print -$NF, -1} /Maximum density/{print $NF, 1}' |\
sort -nr |\
nawk 'NR==1{print $NF}' >! ${tempfile}sign
set diso_sign = `cat ${tempfile}sign`
rm -f ${tempfile}sign >& /dev/null

# invert the Diso map if it seems to be upside-down
#if(("$diso_sign" == "-1")) then
if(("$diso_sign" == "-1")&&(! $?USER_ORDER)) then
    echo "inverting sequence of Diso for difference Fourier"
    echo "SCALE FACTOR -1 0" |\
    mapmask mapin1 ${tempfile}Diso.map mapout ${tempfile}temp.map >> $logfile
    mv ${tempfile}temp.map ${tempfile}Diso.map >& /dev/null
endif

# add these two maps together (equivalent to vector sum to FH)
echo "MAPS ADD" |\
mapmask mapin1 ${tempfile}Diso.map mapin2 ${tempfile}Dano.map \
mapout ${tempfile}FH.map >> $logfile

norm_Four:
# normalize it for output
echo "SCALE SIGMA" |\
mapmask mapin ${tempfile}FH.map mapout $fourfile >> $logfile

if((! $status)&&(-e "$fourfile")) echo "$fourfile is the map of $PHASE applied to FH"

rm -f ${tempfile}phased.mtz >& /dev/null
rm -f ${tempfile}Diso.map >& /dev/null
rm -f ${tempfile}Dano.map >& /dev/null
rm -f ${tempfile}FH.map >& /dev/null


Patterson:
###########################################
if("$pattfile" == "") goto Clean_up

# calculate the unweighted Patterson map (if desired)
fft HKLIN $outfile MAPOUT ${tempfile}FH.map << EOF-fft >> $logfile
TITLE ${hiRES}A Patterson of FH
RESOLUTION $scaleRES
PATTERSON
LABIN F1=FH SIG1=SIGFH F2=FH SIG2=SIGFH
SCALE F2 0.000001 0
EXCLUDE DIFF $DIFF
EXCLUDE SIG1 0
EOF-fft
peakmax MAPIN ${tempfile}FH.map XYZOUT ${tempfile}.pdb << EOF-pick >> $logfile
THRESHOLD RMS 3
NUMPEAKS 50
EOF-pick
# extend it to whole unit cell?
mapmask mapin ${tempfile}FH.map mapout "$pattfile" << EOF >> $logfile
scale sigma
#xyzlim 0 1 0 1 0 1
EOF
if((! $status)&&(-e "$pattfile")) echo "$pattfile is the Patterson of FH."
rm -f ${tempfile}FH.map >& /dev/null


if("$wpattfile" == "") goto Clean_up
# calculate the weighted Patterson map (if desired)
fft HKLIN $outfile MAPOUT ${tempfile}FH.map << EOF-fft >> $logfile
TITLE ${hiRES}A Patterson of FH/SIGFH
RESOLUTION $scaleRES
PATTERSON
LABIN F1=FH SIG1=SIGFH W=W F2=FH SIG2=SIGFH
SCALE F2 0.000001 0
#EXCLUDE DIFF $DIFF
EXCLUDE SIG1 0
EOF-fft
peakmax MAPIN ${tempfile}FH.map XYZOUT ${tempfile}.pdb << EOF-pick >> $logfile
THRESHOLD RMS 3
NUMPEAKS 50
EOF-pick
# extend it to whole unit cell?
mapmask mapin ${tempfile}FH.map mapout "$wpattfile" << EOF >> $logfile
scale sigma
#xyzlim 0 1 0 1 0 1
EOF
if((! $status)&&(-e "$wpattfile")) echo "$wpattfile is the Patterson of FH / sig(FH)^2"
rm -f ${tempfile}FH.map >& /dev/null


# clean up
Clean_up:

rm -f ${tempfile}scaled.mtz >& /dev/null
rm -f ${tempfile}Fs     >& /dev/null
rm -f ${tempfile}Ds     >& /dev/null
rm -f ${tempfile}Fpairs >& /dev/null
rm -f ${tempfile}.pdb   >& /dev/null


exit


Setup:
#################################################

  ####   ######   #####  #    #  #####
 #       #          #    #    #  #    #
  ####   #####      #    #    #  #    #
      #  #          #    #    #  #####
 #    #  #          #    #    #  #
  ####   ######     #     ####   #

#################################################
#
#   gather information on:
#    mtz file
#    data sets
#    resolution limits
#    sigma cuttoff (for map generation)
#    difference cutoff
#
##################################################
if(! $?DIFF) set DIFF = ""

# scan the command line for files
foreach arg ( $* )
    # warn about probable mispellings
    if("$arg" =~ *.mtz) then
	if(-e "$arg") then
	    set mtzfile = "$arg"
	else
	    if(! $?useroutfile) then
		set useroutfile = "$arg"
		set outfile = "$arg"
	    endif
	endif
    endif
end


# now all filenames have been initialized

if(! -e "$mtzfile") goto Help

##################################################
# get crystal and dataset information from the mtz file
echo "go" | mtzdump HKLIN $mtzfile >! ${tempfile}mtzdump
set CELL  = `nawk '/Cell Dimensions/{getline;getline;print}' ${tempfile}mtzdump`
set SGnum = `nawk '/Space group =/{print $NF+0}' ${tempfile}mtzdump | tail -n 1`
set SG    = `nawk -v num=$SGnum '$1==num && NF>5{print $4}' ${CLIBD}/symop.lib `
set hiRES = `nawk '/Resolution Range/{getline;getline;print $6}' ${tempfile}mtzdump`

# get column label names from the mtz file
nawk 'NF>3' ${tempfile}mtzdump |\
nawk '$(NF-1)=="F"{print "F", $NF}\
      $(NF-1)=="D"{print "D", $NF}\
      $(NF-1)=="Q"{print "S", $NF}\
      $(NF-1)=="P"{print "P", $NF}\
      $(NF-1)=="W"{print "W", $NF}' |\
nawk '/^F/{++n} /^D/{++n} /^P/{++n} {printf "%s", $1; \
       if($1=="S") printf "%s", last;\
printf " %d %s \n",n, $2; last=$1}' |\
cat >! ${tempfile}datasets
rm -f ${tempfile}mtzdump >& /dev/null

# check extent of available data
set temp = `nawk '/^F/ || /^D/' ${tempfile}datasets |& wc -l`
if($temp < 1) then
    # this is useless, bail out now
    rm -f ${tempfile}datasets >& /dev/null
    echo "ERROR: no usable data in ${mtzfile}! "
    set mtzfile = ""
    goto Help
endif

# get complete, unique lists of DANO/SIGDANOs
cat ${tempfile}datasets |\
nawk '$1=="D"{D[$2]=$NF} $1=="SD"{S[$2]=$NF} \
      END{for(i in D) print i, D[i], S[i];}' |\
sort -un |\
nawk 'NF==3{print $2,$3}' |\
cat >! ${tempfile}Ds

# get complete, unique lists of F/SIGFs
cat ${tempfile}datasets |\
nawk '$1=="F"{F[$2]=$NF} $1=="SF"{S[$2]=$NF} \
      END{for(i in F) print i, F[i], S[i];}' |\
sort -un |\
nawk 'NF==3{print $2,$3}' |\
cat >! ${tempfile}Fs

# get complete, unique lists of Phases/FOMs
cat ${tempfile}datasets |\
nawk '$1=="P"{P[$2]=$NF} $1=="W"{W[$2]=$NF} \
      END{for(i in P) print i, P[i], W[i];}' |\
sort -un |\
nawk 'NF>=2{print $2,$3, " "}' |\
cat >! ${tempfile}Ps


# one last pass through command line
# allow user overrides of all internal variables
set i = 0
echo -n "" >! ${tempfile}userlabels
while( $i < $#argv )
    @ i = ( $i + 1 )
    @ nexti = ( $i + 1 )
    @ lasti = ( $i - 1 )
    if($nexti > $#argv) set nexti = $#argv
    if($lasti < 1) set lasti = 1
    set arg = "$argv[$i]"
    
    # see if a dataset label was given
    egrep " $arg " ${tempfile}datasets >& /dev/null
    if(! $status) then
	if($?NO) then
	    # user doesn't want this label
	    # filter it out of the input files
	    egrep -v "^$arg " ${tempfile}Fs >! ${tempfile}
	    mv ${tempfile} ${tempfile}Fs
	    egrep -v "^$arg " ${tempfile}Ds >! ${tempfile}
	    mv ${tempfile} ${tempfile}Ds
	    egrep -v "^$arg " ${tempfile}Ps >! ${tempfile}
	    mv ${tempfile} ${tempfile}Ps
	else
	    # must want only this label?
	    cat ${tempfile}datasets |\
	    nawk -v label=$arg 'NF>2 && $NF==label{print $NF}' |\
	    cat >> ${tempfile}userlabels
	endif
	# "NO" stays set for next word
	continue
    endif
    
    # only look at non-file words
    if(-e "$arg") then
	unset NO
	continue
    endif
    
    if("$arg" =~ [0-9]*) then
	# we have a number
	if(("$arg" =~ *A)||("$argv[$nexti]" == "A")) then
	    # user-preferred resolution limits
	    set temp = `echo "$arg" | nawk 'BEGIN{FS="-"} $1+0 > 0.1{print $1+0} $2+0 > 0.1{print $2+0}'`
	    if($#temp != 1) then
		set temp = `echo $temp | nawk '$1>$2{print $1, $2} $2>$1{print $2, $1}'`
		if($#temp == 2) then
		    set loRES = "$temp[1]"
		    set hiRES = "$temp[2]"
		endif
	    else
		if("$temp" != "") set hiRES = "$temp"
	    endif
	endif
	    
	if(("$arg" =~ *[Ss]igma)||("$argv[$nexti]" =~ [Ss]igma)) then
	    set DATA_cutoff = `echo "$arg" | nawk '$1+0 > 0{print $1+0}'`
	endif
	
	if(("$arg" =~ *[Dd]iff)||("$argv[$lasti]" =~ [Dd]iff)) then
	    set MAX_diso = `echo "$arg" | nawk '$1+0 > 0{print $1+0}'`
	    set MAX_dano = `echo "$arg" | nawk '$1+0 > 0{print $1+0}'`
	endif
	if(("$arg" =~ *[Dd]iso)||("$argv[$lasti]" =~ [Dd]iso)) then
	    set MAX_diso = `echo "$arg" | nawk '$1+0 > 0{print $1+0}'`
	endif
	if(("$arg" =~ *[Dd]ano)||("$argv[$lasti]" =~ [Dd]ano)) then
	    set MAX_dano = `echo "$arg" | nawk '$1+0 > 0{print $1+0}'`
	endif
    endif
    
    # allow "NO" logic to carry through to next word(s)
    unset NO
    if(("$arg" == "no")||("$arg" == "not")) set NO
    if(("$arg" == "don't")||("$arg" == "ignore")) set NO
    if("$arg" == "except") set NO
end

rm -f ${tempfile}datasets >& /dev/null

# turn the "user" labels into real label files
cat ${tempfile}Ds ${tempfile}userlabels |\
nawk 'NF==2{set[$1]=$0}\
      NF==1{print set[$1]}' |\
nawk 'NF==2' >! ${tempfile}
set Ds = `cat ${tempfile} | wc -l`
if($Ds > 0) then
    # user mentioned which Ds to use
    mv ${tempfile} ${tempfile}Ds
endif
set Ds = `cat ${tempfile}Ds | wc -l`

# same for Fs
cat ${tempfile}Fs ${tempfile}userlabels |\
nawk 'NF==2{set[$1]=$0}\
      NF==1{print set[$1]}' |\
nawk 'NF==2' >! ${tempfile}
set Fs = `cat ${tempfile} | wc -l`
if($Fs > 1) then
    # user mentioned which Fs to use
    mv ${tempfile} ${tempfile}Fs
    
    # get user-specified F dataset order 
    set order = `nawk 'NF==2{print $1}' ${tempfile}Fs`
    set USER_ORDER
endif
set Fs = `cat ${tempfile}Fs | wc -l`

# same for Phases
cat ${tempfile}Ps ${tempfile}userlabels |\
nawk 'NF==1{print set[$1]}\
           {set[$1]=$0}' |\
nawk 'NF!=0' >! ${tempfile}
set Ps = `cat ${tempfile} | wc -l`
if($Ps > 0) then
    # user mentioned which Phase to use
    tail -1 ${tempfile} >! ${tempfile}Ps
endif
# only use one phase
tail -1 ${tempfile}Ps >! ${tempfile}
set PHASE = `nawk '{print $1}' ${tempfile}`
set FOM   = `nawk '{print $2}' ${tempfile}`


# clean up
rm -f ${tempfile} >& /dev/null
rm -f ${tempfile}Ps >& /dev/null
rm -f ${tempfile}userlabels >& /dev/null

goto Return_from_Setup


####################################################

The Future:
re-order Diso if scales come out screwy?
make the interface a little slicker, less wordy?
use correlation instead of scale as a weight?
implement use of max_DANO, DATA_cutoff, etc. ?