#! /bin/csh -f # # # bestFH.com -James Holton 8-2-04 # # calculate a "best" estimate of F for the heavy atoms # alone (a la B. W. Matthews 1966 Acta Cryst 20, 230-239) # by combining ALL anomalous and isomorphous differences. # (as many as you want) # # FH should give cleaner Pattersons, difference Fouriers, # and direct methods. # # Do NOT combine differences from data sets expected to have # different heavy atom locations. That would be silly. # # # set this to wherever your awk program is alias nawk /usr/bin/nawk nawk 'BEGIN{print}' >& /dev/null if($status) alias nawk awk set mtzfile = "./mtz/all.mtz" # MAD data set set outfile = "./FH.mtz" # contains FH, SIGFH set shelxfile = "./fh.hkl" # same thing, shelx format set fourfile = "./FH_Four.map" # Phased Fourier of FH (if phase is available) set pattfile = "./FH_Patt.map" # Combined Patterson map set wpattfile = "./wFH_Patt.map" # Combined, weighted Patterson map set logfile = "./bestFH.log" # all the CCP4 logs set tempfile = ./bestFH_temp # initialize internal variables set loRES = 1000 set hiRES = "" set order = "" # "order" of increasing dispersive signal data sets set DATA_cutoff = 1 # sigma cutoff (not used) set MAX_dano # upper Dano cutoff (not used) set MAX_diso # upper Diso cutoff (not used) set scaling = scale # apply one scale to each difference data set #set scaling = isotropic # use a B-factor too (wise? ) if($#argv == 0) goto Help # this procedure (re)sets most of the above variables # from either the provided files, or the command line goto Setup Help: cat << EOF usage: $0 alldata.mtz where: alldata.mtz - contains all the Fs and DANOs you want to combine `basename $0` will calculate an (unscaled) estimate of FH, the scattering factor of the heavy metal alone, by combining any and all anomalous and isomorphous (dispersive) differences made available to it. The procedure is derived from Matthews et. al. 1966 Acta Cryst 20, 230. FH is better than Dano or Diso alone, both because the averaging tends to give better signal/noise, and because considering BOTH isomorphous and anomalous simultaneously reduced the systematic errors arising from cross-terms in difference intensity data. FH usually gives cleaner Pattersons and difference Fouriers, as well as improved performace of direct methods programs. Procedure: The isomorphous difference data sets are computed from the provided Fs, (assigned a sign), scaled together, and a sigma-weighed mean isomorphous difference is computed. A similar procedure is applied to the anomalous differences. The average Diso and Dano data sets are then scaled to each other, and combined for the final estimate of FH = sqrt(Diso^2 + k * Dano^2). REMEMBER: Do NOT combine differences from data sets expected to have different heavy atom locations. That would be silly. If you don't know why, you should read Matthews et. al. 1966 Acta Cryst 20, 230. EOF rm -f ${tempfile}Fs >& /dev/null rm -f ${tempfile}Ds >& /dev/null rm -f ${tempfile}Ps >& /dev/null rm -f ${tempfile}Fpairs >& /dev/null exit 2 # # This procedure (at the bottom of the script) does the following # 1) scan the command line for the mtz file # 2) set the CELL, SG, and other variables # 3) generate dataset name lists: ${tempfile}Fs and ${tempfile}Ds Return_from_Setup: ################################################################ # initial report on intended program flow ################################################################ # start the logfile echo "" >! $logfile echo "$0 $*" >> $logfile if(-e "$mtzfile") then echo "calculating FH from data in $mtzfile" | tee -a $logfile endif echo "" | tee -a $logfile echo "resolution $loRES $hiRES" | tee -a $logfile # get a "better" resolution limit for scaling? set scaleRES = "$loRES $hiRES" # count how many datasets we have set Fs = `cat ${tempfile}Fs | wc -l` set Ds = `cat ${tempfile}Ds | wc -l` # trivial assignments of dispersive difference order # (2 datasets -> doesn't matter) # (1 dataset -> unusable) if($Fs < 3) set order = `nawk '{print $1}' ${tempfile}Fs` if($Fs < 2) set order = "" # if user doesn't care, pick Diso ordering automatically if(("$order" == "")&&($Fs > 2)) then echo -n "Evaluating difference data " echo -n "" >! ${tempfile}diso_dano # make a list of essential pairs cat ${tempfile}Fs |\ nawk '{++n; F[n]=$1} \ END{for(i=1;i<=n;++i){for(j=i;j<=n;++j){if(i!=j){\ print F[i] " - " F[j];}}}}' |\ cat >! ${tempfile}Fpairs foreach pair ( `nawk '{print NR}' ${tempfile}Fpairs` ) # retrieve the pair set F1 = `nawk -v pair=$pair 'NR==pair{print $1}' ${tempfile}Fpairs` set F2 = `nawk -v pair=$pair 'NR==pair{print $3}' ${tempfile}Fpairs` # and get the sigmas too set SIGF1 = `nawk -v F=$F1 '$1==F{print $2}' ${tempfile}Fs` set SIGF2 = `nawk -v F=$F2 '$1==F{print $2}' ${tempfile}Fs` # also, get magnitudes of anomalous diffs if($Ds != 0) then @ i = ( ( $pair % $Ds ) + 1 ) set DANO = `nawk -v n=$i 'NR==n{print $1}' ${tempfile}Ds` set SIGDANO = `nawk -v n=$i 'NR==n{print $2}' ${tempfile}Ds` set DANOcards = "DPH1=$DANO SIGDPH1=$SIGDANO" else set DANOcards = "" endif # make some ordinary scaleit cards cat << EOF-scaleitin >! ${tempfile}scaleit.in RESOLUTION $scaleRES refine $scaling weight LABIN FP=$F1 SIGFP=$SIGF1 - FPH1=$F2 SIGFPH1=$SIGF2 $DANOcards END EOF-scaleitin # entertainment echo -n "." # put the labels in the log file echo -n "$F1 $F2 $DANO " >> ${tempfile}diso_dano # run scaleit to get Diso and Dano cat ${tempfile}scaleit.in |\ scaleit HKLIN $mtzfile HKLOUT /dev/null |\ nawk '/Sc_kraut SCALE/{iso=index($0,"diso")-4; ano=index($0,"")+3}\ /THE TOTALS/{print substr($0,iso)+0, substr($0,ano)+0}' |\ cat >> ${tempfile}diso_dano # ${tempfile}diso_dano has format: F1 F2 DANOF3 end rm -f ${tempfile}Fpairs >& /dev/null rm -f ${tempfile}scaleit.in >& /dev/null # get the single largest isomorphous difference set order = `sort -n +3 ${tempfile}diso_dano | tail -1 | nawk '{print $2}'` if("$order" == "") then echo "" echo "no isomorphous differences" echo "estimating FH requires isomorphous/dispersive and anomalous differences! " echo "Therefore: $mtzfile needs to contain at least two columns of Fs" echo "sorry" goto Clean_up endif # now order the remaining datasets with increasing distance from this "native" cat ${tempfile}diso_dano |\ nawk -v ref=$order 'BEGIN{print ref, 0, "order"} \ $1==ref{print $2, $4} $2==ref{print $1, $4}' |\ sort -n +1 >! ${tempfile}order # ${tempfile}order has format: F diso(from Fref) # store the new order in a variable set order = `nawk '{print $1}' ${tempfile}order` # make sure nothing bad happened (this isn't exactly ergodic! ) set Fs = `cat ${tempfile}Fs | wc -l` if($#order != $Fs) then # something has gone horribly wrong echo "ERROR: unable to determine the best order of" echo " isomorphous/dispersive differences" echo "sorry! " echo "" goto Help endif # reorder the Dano list too (doesn't really matter) cat ${tempfile}diso_dano ${tempfile}Ds |\ nawk 'NF>2{dano[$3]=$5} \ NF==2{print $0, dano[$1]}' |\ sort -nru +2 |\ nawk '{print $1, $2}' >! ${tempfile} mv ${tempfile} ${tempfile}Ds >& /dev/null # clean up rm -f ${tempfile}scaleit.in >& /dev/null rm -f ${tempfile}diso_dano >& /dev/null endif # whatever its source, put the f' order in a file echo "$order" |\ nawk 'BEGIN{RS=" "} NF==1{++i; print $1, "order"}' |\ cat >! ${tempfile}order # re-order the Fs list cat ${tempfile}Fs ${tempfile}order |\ nawk '$NF!="order"{sig[$1]=$2} \ $NF=="order"{print $1, sig[$1]}' |\ cat >! ${tempfile} mv ${tempfile} ${tempfile}Fs >& /dev/null set Fs = `cat ${tempfile}Fs | wc -l` # print out final ordering results: echo "" echo -n " f' order: " cat ${tempfile}Fs | nawk '{printf "%s ", $1} END{print ""}' echo -n " f"\"" order: " cat ${tempfile}Ds | nawk '{printf "%s ", $1} END{print ""}' # clean up rm -f ${tempfile}order >& /dev/null # jump ahead if there are no anomalous datasets (why bother?) if("$Ds" == 0) then # make sure this doesn't exist (this is a signal later on) rm -f ${tempfile}dano.mtz >& /dev/null goto calculate_iso endif # no need to weigh anomalous datasets if there is only one of them if("$Ds" == 1) then # all we need to do is rename the dataset set temp = `cat ${tempfile}Ds` # better than crashing, (I guess) if($#temp != 2) set temp = ( $temp $temp ) echo "extracting $temp[1] as Dano" cad hklin1 $mtzfile hklout ${tempfile}dano.mtz << EOF-cad >> $logfile LABIN FILE 1 E1=$temp[1] E2=$temp[2] CTYPO FILE 1 E1=F E2=Q LABOU FILE 1 E1=Dano E2=SIGDano EOF-cad # go on to calculate isomorphous differences goto calculate_iso endif weigh_ano: echo "" echo "weighting anomalous differences" # extract the anomalous differences, and treat them as Fs cat ${tempfile}Ds |\ nawk '{printf "%s %s ", $1, $2} END{print ""}' |\ nawk 'BEGIN{printf "LABIN FILE 1 ";} \ {for(i=1;i<=NF;++i){printf "E%d=%s ", i, $i}; printf "\nCTYPIN FILE 1 "; \ for(i=1;i<=NF;i+=2){printf "E%d=F E%d=Q ", i, i+1}; print ""}' |\ cad HKLIN1 $mtzfile HKLOUT ${tempfile}danos.mtz >> $logfile # put anomalous diffs on the same scale set i = 1 echo -n "" >! ${tempfile}scaleit.log while( $i <= $Ds ) cat << EOF-scaleitin >! ${tempfile}scaleit.in RESOLUTION $scaleRES #refine scale refine $scaling weight EOF-scaleitin # make the LABIN card (not too many FPHs! ) head -1 ${tempfile}Ds |\ nawk '{printf "LABIN FP=%s SIGFP=%s ", $1, $2}' |\ cat >> ${tempfile}scaleit.in # no more than 6 at a time cat ${tempfile}Ds |\ nawk -v first=$i '{++n} n>=first && n<(first+6){++i;\ printf "-\nFPH%d=%s SIGFPH%d=%s ", i, $1, i, $2} \ END {print "\nEND"}' |\ cat >> ${tempfile}scaleit.in # now actually run scaleit cat ${tempfile}scaleit.in |\ scaleit HKLIN ${tempfile}danos.mtz \ HKLOUT ${tempfile}danoscaled.mtz |\ tee -a ${tempfile}scaleit.log >> $logfile # accumulate scaled datasets mv ${tempfile}danoscaled.mtz ${tempfile}danos.mtz >& /dev/null @ i = ( $i + 6 ) end mv ${tempfile}danos.mtz ${tempfile}danoscaled.mtz >& /dev/null # print out "weights" (effective weighting is 1/scale^2) cat ${tempfile}scaleit.log |\ nawk '/APPLICATION OF SCALES/,/--------------------------/' |\ nawk '$1 == "Derivative"{++i; print $1, i, $3}' >! ${tempfile}scales cat ${tempfile}Ds ${tempfile}scales |\ nawk 'NF==2{++n; label[n]=$1; if(length($1)>maxlen) maxlen=length($1)}\ $1 == "Derivative"{w=0; if($3+0!=0) w=1/($3*$3)\ printf "%-" maxlen "s : %.3f\n", label[$2], w}' |\ sort -nr +2 rm -f ${tempfile}scaleit.log >& /dev/null rm -f ${tempfile}scales >& /dev/null combine_ano: echo "combining anomalous differences into Dano" # now do a sigma-weighted average of all anomalous diffs set Ds = `cat ${tempfile}Ds | wc -l` echo $Ds | nawk '{print "NREF -1"; print "FORMAT \047(3i5,"$1*2"f15.7)\047"}' |\ mtzdump HKLIN ${tempfile}danoscaled.mtz |\ nawk '/LIST OF REFLECTIONS/,/MTZDUMP/' |\ nawk '! /[A-Z]/ && NF>3{HKL=substr($0,1,15); sum=norm=n="";\ # run down list of D, sigD \ for(i=4;i0){w=1/(sigD*sigD); sum+=w*D; norm+=w;}}\ # do not print anything for "all-missing" HKLs \ # print zero for all-zero hkls \ if(norm==0) print HKL, 0, 0; \ # print sigma-weighted average (abs value? ) \ if(norm+0 > 0) print HKL, sum/norm, 1/sqrt(norm)}' |\ cat >! ${tempfile}dano.hkl rm -f ${tempfile}danoscaled.mtz >& /dev/null f2mtz HKLIN ${tempfile}dano.hkl HKLOUT ${tempfile}sortme.mtz << EOF-f2mtz >> $logfile CELL $CELL SYMM $SGnum LABOUT H K L Dano SIGDano CTYPO H H H F Q EOF-f2mtz rm -f ${tempfile}dano.hkl >& /dev/null echo "H K L" |\ sortmtz HKLIN ${tempfile}sortme.mtz HKLOUT ${tempfile}dano.mtz >> $logfile rm -f ${tempfile}sortme.mtz # indicate finish echo "" # summed anomalous diffs should now be loaded into: # ${tempfile}dano.mtz, labeled as Dano SIGDano calculate_iso: ########################################################### # now we need to calculate isomorphous diffs # before we can treat them as we did the anomalous diffs # jump ahead if there are no isomorphous datasets (why bother?) if("$Fs" < 2) then # make sure this doesn't exist (this is a signal later on) rm -f ${tempfile}diso.mtz >& /dev/null goto compute_k endif if($Fs == 2) then echo "calculating isomorphous (dispersive) difference as Diso" else echo "subtracting isomorphous (dispersive) differences" echo "note: f' differences should all have the same sign! " endif # make sure this doesn't already exist rm -f ${tempfile}disos.mtz >& /dev/null # make list with largest difference first cat ${tempfile}Fs |\ nawk '{++n; F[n]=$1} \ END{for(i=1;i<=n;++i){for(j=n;j>i;--j){if(i!=j){\ print F[i] " - " F[j];}}}}' |\ cat >! ${tempfile}Fpairs set Fpairs = `cat ${tempfile}Fpairs | wc -l` set pair = 0 while ( $pair < $Fpairs ) @ pair = ( $pair + 1 ) # retrieve the pair set F1 = `nawk -v pair=$pair 'NR==pair{print $1}' ${tempfile}Fpairs` set F2 = `nawk -v pair=$pair 'NR==pair{print $3}' ${tempfile}Fpairs` # and get the sigmas too set SIGF1 = `nawk -v F=$F1 '$1==F{print $2}' ${tempfile}Fs` set SIGF2 = `nawk -v F=$F2 '$1==F{print $2}' ${tempfile}Fs` echo "${F1}-${F2}" # extract these columns from the file echo "LABIN FILE 1 E1=$F1 E2=$SIGF1 E3=$F2 E4=$SIGF2" |\ cad HKLIN1 $mtzfile HKLOUT ${tempfile}dump.mtz >> $logfile # dump the Fs as text, calculate F1-F2 sqrt(SIG1^2+SIG2^2) echo "NREF -1" |\ mtzdump HKLIN ${tempfile}dump.mtz |\ nawk '/LIST OF REFLECTIONS/,/MTZDUMP/' |\ nawk '! /[A-Z?]/ && NF>1{HKL=substr($0,1,13);\ F1=substr($0,14,12); F2=substr($0,36,10);\ SIGF1=substr($0,26,10); SIGF2=substr($0,46,10); \ print HKL, F1-F2, sqrt(SIGF1*SIGF1 + SIGF2*SIGF2)}' |\ cat >! ${tempfile}diso.hkl rm -f ${tempfile}dump.mtz >& /dev/null # special case: only one difference set if($Fs == 2) set pair = "" # read back into mtz format f2mtz HKLIN ${tempfile}diso.hkl HKLOUT ${tempfile}sortme.mtz << EOF-f2mtz >> $logfile CELL $CELL SYMM $SGnum LABOUT H K L Diso${pair} SIGDiso${pair} CTYPO H H H F Q EOF-f2mtz rm -f ${tempfile}diso.hkl >& /dev/null if("$pair" == "") set pair = 999 # sort it (just in case) echo "H K L" |\ sortmtz HKLIN ${tempfile}sortme.mtz HKLOUT ${tempfile}diso.mtz >> $logfile rm -f ${tempfile}sortme.mtz # add columns into an mtz if(-e ${tempfile}disos.mtz) then cad HKLIN1 ${tempfile}disos.mtz HKLIN2 ${tempfile}diso.mtz \ HKLOUT ${tempfile}cadded.mtz << EOF-cadadd >> $logfile LABIN FILE 1 ALL LABIN FILE 2 ALL EOF-cadadd # update the cumulative mtz mv ${tempfile}cadded.mtz ${tempfile}disos.mtz else # create the cumulative mtz mv ${tempfile}diso.mtz ${tempfile}disos.mtz endif end # check for trivial case: one difference dataset if($Fs <= 2) then # no need to calculate relative weights for one dataset mv ${tempfile}disos.mtz ${tempfile}diso.mtz goto compute_k endif weigh_iso: echo "weighting isomorphous differences" # now put all these differences on the same scale set pair = 1 set Fpairs = `cat ${tempfile}Fpairs | wc -l` echo -n "" >! ${tempfile}scaleit.log while ( $pair <= $Fpairs ) cat << EOF-scaleitin >! ${tempfile}scaleit.in RESOLUTION $scaleRES #refine scale refine $scaling weight EOF-scaleitin # first one is the reference echo -n "LABIN FP=Diso1 SIGFP=SIGDiso1 " >> ${tempfile}scaleit.in # scale 6 at a time cat ${tempfile}Fpairs |\ nawk -v first=$pair '{++n} n>=first && n<(first+6){++i;\ printf "-\nFPH%d=Diso%d SIGFPH%d=SIGDiso%d ", i, n, i, n;}\ END{print "\nEND"}' |\ cat >> ${tempfile}scaleit.in # run scaleit cat ${tempfile}scaleit.in |\ scaleit HKLIN ${tempfile}disos.mtz \ HKLOUT ${tempfile}disoscaled.mtz |\ tee -a ${tempfile}scaleit.log >> $logfile rm -f ${tempfile}scaleit.in >& /dev/null @ pair = ( $pair + 6 ) # output is input for next round of scaling mv ${tempfile}disoscaled.mtz ${tempfile}disos.mtz >& /dev/null end mv ${tempfile}disos.mtz ${tempfile}disoscaled.mtz >& /dev/null # print out "weights" (effective weighting is 1/scale^2) cat ${tempfile}scaleit.log |\ nawk '/APPLICATION OF SCALES/,/--------------------------/' |\ nawk '$1 == "Derivative"{++n; print $1, n, $3}' >! ${tempfile}scales cat ${tempfile}Fpairs ${tempfile}scales |\ nawk '$2 == "-"{++n; label[n]=$1 $2 $3; if(length(label[n])>maxlen) maxlen=length(label[n])}\ $1 == "Derivative"{w=0; if($3+0!=0) w=1/($3*$3)\ printf "%-" maxlen "s : %.3f\n", label[$2], w}' |\ sort -nr +2 rm -f ${tempfile}scaleit.log >& /dev/null rm -f ${tempfile}scales >& /dev/null combine_iso: echo "combining isomorphous differences into Diso" # add these scaled data sets together (sigma-weighted again) # (hopefully, our "ordering" procedure has made sure all # these differences have the same sign) set Fpairs = `cat ${tempfile}Fpairs | wc -l` rm -f ${tempfile}Fpairs >& /dev/null echo $Fpairs | nawk '{print "NREF -1"; print "FORMAT \047(3i5,"$1*2"f15.7)\047"}' |\ mtzdump HKLIN ${tempfile}disoscaled.mtz |\ nawk '/LIST OF REFLECTIONS/,/MTZDUMP/' |\ nawk '! /[A-Z]/ && NF>3{HKL=substr($0,1,15); sum=norm=n="";\ # run down list of D, sigD \ for(i=4;i0){w=1/(sigD*sigD); sum+=w*D; norm+=w;}}\ # do not print anything for "all-missing" HKLs \ # print zero for all-zero hkls \ if(norm==0) print HKL, 0, 0; \ # print sigma-weighted average (abs value?) \ if(norm+0 > 0) print HKL, sum/norm, 1/sqrt(norm)}' |\ cat >! ${tempfile}diso.hkl rm -f ${tempfile}disoscaled.mtz >& /dev/null # read the averaged isomorphous differences back into an mtz f2mtz HKLIN ${tempfile}diso.hkl HKLOUT ${tempfile}sortme.mtz << EOF-f2mtz >> $logfile CELL $CELL SYMM $SGnum LABOUT H K L Diso SIGDiso CTYPO H H H F Q EOF-f2mtz rm -f ${tempfile}diso.hkl >& /dev/null echo "H K L" |\ sortmtz HKLIN ${tempfile}sortme.mtz HKLOUT ${tempfile}diso.mtz >> $logfile rm -f ${tempfile}sortme.mtz # indicate finish compute_k: ############################################################## # now we need the "k" that will put Diso and Dano on the same # scale: k/2 = <|Diso|>/<|Dano|> ############################################################## echo "" # handle special cases if(($Ds == 0)&&($Fs <= 1)) then # this should never happen, but... echo "ERROR: no difference data in ${mtzfile}! " goto Help endif if($Ds == 0) then # anomalous difference data is totally missing # just rename the isomorphous differences echo "WARNING: treating Diso data as FH (FH is better with Dano and Diso)" cad hklin1 ${tempfile}diso.mtz hklout ${tempfile}sortme.mtz << EOF >> $logfile labin file 1 E1=Diso E2=SIGDiso labou file 1 E1=FH E2=SIGFH EOF rm -f ${tempfile}diso.mtz >& /dev/null rm -f ${tempfile}dano.mtz >& /dev/null set wpattfile = "" goto sort_final endif if($Fs <= 1) then # isomorphous difference data is totally missing # just re-name the anomalous differences echo "WARNING: treating Dano data as FH (FH is better with Dano and Diso)" cad hklin1 ${tempfile}dano.mtz hklout ${tempfile}sortme.mtz << EOF >> $logfile labin file 1 E1=Dano E2=SIGDano labou file 1 E1=FH E2=SIGFH EOF rm -f ${tempfile}diso.mtz >& /dev/null rm -f ${tempfile}dano.mtz >& /dev/null set wpattfile = "" goto sort_final endif ############################################################## echo -n "scaling Diso to Dano " # use scaleit to put Diso and Dano on the same scale cad HKLIN1 ${tempfile}diso.mtz HKLIN2 ${tempfile}dano.mtz \ HKLOUT ${tempfile}diso_dano.mtz << EOF-cad >> $logfile LABIN FILE 1 ALL LABIN FILE 2 ALL EOF-cad rm -f ${tempfile}diso.mtz >& /dev/null rm -f ${tempfile}dano.mtz >& /dev/null scaleit HKLIN ${tempfile}diso_dano.mtz \ HKLOUT ${tempfile}scaled.mtz << EOF-scaleit | tee ${tempfile}scaleit.log >> $logfile RESOLUTION $scaleRES #refine scale refine $scaling weight LABIN FP=Diso SIGFP=SIGDiso FPH1=Dano SIGFPH1=SIGDano end EOF-scaleit rm -f ${tempfile}diso_dano.mtz >& /dev/null rm -f ${tempfile}scaleit.in >& /dev/null # print out "k" value from scaling cat ${tempfile}scaleit.log |\ nawk '/APPLICATION OF SCALES/,/--------------------------/' |\ nawk '$1 == "Derivative"{printf "k= %.3f\n", 2*$3}' rm -f ${tempfile}scaleit.log >& /dev/null combine_diso_dano: ############################################################## # now combine Diso and Dano as the "best" estimate of total FH echo "calculating FH = sqrt( Diso^2 + (k/2)^2 * Dano^2 )" # also do a "Patterson weight" of 1/(sigma(FH^2))^2 echo "NREF -1" |\ mtzdump HKLIN ${tempfile}scaled.mtz |\ nawk '/LIST OF REFLECTIONS/,/MTZDUMP/' |\ nawk '! /[A-Z]/ && NF>1{ HKL=substr($0,1,13); \ Diso=substr($0,14,12); SIGDiso=substr($0,26,10); \ Dano=substr($0,36,10); SIGDano=substr($0,46,10); \ FH=fh=sqrt(Diso*Diso + Dano*Dano); if(fh==0) fh=1;\ varFH = (Diso*SIGDiso/fh)^2 + (Dano*SIGDano/fh)^2;\ varFH2 = 4 * FH^2 * varFH; \ W=0; if(varFH) W = 1/sqrt(varFH); if(W>maxW) maxW=W;\ print HKL, FH, sqrt(varFH), W} \ END{if(maxW) print 1/maxW > "'${tempfile}'norm"}' |\ cat >! ${tempfile}FH.hkl set norm = `tail -1 ${tempfile}norm` rm -f ${tempfile}norm >& /dev/null if($#norm != 1) set norm = 1 # read FH back into mtz (finally) f2mtz HKLIN ${tempfile}FH.hkl HKLOUT ${tempfile}sortme.mtz << EOF-f2mtz >> $logfile CELL $CELL SYMM $SGnum LABOUT H K L FH SIGFH W CTYPO H H H F Q W SCALE 1 1 1 1 1 $norm EOF-f2mtz rm -f ${tempfile}FH.hkl >& /dev/null sort_final: # sort it (for good measure) echo "H K L" |\ sortmtz HKLIN ${tempfile}sortme.mtz HKLOUT $outfile >> $logfile if((! $status)&&(-e "$outfile")) then echo "$outfile is ready." else echo "ERROR! see $logfile for what happened..." exit 9 endif rm -f ${tempfile}sortme.mtz ############################################################## # use scaleit to get the recommended DIFF scaleit hklin $outfile << EOF >! ${tempfile}scaleit analyze labin FP=FH SIGFP=SIGFH FPH1=FH SIGFPH1=SIGFH SCALE FPH1 0.000001 END EOF if("$DIFF" == "") set DIFF = `nawk '/acceptable differences/{print $NF}' ${tempfile}scaleit` rm -f ${tempfile}scaleit >& /dev/null if("$DIFF" == "") set DIFF = 1000 Shelx_format: if("$shelxfile" == "") goto Fourier # dump FH out in shelx format too. mtz2various HKLIN $outfile HKLOUT $shelxfile << EOF-shelx >> $logfile OUTPUT SHELX FSQUARED LABIN FP=FH SIGFP=SIGFH END EOF-shelx if((! $status)&&(-e "$shelxfile")) echo "$shelxfile is the SHELX version of $outfile." Fourier: ########################################### if(("$PHASE" == "")||("$fourfile" == "")) goto Patterson # calculate a phased Fourier of FH # first, we need to retrieve the phase columns set FOM_cad = "" if("$FOM" != "") set FOM_cad = "E2=$FOM" # set up FOM weight set W = "" if("$FOM" != "") set W = "W=$FOM" # special case of Diso or Dano only if(($Ds == 0)||($Fs <= 1)) then # no double-difference data set E1 = "E1=F" set fftF1="F1=FH" if($Fs <= 1) then # anomalous data only set E1="E1=D" set fftF1="DANO=FH" endif # but still want phased FH map cad hklin1 $outfile hklin2 $mtzfile \ hklout ${tempfile}phased.mtz << EOF-cad >> $logfile LABIN FILE 1 E1=FH E2=SIGFH LABIN FILE 2 E1=$PHASE $FOM_cad CTYPO FILE 1 $E1 E2=Q EOF-cad # ordinary, boring difference Fourier fft HKLIN ${tempfile}phased.mtz MAPOUT ${tempfile}FH.map << EOF-fft >> $logfile TITLE ${hiRES}A map of FH @ $PHASE $FOM RESOLUTION $scaleRES LABIN $fftF1 SIG1=SIGFH PHI=$PHASE $W EXCLUDE SIG1 0 EOF-fft goto norm_Four endif # calculate a combined, phased difference Fourier cad hklin1 ${tempfile}scaled.mtz hklin2 $mtzfile \ hklout ${tempfile}phased.mtz << EOF-cad >> $logfile LABIN FILE 1 E1=Diso E2=SIGDiso E3=Dano E4=SIGDano LABIN FILE 2 E1=$PHASE $FOM_cad CTYPO FILE 1 E1=F E2=Q E3=D E4=Q EOF-cad # now we make the isomorphous difference Fourier fft HKLIN ${tempfile}phased.mtz MAPOUT ${tempfile}Diso.map << EOF-fft >> $logfile TITLE ${hiRES}A map of Diso @ $PHASE $FOM RESOLUTION $scaleRES LABIN F1=Diso SIG1=SIGDiso PHI=$PHASE $W EXCLUDE SIG1 0 EOF-fft # then make the anomalous difference Fourier (phases rotated 90 degrees) fft HKLIN ${tempfile}phased.mtz MAPOUT ${tempfile}Dano.map << EOF-fft >> $logfile TITLE ${hiRES}A map of Dano @ $PHASE $FOM RESOLUTION $scaleRES LABIN DANO=Dano SIG1=SIGDiso PHI=$PHASE $W EXCLUDE SIG1 0 EOF-fft # try to make sure we get the right sign # assume map has at least one large peak (which should be positive) echo "go" | mapdump mapin ${tempfile}Diso.map |\ nawk '/Minimum density/{print -$NF, -1} /Maximum density/{print $NF, 1}' |\ sort -nr |\ nawk 'NR==1{print $NF}' >! ${tempfile}sign set diso_sign = `cat ${tempfile}sign` rm -f ${tempfile}sign >& /dev/null # invert the Diso map if it seems to be upside-down #if(("$diso_sign" == "-1")) then if(("$diso_sign" == "-1")&&(! $?USER_ORDER)) then echo "inverting sequence of Diso for difference Fourier" echo "SCALE FACTOR -1 0" |\ mapmask mapin1 ${tempfile}Diso.map mapout ${tempfile}temp.map >> $logfile mv ${tempfile}temp.map ${tempfile}Diso.map >& /dev/null endif # add these two maps together (equivalent to vector sum to FH) echo "MAPS ADD" |\ mapmask mapin1 ${tempfile}Diso.map mapin2 ${tempfile}Dano.map \ mapout ${tempfile}FH.map >> $logfile norm_Four: # normalize it for output echo "SCALE SIGMA" |\ mapmask mapin ${tempfile}FH.map mapout $fourfile >> $logfile if((! $status)&&(-e "$fourfile")) echo "$fourfile is the map of $PHASE applied to FH" rm -f ${tempfile}phased.mtz >& /dev/null rm -f ${tempfile}Diso.map >& /dev/null rm -f ${tempfile}Dano.map >& /dev/null rm -f ${tempfile}FH.map >& /dev/null Patterson: ########################################### if("$pattfile" == "") goto Clean_up # calculate the unweighted Patterson map (if desired) fft HKLIN $outfile MAPOUT ${tempfile}FH.map << EOF-fft >> $logfile TITLE ${hiRES}A Patterson of FH RESOLUTION $scaleRES PATTERSON LABIN F1=FH SIG1=SIGFH F2=FH SIG2=SIGFH SCALE F2 0.000001 0 EXCLUDE DIFF $DIFF EXCLUDE SIG1 0 EOF-fft peakmax MAPIN ${tempfile}FH.map XYZOUT ${tempfile}.pdb << EOF-pick >> $logfile THRESHOLD RMS 3 NUMPEAKS 50 EOF-pick # extend it to whole unit cell? mapmask mapin ${tempfile}FH.map mapout "$pattfile" << EOF >> $logfile scale sigma #xyzlim 0 1 0 1 0 1 EOF if((! $status)&&(-e "$pattfile")) echo "$pattfile is the Patterson of FH." rm -f ${tempfile}FH.map >& /dev/null if("$wpattfile" == "") goto Clean_up # calculate the weighted Patterson map (if desired) fft HKLIN $outfile MAPOUT ${tempfile}FH.map << EOF-fft >> $logfile TITLE ${hiRES}A Patterson of FH/SIGFH RESOLUTION $scaleRES PATTERSON LABIN F1=FH SIG1=SIGFH W=W F2=FH SIG2=SIGFH SCALE F2 0.000001 0 #EXCLUDE DIFF $DIFF EXCLUDE SIG1 0 EOF-fft peakmax MAPIN ${tempfile}FH.map XYZOUT ${tempfile}.pdb << EOF-pick >> $logfile THRESHOLD RMS 3 NUMPEAKS 50 EOF-pick # extend it to whole unit cell? mapmask mapin ${tempfile}FH.map mapout "$wpattfile" << EOF >> $logfile scale sigma #xyzlim 0 1 0 1 0 1 EOF if((! $status)&&(-e "$wpattfile")) echo "$wpattfile is the Patterson of FH / sig(FH)^2" rm -f ${tempfile}FH.map >& /dev/null # clean up Clean_up: rm -f ${tempfile}scaled.mtz >& /dev/null rm -f ${tempfile}Fs >& /dev/null rm -f ${tempfile}Ds >& /dev/null rm -f ${tempfile}Fpairs >& /dev/null rm -f ${tempfile}.pdb >& /dev/null exit Setup: ################################################# #### ###### ##### # # ##### # # # # # # # #### ##### # # # # # # # # # # ##### # # # # # # # #### ###### # #### # ################################################# # # gather information on: # mtz file # data sets # resolution limits # sigma cuttoff (for map generation) # difference cutoff # ################################################## if(! $?DIFF) set DIFF = "" # scan the command line for files foreach arg ( $* ) # warn about probable mispellings if("$arg" =~ *.mtz) then if(-e "$arg") then set mtzfile = "$arg" else if(! $?useroutfile) then set useroutfile = "$arg" set outfile = "$arg" endif endif endif end # now all filenames have been initialized if(! -e "$mtzfile") goto Help ################################################## # get crystal and dataset information from the mtz file echo "go" | mtzdump HKLIN $mtzfile >! ${tempfile}mtzdump set CELL = `nawk '/Cell Dimensions/{getline;getline;print}' ${tempfile}mtzdump` set SGnum = `nawk '/Space group/{print $NF+0}' ${tempfile}mtzdump` set SG = `nawk -v num=$SGnum '$1==num && NF>5{print $4}' ${CLIBD}/symop.lib ` set hiRES = `nawk '/Resolution Range/{getline;getline;print $6}' ${tempfile}mtzdump` # get column label names from the mtz file nawk 'NF>3' ${tempfile}mtzdump |\ nawk '$(NF-1)=="F"{print "F", $NF}\ $(NF-1)=="D"{print "D", $NF}\ $(NF-1)=="Q"{print "S", $NF}\ $(NF-1)=="P"{print "P", $NF}\ $(NF-1)=="W"{print "W", $NF}' |\ nawk '/^F/{++n} /^D/{++n} /^P/{++n} {printf "%s", $1; \ if($1=="S") printf "%s", last;\ printf " %d %s \n",n, $2; last=$1}' |\ cat >! ${tempfile}datasets rm -f ${tempfile}mtzdump >& /dev/null # check extent of available data set temp = `nawk '/^F/ || /^D/' ${tempfile}datasets |& wc -l` if($temp < 1) then # this is useless, bail out now rm -f ${tempfile}datasets >& /dev/null echo "ERROR: no usable data in ${mtzfile}! " set mtzfile = "" goto Help endif # get complete, unique lists of DANO/SIGDANOs cat ${tempfile}datasets |\ nawk '$1=="D"{D[$2]=$NF} $1=="SD"{S[$2]=$NF} \ END{for(i in D) print i, D[i], S[i];}' |\ sort -un |\ nawk 'NF==3{print $2,$3}' |\ cat >! ${tempfile}Ds # get complete, unique lists of F/SIGFs cat ${tempfile}datasets |\ nawk '$1=="F"{F[$2]=$NF} $1=="SF"{S[$2]=$NF} \ END{for(i in F) print i, F[i], S[i];}' |\ sort -un |\ nawk 'NF==3{print $2,$3}' |\ cat >! ${tempfile}Fs # get complete, unique lists of Phases/FOMs cat ${tempfile}datasets |\ nawk '$1=="P"{P[$2]=$NF} $1=="W"{W[$2]=$NF} \ END{for(i in P) print i, P[i], W[i];}' |\ sort -un |\ nawk 'NF>=2{print $2,$3, " "}' |\ cat >! ${tempfile}Ps # one last pass through command line # allow user overrides of all internal variables set i = 0 echo -n "" >! ${tempfile}userlabels while( $i < $#argv ) @ i = ( $i + 1 ) @ nexti = ( $i + 1 ) @ lasti = ( $i - 1 ) if($nexti > $#argv) set nexti = $#argv if($lasti < 1) set lasti = 1 set arg = "$argv[$i]" # see if a dataset label was given egrep " $arg " ${tempfile}datasets >& /dev/null if(! $status) then if($?NO) then # user doesn't want this label # filter it out of the input files egrep -v "^$arg " ${tempfile}Fs >! ${tempfile} mv ${tempfile} ${tempfile}Fs egrep -v "^$arg " ${tempfile}Ds >! ${tempfile} mv ${tempfile} ${tempfile}Ds egrep -v "^$arg " ${tempfile}Ps >! ${tempfile} mv ${tempfile} ${tempfile}Ps else # must want only this label? cat ${tempfile}datasets |\ nawk -v label=$arg 'NF>2 && $NF==label{print $NF}' |\ cat >> ${tempfile}userlabels endif # "NO" stays set for next word continue endif # only look at non-file words if(-e "$arg") then unset NO continue endif if("$arg" =~ [0-9]*) then # we have a number if(("$arg" =~ *A)||("$argv[$nexti]" == "A")) then # user-preferred resolution limits set temp = `echo "$arg" | nawk 'BEGIN{FS="-"} $1+0 > 0.1{print $1+0} $2+0 > 0.1{print $2+0}'` if($#temp != 1) then set temp = `echo $temp | nawk '$1>$2{print $1, $2} $2>$1{print $2, $1}'` if($#temp == 2) then set loRES = "$temp[1]" set hiRES = "$temp[2]" endif else if("$temp" != "") set hiRES = "$temp" endif endif if(("$arg" =~ *[Ss]igma)||("$argv[$nexti]" =~ [Ss]igma)) then set DATA_cutoff = `echo "$arg" | nawk '$1+0 > 0{print $1+0}'` endif if(("$arg" =~ *[Dd]iff)||("$argv[$lasti]" =~ [Dd]iff)) then set MAX_diso = `echo "$arg" | nawk '$1+0 > 0{print $1+0}'` set MAX_dano = `echo "$arg" | nawk '$1+0 > 0{print $1+0}'` endif if(("$arg" =~ *[Dd]iso)||("$argv[$lasti]" =~ [Dd]iso)) then set MAX_diso = `echo "$arg" | nawk '$1+0 > 0{print $1+0}'` endif if(("$arg" =~ *[Dd]ano)||("$argv[$lasti]" =~ [Dd]ano)) then set MAX_dano = `echo "$arg" | nawk '$1+0 > 0{print $1+0}'` endif endif # allow "NO" logic to carry through to next word(s) unset NO if(("$arg" == "no")||("$arg" == "not")) set NO if(("$arg" == "don't")||("$arg" == "ignore")) set NO if("$arg" == "except") set NO end rm -f ${tempfile}datasets >& /dev/null # turn the "user" labels into real label files cat ${tempfile}Ds ${tempfile}userlabels |\ nawk 'NF==2{set[$1]=$0}\ NF==1{print set[$1]}' |\ nawk 'NF==2' >! ${tempfile} set Ds = `cat ${tempfile} | wc -l` if($Ds > 0) then # user mentioned which Ds to use mv ${tempfile} ${tempfile}Ds endif set Ds = `cat ${tempfile}Ds | wc -l` # same for Fs cat ${tempfile}Fs ${tempfile}userlabels |\ nawk 'NF==2{set[$1]=$0}\ NF==1{print set[$1]}' |\ nawk 'NF==2' >! ${tempfile} set Fs = `cat ${tempfile} | wc -l` if($Fs > 1) then # user mentioned which Fs to use mv ${tempfile} ${tempfile}Fs # get user-specified F dataset order set order = `nawk 'NF==2{print $1}' ${tempfile}Fs` set USER_ORDER endif set Fs = `cat ${tempfile}Fs | wc -l` # same for Phases cat ${tempfile}Ps ${tempfile}userlabels |\ nawk 'NF==1{print set[$1]}\ {set[$1]=$0}' |\ nawk 'NF!=0' >! ${tempfile} set Ps = `cat ${tempfile} | wc -l` if($Ps > 0) then # user mentioned which Phase to use tail -1 ${tempfile} >! ${tempfile}Ps endif # only use one phase tail -1 ${tempfile}Ps >! ${tempfile} set PHASE = `nawk '{print $1}' ${tempfile}` set FOM = `nawk '{print $2}' ${tempfile}` # clean up rm -f ${tempfile} >& /dev/null rm -f ${tempfile}Ps >& /dev/null rm -f ${tempfile}userlabels >& /dev/null goto Return_from_Setup #################################################### The Future: re-order Diso if scales come out screwy? make the interface a little slicker, less wordy? use correlation instead of scale as a weight? implement use of max_DANO, DATA_cutoff, etc. ?