#! /bin/csh -f
#
#	Phaser Elves		Automatic heavy-atom phase determination
#
#
echo "Phaser Elves v 1.2.5     Because trying everything isn't so hard.(TM)   James Holton 2-2-05"
echo ""
#
########################################################################################################
# Legal stuff
set temp = `find $0 -mtime +30 -print`
if(("$temp" == "")&&($#argv == 0)) cat << EOF-lawyers
Copyright 1999. The Regents of the University of California (Regents). All Rights Reserved. 

     Permission to use, copy, modify, and distribute this software and its 
     documentation for educational, research, and not-for-profit purposes, 
     without fee and without a signed licensing agreement, is hereby granted, 
     provided that the above copyright notice, this paragraph and the following 
     two paragraphs appear in all copies, modifications, and distributions. 
     Contact The Office of Technology Licensing, UC Berkeley, 2150 Shattuck 
     Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201, for commercial 
     licensing opportunities. Created by James Holton, Department of Molecular 
     and Cell Biology, University of California, Berkeley. 
  
     IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, 
     SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, 
     ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF 
     REGENTS HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
  
     REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED 
     TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 
     PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED 
     HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
     MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. 

EOF-lawyers


###############################################################################

 #    #  ######  #       #####
 #    #  #       #       #    #
 ######  #####   #       #    #
 #    #  #       #       #####
 #    #  #       #       #
 #    #  ######  ######  #

###############################################################################
#
#   Help screen and program defaults
#
###############################################################################
if(("$argv" =~ "-h")||($#argv == 0)) goto Help
goto Config

Help:
cat << EOF

usage: $0 mtzfile.mtz sitefile Fnat n params Res_A

where:
    mtzfile.mtz  - CCP4 mtz file containing all your Fs

    sitefile
    mlphare.com  - previous mlphare input script
    mlphare.log  - previous mlphare output log
    sites.pdb    - Brookhaven-style PDB file containing your metal sites
    sites.res    - SHELX output file (.lst works too)
    solve.status - SOLVE status file (containing sites)
    sites.txt    - just any random list of sites
    
    Fnat         - F in mtzfile.mtz to use as native
    n		 - params/atom. 3=(XYZ) 6=(XYZ occ aocc B)
    Res_A	 - a resolution limit

all of these items are optional

examples:
Start refining sites from SOLVE
    $0 mtz/all.mtz solve.status
Pick up where you left off
    $0 scripts/mlphare.com
Run SHELX on anomalous differences and refine those sites
    $0 mtz/all.mtz

EOF
exit





Config:
###############################################################################
#
#   Evaluate unix system
#
###############################################################################
# make sure nawk works
set program = "nawk"
foreach name ( nawk awk gawk )
    test -x "$program"
    if(! $status) break
    
    set possibilities = `which $name |& grep -v ' not in ' | tail -1`
    foreach file ( $possibilities )
	test -x "$file"
	if(! $status) then
	    # test for desired functionality (change this?)
	    set temp = `echo "1.54" | $file '{printf("%3d", 3147.7 * ( $1 )^(-3.014))}' |& cat`
	    if("$temp" == 856) then
		set program = "$file"
		break
	    endif
	endif
    end
    unset possibilities
end
test -x "$program"
if($status) then
    set program = "awk"
    foreach place ( /bin /usr/bin /usr/local/bin  )
	test -x "$program"
	if(($status)&&(-e $place)) then
	    # keep looking
	    set files = `ls -1L ${place} |& grep "$program" |& sort -nr +4 |& head -20 `
	    foreach file ( $files )
		# test for desired functionality
		set temp = `echo "1.54" | $file '{printf("%3d", 3147.7 * ( $1 )^(-3.014))}' |& cat`
		if("$temp" == 856) then
		    set program = "$file"
		    break
		endif
	    end
	endif
    end
endif

# agressively search for nawk in likely places
test -x "$program"
if($status) then
    echo -n "Looking for $program "
    foreach place ( /bin /usr/bin /usr/local/bin /usr / )
	test -x "$program"
	if(($status)&&(-e $place)) then
	    if("$place" == "/") echo -n "uhh"
	    
	    # use find to get candidate files
	    set files = `find $place -name '*'$program \( -type l -o \( -type f -size +10000c \) \) -perm -1 -print |& egrep -v "^find:" |& head -20`
	    foreach file ( $files )
		# test for desired functionality
		set temp = `echo "1.54" | $file '{printf("%3d", 3147.7 * ( $1 )^(-3.014))}' |& cat`
		if("$temp" == 856) then
		    set program = "$file"
		    break
		endif
	    end
	endif
	
	# entertainment
	echo -n "."
    end
endif

# check that we found the right awk program
set temp = `echo "1.54" | $program '{printf("%3d", 3147.7 * ( $1 )^(-3.014))}' |& cat`
if("$temp" == 856) then
    # set up this awk program as nawk
    set nawk = "$program"
    alias nawk $nawk
else
    echo "Dagnabbit!  We can't find a suitable awk program.  What kind of unix is this? "
    echo "Elves may not be able to work."
    set nawk = /bin/awk
    alias nawk awk
endif

# nice symbols, but may not be portable
set ANG = `echo "" | nawk 'BEGIN{printf "\305"}'`
set DEG = `echo "" | nawk 'BEGIN{printf "\260"}'`
#set ANG = "A"
#set DEG = "deg"

# fix OSF1 csh echo shortcomings
set temp = `echo -n "test"`
if(($#temp == 2)&&(-e /usr/bin/echo)) then
    alias echo /usr/bin/echo
endif

if(! $?CCP4) then
    echo -n "Attempting to set up CCP4 ... "

    set ccp4setup = ""
    foreach place ( /programs/xtal/ccp4_3.4/ /usr/xtal/CCP4_v3.4/ /programs/xtal /programs/ /usr/xtal /usr/local /usr/ )
	if((! -e "$ccp4setup")&&(-e "$place")) then
	    # look for setup scripts here
	    set ccp4setup = `find ${place} -name ccp4.setup |& nawk '/ccp4.setup$/{print $NF}' | tail -1`
	endif
        if((-e "$ccp4setup")&&(! $?CCP4)) then
            source $ccp4setup
            setenv CCP4_SCR    `pwd`
            setenv BINSORT_SCR `pwd`
            echo "using $ccp4setup"
        endif
    end
endif
if(! $?CCP4) then
    echo "failed."
    echo "Please ask your sysadmin how to set up CCP4, "
    echo "Or go to: netscape http://www.dl.ac.uk/CCP/CCP4/main.html"
    echo "about getting the CCP4 program suite."
    echo "and run $0 again."
    
    echo "If you have Red Hat Linux, you can get ccp4 by typing:"
    echo "rpm -i http://imsb.au.dk/~mok/linux/dist/rpms/ccp4-lib-3.5.1-5.i386.rpm"
    echo "rpm -i http://imsb.au.dk/~mok/linux/dist/rpms/ccp4-progs-3.5.1-5.i386.rpm"
    echo "rpm -i http://imsb.au.dk/~mok/linux/dist/rpms/ccp4-etc-3.5.1-5.i386.rpm"
    echo "rpm -i http://imsb.au.dk/~mok/linux/dist/rpms/ccp4-examples-3.5.1-5.i386.rpm"
    echo "rpm -i http://imsb.au.dk/~mok/linux/dist/rpms/ccp4-doc-3.5.1-5.i386.rpm"
    echo "rpm -i http://imsb.au.dk/~mok/linux/dist/rpms/ccp4-manual-3.5.1-5.i386.rpm"
    echo "rpm -i http://imsb.au.dk/~mok/linux/dist/rpms/ccp4-html-3.5.1-5.i386.rpm"
    exit 9
    set CCP4_LIB
endif
setenv CCP4_OPEN       UNKNOWN
# make sure we can write to scratch directories
if(! $?CCP4_SCR) setenv CCP4_SCR .
if(! $?BINSORT_SCR) setenv BINSORT_SCR .

touch ${CCP4_SCR}/this$$ >& /dev/null
if($status) then
    # safest to do this
    setenv CCP4_SCR .
endif
rm -f ${CCP4_SCR}/this$$ >& /dev/null

touch ${BINSORT_SCR}/this$$ >& /dev/null
if($status) then
    # safest to do this
    setenv BINSORT_SCR .
endif
rm -f ${BINSORT_SCR}/this$$ >& /dev/null


# check that current directory is writable
touch ./this$$ >& /dev/null
if($status) then
    # can't write to current directory!
    chmod u+w . >& /dev/null
    touch ./this$$ >& /dev/null
    if($status) then
	# can't chmod current directory either
	echo "ERROR! We can't write to this directory!"
	pwd
	echo "Please cd to the place you want to process your data, and"
	echo "then run $0 again."
	exit 9
    else
	# warn user about what we did
	echo "Had to make current directory writable:"
	echo "chmod u+w ."
    endif
    rm -f ./this$$ >& /dev/null
endif
rm -f ./this$$ >& /dev/null

# check free disk space
set freespace = `df -k $CCP4_SCR |& nawk 'NR>=2&&NF>2{avl=NF-2;print $avl/1024}'`
set test = `echo $freespace 100 | nawk '{print ($1<$2)}'`
if($test) echo "WARNING: "'$'"CCP4_SCR is getting full!  ${freespace}MB left. "
set freespace = `df -k $BINSORT_SCR |& nawk 'NR>=2&&NF>2{avl=NF-2;print $avl/1024}'`
set test = `echo $freespace 100 | nawk '{print ($1<$2)}'`
if($test) echo "WARNING: "'$'"BINSORT_SCR is getting full!  ${freespace}MB left."
set freespace = `df -k . |& nawk 'NR>=2&&NF>2{avl=NF-2;print $avl/1024}'`
set test = `echo $freespace 100 | nawk '{print ($1<$2)}'`
if($test) echo "WARNING: disk space is getting low! ${freespace}MB left. "


# fix old Irix od shortcomings
alias ood od
echo "test" >! ./this$$
set test = `od -c -j 2 -N 4 this$$ |& nawk '{print $2}'`
if("$test" != "s") then
    # no -j option supported, compensate:
    # od -x -j offset ... --> od -x ... offset
    alias od 'od \!:1 \!:$ \!:3.'
    alias ood /bin/od
    set WEAK_od
endif
rm -f ./this$$ >& /dev/null

# set Elves prompt
set PROMPT = "P. Elves-> "

# character for ringing the terminal bell (alert user)
set BELL = `echo "" | awk 'BEGIN{printf "\07"}'`
set oldTTYerase = "^H"


# no dumping! 
limit coredumpsize 0

# go automatic if user is ignoring us
test -t 1
if($status) then
    echo "output is not a terminal."
    # Q&A would stop process cold
    echo "Elves will answer their own questions."
    echo ""
    echo "$0 $*"
    echo "as pid=$$ on "`hostname -s`" at "`date +"%T %D"`
    echo "in "`pwd`
    echo ""
    set AUTO
endif

# random script-control settings and utility variables
set MAXLINE    = 500
set FIRSTIME
set temp
set input
set info
set understood
set in



########################################################################################################
# Defaults
#
# required directories
set scriptDIR  = scripts/
set logDIR     = logs/
set mtzDIR     = mtz/
set mapDIR     = maps/
set oDIR       = o/

# default data file names
set mtzfile    = ${mtzDIR}all.mtz
set bestmtz    = ${mtzDIR}best_phased.mtz

# names of generated mlphare script
set scriptfile = ${scriptDIR}mlphare.com

set logfile    = ${logDIR}mlphare.log
set coordfile  = ./phaser_tempcoordfile
set tempfile   = ./phaser_temp

# potential input files
set shelxfile  = ""
set solvefile  = ""
set pdbfile    = ""
set sitefile   = ""
set inscript   = ""
set inlogfile  = ""

# preliminary program locations
set DM	    = dm
set SHELX   = /programs/shelx/sgi/shelxs
set MAPMAN  = /programs/o/rave/rave_irix6/6d_mapman
set BRIX    = /usr/bin/brix
set BONES   = /usr/bin/bones

# crystal parameters
set SG      = ""
set CELL    = ""
set hiRES   = ""

# refinement settings
set native  = ""
set fix     = "W"
set params  = 3
set mlphareCYCLES = 30

# mlphare convergence criteria
set converge_crit = 2
set Memory  = 0

# atom rejection parameters
set FILTER_ATOMS
set baddies = 0
set Bcap    = 500
set wilsonB = ""
set sites   = 0

# solvent-flattening settings
set SOLVENT = ""
set default_trials = "25 30 35 40 45 50 55 60 65 70"

# option to add difference-Fourier sites to the script
set MORE_SITES
set newSIG = 3

# sign/hand flipping variables
set FLIP_OCC
set FLIP_HAND
set FLIP_SG
set Cycle   = 0
set FOM	    = ""
set bestFOM = ""
set bestDMtrial
set biggest_bone = 0
set FLIP_state   = 1
set FLIP_label   = ""
set FLIP_labels  = ""

# for memory of original state
set firstmtz = ""
set firstSG  = ""
set otherSG  = ""

# random, empty initializations
set RESCARD   = ""
set newSG     = ""
set futureSG  = ""
set site_cell = ""
set site_sg   = ""
set mapmtz    = ""
set ofmt      = "dsn6"

# delete old garbage files
rm -f ${tempfile}* ${coordfile} >& /dev/null



















# begin program with input from command line (hopefully, not too long)
set input =  ( $argv )

Gather:
###############################################################################

  ####     ##     #####  #    #  ######  #####
 #    #   #  #      #    #    #  #       #    #
 #       #    #     #    ######  #####   #    #
 #  ###  ######     #    #    #  #       #####
 #    #  #    #     #    #    #  #       #   #
  ####   #    #     #    #    #  ######  #    #

###############################################################################
#
#   Interpret the command line
#
###############################################################################

# reset user-input variables
set SOLVENT = ""

# un-pack complex awk procedures
goto Unwrap_Awk_Scripts
# ${tempfile}sitereader.awk
# ${tempfile}mtzstuff.awk
Return_Unwrap_Awk_Scripts:

# make sure input is a multiword variable
set input = ( $input )
if("$input" == "") then
    # replace empty input with command line?
    set input = ( $argv )
endif

# Pass 1: find site files, program settings (and third-party binaries)
set i = 0
while ( $i < $#input )
    @ i = ( $i + 1 )
    set arg = "$input[$i]"
    
    # negative logic
    if(! -e "$arg") then
	# $arg is not a file
	# simplefy interpretation of settings
	set arg = `echo "$arg" | nawk '{print toupper($0)}'`
	if(("$arg" == "NO")||("$arg" == "NOT")) then
	    # this will remain set until we know what it means
	    set NO
	    continue
	endif
	
	# program options (not initializable anywhere else)
	if("$arg" == "NEW") set NEW
	if("$arg" == "-NEW") set NEW
	if("$arg" == "NEGATIVE") then
	    if($?NO) then
		# must just not want to flip occupancies
		unset FLIP_OCC
		unset NO
		continue
	    endif
	    # user-option to override default of positive real occs
	    set NEGATIVE_OCC
	endif
	
	# regognize general (unitless) numbers
	if(("$arg" =~ [0-9]*)&&("$arg" =~ *[0-9])&&("$arg" !~ *[a-zA-Z]*)) then
	    if("$arg" !~ *.*) then
		# some non-negative integer, what does it mean? 
		set integer = $arg
		continue
	    endif
	endif
	
	# interpret "NO"s
	if($?NO) then
	    # preemtive, will be re-set if nothing is recognized
	    unset NO
	    if(("$arg" == "DM")||("$arg" =~ FLAT*)) then
		set NO_DM
		continue
	    endif
	    if("$arg" == "MAPMAN") then
		set NO_MAPMAN
		continue
	    endif
	    if("$arg" == "SHELX") then
		set NO_SHELX
		continue
	    endif
	    if(("$arg" == "ADD")||("$arg" == "MORE")) then
		unset MORE_SITES
		continue
	    endif
	    if(("$arg" == "FILTER")||("$arg" == "REJECT")) then
		unset FILTER_ATOMS
		continue
	    endif
	    if(("$arg" == "FLIP")||("$arg" =~ ALTERNAT*)||("$arg" =~ EXPLOR*)) then
		# there's a little more to this
		if($i < $#input) then
		    @ i = ( $i + 1 )
		    set narg = `echo "$input[$i]" | nawk '{print toupper($1)}'`
		    if(("$narg" =~ OCC*)||("$narg" == SIGN)) then
			unset FLIP_OCC
			continue
		    endif
		    if("$narg" == "HAND") then
			unset FLIP_HAND
			continue
		    endif
		    if(("$narg" == "SG")||("$narg" == "SPACE")) then
			unset FLIP_SG
			continue
		    endif
		    @ i = ( $i - 1 )
		endif
		# no qualifier, so just turn 'em all off
		unset FLIP_OCC
		unset FLIP_HAND
		unset FLIP_SG
		continue
	    endif
	    if("$arg" == "HURRY") then
		unset HURRY_UP
		continue
	    endif
	    # look to next word for meaningful "no x"
	    set NO
	endif
	
	# optinally turn on some options
	if("$arg" == "HURRY") set HURRY_UP
	if(("$arg" == "ADD")||("$arg" == "MORE")) then
	    set MORE_SITES
	    continue
	endif
	if(("$arg" == "FILTER")||("$arg" == "REJECT")) then
	    set FILTER_ATOMS
	    continue
	endif
	if(("$arg" == "FLIP")||("$arg" =~ ALTERNAT*)||("$arg" =~ EXPLOR*)) then
	    # there's a little more to this...
	    if($i < $#input) then
		@ i = ( $i + 1 )
		set narg = `echo "$input[$i]" | nawk '{print toupper($1)}'`
		if(("$narg" =~ OCC*)||("$narg" == SIGN)) then
		    set FLIP_OCC
		    continue
		endif
		if("$narg" == "HAND") then
		    set FLIP_HAND
		    continue
		endif
		if(("$narg" == "SG")||("$narg" == "SPACE")) then
		    set FLIP_SG
		    continue
		endif
		@ i = ( $i - 1 )
	    endif
	    # no qualifier, so just turn 'em all on
	    set FLIP_OCC
	    set FLIP_HAND
	    set FLIP_SG
	    continue
	endif
	
	
	# recognize post-number "unit" words
	if($?integer) then
	    # units must come right after
	    if(("$arg" =~ PARAM*)&&("$integer" =~ [0-6])) then
		set user_params = $integer
		continue
	    endif
	    if(("$arg" =~ SITE*)&&("$integer" =~ [1-9]*)) then
		set user_sites = $integer
		# user must be happy with given number of sites? 
		unset MORE_SITES
	    endif
	    unset integer
	endif
	
	# recognize numbers with units attached
	if("$arg" =~ [1-9]*%) then
	    # this must be a solvent content
	    set temp = `echo $arg | nawk '$1+0>0 && $1+0 < 100{printf "%02d", $1+0}'`
	    if("$temp" != "") then
		set SOLVENT = "$SOLVENT $temp"
	    else
		echo "bad solvent content: $arg ?"
	    endif
	endif
	
	# resolution is read below (along with scripts)
	
	# might as well read space group now
	if("$arg" =~ [PCIFR][1-6]*) then
	    if($?NO) then
		unset NO
		# must just not want to flip SG
		unset FLIP_SG
		continue
	    endif
	    
	    set temp = ""
	    if($?CLIBD) then
		set temp = `nawk -v SG=$arg '$4 == SG && $1 < 500 {print $4}' $CLIBD/symop.lib | head -1`
	    endif
	    if("$temp" != "") then
		# valid space group, prepare to reindex to it
		set newSG = "$temp"
		set USER_SG = "$temp"
		unset FLIP_SG
		continue
	    endif
	    # check for "pseudo-spacegroup" language
	    set temp = `echo "$arg" | nawk '/[PC]2212|[PC]2122|P21221|P22121/'`
	    if("$temp" != "") then
		# these are okay too, ${scriptDIR}reindex.com will understand
		set newSG = "$temp"
		set USER_SG = "$temp"
		unset FLIP_SG
		
		continue
	    endif
	endif
	
	# unrecognized non-file word
	continue
    endif
    
    # arg is a file that exists
	
    # make sure it's an ordinary file
    set temp = `ls -lnLd $arg | nawk '/^-/{print $NF}'`
    if(! -e "$temp") then
	# what the...
	echo "What is $arg for? "
	continue
    endif
    
    # $arg is an ordinary, readable file
    switch("$arg")
	case *.sh:
	    set inscript = "$arg"
	breaksw
	case *.com:
	    set inscript = "$arg"
	breaksw
	case *.log:
	    set inlogfile = "$arg"
	breaksw
	case *.log.old*
	    set inlogfile = "$arg"
	breaksw
	case *.res:
	    set shelxfile = "$arg"
	breaksw
	case *.ins:
	    set shelxfile = "$arg"
	breaksw
	case *.lst:
	    set shelxfile = "$arg"
	breaksw
	case *.pdb:
	    set pdbfile = "$arg"
	breaksw
	case *.brk:
	    set pdbfile = "$arg"
	breaksw
	case *solve.status:
	    set solvefile = "$arg"
	breaksw
	case *mlphare*:
	    if("$arg" !~ *.log*) then
		grep "mlphare " $arg >& /dev/null
		if(! $status) then
		    set inscript = "$arg"
		    breaksw
		endif
	    endif
	
	default:
	    # just a random "site" file?
	    # look for three, consecutive high-precision numbers between -1.1 and 1.1
	    cat $arg |\
	    nawk -f ${tempfile}sitereader.awk |&\
	    nawk '/ATOM/' >! ${tempfile}.sites
	    set temp = `cat ${tempfile}.sites | wc -l`
	    rm -f ${tempfile}.sites
	    if($temp > 0) then
		# re-find the sites later
		set sitefile = $arg
	    endif
	breaksw
    endsw

    # check for program binaries
    test -x "$arg"
    if($status) then
	# maybe a data file, handle this later...
	continue
    endif

    set file = "$arg"

    # is it dm?
    if("$file" =~ *dm*) then
	echo "testing $arg"
	    set temp = `setenv DISPLAY; echo "" | $file |& nawk '/K. Cowtan/{print ver} /dm [1-4].[0-9]/{for(i=1;i<NF;++i) if($i ~ /dm$/) ver= $(i+1)+0}'`
	    if("$temp" != "") then
		set DM = "$arg"
		set file = ""
	    endif
	endif
    # is it shelx?
    if("$file" =~ *shelx*) then
	echo "testing $arg"
	set temp = `setenv DISPLAY; $file $$ |& nawk '/CANNOT OPEN FILE/{print $NF}' | grep "$$.ins" | tail -1`
	if("$temp" != "") then
	    set SHELX = "$arg"
	    set file = ""
	endif
    endif
		
    # is it mapman?
    if("$file" =~ *mapman*) then
	echo "testing $arg"
	set temp = `setenv DISPLAY; echo "quit" | $file |& nawk '/Kleywegt/{print}' | tail -1`
	if("$temp" != "") then
	    set MAPMAN = "$arg"
	    set file = ""
	endif
    endif
		
    # is it brix?
    if("$file" =~ *brix) then
	# test it later...
	set BRIX = "$arg"
    endif
		
    # is it bones?
    if("$file" =~ *bones) then
	# test it later...
	set BONES = "$arg"
    endif
end

############################################
# revert to defaults, if need be
############################################
if((! $?NEW)&&(! -e "$inscript")&&(! -e "$inlogfile")&&(! -e "$shelxfile")&&(! -e "$solvefile")&&(! -e "$pdbfile")&&(! -e "$sitefile")) then
    # no apparent sites specified, see if there is an old run lying around
    if((! -e "$inscript")&&(-e "$scriptfile")) then
	# might as well pick up where we left off
	set inscript = "$scriptfile"
    endif
    if((-e "$inscript")||(-e "$logfile")) then
	echo "where were we?... "
    endif
    if(! -e "$inscript") then
	# okay, no old script.  What about the log?
	if((! -e "$inlogfile")&&(-e "$logfile")) then
	    # check it
	    set temp = `nawk '$1 ~ /^ATOM/' $logfile | wc -l`
	    if($temp > 0) then
		set inlogfile = "$logfile"
	    else
		# maybe an aborted run?
		if(-e "${logfile}.old") then
		    # check it
		    set temp = `nawk '$1 ~ /^ATOM/' ${logfile}.old | wc -l`
		    if($temp > 0) then
			set inlogfile = "${logfile}.old"
		    endif
		endif
	    endif
	endif
	if(-e "$inlogfile") then
	    echo "found $inlogfile"
	endif
    endif
    if(-e "$inscript") then
	echo "found $inscript"
    endif
else
    # remember that user supplied sites
    set USER_SITES
endif

# decide on list of solvent contents to try
set trials = "$default_trials"

# user preference overrides default trials
if("$SOLVENT" != "") then
    set trials = "$SOLVENT"
endif
set default_trials = ( $trials )
set trials = ( $trials )

# for impatient people
if($?HURRY_UP) then
    # only do a few cycles each round
    set mlphareCYCLES = 5
    # declare convergence when 1st decimal place is the same
    set converge_crit = 100
    # only look for reasonably high difference-fourier peaks 
    set newSIG = 4
endif
if($?MAD) then
    # don't let parameters diverge too much between runs
    set mlphareCYCLES = 5    
endif


FindData:
################################################################################

 ######     #    #    #  #####           #    #   #####  ######
 #          #    ##   #  #    #          ##  ##     #        #
 #####      #    # #  #  #    #          # ## #     #       #
 #          #    #  # #  #    #          #    #     #      #
 #          #    #   ##  #    #          #    #     #     #
 #          #    #    #  #####           #    #     #    ######

################################################################################
#   decide on/find an input mtz file
################################################################################

# first thing first: find the data

# get mtz filename from command line (highest priority)
foreach arg ( $input )
    if("$arg" =~ *.mtz) then
	if(-e "$arg") then
	    set mtzfile = "$arg"
	    set USERmtz
	    unset DEFAULT_PARAMS
	else
	    echo "WARNING: $arg does not exist! "
	endif
    endif
end

# retrieve MTZ filename from logfile
if((! $?USERmtz)&&(-e "$inlogfile")) then
    # get MTZ filename from the log
    set temp = `nawk '/Filename:/{print $NF}' $inlogfile | head -1`
    
    if("$temp" != "") then
	if(-e "$temp") then
	    set mtzfile = "$temp"
	    unset DEFAULT_PARAMS
	else
	    echo "WARNING: $temp from $inlogfile does not exist! "
	endif
    endif
endif

	

# retrieve MTZ filename form old script (higher priority than logfile)
if((! $?USERmtz)&&(-e "$inscript")) then

    # get first MTZ filename in this script
    cat $inscript |&\
    nawk 'BEGIN{RS=" "} {print}' |&\
    nawk '/mtz$/ && ! /\$/' |&\
    cat >! ${tempfile}
    set temp = `cat ${tempfile}`
    rm -f ${tempfile} >& /dev/null
    
    if("$temp" != "") then
	set temp = $temp[1];
	if(-e "$temp") then
	    set mtzfile = "$temp"
	    unset DEFAULT_PARAMS
	else
	    echo "WARNING: $temp from $inscript does not exist! "
	endif
    endif
endif










read_mtz:
################################################################################

 #####   ######    ##    #####           #    #   #####  ######
 #    #  #        #  #   #    #          ##  ##     #        #
 #    #  #####   #    #  #    #          # ## #     #       #
 #####   #       ######  #    #          #    #     #      #
 #   #   #       #    #  #    #          #    #     #     #
 #    #  ######  #    #  #####           #    #     #    ######

################################################################################
#   Read in label assignments from mtz file
################################################################################

set firstmtz = "$mtzfile"

# read info from MTZ file
echo "go" | mtzdump HKLIN $mtzfile |\
nawk -f ${tempfile}mtzstuff.awk >&! ${tempfile}mtzdmp
# format: [FQDP..]: Fname SIGFname <Fname>/<SIGFname>

# don't need this anymore
#rm -f ${tempfile}mtzstuff.awk >& /dev/null


# get the paired-up Fs and SIGFs
nawk '/^F: / && NF>2' ${tempfile}mtzdmp |\
cat >! ${tempfile}Fpairs
# format: [FQDP..]: Fname SIGFname <Fname>/<SIGFname>

# count them
set Fs = `cat ${tempfile}Fpairs | wc -l`
if("$Fs" == 0) then
    # back-up plan
    echo "go" | mtzdump HKLIN $mtzfile |\
    nawk '/OVERALL FILE STATISTICS/,/LIST OF REFLECTIONS/' |\
    nawk 'NF>8' |\
    nawk '$(NF-1)=="F"{print "F:", $NF}\
          $(NF-1)=="Q"{print "Q:", $NF}' |\
    cat >! ${tempfile}
    
    egrep "^F" ${tempfile} >& /dev/null
    if(! $status) echo "WARNING: main mtz reader failed! "
    cat ${tempfile} |\
    nawk '/^F:/{F=$2} /^Q:/{if(F)print"F:", F, $NF, 1; F=""}' |\
    cat >! ${tempfile}Fpairs
    rm -f ${tempfile} >& /dev/null
endif
if("$Fs" == 0) then
    echo "no Fs in $mtzfile ... sorry "
    
    if(! $?debug) rm -f ${tempfile}* >& /dev/null
    exit 9
endif
if("$Fs" < 2) then
    echo -n "only $Fs F in $mtzfile "

    # see if there's anomalous
    set Ds = `nawk '/^D: / && NF>2' ${tempfile}mtzdmp | wc -l`
    if("$Ds" > 0) then
	# no-brainer picking native
	set native = `nawk '{print $2, $3}' ${tempfile}Fpairs`
    else
	echo "you need DIFFERENCE data to get phases! "
	rm -f ${tempfile}* >& /dev/null
	exit 8
    endif
endif

################################################################################
# initialize misc variables from MTZ
set CELL    = `nawk '/^CELL/{print $2+0, $3+0, $4+0, $5+0, $6+0, $7+0}' ${tempfile}mtzdmp`
set SGnum   = `nawk '/^SYMM/{print $NF}' ${tempfile}mtzdmp`
set mtzSG   = `nawk -v SGnum=$SGnum '$1==SGnum{print $4}' ${CLIBD}/symop.lib`
# "current" SG is the one in the mtz file
set SG      = "$mtzSG"
if("$newSG" == "") then
    # there was no user-specified space group
    set newSG    = "$mtzSG"
endif

# use resolution from mtz file
set hiRES   = `nawk '/^RESO/{print $NF+0}' ${tempfile}mtzdmp`
set RESCARD = `nawk '/^RESO/' ${tempfile}mtzdmp`

# base B-factor limits on resolution range
set wilsonB = `echo "$hiRES" | nawk '{print 79*($1/3)^2}'`
#set Bcap    = `echo "$RESCARD" | nawk '{print 79*($2/4)^2}'`



################################################################################################################
#   initialize resolution limit
################################################################################################################
if(-e "$inlogfile") then
    # get resolution from the last output log
    set temp = `nawk '$3~/^RESO/ && $NF+0 >0.1 && $NF+0 < 6 {print $NF;exit}' $inlogfile`
    if("$temp" != "") then
	set hiRES = "$temp"
	set RESCARD = `nawk '$3~/^RESO/ && $NF+0 >0.1 && $NF+0 < 6 {print $3, $4, $5}' $inlogfile`
    endif
endif	

if(-e "$inscript") then
    # get resolution from an input script
    set temp = `nawk '/^RESO/ && $NF+0 >0.1 && $NF+0 < 6 {print $NF}' $inscript`
    if("$temp" != "") then
	set hiRES = "$temp"
	set RESCARD = `nawk '/^RESO/ && $NF+0 >0.1 && $NF+0 < 6 {print}' $inscript`
    endif
endif	

# pass through command line, for right resolution limit
foreach arg ( $input )
    if(! -e "$arg") then
	if("$arg" =~ [1-6]*A) then
	    set hiRES = `echo $arg | nawk '{print $1+0}'`
	endif
    endif
end
# update the RESolution keycard
set RESCARD = `echo "$RESCARD $hiRES" | nawk '{print "RESOLUTION", $2, $NF}'`






PickRef:
################################################################################

 #####      #     ####   #    #          #####   ######  ######
 #    #     #    #    #  #   #           #    #  #       #
 #    #     #    #       ####            #    #  #####   #####
 #####      #    #       #  #            #####   #       #
 #          #    #    #  #   #           #   #   #       #
 #          #     ####   #    #          #    #  ######  #

################################################################################
#   look to all sources for indications of "best" native set
################################################################################

# see if user specified native (now that we know the contents of the mtz)

echo -n "" >! ${tempfile}filelabels

# get reference from input logfile?
if(("$native" == "")&&(-e "$inlogfile")) then
    # retrieve reference wavelength used in $inlogfile
    cat $inlogfile |\
    nawk '/LABIN/{for(i=1;i<=NF;++i)if($i == "LABIN") print $(i+1), $(i+2)}' |\
    head -1 |\
    nawk '{print $1; print $2}' |\
    nawk 'BEGIN{FS="="} {print $2}' |\
    cat >> ${tempfile}filelabels
endif
# get reference from input script?
if(("$native" == "")&&(-e "$inscript")) then
    # retrieve reference wavelength used here
    cat $inscript |\
    nawk '/LABIN/{for(i=1;i<=NF;++i)if($i == "LABIN") print $(i+1), $(i+2)}' |\
    head -1 |\
    nawk '{print $1; print $2}' |\
    nawk 'BEGIN{FS="="} {print $2}' |\
    cat >> ${tempfile}filelabels
endif
set filelabels = `cat ${tempfile}filelabels`
rm -f ${tempfile}filelabels >& /dev/null

# check command line (and file contents) for labels
foreach arg ( $filelabels $input )
    set temp = `nawk -v arg=$arg '/^COL/ && $3=="F" && $2==arg' ${tempfile}mtzdmp`
    if("$temp" != "") then
	set USER_NATIVE
#        echo "found $arg in $mtzfile"
	
	# assign F to our favorite SIGF
        set native = `nawk -v arg=$arg '$2==arg{print $2, $3}' ${tempfile}Fpairs`
	if("$native" == "") then
	    # resort to no sigma
	    set native = "$arg"
	endif
    endif
    set temp = `nawk -v arg=$arg '/^COL/ && $3=="Q" && $2==arg' ${tempfile}mtzdmp`
    if("$temp" != "") then
#        echo "found $arg in $mtzfile"
	
	# assign Sigma to our favorite SIGF (if we can)
	set SIGnative = `nawk -v arg=$arg '$3==arg{print $2, $3}' ${tempfile}Fpairs`
	if("$SIGnative" == "") set SIGnative = "$arg"
    endif
end

# inform user that we understood the native assignment
if($?USER_NATIVE) then
    echo "reference data set is $native"
    unset USER_NATIVE
endif


# if user doesn't seem to care, pick F with best Diso from all other Fs
if("$native" == "") then
    set i = 0
    echo -n "Evaluating $Fs Fs in $mtzfile "

    echo -n "" >! ${tempfile}diso_dano
    while ($i < $Fs)
	@ i = ( $i + 1 )
	
	echo -n "."
	
	# retrieve name of this "reference" F and SIGF
	set F    = `nawk -v line=$i 'NR==line {print $2}' ${tempfile}Fpairs`
	set SIGF = `nawk -v line=$i 'NR==line {print $3}' ${tempfile}Fpairs`
	
	set dataset = 1
	set datasets = `nawk '/^F: /' ${tempfile}mtzdmp | wc -l`
	while ( $dataset <= $datasets )
	    # make some ordinary scaleit cards
	    echo "refine anisotropic" >! ${tempfile}scaleit.in
	    echo "$RESCARD" >> ${tempfile}scaleit.in

	    # make a LABIN card for scaleit
	    echo -n "LABIN FP=$F SIGFP=$SIGF " >> ${tempfile}scaleit.in
	    
	    # scaleit might only do 6 datasets at a time
	    cat ${tempfile}mtzdmp |\
	    nawk -v first=$dataset '\
	       /^F: /{++f; F[f]=$2; QF[f]=$3; } \
	       /^D: /{++d; D[d]=$2; QD[d]=$3; } \
		  END{for(i=1;i<=6;++i){set=i+first-1;\
	      if(F[set] != "")printf "-\nFPH%d=%s SIGFPH%d=%s ", i, F[set], i, QF[set];\
	      if(D[set] != "")printf "DPH%d=%s SIGDPH%d=%s ",    i, D[set], i, QD[set]};\
	      print ""; print "END"}' |\
	    cat >> ${tempfile}scaleit.in
	    
	    # run scaleit
	    cat ${tempfile}scaleit.in |\
	    scaleit HKLIN $mtzfile HKLOUT /dev/null |\
	    nawk '/TABLE:Analysis /{for(i=1;i<NF;++i){\
		if($i=="FP=")FP=$(i+1); if($i=="FPH=")FPH=$(i+1); }}\
		/Sc_kraut SCALE/{iso=index($0,"diso")-4; ano=index($0,"<ano>")+3}\
	    /THE TOTALS/{print FP, FPH, substr($0,iso)+0, substr($0,ano)+0}' |\
	    cat >> ${tempfile}diso_dano
	    
	    # advance to next batch of 6 datasets
	    @ dataset = ( $dataset + 6 )
	end
    end
    #  ${tempfile}diso_dano has format:
    # Fnat Fderiv diso dano

    # compute a "native score" for each F (sum of all diso+dano) (maybe use product? )
    cat ${tempfile}diso_dano |\
    nawk '{if($3+0==0) score[$1]+=$4; score[$1] += $3} \
        END{for(ref in score) print ref, score[ref]}' |\
    sort -n +1 >! ${tempfile}Fscores
    
    # now pick the best native F (and SIGF)
    set native = `tail -1 ${tempfile}Fscores | nawk '{print $1}'`
    set native = `nawk -v F=$native '$2==F{print $2, $3}' ${tempfile}Fpairs`
    
    if("$native" != "") then
	echo "picked $native[1] as the best reference."
    else
	echo "WARNING: could not read scaleit logs "
    endif
 
    # clean up
    rm -f ${tempfile}Fscores
    rm -f ${tempfile}scaleit.in
    rm -f ${tempfile}diso_dano
endif



# failing that, just pick F with best F/sigma
if("$native" == "") then
    set native = `sort -n +3 ${tempfile}Fpairs | tail -1 | nawk '{print $2, $3}'`
endif

# last safety catch, if all else fails
if("$native" == "") then
    set native = `nawk '/^COL/ && $3=="F"{print $2}' ${tempfile}mtzdmp | head -1`
endif

# finally, tack user-specified SIG to end of native
if($?SIGnative) then
    set native = `echo "$native $SIGnative" | nawk '{print $1, $NF}'`
endif






# make sure we have SOME kind of sigma for native
set native = ( $native )
if($#native < 2) then
    # pick first sigma in file
    set temp = `nawk '/^COL/ && $3=="Q"{print $2}' ${tempfile}mtzdmp | head -1`
    set native = `echo "$native $temp" | nawk '{print $1, $NF}'`
endif

# fail if mtz is just a bunch of crap
if($#native < 2) then
    echo "ERROR: no sigmas in $mtzfile"
    echo ""
    echo "Maximum Likelihood cannot be done without sigmas! "
    echo ""
    
    if(! $?debug) rm -f ${tempfile}* >& /dev/null
    
    set BAD
    goto done
endif

# make F/SIG pair into two variables
set SIGnative = `echo $native | nawk '{print $NF}'`
set native = `echo $native | nawk '{print $1}'`


# don't need this anymore
rm -f ${tempfile} >& /dev/null
rm -f ${tempfile}Fpairs >& /dev/null
#rm -f ${tempfile}mtzdmp >& /dev/null















################################################################################
# decide on atoms to fix or not to fix in this space/point group
################################################################################
set PG = `nawk -v SG=$SG '$4==SG{print substr($5,3)}' $CLIBD/symop.lib | head -1`
set temp = `echo $PG | nawk '{print length($1)}'`
if("$temp" == 1) set fix = "Z"
if("$PG"   == 1) set fix = "X|Y|Z"
if("$PG"   == 2) set fix = "Y"


# do we need to change space groups? (that is, do we have a chiral screw axis)
set otherSG = ""
if(("$newSG" =~ P[346]*)&&(0)) then
    # construct inverted-hand spacegroup symbol
    set otherSG = `echo $newSG | nawk '{print substr($1,1,2) substr($1,2,1)-substr($1,3,1) substr($1,4) }'`
    # make sure it's a real spacegroup
    set otherSG = `nawk -v SG=$otherSG '$4 == SG && $1 < 500 {print $4}' $CLIBD/symop.lib | head -1`
    if(("$otherSG" != "")&&("$otherSG" != "$newSG")) then
	# we've got another SG to try!
	set otherSG = "$otherSG"
	# remember the initial SG too
	set firstSG = "$newSG"
    else
	# no reindexing needed
	set otherSG = ""
    endif
endif

# no need to FLIP_SG if there's nothing to flip to
#if("$otherSG" == "") unset FLIP_SG
if(! $?FLIP_SG) set otherSG = ""









Make_LABIN:
################################################################################

 #         ##    #####      #    #    #
 #        #  #   #    #     #    ##   #
 #       #    #  #####      #    # #  #
 #       ######  #    #     #    #  # #
 #       #    #  #    #     #    #   ##
 ######  #    #  #####      #    #    #

################################################################################
# build, borrow or modify the LABIN card
################################################################################

# make the "maximum" labin card from the MTZ
echo -n "LABIN FP=$native SIGFP=$SIGnative " >! ${tempfile}labin.mtz
cat ${tempfile}mtzdmp |\
nawk '/^F: /{++f; F[f]=$2; QF[f]=$3; } \
      /^D: /{++d; D[d]=$2; QD[d]=$3; } \
      END{for(i=1;i<=f;++i){\
  if(F[i] != "")printf "-\nFPH%d=%s SIGFPH%d=%s ", i, F[i], i, QF[i];\
  if(D[i] != "")printf "DPH%d=%s SIGDPH%d=%s ",    i, D[i], i, QD[i]};\
  print ""}' |\
cat >> ${tempfile}labin.mtz


if(-e "$inscript") then
    # now get LABIN card from the script (verbatim)
    cat $inscript |\
    nawk '$1 ~ /^LABIN/{print; while($NF == "-"){getline; print}}' |\
    nawk 'BEGIN{RS=" "} NF != 0 && $1 != "-"' |\
    nawk 'NF!=0' |\
    nawk '/^FPH/{printf " -\n"} {printf "%s ", $0} END{print ""}' |\
    cat >! ${tempfile}labin.input
else
    if(-e "$inlogfile") then
	# reconstruct LABIN card from the log
	cat $inlogfile |\
	nawk '/LABIN/{printf "%s", substr($0, 15); while(getline){if($1 != "Data"){printf "%s", substr($0, 2)}else{break}}}' |\
	nawk 'BEGIN{RS=" "; ORS=" "} NF != 0' |\
	nawk 'BEGIN{RS=" "} /^FPH/{printf "-\n"} {printf "%s ", $0} END{print ""}' |\
	cat >! ${tempfile}labin.input
    endif
endif    

if(-e ${tempfile}labin.input) then
    # don't bother using if there aren't any real labels
    set temp = `cat ${tempfile}labin.input | nawk 'BEGIN{RS=" "} /[=]/{print $0}' | wc -l`
    if($temp < 2) rm -f ${tempfile}labin.input
endif

if(-e ${tempfile}labin.input) then
    # reconcile this with possible cards (from MTZ)
    echo "=----=" >! ${tempfile}
    cat ${tempfile}labin.mtz ${tempfile} ${tempfile}labin.input |\
    nawk 'BEGIN{RS="=";ORS="="} {++count[$1]} $0=="----"{p=1} \
     p==1{if(count[$1]>=2) print $0}' |\
    cat >! ${tempfile}labin.okay
    
    # now ${tempfile}labin.okay contains the intersection of labels from
    # the mtz file and the previous script/log's labin card
else
    # use the one we generated from the mtz file
    cp ${tempfile}labin.mtz ${tempfile}labin.okay
endif

# reformat, so it looks pretty
cat ${tempfile}labin.okay |\
nawk '/^LABI/{printf "LABIN %-10s %s -\n", $2, $3} \
 $1~/^FPH/{printf "      %-10s %-15s %-15s %s %s\n", $1, $2, $3, $4, $5}' |\
cat >! ${tempfile}.LABELS

# don't need these anymore
rm -f ${tempfile} >& /dev/null
rm -f ${tempfile}labin.okay >& /dev/null
rm -f ${tempfile}labin.mtz >& /dev/null
rm -f ${tempfile}labin.input >& /dev/null
rm -f ${tempfile}Fpairs >& /dev/null
#rm -f ${tempfile}mtzdmp >& /dev/null


# ${tempfile}.LABELS now contains the desired data labels
# check for SAD?
set Fs = `nawk 'BEGIN{RS=" "} {print}' ${tempfile}.LABELS | nawk '/^FPH/' | wc -l`
if($Fs < 2) then
    set Ds = `nawk 'BEGIN{RS=" "} {print}' ${tempfile}.LABELS | nawk '/^DPH/' | wc -l`
    if($Ds == 1) then
	echo "You must be SAD :)"
	set SAD
	unset FLIP_OCC
    endif
endif

# skip over mtz check on repeat passes by here
if($?DEFAULT_PARAMS) goto GetSites


###############################################################################
# analyze mtzfile
###############################################################################
# run SCALEIT with the same LABIN we will be using for MLPHARE
echo "checking $mtzfile"
cp -f $mtzfile ${tempfile}scaleme.mtz >& /dev/null

# extract the datasets (again)
nawk '$1 ~ /^FPH/' ${tempfile}.LABELS |\
nawk 'BEGIN{FS="="} {print $2, $3, $4, $5}' |\
nawk '{print $1, $3, $5, $7}' |\
cat >! ${tempfile}datasets

echo -n "" >! ${tempfile}scaleit.log
set i = 1
set datasets = `cat ${tempfile}datasets | wc -l`
while ($i <= $datasets)
    head -1 ${tempfile}.LABELS >! ${tempfile}scaleit.in
    cat ${tempfile}datasets |\
    nawk -v first=$i '{++n} n>=first && n<(first+6){++i;\
	printf "-\nFPH%d=%s SIGFPH%d=%s ", i, $1, i, $2;\
	if($3!="")\
	printf "DPH%d=%s SIGDPH%d=%s ", i, $3, i, $4;}\
	END{print ""}' |\
    cat >> ${tempfile}scaleit.in
    echo "refine anisotropic" >> ${tempfile}scaleit.in
    echo "$RESCARD"           >> ${tempfile}scaleit.in
    echo "END"		      >> ${tempfile}scaleit.in
    cat ${tempfile}scaleit.in |\
    scaleit HKLIN ${tempfile}scaleme.mtz HKLOUT ${tempfile}scaled.mtz |\
    cat >> ${tempfile}scaleit.log
    if($status) then
	# why would this happen?
	echo "WARNING: $mtzfile failed to scale! "
    endif
    # get ready to continue scaling
    mv -f ${tempfile}scaled.mtz ${tempfile}scaleme.mtz >& /dev/null
    @ i = ( $i + 6 )
end
mv -f ${tempfile}scaleme.mtz ${tempfile}scaled.mtz >& /dev/null
rm -f ${tempfile}scaleit.in >& /dev/null
rm -f ${tempfile}datasets >& /dev/null







#####################################
# see if scales varied wildly, if so, use the scaled version
cat ${tempfile}scaleit.log |\
nawk '/APPLICATION OF SCALES/,/--------------------------/' |\
nawk '$1 == "Derivative"{++n; scale[n]=$3+0; avg+=$3}\
END{if(n==0) exit; avg/=n; for(i=1;i<=n;++i){\
      rmsd+=(scale[i]-avg)^2};\
    rmsd = sqrt(rmsd/n); print rmsd}' |\
cat >! ${tempfile}scales

set temp = `nawk '{printf "%d", $1*100}' ${tempfile}scales`
if($temp > 5) then
    # make up a new name
    set newmtzfile = `echo "$firstmtz" | nawk 'BEGIN{FS="/"} {print $NF}' | nawk 'BEGIN{FS="."} {for(i=1;i<NF;++i) printf "%s.", $i;}'`
    set newmtzfile = "${newmtzfile}scaled.mtz"
    test -d ${mtzDIR}
    if(! $status) set newmtzfile = "${mtzDIR}${newmtzfile}"
    mv ${tempfile}scaled.mtz $newmtzfile
    
    # inform user we re-scaled their data
    echo "data in $mtzfile are not on the same scale."
    echo "we re-scaled $mtzfile into $newmtzfile"
    echo "and we will use $newmtzfile"
    
    # use this as "input" mtz file from now on
    set mtzfile = $newmtzfile
    # do all reindexing from this one too
    set firstmtz = $newmtzfile
else
    rm -f ${tempfile}scaled.mtz >& /dev/null
endif
rm -f ${tempfile}scales >& /dev/null


#####################################
# define default EXCLUDE flags for each derivative
cat ${tempfile}scaleit.log |\
nawk '/Derivative title:/{F=$6} \
      /Anomalous Differences/{KEY="DANO"} /Isomorphous Differences/{KEY="DISO"} \
      /acceptable differences are less than/{print F, KEY, $NF}' |\
cat >! ${tempfile}EXCLUDE

# quick-reference file for LABIN
cat ${tempfile}.LABELS |\
nawk 'BEGIN{RS=" "} /[=]/{print $0}' |\
nawk 'BEGIN{FS="="} {print substr($1,1,1), substr($1,length($1))+0, $2}' |\
sort -n +1 -2 >! ${tempfile}derivs

# re-name labels to derivative numbers
cat ${tempfile}derivs ${tempfile}EXCLUDE |\
nawk '$1=="F"{num[$3]=$2} $2~/O$/{print num[$1], $2, $3}' |\
cat >! ${tempfile}
mv ${tempfile} ${tempfile}EXCLUDE
rm -f ${tempfile}derivs

# just in case files get lost
set max_dano = `nawk '$2=="DANO"{print 1.5*$3}' ${tempfile}EXCLUDE | sort -n | tail -1`
set max_diso = `nawk '$2=="DISO"{print 1.5*$3}' ${tempfile}EXCLUDE | sort -n | tail -1`

# since mlphare looses the EXCLUDE cards in each iteration,
# we will have to continuously re-generate them





#####################################
# now load these "default occupancies" from the scaleit log into a variable
# (this will eliminate the need to keep this log around forever)
cat ${tempfile}scaleit.log |\
nawk -v wilsonB=$wilsonB '/THE TOTALS/{++n; print n, $10, $14, wilsonB}' |\
cat >! ${tempfile}occs
# format: deriv_num Diso Dano WilsonB

set temp = `nawk 'NF==4' ${tempfile}occs | wc -l`
if($temp == 0) then
    # perhaps scaling failed?
    cat ${tempfile}.LABELS |\
    nawk -v wilsonB=$wilsonB '$1 ~ /^FPH/{++i; print i, 1, 1, wilsonB}' |\
    cat >! ${tempfile}occs
endif

# load this file into a variable
set DEFAULT_PARAMS = `nawk '{printf "%s n", $0}' ${tempfile}occs`



# don't need this anymore
rm -f ${tempfile}scaleit.log >& /dev/null
rm -f ${tempfile}occs >& /dev/null






GetSites:
########################################################################################################

  ####   ######   #####           ####      #     #####  ######   ####
 #    #  #          #            #          #       #    #       #
 #       #####      #             ####      #       #    #####    ####
 #  ###  #          #                 #     #       #    #            #
 #    #  #          #            #    #     #       #    #       #    #
  ####   ######     #             ####      #       #    ######   ####

########################################################################################################
# make decision about where to go get sites
########################################################################################################

# skip this if we already have sites
if($sites) goto GotSites

# these kinds of files take precedence over the logfile and old scripts

if(-e "$pdbfile") then
    set sitefile = "$pdbfile"
    goto read_pdb
endif
if(-e "$solvefile") then
    set sitefile = "$solvefile"
    goto read_solve
endif
if(-e "$shelxfile") then
    set sitefile = "$shelxfile"
    goto read_shelx
endif
if(-e "$sitefile") then
    # random, text site file
    goto read_general
endif


if(-e "$inscript") then
    set sitefile = "$inscript"
    goto read_inscript
endif
if(-e "$inlogfile") then
    set sitefile = "$inlogfile"
    goto read_inlog
endif

# handle this later
#if($?FIRSTIME) echo "no sites? "
set FIND_SITES

# all the above routines return HERE (upon success, otherwise GetSites) 
GotSites:
set inscript = ""
set inlogfile = ""





################################################################################

 ######  #    #  #####     ##    #    #  #####
 #        #  #   #    #   #  #   ##   #  #    #
 #####     ##    #    #  #    #  # #  #  #    #
 #         ##    #####   ######  #  # #  #    #
 #        #  #   #       #    #  #   ##  #    #
 ######  #    #  #       #    #  #    #  #####

################################################################################
#
#   "expand" any sets of sites to cover all derivatives
#
#   Also, zero-out occupancies that are undefined   
#
################################################################################
grep ATOM $coordfile >& /dev/null
if(! $status) then
    # we have read in some sites
    
    # count the original number of sites
    if($sites == 0) set sites = `grep ATOM $coordfile | wc -l`
    
    # get names of "derivative" data sets we will be refining agaist
    cat ${tempfile}.LABELS |\
    nawk 'BEGIN{RS=" "} /[=]/{print $0}' |\
    nawk 'BEGIN{FS="="} {print substr($1,1,1), substr($1,length($1))+0, $2}' |\
    sort -n +1 -2 >! ${tempfile}derivs
    set FPHs = `nawk '$1=="F" && $2!=0 {print $NF}' ${tempfile}derivs`
    
    # now make sure we actually have proper "DERIV" cards
    grep "DERIV" $coordfile >& /dev/null
    if($status) then
	# assume this is just a single bunch of atoms
	# so make a copy for each wavelength
	
	echo "" >! ${tempfile}refine
	foreach F ( $FPHs )
	
	    # bare minimum header here
	    echo "DERIV ${F}-$native, and $F anomalous"     >> ${tempfile}refine
	    echo "DCYCLE PHASE ALL REFCYC ALL KBOV ALL"     >> ${tempfile}refine
	    
	    cat $coordfile                                  >> ${tempfile}refine
	    
	    echo ""                                         >> ${tempfile}refine
	end
	mv ${tempfile}refine $coordfile
    endif
    
    # now reconcile # of atom groups vs # of data sets being refined
    set sets = `grep "DERIV" ${coordfile} | wc -l`
    if($sets > $#FPHs) then
	# there are more DERIV cards that there are Fs! 
	
	# lop off last ones?
	cat $coordfile |\
	nawk -v FPHs=$#FPHs '/DERIV/{++deriv;p=1} deriv>FPHs{p=0} \
	    p==1{print}' |\
	cat >! ${tempfile}
	mv ${tempfile} $coordfile
    endif
    if($sets < $#FPHs) then
	# there are more Fs than DERIV cards!
	
	# duplicate last one? 
	cat $coordfile |\
	nawk -v needed=$#FPHs '/DERIV/{++derivs;lines=0} {++lines; line[lines]=$0} \
	    {print} END{deriv=derivs; while(deriv < needed){ \
	     for(i=1;i<=lines;++i) print line[i]; ++deriv}}' |\
	cat >! ${tempfile}
	mv ${tempfile} $coordfile
    endif

    # see if one of these FPHs is the reference 
    set temp = `nawk '/^F /' ${tempfile}derivs | nawk '$3==ref{print $2} $2==0{ref=$3}'`
    if("$temp" != "") then
	# make sure the real occupancy is zero for this one
	cat $coordfile |\
	nawk -v ref=$temp '/DERIV/{++deriv;p=0} deriv==ref{p=1} \
	    /ATOM/ && p==1 {$6="0.000"}  {print}' |\
	cat >! ${tempfile}
	mv ${tempfile} $coordfile
    endif
    
    # set anomalous occupancy to 0 for derivatives with no anomalous diffs
    nawk '$2!=0' ${tempfile}derivs |\
    nawk '$1=="F"{F[$2]=$3} $1=="D"{D[$2]=$3} \
	END{for(num in F)if(D[num]=="") printf "|" num; print ""}' |\
    nawk '{print substr($0,2)}' >! ${tempfile}worry
    set noanom = `cat ${tempfile}worry`
    rm -f ${tempfile}worry >& /dev/null
    
    if("$noanom" != "") then
	# some anomalous occupancies need to be zeroed
	cat $coordfile |\
	nawk -v noanom="$noanom" '/DERIV/{++deriv;noano=0} deriv~noanom{noano=1} \
	    /ATOM/ && noano {if($7+0 != 0) $7="0.000"}  {print}' |\
	cat >! ${tempfile}
	mv ${tempfile} $coordfile
    endif
    
    
    # don't need these anymore
    rm -f ${tempfile}derivs >& /dev/null

    # reset out-of-bounds input B factors and occupancies (avoid rejecting them later)
    cat $coordfile |\
    nawk -v Bcap=$Bcap -v wilsonB=$wilsonB '$1 ~/^ATOM/ && \
        ( $NF < wilsonB/4 || $NF > Bcap ){$NF="?.???"} \
        ( $6*$6 < 0.0025 && $6+0!=0){$6="?.???"} \
        ( $7 < 0.05 && $7+0!=0){$7="?.???"} \
        {print}' |\
    cat >! ${tempfile}
    mv ${tempfile} $coordfile
endif


Fill-in:
################################################################################

 ######     #    #       #                  #    #    #
 #          #    #       #                  #    ##   #
 #####      #    #       #       #####      #    # #  #
 #          #    #       #                  #    #  # #
 #          #    #       #                  #    #   ##
 #          #    ######  ######             #    #    #

################################################################################
#
#   fill-in missing occupancy/Bfactor data
#
################################################################################
grep '?.???' $coordfile >& /dev/null
if(! $status) then
    # there are some undefined values in the atom list
    
    #set params = 2
    
    # retrieve average parameters from currently available sites
    cat $coordfile |\
    nawk '/DERIV/{++derivs} \
        $1~/^ATOM/ && $6 !~ /[?]/{ occ[derivs]+=$6; ++nocc[derivs];} \
        $1~/^ATOM/ && $7 !~ /[?]/{aocc[derivs]+=$7; ++naocc[derivs];} \
        $1~/^ATOM/ && $NF!~ /[?]/{Bfac[derivs]+=$NF;++nBfac[derivs];} \
     END{for(i=1;i<=derivs;++i){\
        if( nocc[i]){ occ[i]= occ[i]/ nocc[i]}else{ occ[i]="?.???"};\
        if(naocc[i]){aocc[i]=aocc[i]/naocc[i]}else{aocc[i]="?.???"};\
        if(nBfac[i]){Bfac[i]=Bfac[i]/nBfac[i]}else{Bfac[i]="?.???"};\
	print i, occ[i], aocc[i], Bfac[i]}}' |\
    cat >! ${tempfile}occs
    
    # substitute missing values in the coordfile
    cat ${tempfile}occs $coordfile |\
    nawk '/DERIV/{++deriv} \
          /^[1-9]/{occ[$1]=$2; aocc[$1]=$3; Bfac[$1]=$4 } \
	  $1~/^ATOM/&&$6~/[?]/{$6= occ[deriv]} \
	  $1~/^ATOM/&&$7~/[?]/{$7=aocc[deriv]} \
	  $1~/^ATOM/&&$NF~/[?]/{$NF=Bfac[deriv]} \
	   ! /^[0-9]/{print}' |\
    cat >! ${tempfile}
    mv ${tempfile} $coordfile
    
    
    
    
    # use scaleit results to estimate remaining parameters
    echo "$DEFAULT_PARAMS" |\
    nawk 'BEGIN{RS="n"} {print}' |\
    cat >! ${tempfile}occs
    # format: deriv_num Diso Dano WilsonB
    
    # honor request for negative default occupancies
    if($?NEGATIVE_OCC) then
	nawk '{$2=-$2; print}' ${tempfile}occs >! ${tempfile}
	mv ${tempfile} ${tempfile}occs >& /dev/null
    endif
    
    # substitute missing values in the coordfile
    cat ${tempfile}occs $coordfile |\
    nawk '/DERIV/{++deriv} \
          $1~/^[1-9]/{occ[$1]=$2; aocc[$1]=$3; Bfac[$1]=$4 } \
	  $1~/^ATOM/&&$6~/[?]/{$6= occ[deriv]} \
	  $1~/^ATOM/&&$7~/[?]/{$7=aocc[deriv]} \
	  $1~/^ATOM/&&$NF~/[?]/{$NF=Bfac[deriv]} \
	  $1!~/^[0-9]/{print}' |\
    cat >! ${tempfile}
    mv ${tempfile} $coordfile
    
    # check for anything that survived?
    set temp = `nawk '$1~/^ATOM/ && /[?]/' $coordfile | wc -l`
    if("$temp") then
	echo "ERROR: unable to determine starting occupancies! "
	set BAD
	goto done
    endif
    
    # don't need this anymore
    rm -f ${tempfile}occs >& /dev/null
endif


# don't need these anymore
rm -f ${tempfile}mtzdmp       >& /dev/null
rm -f ${tempfile}scaleit.log  >& /dev/null
rm -f ${tempfile}mtzstuff.awk >& /dev/null
rm -f ${tempfile}sitereader.awk >& /dev/null




# check for atoms again
grep ATOM $coordfile >& /dev/null
if($status) then
    # see if we can find a few sites ourselves
    if($?FIRSTIME) echo "no heavy atom sites were provided..."
    set FIND_SITES
else
    # we definitely don't need to do this
    unset FIND_SITES
endif







permute:
################################################################################

 #####   ######  #####   #    #  #    #   #####  ######
 #    #  #       #    #  ##  ##  #    #     #    #
 #    #  #####   #    #  # ## #  #    #     #    #####
 #####   #       #####   #    #  #    #     #    #
 #       #       #   #   #    #  #    #     #    #
 #       ######  #    #  #    #   ####      #    ######

################################################################################
#
# figure out whether or not to permute axes (based on cell edges)
#
################################################################################
# first, we need to know what the mtz unit cell WILL be
# (not, necessarily what it is)
set axes = ""
if("$newSG" == P2212)  set axes = "b c a"
if("$newSG" == P2122)  set axes = "c a b"
if("$newSG" == P21221) set axes = "b c a"
if("$newSG" == P22121) set axes = "c a b"
#if("$newSG" == C2212)  set axes = "b c a"
#if("$newSG" == C2122)  set axes = "c a b"
if("$axes" != "") then
    # planning to change orthorhombic cell axis ordering...
    # predict new unit cell (after reindex.com in $newSG)
    echo "$CELL" | nawk '{print $1; print $2; print $3}' |\
    sort -n |\
    nawk -v axes="$axes" 'BEGIN{split(axes, abc)} {\
	# write desired axis ordering in front of cannonical one \
	print abc[NR], $0}' |\
    sort |\
    nawk '{print $2}' >! ${tempfile}newcell
    echo "90 90 90"   >> ${tempfile}newcell
    set CELL = `cat ${tempfile}newcell`
    set CELL = `echo "$CELL" | nawk '{print $1+0, $2+0, $3+0, $4+0, $5+0, $6+0}'`

    rm -f ${tempfile}newcell >& /dev/null
    if($#CELL != 6) then
	# this should never happen...
	echo "ERROR! no unit cell is available! "	
	set BAD
	goto done
    endif
    
endif

# the "future" space group (for the report) will be $futureSG
set futureSG = `echo "$newSG" | nawk '/^[PC]2212$/||/^[PC]2122$/{print substr($1,1,1) "2221"} /^P21221$/||/^P22121$/{print "P21212"}'`
if("$futureSG" == "") set futureSG = "$newSG"


# can we change space groups? (that is, do we have a chiral screw axis)
set otherSG = ""
if("$newSG" =~ P[346]*) then
    # construct inverted-hand spacegroup symbol
    set otherSG = `echo $newSG | nawk '{print substr($1,1,2) substr($1,2,1)-substr($1,3,1) substr($1,4) }'`
    # make sure it's a real spacegroup
    set otherSG = `nawk -v SG=$otherSG '$4 == SG && $1 < 500 {print $4}' $CLIBD/symop.lib | head -1`
    if(("$otherSG" != "")&&("$otherSG" != "$newSG")) then
        # we've got another SG to try!
        set otherSG = "$otherSG"
        # remember the initial SG too
        set firstSG = "$newSG"
    else
        # no reindexing needed
        set otherSG = ""
    endif
endif
# no way to FLIP_SG if there's nothing to flip to
if("$otherSG" == "") unset FLIP_SG
# enforce user-specified no flip SG option
if(! $?FLIP_SG) set otherSG = ""


# define list of all space groups we intend to try
set allSGs = ( $futureSG $otherSG )
if(($?FIND_SITES)&&($?FLIP_SG)) then
    # we have even more space groups to consider
    
    # get point-group of $SG
    set PG   = `nawk -v SG=$SG '$4==SG{print $5; exit}' $CLIBD/symop.lib`
    set latt = `echo $SG | nawk '{print substr($1, 1, 1)}'`
    
    # get all space groups in this point group
    set allSGs = `nawk -v PG=$PG -v latt=$latt '$5==PG && $4 ~ latt && ! /m/ && ! /bar/ && $1 < 500{print $4}' $CLIBD/symop.lib`

    # special case of asymmetric orhorhombics
    if("$PG" == "PG222") then
	if("$latt" == "P") set allSGs = "P212121 P21212 P21221 P22121 P2221 P2212 P2122 P222"
#	if("$latt" == "C") set allSGs = "C222 C2221 C2212 C2122"
    endif
endif
if($#allSGs == 1) unset FLIP_SG

# now decide on how to permute the input coorinates
set permutaion = ""
if($?site_cell) then
    echo "$CELL $site_cell" |\
    nawk '{A=$1;B=$2;C=$3; x=$7;y=$8;z=$9;\
          print (A-x)^2+(B-y)^2+(C-z)^2, "XYZ";\
          print (A-y)^2+(B-z)^2+(C-x)^2, "YZX";\
          print (A-z)^2+(B-x)^2+(C-y)^2, "ZXY";}' |\
    sort -n | head -1 >! ${tempfile}permutation
    set permutation = `nawk '{print $NF}' ${tempfile}permutation`
    rm -f ${tempfile}permutation >& /dev/null
endif

if("$permutation" == "") then
    # must have been unable to get the cell?
    # now we have to "guess" as to what cell the input
    # atoms were found in!  
    # Assume that it was the standard cell, with symops redefined
    if(("$newSG" =~ [PpCc]2[12][12]*)&&($?USER_SITES)) then
	# we are reindexing to an orthorhombic space group,
	# so we need to re-do the sites too! 
	set temp = `echo "$newSG" | nawk '{print substr($1,2)}'`
	if(("$temp" == 21221)||("$temp" == 2212)) then
	    # the current "Y" axis needs to become "Z"
	    set permutation = "ZXY"
	endif
	if(("$temp" == 22121)||("$temp" == 2122)) then
	    # the current "X" axis needs to become "Z"
	    set permutation = "YZX"
	endif
	
	if($permutation") then
	    # next message may reflect a dubious choice
	    echo -n "WARNING!  "
	endif
    endif
endif

if("$permutation" == "YZX") then
    # apply this to sites
    echo "permuting sites in $sitefile"
    echo "by new XYZ = old $permutation (because unit cells were different)"
    cat $coordfile |\
    nawk '$1 ~ /^ATOM/{x=$4; y=$5; z=$3; $3=x; $4=y; $5=z} {print}' |\
    cat >! ${tempfile}
    mv ${tempfile} $coordfile
endif
if("$permutation" == "ZXY") then
    # apply this to sites
    echo "permuting sites in $sitefile"
    echo "by new XYZ = old $permutation (because unit cells were different)"
    cat $coordfile |\
    nawk '$1 ~ /^ATOM/{x=$5; y=$3; z=$4; $3=x; $4=y; $5=z} {print}' |\
    cat >! ${tempfile}
    mv ${tempfile} $coordfile
endif

# sites should now be in the right cell
set site_cell = "$CELL"



# only look for programs once
if(! $?FOUND_PROGS) then
    # find location of needed/wanted third-party programs
    goto FindProgs
    # dm 2.x or higher
    # mapman (or brix & bones)
    # shelx (if $?FIND_SITES)
endif
Return_FindProgs:
set FOUND_PROGS




# report here?
Report:
if($?FIRSTIME) set REPORT
if($?REPORT) then
    unset FIRSTIME
    echo ""
    echo -n "    Phaser Elves are ready to "
    if($?FIND_SITES) then
	echo -n "find "
	if($?user_sites) then
	    echo -n "$user_sites "
	endif

	if(-e "$SHELX") then
	    set temp = "shelx"
	else
	    set temp = "rantan"
	endif
	echo "heavy atoms using ${temp},"
	echo -n "    and then refine them "
    else
	set temp = ""
	if($sites > 1) set temp = "s"
	echo "refine the $sites heavy-atom position${temp} from $sitefile"
	echo -n "    "
    endif
    echo "against the data in $mtzfile with mlphare,"
    echo "    using a script called $scriptfile"
    echo ""
    if(! $?NO_DM) then
	echo "    Solvent-flattening by ${scriptDIR}dm.com will be done at:"
	set temp = `echo "$trials" | nawk '{for(i=1;i<NF;++i) print $i"%"} NF>1{print "and"} {print $NF"%"}'`
	echo "    $temp solvent."
    else
	echo "    No solvent-flattening will be attempted."
    endif
    if($?HURRY_UP) then
	echo "    Refinement will be rushed."
    endif
    set temp = ""
    if($?FLIP_OCC)  set temp = "$temp sign"
    if(($?FLIP_OCC)&&($?FLIP_HAND)&&($?FLIP_SG)) set temp = "${temp},"
    if($?FLIP_HAND) set temp = "$temp hand"
    if($?FLIP_SG)   set temp = "$temp space_group"
    if("$temp" != "") then
	set temp = `echo $temp | nawk '{for(i=1;i<NF;++i) print $i} NF>1{print "and"} {print $NF}'`
	set temp = `echo $temp | nawk '{gsub("_", " ", $0); print}'`
	echo "    Alternative $temp choices will be explored."
    endif
    if($?FILTER_ATOMS) then
	echo "    Atoms with unrealistic parameters will be rejected."
    endif
    if($?MORE_SITES) then
	echo "    Peaks found in difference-Fouriers will be added to refinement."
    endif
    echo ""
    echo "    Unit Cell:   $CELL"
    echo "    Space Group: $allSGs"
    echo ""
    
    set temp = "Yes"
    echo "Everything look okay? [$temp]"
    echo -n "$PROMPT"
    if($?AUTO) then
	echo "$temp"
    else
	echo -n "$BELL"
	set in = ( $< )
	if("$in" != "") set temp = "$in"
    endif
    set temp = ( $temp )
    
    # catch unexpected replies
    if(("$temp" !~ [Yy]*)||($#temp != 1)) then
	if(($#temp == 1)&&("$temp" =~ [Nn]*)) then
	    # one word, began with "N"
	    set temp = "nothing"
	    echo "What's wrong? [$temp]?"
	    echo -n "$PROMPT"
	    if($?AUTO) then
		echo "$temp"
	    else
		set in = ( $< )
		if("$in" != "") set temp = ( $in )
	    endif
	    if(("$temp" == "quit")||("$temp" == "exit")||("$temp" == "stop")) then
		set QUIT
		goto done
	    endif
	    if("$temp" != "nothing") then
		set input = "$temp"
		goto Gather
	    endif
	else
	    set input = "$temp"
	    goto Gather
	endif
    endif

    # user said everyhting is okay!
    echo "Good."
    unset REPORT
endif




# only do this once
if(! $?MADE_DIRS) then
    # make required directories
    foreach dir ( $logDIR $mtzDIR $scriptDIR $mapDIR $oDIR )
	# see if it's already done
	set temp = `ls -lnd $dir |& nawk '/^d/{print $NF}'`
	if((-e "$dir")&&(! -e "$temp")) then
	    # a file is using this name
	    echo "WARNING: we are moving your $dir to ${dir}.old"
	    mv -f $dir ${dir}.old
	endif
	if(! -e "$dir") mkdir $dir
    end
    
    # generate the auxillary scripts
    goto Setup_scripts
    # generate: (if they're not already there)
    #${scriptDIR}reindex.com
    #${scriptDIR}rrsps.com
    #${scriptDIR}bestFH.com
    #${scriptDIR}dm.com
    #${scriptDIR}fft.com
    #${scriptDIR}pick.com
    set MADE_DIRS
endif
Return_Setup_scripts:

















reindex:
################################################################################

 #####   ######     #    #    #  #####   ######  #    #
 #    #  #          #    ##   #  #    #  #        #  #
 #    #  #####      #    # #  #  #    #  #####     ##
 #####   #          #    #  # #  #    #  #         ##
 #   #   #          #    #   ##  #    #  #        #  #
 #    #  ######     #    #    #  #####   ######  #    #

################################################################################
# check to see if it's time for another space group.
################################################################################
if((("$newSG" != "$SG")&&("$newSG" != ""))||($?REINDEX_SG)) then
    # maybe use this variable to communicate extra parameters someday? 
    if(! $?REINDEX_SG) set REINDEX_SG

    # make up a "new" input mtz name
    set newmtzfile = `echo "$firstmtz" | nawk 'BEGIN{FS="/"} {print $NF}' | nawk 'BEGIN{FS="."} {for(i=1;i<NF;++i) printf "%s.", $i;}'`
    set newmtzfile = "${mtzDIR}${newmtzfile}${newSG}.mtz"
    
    
    # make sure we're not being toyed with
    if(! -e "${scriptDIR}reindex.com") then
	# this should NEVER happen! 
	echo "ERROR! our ${scriptDIR}reindex.com is missing! "
	echo "       what have you done with it? "
	goto Setup_scripts
    endif
    # reindex.com will display its own dialog
    ${scriptDIR}reindex.com $mtzfile $newmtzfile $newSG $REINDEX_SG
#    if($status) rm -f $newmtzfile >& /dev/null
    
    
    # check for success
    if(-e "$newmtzfile") then    
	# reset variables
	set mtzfile = $newmtzfile
	set SG = "$newSG"
	if(! $?site_cell) set site_cell = "$CELL"
	set CELL = `echo "head" | mtzdump hklin $mtzfile | nawk '/Cell Dimensions/{getline;getline;print}'`
	set CELL = `echo "$CELL" | nawk '{print $1+0, $2+0, $3+0, $4+0, $5+0, $6+0}'`
    else
	# something went wrong
	echo "FAILED!  sticking with $mtzfile in $SG ..."
	set newSG = $SG
    endif
    unset REINDEX_SG
endif











# jump to site locator at this point 
# (if we don't have any starting atoms)
if($?FIND_SITES) goto FindSites

# no need to FLIP_SG anymore if there is no otherSG
if("$otherSG" == "") unset FLIP_SG

# allow user-override of degrees-of-freedom
if($?user_params) then
    #if($user_params > $params) set params = $user_params
    set params = $user_params
endif
# only display when refinement type changes (beginning)
if($params == 1) echo "refining B-factors only"
if($params == 2) echo "refining occupancies only"
if($params == 3) echo "refining XYZ only"
if($params == 4) echo "refining XYZ and B-factors"
if($params == 5) echo "refining XYZ and occupancy"
if($params == 6) echo "refining XYZ, occupancy and B-factors"


Write_Script:
###############################################################################

  ####    ####   #####      #    #####    #####
 #       #    #  #    #     #    #    #     #
  ####   #       #    #     #    #    #     #
      #  #       #####      #    #####      #
 #    #  #    #  #   #      #    #          #
  ####    ####   #    #     #    #          #

###############################################################################
#
#	get ready to run mlphare
#
########################################################################################################



# impose Phaser's options on the atom list
# needs: 
# $coordfile 
# ${tempfile}.LABELS (or $scriptfile)
# can use:
# ${tempfile}EXCLUDE 
# ${tempfile}badatoms


if(! $?AUTO) then
    # ask for approval?
endif

# regenerate the LABELS file, if it is missing somehow
if((! -e ${tempfile}.LABELS)&&(-e $scriptfile)) then
    # this shouldn't happen, but it might
    cat $scriptfile |\
    nawk '$1 ~ /^LABIN/{print; while($NF == "-"){getline; print}}' |\
    nawk 'BEGIN{RS=" "} NF != 0 && $1 != "-"' |\
    nawk 'NF!=0' |\
    nawk '/^FPH/{printf " -\n"} {printf "%s ", $0} END{print ""}' |\
    nawk '/^LABI/{printf "LABIN %-10s %s -\n", $2, $3} \
     $1~/^FPH/{printf "      %-10s %-15s %-15s %s %s\n", $1, $2, $3, $4, $5}' |\
    cat >! ${tempfile}.LABELS
endif


# regenerate sites (if they are missing somehow)
grep ATOM "$coordfile" >& /dev/null
if($status) then
    echo "WARNING: all atoms are gone! "

    # no worries if we were just flipping...
    if($?TENTATIVE) then
	# loss of atoms occurred because of something
	# Phaser did, so go undo it
	echo "...guess that was a bad idea."
	goto Flip
    endif

    # only do this once
    if($?RESTORED_ATOMS) then
	set BAD
	goto done
    endif
    
    rm -f $coordfile >& /dev/null
    grep ATOM "${scriptfile}.best" >& /dev/null
    if((! $status)&&(! -e "$coordfile")) then
	echo "restoring atoms from ${scriptfile}.best"
	cat ${scriptfile}.best |\
	nawk '$1~/^DERIV/,/END/' |\
	nawk '$1~/^ATOM/ && /BFAC/{$NF=99} {print}' |\
	cat >! $coordfile
    endif
    grep ATOM "$scriptfile" >& /dev/null
    if((! $status)&&(! -e "$coordfile")) then
	echo "restoring atoms from $scriptfile"
	cat $scriptfile |\
	nawk '$1~/^DERIV/,/END/' |\
	nawk '$1~/^ATOM/ && /BFAC/{$NF=99} {print}' |\
	cat >! $coordfile
    endif

    if(! -e "$coordfile") then
	echo "ERROR: we seem to have lost all the atoms! "
	set BAD
	goto done
    endif
    set RESTORED_ATOMS
endif


# maintain list of known-to-be-bad atoms
if(($Cycle > 0)&&(! $?FLIP_BEGIN)) then
    # don't mistake a Phaser-written script for input one on first round

    # check for old,bad atoms already listed in the last script
    egrep "BADATOM|OLDATOM" $scriptfile >& /dev/null
    if(! $status) then
	# the last script has "bad" atoms in it
	cat $scriptfile |\
	nawk '$1~/^BADATOM/ || $1~/^OLDATOM/' >! ${tempfile}
	
	# propagate these old/bad atoms to the next script
	if(-e ${tempfile}badatoms) then
	    # add "new bad" sites after these
	    cat ${tempfile}badatoms >> ${tempfile}
	endif
	mv ${tempfile} ${tempfile}badatoms
    endif
endif
unset FLIP_BEGIN

# insert the EXCLUDE cards (if they are missing)
grep -i EXCLUDE $coordfile >& /dev/null
if(($status)&&(-e ${tempfile}EXCLUDE)) then
    # there are no EXLCUDE cards, so we should use ours
    cat ${tempfile}EXCLUDE $coordfile |\
    nawk '! /^[0-9]/{print} \
            /^[1-9]/{card[$1] = card[$1] $2 " " $3 " " } \
	    /DERIV/{++deriv; print " EXCLUDE", card[deriv]}' |\
    cat >! ${tempfile}
    mv ${tempfile} $coordfile
endif
# done with this file now
rm -f ${tempfile}EXCLUDE >& /dev/null


# decide to refine against real or anomalous diffs
cat $coordfile |\
nawk '$1!~/^ATREF/{print} \
      $1~/^ATOM/{\
        if($6*$6 > $7*$7){\
	  # isomorphous diffs are stronger \
	  print " ATREF X ALL Y ALL Z ALL OCC ALL AOCC ALL B ALL"}\
	else{\
	# anomalous diffs are stronger \
	print " ATREF AX ALL AY ALL AZ ALL OCC ALL AOCC ALL AB ALL"}}' |\
cat >! ${tempfile}refine
mv ${tempfile}refine $coordfile


# disable occupancy refinement for occupancies == 0;
cat $coordfile |\
nawk '$1!~/^ATREF/{print} \
      $1~/^ATOM/{occ = $6+0; aocc = $7+0; getline; \
    printf " ATREF "; for(i=2;i <= NF; i+=2){\
    # only print refinement flag if occupancy != 0 \
    if(! ((($i == "OCC")&&(occ == 0)) || (($i == "AOCC")&&(aocc <= 0)))) printf "%s ALL ", $i}; \
    print ""}' |\
cat >! ${tempfile}refine
mv ${tempfile}refine $coordfile

# check for anomalous-only data
set temp = `nawk '$1~/^ATOM/ && $6*$6 > 0.000025' $coordfile | wc -l`
if("$temp" == "0") then
    # no atoms have real occupancies
    unset FLIP_OCC
endif

if($params == 1) then
    # params=1 means B-only refinement

    # disable all XYZ refinement, and do occupancy only
    cat $coordfile |\
    nawk '$1!~/^ATREF/{print} $1~/^ATREF/{ \
	printf " ATREF "; for(i=2;i <= NF; i+=2){\
	# remove all but B refinement flags for this atom \
	if($i ~ "B") printf "%s ALL ", $i}; \
	print ""}' |\
    cat >! ${tempfile}refine
endif

if($params == 2) then
    # params=2 means occupancy-only refinement
#    set Memory = 0
    
    # disable all XYZ refinement, and do occupancy only
    cat $coordfile |\
    nawk '$1!~/^ATREF/{print} $1~/^ATREF/{ \
	printf " ATREF "; for(i=2;i <= NF; i+=2){\
	# remove XYZ refinement flags for this atom \
	if($i !~ "X|Y|Z|B") printf "%s ALL ", $i}; \
	print ""}' |\
    cat >! ${tempfile}refine
endif

if($params >= 3) then
    # create ATREF line for desired degrees of freedom
    set mask = `echo "X Y Z O AO B $params" | nawk '{printf "W"; for(i=6;i>$NF;--i){printf "|%s", $i}}'`
#    if($params == 3) set mask = "O|AO|B"
    if($params == 4) set mask = "O|AO"
#    if($params == 5) set mask = "B"
#    if($params == 6) set mask = "W"
    cat $coordfile |\
    nawk -v mask=$mask '$1!~/^ATREF/{print} $1~/^ATREF/{\
	printf " ATREF "; for(i=2;i <= NF; i+=2){\
	# remove desired refinement keys \
	if($i !~ mask) printf "%s ALL ", $i}; print ""}' |\
    cat >! ${tempfile}refine
    
endif
mv ${tempfile}refine $coordfile

# disable refinement of indeterminant axes (for first atom only)
cat $coordfile |\
nawk -v fix=$fix '$1!="ATOM1"{print} $1=="ATOM1"{print; getline; \
    printf " ATREF "; for(i=2;i <= NF; i+=2){\
    # remove refinement card for this atom \
    if($i !~ fix) printf "%s ALL ", $i}; print ""}' |\
cat >! ${tempfile}refine
mv ${tempfile}refine $coordfile


# reformat atom list, so it's pretty
cat $coordfile |\
nawk '$1~/^ATOM/ && $7=="BFAC"{$7="0.000 BFAC"} {print}' |\
nawk '$1!~/^ATOM/{print} $1~/^ATOM/{\
    printf " ATOM%-3d ANO  %6.3f %6.3f %6.3f %6.3f %6.3f BFAC %8.3f\n",\
    substr($1,5)+0, $3, $4, $5, $6, $7, $9}' >! ${tempfile}refine
mv ${tempfile}refine $coordfile



# $coordfile has now been completely filtered




# speed up when we know we're close?
#if($?TENTATIVE) set mlphareCYCLES = 5




# maintain decaying list of old scripts
if(-e ${scriptfile}.older) mv ${scriptfile}.older ${scriptfile}.oldest
if(-e ${scriptfile}.old) mv ${scriptfile}.old ${scriptfile}.older
if(-e ${scriptfile}) mv ${scriptfile} ${scriptfile}.old

cat << END-script >! $scriptfile
#! /bin/csh -f
#
#   $scriptfile  - Automatically generated mlphare script
#
#   (courtesy of the Phaser Elves)
#
# intended for $SG
#         cell $CELL
#
set mtzfile = $mtzfile
set outfile = mlphared.mtz

if(("\$1" =~ *.mtz)&&(-e "\$1")) set mtzfile = "\$1"

mlphare \
        HKLIN \$mtzfile \
	HKLOUT \$outfile   \
 << eof-phare
TITLE Phaser Elves's mlphare script
HLOUT

$RESCARD
SCALE SIGFP 1.0
CYCLE $mlphareCYCLES
ANGLE 10
PRINT AVE AVF
END-script
cat ${tempfile}.LABELS >> $scriptfile
rm -f ${tempfile}.LABELS >& /dev/null
cat << END-script >> $scriptfile
LABOUT ALLIN
RUN

# note that occupancies here should be in "electron" units
# relative to the absolute scale of your data
END-script
cat $coordfile >> $scriptfile
rm -f $coordfile >& /dev/null
cat << END-script >> $scriptfile

END
eof-phare
if(! \$status) then
    echo \$mtzfile phased to \$outfile
endif

exit

END-script
if(-e ${tempfile}badatoms) then
    echo "Sites we have already tried (history list):" >> $scriptfile
    cat ${tempfile}badatoms >> $scriptfile
    
    rm -f ${tempfile}badatoms >& /dev/null
endif
chmod a+x $scriptfile


make_pdb:
#######################################################
# convert the mlphare sites to other file formats
#######################################################
cat $scriptfile |\
nawk '$1 ~ /^ATOM/{print}' |\
cat >! ${tempfile}sites

# first, symmetry-expand all these sites out to a full unit cell
cat << EOF >! ${tempfile}gensym.in
SYMM $SGnum
CELL $CELL
XYZLIM -0.1 1.1 -0.1 1.1 -0.1 1.1 
EOF
# label by ordinal number in mlphare script
cat ${tempfile}sites |\
nawk '{++n; print "RESIDUE",n; print "ATOM X", $3, $4, $5}' |\
cat >> ${tempfile}gensym.in
cat ${tempfile}gensym.in | gensym |&\
nawk '/List of sites/,/Normal termination/' |\
nawk '$2 ~ /[01].[0-9][0-9][0-9]/{print $2, $3, $4, $5, $6, $7, $(NF-1), "sym"}' |\
cat >! ${tempfile}symsites
rm -f ${tempfile}gensym.in >& /dev/null

# set cutoff for sites being "close enough"
set CLOSE_peaks = `echo "$hiRES" | nawk '{printf "%.2f", $1/3}'`

# now filter the symmetry-expanded list of all sites to 
# assign a unique list of independent sites
cat ${tempfile}symsites |\
nawk -v cut=$CLOSE_peaks '\
    $NF=="sym"{++n; X[n]=$4; Y[n]=$5; Z[n]=$6; group[n]=$(NF-1); \
	for(i=1;i<=n;++i){\
	    dist=sqrt(($4-X[i])^2 +($5-Y[i])^2 +($6-Z[i])^2);\
	    # see if this site has already been printed \
	    if(dist < cut){ label = group[i]; break}}; \
	print $(NF-1), "same as", label}' |\
sort -un >! ${tempfile}sites_unique
rm -f ${tempfile}symsites >& /dev/null

# now reduce equivalent sites to a common label
cat ${tempfile}sites_unique ${tempfile}sites |\
nawk '/same as/{g[$1]=$NF}\
    ! /same as/{++n; ++count[g[n]]; if(g[n]==n) ATOM[g[n]]=$1; \
      X[g[n]]+=$3; Y[g[n]]+=$4; Z[g[n]]+=$5; \
      if($6+0!=0)++ocount[g[n]]; if($7+0!=0)++acount[g[n]];\
      occ[g[n]]+=$6; aocc[g[n]]+=$7; B[g[n]]+=$9 }\
      END{for(i in ATOM){if(count[i]==0) count[i]=999999;\
      if(ocount[i]==0) ocount[i]=999999;if(acount[i]==0) acount[i]=999999;\
      print i, ATOM[i], X[i]/count[i], Y[i]/count[i], Z[i]/count[i],\
        occ[i]/ocount[i], aocc[i]/acount[i], B[i]/count[i];}}' |\
sort -n |\
nawk '{printf " %-7s ANO  %6.3f %6.3f %6.3f %6.3f %6.3f BFAC %8.3f\n",\
       $2, $3, $4, $5, $6, $7, $8}' |\
cat >! ${scriptDIR}sites.mlphare
rm -f ${tempfile}sites >& /dev/null
rm -f ${tempfile}sites_unique >& /dev/null


# make a PDB too
cat ${scriptDIR}sites.mlphare |\
nawk 'BEGIN{deriv="0"} $1~/^ATOM/{print substr($1,5), $3, $4, $5, sqrt($6*$6),$7, $NF}' |\
nawk '{B=$NF; norm=2; if(($5+0==0)||($6+0==0)) norm=1; occ=($5+$6)/norm;\
       printf "%5d%10.5f%10.5f%10.5f%10.5f%5.2f%5d%10d%2s%-3s%3s %1s\n", \
       $1, $2, $3, $4, B, occ, "38", $1, "HA", "", "IUM", " "}' |\
cat >! ${tempfile}sites.frac

# use coordconv to convert fractional MLphare coordinates to Angstroms
coordconv XYZIN ${tempfile}sites.frac \
         XYZOUT ${tempfile}sites.pdb << EOF-conv >& /dev/null
CELL $CELL
INPUT FRAC
OUTPUT PDB ORTH 1
END
EOF-conv

# explain what this is in the header
echo "REMARK symmetry-reduced sites from $scriptfile" |\
cat - ${tempfile}sites.pdb |\
cat >! ${oDIR}/sites.pdb

rm -f ${tempfile}sites.frac >& /dev/null
rm -f ${tempfile}sites.pdb  >& /dev/null

##################
# ${scriptDIR}sites.mlphare has the (reduced) sites in MLPHARE format
# ${oDIR}/sites.pdb has the (reduced) sites in PDB format
# these will be renamed by the "Flip" procedure anyway
##################


###############################################################################

# clean up ALL temporary files (everything can be reconstructed from the logfile)



# back up old logs
tail -100 ${logfile}.older |& grep "Normal termination" >& /dev/null
if(! $status) mv ${logfile}.older ${logfile}.oldest
tail -100 ${logfile}.old   |& grep "Normal termination" >& /dev/null
if(! $status) mv ${logfile}.old ${logfile}.older
tail -100 ${logfile}       |& grep "Normal termination" >& /dev/null
if(! $status) mv ${logfile} ${logfile}.old

########################################################

 #####   #    #  #    #
 #    #  #    #  ##   #
 #    #  #    #  # #  #
 #####   #    #  #  # #
 #   #   #    #  #   ##
 #    #   ####   #    #

########################################################
#  Actual MLPHARE run
########################################################
echo -n "running $scriptfile into $logfile "
./$scriptfile $mtzfile | nawk '{print} NR>100000{exit}' | tee $logfile | nawk '$1=="Cycle:"&&$2%3==0 || $2=="CYCLE"&&$3%3==0{printf "."}'
########################################################
set temp = $status
if(! $temp) then
    # make sure mlphare reported sucessful finish
    grep "Normal termination" $logfile >& /dev/null
    set temp = $status
endif
if(! $temp) then
    if(-e mlphared.mtz) then
	# move the output file to where Phaser wants it
	mv mlphared.mtz ${mtzDIR}mlphare.mtz
	set temp = $status
    else
	# output file does not exist! 
	set temp = 1
    endif
endif
if(! $temp) then    
    # this run was good, so reset any tentative things
    if($?SHORT_RUN) then
	# this run was shortened, and that fixed the problem
	# so, reset to the previous run length
	set mlphareCYCLES = $SHORT_RUN
	# forget that we tried this? 
	unset SHORT_RUN
    endif
else
    echo "mlphare crashed! "

    # maybe we let it run too long?
    if(! $?SHORT_RUN) then
        # we have not tried a short run
        set temp = `nawk '/Cycle:/{cyc = $NF} /inconsistent phase/{exit} END{print cyc+0}' ${logfile}`
        if($temp > 2) then
            # allow this run to get as far as it did, and then try to eliminate sites
	    # store "old" cycle count in this variable
	    set SHORT_RUN = $mlphareCYCLES
	    # set new cycle count to one less that old one
            @ mlphareCYCLES = ( $temp - 1 )
	    echo "woops!  let's try just $mlphareCYCLES cycles."
	    # "read" scriptfile like an input script
	    nawk '$1 ~ /DERIV/,/END/{print}' $scriptfile |\
	    nawk '! /END/{print}' >! ${coordfile}

	    goto Write_Script
        endif
    else
	# we just tried a short run (and it failed), so undo the limited number of cycles
	set mlphareCYCLES = $SHORT_RUN
	unset SHORT_RUN
    endif


    # see if there's anything we can do...
    if($?TENTATIVE) then
	# this crash occurred because of something
	# Phaser did, so go undo it
	echo "...guess that was a bad idea."
	goto Flip
    endif
    
    
    echo "examine ${logfile} to find out what went wrong. "
    echo "once you have made any changes to ${scriptfile}, you"
    echo "can continue automated refinement by typing:"
    echo "$0 $scriptfile "
    echo ""
    echo "Good Luck! "
    rm -f ${tempfile}* >& /dev/null
    exit 9
endif

# keep track of how many mlphare runs we have done
@ Cycle = ( $Cycle + 1 )
@ Memory = ( $Memory + 1 )

##############################
#if($params == 2) then
#    # eliminate stupid atoms
#    set FILTER_ATOMS
#endif





Analyze_Results:
################################################################################################################

 #####   ######   ####   #    #  #        #####   ####
 #    #  #       #       #    #  #          #    #
 #    #  #####    ####   #    #  #          #     ####
 #####   #            #  #    #  #          #         #
 #   #   #       #    #  #    #  #          #    #    #
 #    #  ######   ####    ####   ######     #     ####

################################################################################################################
#   Analyze results of the MLphare run
################################################################################################################

if(! -e "$logfile") then
    echo "ERROR: no log produced! "
    set BAD
    goto done
endif
    
# print out mean FOM
cat $logfile |\
nawk '/phased -ALL/ || /phased -ACENTRIC/{getline; getline; getline; min = 1; max=0; print; \
  for(i=1;i<NF;++i){if($i > max) max=$i; if(($i < min)&&($i+0!=0)) min = $i};\
  print " FOM = " $NF " ("min" - "max")"}' |\
tail -1 >! ${tempfile}FOM
set FOM = `nawk '{print $3}' ${tempfile}FOM`
nawk '{printf "%s", $0}' ${tempfile}FOM
rm -f ${tempfile}FOM

# print out phasing powers
#echo "  best Phasing Power = " | nawk '{printf "%s", $0}'
#nawk '/PhP_a/{getline;print $6;getline;print $6;getline;print $6;getline;print $6;\
# getline;print $6;getline;print $6;getline;print $6;getline;print $6}'\
#$logfile | sort -n | tail -1 | nawk '{printf "%s (a), ", $1}'
#nawk '/PhP_a/{getline;print $11;getline;print $11;getline;print $11;getline;print $11;\
# getline;print $11;getline;print $11;getline;print $11;getline;print $11}'\
#$logfile | sort -n | tail -1 | nawk '{printf "%s (c)", $1}'

echo ""

####################################################################
# reconstruct LABIN card from the log
cat $logfile |\
nawk '/LABIN/{printf "%s", substr($0, 15); while(getline){if($1 != "Data"){printf "%s", substr($0, 2)}else{break}}}' |\
nawk 'BEGIN{RS=" "; ORS=" "} NF != 0' |\
nawk 'BEGIN{RS=" "} /^FPH/{printf "-\n"} {printf "%s ", $0} END{print ""}' |\
nawk '/^LABI/{printf "LABIN %-10s %s -\n", $2, $3} \
 $1~/^FPH/{printf "      %-10s %-15s %-15s %s %s\n", $1, $2, $3, $4, $5}' |\
cat >! ${tempfile}.LABELS

# reconstruct EXCLUDE cards from the log
cat $logfile |\
nawk '$1=="Compound"{deriv=$2} \
      $5=="Compound"{deriv=$6} \
      $1=="EXCLUDE" && /FPH -FP/ && />/{print deriv+0, "DISO", $NF}   \
      $1=="EXCLUDE" && /-FPH/ && />/{print deriv+0, "DANO", $NF}' |\
cat >! ${tempfile}EXCLUDE

# extract atom entries from the MLphare log 
# (with full refinement cards)
cat $logfile |\
nawk '$1~/^DERIV/,$1~/^MLPHARE/{print}' |\
nawk '$1 ~ /^ATOM/ {while(/\.[0-9][0-9][0-9][0-9-]/ && safe < 20){\
      # insert spaces between numbers that are stuck together \
      stuck=match($0,/\.[0-9][0-9][0-9][0-9-]/)+3; \
      $0 = substr($0,1,stuck) " " substr($0, stuck+1);++safe}}\
      {print}' |\
nawk '$1~/^ATOM/ && $7=="BFAC"{$7="0.000 BFAC"} {print}' |\
nawk '! /MLPHARE/ && ! /SUMMARY/{print}' >! ${coordfile}

# apply MAD constraints here? 
if($?MAD) then
    nawk -f ${scriptDIR}mad.awk ${coordfile} |\
    nawk '$0=="Summary:"{exit} {print}' >! ${tempfile}
    mv ${tempfile} ${coordfile}
endif

# check for valid atoms
grep ATOM ${coordfile} >& /dev/null
if($status) then
    echo "ERROR: no atoms found in $logfile ... "
    set FIND_SITES
    goto Report
endif

####################################################################
# see if these parameters have converged

# extract list of parameters from $coordfile (the "next" script)
cat $coordfile |\
nawk '$1~/^ATOM/ {++n; print n,"x",$3; print n,"y",$4; print n,"z",$5;\
                       print n,"o",$6; print n,"a",$7; print n,"b",$NF}' |\
cat >! ${tempfile}params
# format: atomnum [xyzoab] value

# get input coordinates from the "current" script
cat $scriptfile |\
nawk '$1~/^ATOM/ {++n; print n,"x",$3; print n,"y",$4; print n,"z",$5;\
                       print n,"o",$6; print n,"a",$7; print n,"b",$NF}' |\
cat >! ${tempfile}oldparams
# format: atomnum [xyzoab] value

# get coordinates from one or more old scripts
if((-e "${scriptfile}.old")&&($Memory > 2)) then
    # (re)extract atoms from the "previous" script
    cat ${scriptfile}.old |\
    nawk '$1~/^DERIV/,$1~/^MLPHARE/{print}' |\
    nawk '$1 ~ /^ATOM/ {while(/\.[0-9][0-9][0-9][0-9-]/ && safe < 20){\
          # insert spaces between numbers that are stuck together \
          stuck=match($0,/\.[0-9][0-9][0-9][0-9-]/)+3; \
          $0 = substr($0,1,stuck) " " substr($0, stuck+1);++safe}}\
        {print}' |\
    nawk '$1~/^ATOM/ && $7=="BFAC"{$7="0.000 BFAC"} {print}' |\
    nawk '$1~/^ATOM/ {++n; print n,"x",$3; print n,"y",$4; print n,"z",$5;\
			   print n,"o",$6; print n,"a",$7; print n,"b",$NF}' |\
    cat >! ${tempfile}olderparams
    # format: atomnum [xyzoab] value
endif

if((-e "${scriptfile}.older")&&($Memory > 3)) then
    # (re)extract atoms from 2 runs ago
    cat ${scriptfile}.older |\
    nawk '$1~/^DERIV/,$1~/^MLPHARE/{print}' |\
    nawk '$1 ~ /^ATOM/ {while(/\.[0-9][0-9][0-9][0-9-]/ && safe < 20){\
	  # insert spaces between numbers that are stuck together \
	  stuck=match($0,/\.[0-9][0-9][0-9][0-9-]/)+3; \
	  $0 = substr($0,1,stuck) " " substr($0, stuck+1);++safe}}\
	  {print}' |\
    nawk '$1~/^ATOM/ && $7=="BFAC"{$7="0.000 BFAC"} {print}' |\
    nawk '$1~/^ATOM/ {++n; print n,"x",$3; print n,"y",$4; print n,"z",$5;\
			   print n,"o",$6; print n,"a",$7; print n,"b",$NF}' |\
    cat >! ${tempfile}oldestparams
    # format: atomnum [xyzoab] value
endif

if((-e "${scriptfile}.oldest")&&($Memory > 4)) then
    # (re)extract atoms from 3 runs ago
    cat ${scriptfile}.oldest |\
    nawk '$1~/^DERIV/,$1~/^MLPHARE/{print}' |\
    nawk '$1 ~ /^ATOM/ {while(/\.[0-9][0-9][0-9][0-9-]/ && safe < 20){\
	  # insert spaces between numbers that are stuck together \
	  stuck=match($0,/\.[0-9][0-9][0-9][0-9-]/)+3; \
	  $0 = substr($0,1,stuck) " " substr($0, stuck+1);++safe}}\
	  {print}' |\
    nawk '$1~/^ATOM/ && $7=="BFAC"{$7="0.000 BFAC"} {print}' |\
    nawk '$1~/^ATOM/ {++n; print n,"x",$3; print n,"y",$4; print n,"z",$5;\
			   print n,"o",$6; print n,"a",$7; print n,"b",$NF}' |\
    cat >! ${tempfile}oldestparams
    # format: atomnum [xyzoab] value
endif

# make sure we have at least two parameter sets to compare
set sets = `ls -1 ${tempfile}*params |& wc -l`
if($sets > 1) then
    # calculate the shifts between each parameter set
    cat ${tempfile}*params |\
    nawk '{p=$1 " " $2; param[p]=param[p] " " $3} \
	  END{for(p in param) print p, param[p]}' | sort -n |\
    nawk '{printf "%s %s ",$1, $2; \
	   for(i=3;i<=NF;++i){for(j=i+1;j<=NF;++j){printf "%s ", sqrt(($i-$j)^2);}} print ""}' |\
    cat >! ${tempfile}shifts
    # format: atomnum [xyzoab] shift shift shift ...    	

    # find the largest, overall "minimum" shift
    cat ${tempfile}shifts |\
    nawk '{min=99999; for(i=3;i<=NF;++i){if(min>$i)min=$i} print $1, $2, min}' |\
    nawk '$NF>max{max=$NF} END{printf "%d\n", max*1000}' |\
    cat >! ${tempfile}max_of_mins
    
    # load convergence number into a variable
    set maxdrift = `cat ${tempfile}max_of_mins`
    rm -f ${tempfile}max_of_mins >& /dev/null
    
    # see if biggest intra-parameter drift meets convergence criteria
    if(($maxdrift <= $converge_crit)&&(1)) then
	# this round of MLpharing has converged
	set Converged
	# forget about runs leading up to this
	set Memory = 0
    endif
endif

# clean up
rm -f ${tempfile}*params >& /dev/null
# still need ${tempfile}shifts (below)

####################################################################
# filter out atoms that don't meet cutoffs

# first, find atoms with completely bad occ or Bfactor
#       |occ| > 0.05  OR aocc > 0.05 
#   and $Bcap > Bfac > $wilsonB/4
# (negative aocc will be handled below)
nawk '$1~/^ATOM/' $coordfile |\
nawk -v Bcap=$Bcap -v wilsonB=$wilsonB '{++n}\
    $7 < 0.05 && $6*$6 < 0.0025{print "BADATOM", n, "occ too low"}\
                    $NF > Bcap {print "BADATOM", n, "B too high"}\
    $NF < wilsonB/4 || $NF < 1 {print "BADATOM", n, "B too low";}' |\
cat >! ${tempfile}badatom_numbers

# extract and mark atoms that had unreasonable occ and/or B values
cat ${tempfile}badatom_numbers $coordfile |\
nawk '$1=="BADATOM" && NF==5 {bad[$2]=$3 " " $4 " " $5}\
      $1~/^ATOM/{++n; if(bad[n]) \
      print "BADATOM" substr($0, index($0, "ATOM")+4), "(" bad[n] ")";}' |\
cat >! ${tempfile}badatoms



# check for too-big-XYZ-shift atoms (from above):
if(! -e ${tempfile}shifts) then
    echo -n "" >! ${tempfile}shifts
endif
# find any XYZ shifts larger than 0.2 cells
cat ${tempfile}shifts |\
nawk '$NF+0 > 0.2 && $2 ~ /[xyz]/ {print "BADATOM", $1, $2, "moved", $NF}' |\
sort -nu >! ${tempfile}bigshift_numbers

# extract and mark atoms that LED TO large XYZ shifts
cat ${tempfile}bigshift_numbers $scriptfile |\
nawk '$1=="BADATOM" && NF==5 {bad[$2]=$3 " " $4 " " $5}\
      $1~/^ATOM/{++n; if(bad[n]) \
      print "BADATOM" substr($0, index($0, "ATOM")+4), "(", bad[n] ")";}' |\
cat >> ${tempfile}badatoms


if($?MAD) then
    # only eliminate sites with all-bad atoms? 
    unset FILTER_ATOMS
endif


# now actually elimiate the "bad" atoms from $coordfile
cat ${tempfile}badatom_numbers ${tempfile}bigshift_numbers $coordfile |\
nawk '$1=="BADATOM" && NF==5 {bad[$2]=1; next}\
      $1~/^ATOM/{++n; if(! bad[n]) print} \
      $1!~/^ATOM/{print}' |\
cat >! ${tempfile}filtered

# eliminate atoms in the vicinity of baddies?

# don't need these anymore
rm -f ${tempfile}badatom_numbers ${tempfile}bigshift_numbers >& /dev/null
rm -f ${tempfile}shifts >& /dev/null

# convert negative anomalous occupancies to zero
# (could just be a derivative with no anomalous signal)
set neganoms = `nawk '$1~/^ATOM/ && $7+0<0' ${tempfile}filtered | wc -l`
if("$neganoms" != "0") then
    # some anomalous occupancies were negative
    # set them to zero (mlphare set-up engine will not refine these)
    cat ${tempfile}filtered |\
    nawk '$1~/^ATOM/ && $7+0<0 {$7=0} {print}' |\
    cat >! ${tempfile}zeroed
    mv ${tempfile}zeroed ${tempfile}filtered
endif

# count how many "bad" atoms there were
set oldatoms = `nawk '$1~/^ATOM/{print}' $coordfile | wc -l`
set newatoms = `nawk '$1~/^ATOM/{print}' ${tempfile}filtered | wc -l`
set baddies = `echo "$oldatoms $newatoms" | nawk '{print $1 - $2}'`


if($?FILTER_ATOMS) then
    #unset FILTER_ATOMS
    
    # use filtered atom list
    mv ${tempfile}filtered $coordfile
    
    if(("$baddies" != "0")||("$neganoms" != "0")) then
	unset Converged
	set Memory = 0
    endif
    
    # report on modifications of atoms
    if("$baddies" != "0") echo "eliminated $baddies bad atoms from $scriptfile"
    if("$neganoms" != "0") echo "$neganoms negative anomalous occupancies have been fixed to zero"    
else
    if("$baddies" != "0") echo "WARNING: $baddies atoms from $scriptfile look pretty bad! "
    if("$neganoms" != "0") echo "WARNING: $neganoms anomalous occupancies are negative! "    
    rm -f ${tempfile}filtered >& /dev/null
endif


####################################################################
# decide to increase degrees of freedom or not
if($?Converged) then
    unset Converged
    if(($params >= 6)||($?user_params)) then
	echo "refinement has converged! "
	
	# clean up
	rm -f ${tempfile}EXCLUDE  >& /dev/null
	rm -f ${tempfile}.LABELS  >& /dev/null
	rm -f $coordfile          >& /dev/null
	rm -f ${tempfile}badatoms >& /dev/null
	goto flatten
    endif
    # implement a careful parameter refinement schedule
    if($params == 5) then
	# we've been refining xyz, occ & aocc
	echo "enabling B factor refinement"
	set params = 6
    endif
    if($params == 4) then
	# we've been refining xyz & B
	echo "enabling occupancy refinement"
	set params = 6
    endif
    if(($params == 5)&&($?NO_B)) then
	# we've been refining xyz, occ & aocc
	echo "switching to XYZ & B refinement"
	set params = 4
    endif
    if($params == 3) then
	# we've been refining xyzB
	echo "enabling occupancy refinement"
	set params = 5
    endif
    if($params == 2) then
	# we've been refining occ & aocc only
	echo "switching to XYZ refinement"
	set params = 3
	set Memory = 0
    endif
    if($params == 1) then
	# we've been refining B only
	echo "switching to XYZ refinement"
	set params = 3
	set Memory = 0
    endif
endif

####################################################################	


# we are now ready to go back to refinement


goto Write_Script


















FindProgs:
########################################################################################################

 ######     #    #    #  #####           #####   #####    ####    ####    ####
 #          #    ##   #  #    #          #    #  #    #  #    #  #    #  #
 #####      #    # #  #  #    #          #    #  #    #  #    #  #        ####
 #          #    #  # #  #    #          #####   #####   #    #  #  ###       #
 #          #    #   ##  #    #          #       #   #   #    #  #    #  #    #
 #          #    #    #  #####           #       #    #   ####    ####    ####

########################################################################################################
#	find needed third-party programs
########################################################################################################




set DM = dm
set DM_method = "PERT"





find_mapman:
########################################################################################################
#	find the O/RAVE MAPMAN executable
########################################################################################################
if($?NO_MAPMAN) then
    # user doesn't want o-format files
    set MAPMAN = mapman
    set BRIX   = brix
    set BONES  = bones
    goto find_shelx
endif
# no mapman for Linux
#if(`uname` == Linux) then
#    set MAPMAN = "mapman"
#    goto find_brix
#endif

set program  = "$MAPMAN"
set names    = "mapman MAPMAN 6d_mapman 4d_mapman lx_mapman"
set badname  = "xdlmapman"
set places = "~/bin /programs/o/bin /programs/bin /programs /xtal /usr/local/bin /usr/local /usr/xtal /usr"

set size        = 100000
set program_Version = "0"
set GoodVersion     = "4"

# make sure no xwindows program pop up
if($?DISPLAY) then
    set noDISPLAY = $DISPLAY
endif
setenv DISPLAY

# first, check to see if default will work
test -x "$program"
if(! $status) then
    set file = `ls -lnL $program |& nawk -v size=$size '$5>size{print $NF}'`
    # test for program signature (author and version number)
    set temp = `echo "" | $file |& nawk '/Kleywegt/ && name{print ver, 1;exit} /MAPMAN/{name=1} /Version/{split($NF,w,"/"); ver=w[2]}'`
    if("$temp" != "") then
	# get most recent version (if possible)
	set temp = `echo $temp $program_Version | nawk '$1+0 > $2+0{print $1}'`
	if("$temp" != "") then
	    set program = "$file"
	    set program_Version = "$temp"
	endif
    endif
endif


# try using the "which" command
foreach name ( $names )
    # check for sufficiently high version number
    set temp = `echo "$program_Version $GoodVersion" | nawk '$1+0 >= $2+0{print $1}'`
    if("$temp" != "") break
    
    # "which" should handle processing of $path
    set possibilities = `which $name |& nawk 'NF==1{gsub(/[*?\042\047]/," "); print}' |& nawk -v L=$MAXLINE '{l+=length($0)} l<L{print}'`
    foreach file ( $possibilities )
	# skip filenames we know are wrong
	if("$file" =~ *$badname) continue
	
	# check to see if it's even executable
	test -x "$file"
	if(! $status) then
	    # test for program signature (author and version number)
	    set temp = `echo "" | $file |& nawk '/Kleywegt/ && name{print ver, 1;exit} /MAPMAN/{name=1} /Version/{split($NF,w,"/"); ver=w[2]}'`
	    if("$temp" != "") then
		# get most recent version (if possible)
		set temp = `echo $temp $program_Version | nawk '$1+0 > $2+0{print $1}'`
		if("$temp" != "") then
		    set program = "$file"
		    set program_Version = "$temp"
		endif
	    endif
	endif
    end
end


# check again for sufficiently high version number
set temp = `echo "$program_Version $GoodVersion" | nawk '$1+0 >= $2+0{print $1}'`
if("$temp" == "") then    

    # simplefy program name
    set program = `echo $names | nawk '{print $1}'`
    
    # search for $program in likely places
    echo -n "Looking for $program "
    set HARD_TO_FIND
    onintr Skip_MAPMAN_search
    foreach place ( $places )
	if(-e $place) then
	    # keep looking
	    set files = `ls -lnL ${place} |& grep "$program" |& sort -nr +4 |& nawk -v size=$size ' /^-/ && $1 ~ /x/ && $5 > size {print $NF}' |& nawk -v L=$MAXLINE '{l+=length($0)} l<L{print}'`
	    foreach file ( $files )
		# skip filenames we know are wrong
		if("$file" =~ *$badname) continue
		
		set file = "${place}/${file}"
		# test for program signature (author and version number)
		set temp = `echo "" | $file |& nawk '/Kleywegt/ && name{print ver, 1;exit} /MAPMAN/{name=1} /Version/{split($NF,w,"/"); ver=w[2]}'`
		if("$temp" != "") then
		    # get most recent version (if possible)
		    set temp = `echo $temp $program_Version | nawk '$1+0 > $2+0{print $1}'`
		    if("$temp" != "") then
			set program = "$file"
			set program_Version = "$temp"
		    endif
		endif
	    end
	endif
		
	# entertainment
	echo -n "."

	# check for sufficiently high version number
	set temp = `echo "$program_Version $GoodVersion" | nawk '$1+0 >= $2+0{print $1}'`
	if("$temp" != "") break
    end
endif


# check again for sufficiently high version number
set temp = `echo "$program_Version $GoodVersion" | nawk '$1+0 >= $2+0{print $1}'`
if("$temp" == "") then

    # get absolute time (seconds since 1/1/1)
    set THEN = `date "+%S %M %H %j %Y" |& nawk '{$5-=1; printf "%50.0f\n", $1+ 60*($2+ 60*($3+ 24*($4 + 365 *$5 + int($5/4) -int($5/100) +int($5/400) )))}'`

    echo -n " looking harder "
    
    foreach place ( $places )
	# check for sufficiently high version number
	set temp = `echo "$program_Version $GoodVersion" | nawk '$1+0 >= $2+0{print $1}'`
	if("$temp" != "") break
	
	if(-e $place) then
	    if("$place" == "/") echo -n "hmm."
	
	    # use find to get candidate files
	    set files = `find $place -name '*'$program'*' -follow -size +${size}c -perm -1 -print |& nawk '! /^find:/' |& nawk -v L=$MAXLINE '{l+=length($0)} l<L{print} l>L{exit}'`
	    foreach file ( $files )
		# skip filenames we know are wrong
		if("$file" =~ *$badname) continue

		# test for program signature (author and version number)
		set temp = `echo "" | $file |& nawk '/Kleywegt/ && name{print ver, 1;exit} /MAPMAN/{name=1} /Version/{split($NF,w,"/"); ver=w[2]}'`
		if("$temp" != "") then
		    # get most recent version (if possible)
		    set temp = `echo $temp $program_Version | nawk '$1+0 > $2+0{print $1}'`
		    if("$temp" != "") then
			set program = "$file"
			set program_Version = "$temp"
		    endif
		endif
	    end
	endif
	
	# entertainment
	echo -n "."

	# give up if this is taking too long (10 minutes)
	set NOW = `date "+%S %M %H %j %Y" |& nawk '{$5-=1; printf "%50.0f\n", $1+ 60*($2+ 60*($3+ 24*($4 + 365 *$5 + int($5/4) -int($5/100) +int($5/400) )))}'`
	set temp = `echo "$NOW $THEN" | nawk '{printf "%d", $1 - $2}'`
	if($temp > 600) then
	    echo " this is taking too long."
	    break;
	endif	
    end
endif

Skip_MAPMAN_search:
onintr

# check if we found any program
test -x "$program"
if(! $status) then
    # got something, make sure it works
    set file = $program
    set temp = `echo "" | $file |& nawk '/Kleywegt/ && name{print ver, 1;exit} /MAPMAN/{name=1} /Version/{split($NF,w,"/"); ver=w[2]}'`
    if("$temp" != "") then
	set MAPMAN = $program
	set MAPMAN_Version = "$temp"
	set ofmt = "dsn6"
    else
	# this shouldn't happen, but...
#	set MAPMAN = ""
    endif
else
#    set MAPMAN = ""
endif

# turn display back on
if($?noDISPLAY) then
    setenv DISPLAY $noDISPLAY
endif
unset noDISPLAY


# out of ideas, help?
test -x "$MAPMAN"
if(! $status) then
    echo ""
    if($?HARD_TO_FIND) echo "found $MAPMAN"
else
    echo ""
    echo "Couldn't find ${program}, looked everywhere."
    set MAPMAN_Version = "0"
    set NO_MAPMAN
endif
unset HARD_TO_FIND



find_brix:
# no need for brix if we have mapman
if(-e "$MAPMAN") goto find_shelx

# don't have mapman, resort to brix and bones...
set program  = "$BRIX"
set names    = "brix BRIX 6d_brix 4d_brix"
set badname  = "bones2pdb"
set places = "~/bin /programs/o/bin /programs/bin /programs/o /programs /xtal /usr/local/o/bin /usr/local/o /usr/local/bin /usr/local /usr/xtal /usr"
set places = "/usr/bin ~/bin /programs/o/bin /programs /xtal /usr/local /usr/xtal /usr"


# make a test map
echo "1 1 1 1 1" >! ${tempfile}.hkl
f2mtz hklin ${tempfile}.hkl hklout ${tempfile}.mtz << EOF >& /dev/null
CELL 20 20 20 90 90 90
SYMM P1
LABOU H K L F PHI
CTYPI H H H F P
EOF
fft hklin ${tempfile}.mtz mapout ${tempfile}.map << EOF >& /dev/null
RESO 10
LABIN F1=F PHI=PHI
EOF
rm -f ${tempfile}.hkl ${tempfile}.mtz >& /dev/null
set testmap = ${tempfile}.map
set newmap = ${tempfile}.omap
rm -f "$newmap" >& /dev/null
if(! -e "$testmap") then
    # this is not going to work
    echo "test-map generation failed! "
    goto End_BRIX_search
endif


# first, check to see if default will work
test -x "$program"
if(! $status) then    
    set file = "$program"
    
    # test for program function
    rm -f "$newmap" >& /dev/null
    echo "" | $file $testmap $newmap >& /dev/null
    if(! -e "$newmap") continue
    
    # check output file
    grep -l ":-)" "$newmap" >& /dev/null
    if($status) continue
    
    # made it this far, $file must work...
    set program = "$file"
    goto End_BRIX_search
endif


# try using the "which" command
foreach name ( $names )
    # "which" should handle processing of $path
    set possibilities = `which $name |& nawk 'NF==1{gsub(/[*?\042\047]/," "); print}' |& nawk -v L=$MAXLINE '{l+=length($0)} l<L{print}'`
    foreach file ( $possibilities )
	# skip filenames we know are wrong
	if("$file" =~ *$badname) continue
	
	# check to see if it's even executable
	test -x "$file"
	if($status) continue
	
	# test for program function
	rm -f "$newmap" >& /dev/null
	echo "" | $file $testmap $newmap >& /dev/null
	if(! -e "$newmap") continue
	
	# check output file
	grep -l ":-)" "$newmap" >& /dev/null
	if($status) continue
	
	# made it this far, $file must work...
	set program = "$file"
	goto End_BRIX_search
    end
end


# simplefy program name
set program = `echo $names | nawk '{print $1}'`

# search for $program in likely places
echo -n "Looking for $program program "
set HARD_TO_FIND
# get absolute time (seconds since 1/1/1)
set THEN = `date "+%S %M %H %j %Y" |& nawk '{$5-=1; printf "%50.0f\n", $1+ 60*($2+ 60*($3+ 24*($4 + 365 *$5 + int($5/4) -int($5/100) +int($5/400) )))}'`
onintr End_BRIX_search

foreach place ( $places )
    if(! -e $place) continue
    
    # keep looking
    set files = `ls -lnL ${place} |& grep "$program" |& sort -nr +4 |& nawk ' /^-/ && $1 ~ /x/ {print $NF}' |& nawk -v L=$MAXLINE '{l+=length($0)} l<L{print}'`
    foreach file ( $files )
	# skip filenames we know are wrong
	if("$file" =~ *$badname) continue
	set file = "${place}/${file}"
	
	# check to see if it's even executable
	test -x "$file"
	if($status) continue
	
	# test for program function
	rm -f "$newmap" >& /dev/null
	echo "" | $file $testmap $newmap >& /dev/null
	if(! -e "$newmap") continue
	
	# check output file
	grep -l ":-)" "$newmap" >& /dev/null
	if($status) continue
	
	# made it this far, $file must work...
	set program = "$file"
	goto End_BRIX_search
    end

    # entertainment
    echo -n "."
end


echo -n " looking harder "
    
foreach place ( $places )
    if(! -e $place) continue

    if("$place" == "/") echo -n "hmm."
	
    # use find to get candidate files
    set files = `find $place -name '*'$program'*' -follow -perm -1 -print |& nawk '! /^find:/' |& nawk -v L=$MAXLINE '{l+=length($0)} l<L{print} l>L{exit}'`
    foreach file ( $files )
	# skip filenames we know are wrong
	if("$file" =~ *$badname) continue
	
	# make sure it's executable
	test -x $file
	if($status) continue

	# test for program function
	rm -f "$newmap" >& /dev/null
	echo "" | $file $testmap $newmap >& /dev/null
	if(! -e "$newmap") continue
	
	# check output file
	grep -l ":-)" "$newmap" >& /dev/null
	if($status) continue
	
	# made it this far, $file must work...
	set program = "$file"
	goto End_BRIX_search
    end
    
    # entertainment
    echo -n "."
    
    # give up if this is taking too long (10 minutes)
    set NOW = `date "+%S %M %H %j %Y" |& nawk '{$5-=1; printf "%50.0f\n", $1+ 60*($2+ 60*($3+ 24*($4 + 365 *$5 + int($5/4) -int($5/100) +int($5/400) )))}'`
    set temp = `echo "$NOW $THEN" | nawk '{printf "%d", $1 - $2}'`
    if($temp > 600) then
	echo " this is taking too long."
	goto End_BRIX_search
    endif	
end

End_BRIX_search:
onintr

# check if we found any program
test -x "$program"
if(! $status) then
    # got something, make sure it works
    set file = $program
    rm -f $newmap >& /dev/null
    echo "" | $file $testmap $newmap >& /dev/null
    if(-e "$newmap") then
	# check output file
	grep -l ":-)" "$newmap" >& /dev/null
	if(! $status) then
	    set BRIX = "$file"
	    set ofmt = "brix"
	endif
    endif
endif
# clean up
rm -f ${tempfile}.map >& /dev/null
rm -f ${tempfile}.omap >& /dev/null



# out of ideas, help?
test -x "$BRIX"
if(! $status) then
    echo ""
    if($?HARD_TO_FIND) echo "found $BRIX"
else
    echo ""
    echo "Couldn't find ${program}, looked everywhere."
    set NO_BRIX
endif
unset HARD_TO_FIND



find_bones:
# no need for "bones" if we have mapman
if(-e "$MAPMAN") goto find_shelx

set program  = "$BONES"
set names    = "bones BONES 6d_bones 4d_bones"
set badname  = "bones2pdb"
set places = "/usr/bin ~/bin /programs/o/bin /programs/bin /programs /xtal /usr/local/o/bin /usr/local/o /usr/local/bin /usr/local /usr/xtal /usr"

# make a test map
echo "1 1 1 1 1" >! ${tempfile}.hkl
f2mtz hklin ${tempfile}.hkl hklout ${tempfile}.mtz << EOF >& /dev/null
CELL 10 10 10 90 90 90
SYMM P1
LABOU H K L F PHI
CTYPI H H H F P
EOF
fft hklin ${tempfile}.mtz mapout ${tempfile}.map << EOF >& /dev/null
LABIN F1=F PHI=PHI
EOF
rm -f ${tempfile}.hkl ${tempfile}.mtz >& /dev/null
set testmap = ${tempfile}.map
if(! -e "$testmap") set testmap = ""

# make a test input file
cat << EOF >! ${tempfile}.inp
$testmap
1000 1000
1000
${tempfile}.o
skel
EOF

# first, check to see if default will work
test -x "$program"
if(! $status) then
    set file = "$program"
    
    # test for program function
    rm -f ${tempfile}.o >& /dev/null
    cat ${tempfile}.inp | $file >& /dev/null
    if(! -e "${tempfile}.o") continue
    
    # check output file
    grep -l "SKEL_ATOM_XYZ" ${tempfile}.o >& /dev/null
    if($status) set program = ""
    
    # made it this far, $file must work...
    set program = "$file"
    goto End_BONES_search
endif


# try using the "which" command
foreach name ( $names )
    # "which" should handle processing of $path
    set possibilities = `which $name |& nawk 'NF==1{gsub(/[*?\042\047]/," "); print}' |& nawk -v L=$MAXLINE '{l+=length($0)} l<L{print}'`
    foreach file ( $possibilities )
	# skip filenames we know are wrong
	if("$file" =~ *$badname) continue
	
	# check to see if it's even executable
	test -x "$file"
	if($status) continue
	
	# test for program function
	rm -f ${tempfile}.o >& /dev/null
	cat ${tempfile}.inp | $file >& /dev/null
	if(! -e "${tempfile}.o") continue
    
	# check output file
	grep -l "SKEL_ATOM_XYZ" ${tempfile}.o >& /dev/null
	if($status) continue
	
	# made it this far, $file must work...
	set program = "$file"
	goto End_BONES_search
    end
end


# simplefy program name
set program = `echo $names | nawk '{print $1}'`

# search for $program in likely places
echo -n "Looking for $program program "
set HARD_TO_FIND
# get absolute time (seconds since 1/1/1)
set THEN = `date "+%S %M %H %j %Y" |& nawk '{$5-=1; printf "%50.0f\n", $1+ 60*($2+ 60*($3+ 24*($4 + 365 *$5 + int($5/4) -int($5/100) +int($5/400) )))}'`
onintr End_BONES_search

foreach place ( $places )
    if(! -e $place) continue
    
    # keep looking
    set files = `ls -lnL ${place} |& grep "$program" |& sort -nr +4 |& nawk ' /^-/ && $1 ~ /x/ {print $NF}' |& nawk -v L=$MAXLINE '{l+=length($0)} l<L{print}'`
    foreach file ( $files )
	# skip filenames we know are wrong
	if("$file" =~ *$badname) continue
	set file = "${place}/${file}"
	
	# check to see if it's even executable
	test -x "$file"
	if($status) continue
	
	# test for program function
	rm -f ${tempfile}.o >& /dev/null
	cat ${tempfile}.inp | $file >& /dev/null
	if(! -e "${tempfile}.o") continue
    
	# check output file
	grep -l "SKEL_ATOM_XYZ" ${tempfile}.o >& /dev/null
	if($status) continue
	
	# made it this far, $file must work...
	set program = "$file"
	goto End_BONES_search
    end

    # entertainment
    echo -n "."
end


echo -n " looking harder "
    
foreach place ( $places )
    if(! -e $place) continue

    if("$place" == "/") echo -n "hmm."
	
    # use find to get candidate files
    set files = `find $place -name '*'$program'*' -follow -perm -1 -print |& nawk '! /^find:/' |& nawk -v L=$MAXLINE '{l+=length($0)} l<L{print} l>L{exit}'`
    foreach file ( $files )
	# skip filenames we know are wrong
	if("$file" =~ *$badname) continue
	
	# make sure it's executable
	test -x $file
	if($status) continue

	# test for program function
	rm -f ${tempfile}.o >& /dev/null
	cat ${tempfile}.inp | $file >& /dev/null
	if(! -e "${tempfile}.o") continue
    
	# check output file
	grep -l "SKEL_ATOM_XYZ" ${tempfile}.o >& /dev/null
	if($status) continue
	
	# made it this far, $file must work...
	set program = "$file"
	goto End_BONES_search
    end
    
    # entertainment
    echo -n "."
    
    # give up if this is taking too long (10 minutes)
    set NOW = `date "+%S %M %H %j %Y" |& nawk '{$5-=1; printf "%50.0f\n", $1+ 60*($2+ 60*($3+ 24*($4 + 365 *$5 + int($5/4) -int($5/100) +int($5/400) )))}'`
    set temp = `echo "$NOW $THEN" | nawk '{printf "%d", $1 - $2}'`
    if($temp > 600) then
	echo " this is taking too long."
	goto End_BONES_search
    endif	
end

End_BONES_search:
onintr

# check if we found any program
test -x "$program"
if(! $status) then
    # got something, make sure it works
    set file = $program
    rm -f ${tempfile}.o >& /dev/null
    cat ${tempfile}.inp | $file >& /dev/null
    if(-e "${tempfile}.o") then
	# check output file
	grep -l "SKEL_ATOM_XYZ" ${tempfile}.o >& /dev/null
	if(! $status) then
	    set BONES = "$file"
	endif
    endif
endif
# clean up (regardless)
rm -f ${tempfile}.map >& /dev/null
rm -f ${tempfile}.inp >& /dev/null
rm -f ${tempfile}.o >& /dev/null



# out of ideas, help?
test -x "$BONES"
if(! $status) then
    echo ""
    if($?HARD_TO_FIND) echo "found $BONES"
else
    echo ""
    echo "Couldn't find ${program}, looked everywhere."
    set BONES = "bones"
endif
unset HARD_TO_FIND





















find_shelx:
# first, do we HAVE to find shelx?
if((! $?FIND_SITES)||($?NO_SHELX)) then
    set SHELX = shelxs
    goto Return_FindProgs
endif

set program = "$SHELX"
set names = "shelxs shelx"
set places = "/programs/shelx /programs/bin /programs /usr/local/shelx /usr/local/bin /usr/local /usr/xtal"
set size    = 100000

# first, check to see if default will work
test -x $program
if(! $status) then
    set file = `ls -lnL $program |& nawk -v size=$size '$5>size{print $NF}'`
    # test for program signature (author and version number)
    set temp = `$file $$ |& nawk '/CANNOT OPEN FILE/{print $NF}' | grep "$$.ins" | tail -1`
    if("$temp" != "") then
	set program = "$file"
    endif
endif


# try using the "which" command
foreach name ( $names )
    # check for success
    test -x $program
    if(! $status) break
    
    # which should handle processing of $path
    set possibilities = `which $name |& nawk 'NF==1' |& nawk -v L=$MAXLINE '{l+=length($0)} l<L{print}'`
    foreach file ( $possibilities )
	test -x "$file"
	if(! $status) then
	    # test for program signature (author and version number)
	    set temp = `$file $$ |& nawk '/CANNOT OPEN FILE/{print $NF}' | grep "$$.ins" | tail -1`
	    if("$temp" != "") then
		# get most recent version (if possible)
		set program = "$file"
	    endif
	endif
    end
end

onintr Skip_SHELX_search

# check again for success
test -x $program
if($status) then
    
    # truncate to base name
    set program = `echo $program | nawk 'BEGIN{FS="/"} {print $NF}'`
    
    # search for $program in likely places
    echo -n "Looking for $program "
    foreach place ( $places )
	if(-e $place) then
	    # keep looking
	    set files = `ls -lnL ${place} |& grep "$program" |& sort -nr +4 |& nawk -v size=$size ' /^-/ && $1 ~ /x/ && $5 > size {print $NF}' |& nawk -v L=$MAXLINE '{l+=length($0)} l<L{print}'`
	    foreach file ( $files )
		set file = "${place}/${file}"
		# test for program signature (author and version number)
		set temp = `$file $$ |& nawk '/CANNOT OPEN FILE/{print $NF}' | grep "$$.ins" | tail -1`
		if("$temp" != "") then
		    # get most recent version (if possible)
		    set temp = `$file $$ |& nawk '/CANNOT OPEN FILE/{print $NF}' | grep "$$.ins" | tail -1`
		    if("$temp" != "") then
			set program = "$file"
		    endif
		endif
	    end
	endif
		
	# entertainment
	echo -n "."

	# check for success
	test -x $program
	if(! $status) break
    end
endif


# check again for success
test -x $program
if($status) then

    # get absolute time (seconds since 1/1/1)
    set THEN = `date "+%S %M %H %j %Y" | nawk '{$5-=1; printf "%50.0f\n", $1+ 60*($2+ 60*($3+ 24*($4 + 365 *$5 + int($5/4) -int($5/100) +int($5/400) )))}'`

    echo -n " looking harder "
    foreach place ( $places /usr / )
	# check for success
	test -x $program
	if(! $status) break
	
	if(-e $place) then
	    if("$place" == "/") echo -n "hmm."
	
	    # use find to get candidate files
	    set files = `find $place -name '*'$program'*' -follow -size +${size}c -perm -1 -print |& nawk '! /^find:/' |& nawk -v L=$MAXLINE '{l+=length($0)} l<L{print}'`
	    foreach file ( $files )

		# test for program signature (author and version number)
		set temp = `$file $$ |& nawk '/CANNOT OPEN FILE/{print $NF}' | grep "$$.ins" | tail -1`
		if("$temp" != "") then
		    # get most recent version (if possible)
		    set temp = `$file $$ |& nawk '/CANNOT OPEN FILE/{print $NF}' | grep "$$.ins" | tail -1`
		    if("$temp" != "") then
			set program = "$file"
		    endif
		endif
	    end
	endif
	
	# entertainment
	echo -n "."

	# give up if this is taking too long (10 minutes)
	set NOW = `date "+%S %M %H %j %Y" | nawk '{$5-=1; printf "%50.0f\n", $1+ 60*($2+ 60*($3+ 24*($4 + 365 *$5 + int($5/4) -int($5/100) +int($5/400) )))}'`
	set temp = `echo "$NOW $THEN" | nawk '{printf "%d", $1 - $2}'`
	if($temp > 600) then
	    echo " this is taking too long."
	    break;
	endif	
    end
endif

Skip_SHELX_search:
onintr

# check if we found any program
test -x "$program"
if(! $status) then
    # got something, make sure it works
    set file = $program
    set temp = `$file $$ |& nawk '/CANNOT OPEN FILE/{print $NF}' | grep "$$.ins" | tail -1`
    if("$temp" != "") then
	set SHELX = $program
    else
	# this shouldn't happen, but...
#	set SHELX = ""
    endif
else
#    set SHELX = ""
endif

#set SHELX = "shelxs"

# out of ideas, help?
test -x "$SHELX"
if(! $status) then
    echo "found $SHELX"
else
    echo ""
    echo "Couldn't find shelxs, looked everywhere."
endif


goto Return_FindProgs






















FindSites:
################################################################################

 ######     #    #    #  #####            ####      #     #####  ######   ####
 #          #    ##   #  #    #          #          #       #    #       #
 #####      #    # #  #  #    #           ####      #       #    #####    ####
 #          #    #  # #  #    #               #     #       #    #            #
 #          #    #   ##  #    #          #    #     #       #    #       #    #
 #          #    #    #  #####            ####      #       #    ######   ####

################################################################################
#   Run various site-finding programs
#
#   needs ${tempfile}mtzdmp
################################################################################

# don't loop forever
if($?LOOKED_FOR_SITES) then
    echo "Guess those sites we found were no good... ;("
    echo ""
    echo "We suggest solve as a good third-party program for doing this:"
    echo "http://www.solve.lanl.gov/"
    echo "Good luck! "
    set BAD
    goto done
endif
set LOOKED_FOR_SITES
unset FIND_SITES

# check for info on mtz file
if(! -e ${tempfile}mtzdmp) then
    # uhh, do a half-assed regeneration of this file
    echo "go" | mtzdump HKLIN $mtzfile |&\
    nawk 'NF>6{if($(NF-1)=="D"){\
       D=$NF; m=$(NF-4); getline; s=$(NF-4); if(s+0==0) s=1; \
       print "D:", D, $NF, m/s;}}' >&! ${tempfile}mtzdmp
endif

# make the best Patterson we can:
if(-e ${tempfile}EXCLUDE) then
    set temp = `nawk '$2=="DANO"{print 1.5*$3}' ${tempfile}EXCLUDE | sort -n | tail -1`
    if("$temp" != "") set max_dano = "$temp"
    set max_diso = `nawk '$2=="DISO"{print 1.5*$3}' ${tempfile}EXCLUDE | sort -n | tail -1`
    if("$temp" != "") set max_diso = "$temp"
endif
if(! $?max_diso) set max_diso
if(! $?max_dano) set max_dano

# reduce resolution a little? (80% volume)
set pattRES = `echo "$hiRES" | nawk '$1+0>0.1{printf "%.2f", (0.8*1/($1^3))^(-1/3)}'`

if(! -e "${scriptDIR}bestFH.com") then
    # this should NEVER happen! 
    echo "ERROR! our ${scriptDIR}bestFH.com is missing! "
    echo "       what have you done with it? "
    goto Setup_scripts
endif
# run the packaged Matthews Patterson script
${scriptDIR}bestFH.com $mtzfile ${pattRES}A dano $max_dano diso $max_diso |\
nawk '$1=="./FH.mtz"{$0="mtz" substr($0,2)} $1=="./FH_Patt.map"{$0="maps" substr($0,2)}  {print}'
rm -f wFH_Patt.map >& /dev/null
mv FH_Patt.map ${mapDIR} >& /dev/null
mv FH.mtz ${mtzDIR}      >& /dev/null
mv fh.hkl ${mtzDIR}      >& /dev/null
mv bestFH.log ${logDIR} >& /dev/null
echo ""


# try SHELX first, it's fastest, if it works

goto run_shelx

exit











run_shelx:
################################################################################

 #####   #    #  #    #           ####   #    #  ######  #       #    #
 #    #  #    #  ##   #          #       #    #  #       #        #  #
 #    #  #    #  # #  #           ####   ######  #####   #         ##
 #####   #    #  #  # #               #  #    #  #       #         ##
 #   #   #    #  #   ##          #    #  #    #  #       #        #  #
 #    #   ####   #    #           ####   #    #  ######  ######  #    #

################################################################################
#   Run SHELX
################################################################################


# first, we have to FIND shelx! 
test -x "$SHELX"
if($status) then
    echo "shelxs is unavailable."
    echo ""
    
    # go to Plan B:
    goto run_rantan
endif




# try a range of sites if we don't know how many
set target_sites = "1 sites 3 sites 5 sites 10 sites 15 sites 20 site"
if($?user_sites) then
    set target_sites = "$user_sites sites"
endif
# try a range of resolutions too
#   highest  frac_last   steps
set shells = `echo "$pattRES 0.1 5" | nawk '$1+0>0{for(r=$2;r<=1;r+=(1-$2)/$3){printf "%.2fA ", (r*1/($1^3))^(-1/3)}}'`

# take steps to speed things up
if($?HURRY_UP) then
    set shells = `echo "$pattRES 0.1 2" | nawk '$1+0>0{for(r=$2;r<=1;r+=(1-$2)/$3){printf "%.2fA ", (r*1/($1^3))^(-1/3)}}'`
endif

set FHmtz = ${mtzDIR}FH.mtz
if(! -e "$FHmtz") then
    # this should always exist
    set FHmtz = $mtzfile
endif

if(! -e "${scriptDIR}shelx.com") then
    # this should NEVER happen! 
    echo "ERROR! our ${scriptDIR}shelx.com is missing! "
    echo "       what have you done with it? "
    goto Setup_scripts
endif
# run the combinatorial shelx script
echo ""
echo "running ${scriptDIR}shelx.com ${FHmtz} $allSGs $shells $target_sites"
onintr After_shelx
${scriptDIR}shelx.com ${FHmtz} $allSGs $shells $target_sites
After_shelx:
onintr
mv shelx.log ${logDIR} >& /dev/null
# clear out any temporary files?
rm -f shelx_temp* >& /dev/null
rm -f shelx.inp shelx.lst >& /dev/null

# filter the result for shelx-approved atoms
cat ./shelx.pdb |\
nawk '/^ATOM/ && $5 == "D" && substr($0, 55, 6)+0>0 {print}  ! /^ATOM/{print}' |\
cat >! ${tempfile}.pdb

# check that we got something? 
set sites = `nawk '/^ATOM/' ${tempfile}.pdb | wc -l`
if("$sites" == "0") then
    # huh...
    rm -f ${tempfile}.pdb >& /dev/null
    echo "shelx failed..."
    goto run_rantan
endif

# we got some sites
set site_cell = `nawk '$1 ~ /^CRYST/{printf "%.3f %.3f %.3f %.3f %.3f %.3f\n", $2,$3,$4,$5,$6,$7;exit}' ./shelx.pdb`
coordconv XYZIN ${tempfile}.pdb XYZOUT ${tempfile}.xyz << eof-pdb >& /dev/null
INPUT PDB
CELL $site_cell
OUTPUT FRAC
END
eof-pdb
# make sure we don't use occ or B-factor (they are sigma heights)
cat ${tempfile}.xyz |\
nawk '{++i; printf " ATOM%-3d ANO  %6.3f %6.3f %6.3f ?.??? ?.??? BFAC ?.???\n",\
i, $2, $3, $4}' >! $coordfile
set params = 2

# make sure we use the same SG that was used to run SHELX
set newSG = `nawk '/results here are from/{getline; print $NF}' ./shelx.pdb`
# re-define otherSG latter?


# clean up
rm -f ${tempfile}.xyz >& /dev/null
rm -f ${tempfile}.pdb >& /dev/null

# now return to site handler
set sitefile = ./shelx.pdb
echo "using $sites sites in $sitefile"
goto GotSites
exit



run_rantan:
################################################################################

 #####   #    #  #    #          #####     ##    #    #   #####    ##    #    #
 #    #  #    #  ##   #          #    #   #  #   ##   #     #     #  #   ##   #
 #    #  #    #  # #  #          #    #  #    #  # #  #     #    #    #  # #  #
 #####   #    #  #  # #          #####   ######  #  # #     #    ######  #  # #
 #   #   #    #  #   ##          #   #   #    #  #   ##     #    #    #  #   ##
 #    #   ####   #    #          #    #  #    #  #    #     #    #    #  #    #

################################################################################
#   Run RANTAN,  CCP4's direct-methods program
################################################################################

# try a range of sites if we don't know how many
set target_sites = ""
if($?user_sites) then
    set target_sites = "$user_sites sites"
endif
# try a range of resolutions too
set shells = `echo "$pattRES 0.1 5" | nawk '$1+0>0{for(r=$2;r<=1;r+=(1-$2)/$3){printf "%.3fA ", (r*1/($1^3))^(-1/3)}}'`

set FHmtz = ${mtzDIR}FH.mtz
if(! -e "$FHmtz") then
    # this should always exist
    set FHmtz = $mtzfile
endif

if(! -e "${scriptDIR}rantan.com") then
    # this should NEVER happen! 
    echo "ERROR! our ${scriptDIR}rantan.com is missing! "
    echo "       what have you done with it? "
    goto Setup_scripts
endif
# run the combinatorial rantan script
echo ""
echo "running ${scriptDIR}rantan.com ${FHmtz} $allSGs $shells $target_sites"
onintr After_rantan
${scriptDIR}rantan.com ${FHmtz} $allSGs $shells $target_sites
After_rantan:
onintr
mv rantan.log ${logDIR} >& /dev/null
rm -f rantan_temp* >& /dev/null

# filter for non-zero occupancies
cat ./rantan.pdb |\
nawk '/^ATOM/ && $5 != "X" && substr($0, 55, 6)+0>0 {print}  ! /^ATOM/{print}' |\
cat >! ${tempfile}.pdb

# check that we got something?
set sites = `nawk '/^ATOM/' ${tempfile}.pdb | wc -l`
if("$sites" == "0") then
    echo "rantan failed..."
    rm -f ${tempfile}.pdb >& /dev/null
    goto run_rsps
endif


# we got some sites
set site_cell = `nawk '$1 ~ /^CRYST/{printf "%.3f %.3f %.3f %.3f %.3f %.3f\n", $2,$3,$4,$5,$6,$7;exit}' ./rantan.pdb`
coordconv XYZIN ${tempfile}.pdb XYZOUT ${tempfile}.xyz << eof-pdb >& /dev/null
INPUT PDB
CELL $site_cell
OUTPUT FRAC
END
eof-pdb
# make sure we don't use occ or B-factor (they are sigma heights)
cat ${tempfile}.xyz |\
nawk '{++i; printf " ATOM%-3d ANO  %6.3f %6.3f %6.3f ?.??? ?.??? BFAC ?.???\n",\
i, $2, $3, $4}' >! $coordfile
set params = 2

# clean up
rm -f ${tempfile}.xyz >& /dev/null
rm -f ${tempfile}.pdb >& /dev/null

# now return to site handler
set sitefile = ./rantan.pdb
echo "using $sites sites in $sitefile"
goto GotSites
exit



run_rsps:
################################################################################

 #####   #    #  #    #          #####    ####   #####    ####
 #    #  #    #  ##   #          #    #  #       #    #  #
 #    #  #    #  # #  #          #    #   ####   #    #   ####
 #####   #    #  #  # #          #####        #  #####        #
 #   #   #    #  #   ##          #   #   #    #  #       #    #
 #    #   ####   #    #          #    #   ####   #        ####

################################################################################
#   Run Real-Space Patterson Search
################################################################################

if(! -e "./${scriptDIR}rrsps.com") then
    # this should NEVER happen! 
    echo "ERROR! our ${scriptDIR}rrsps.com is missing! "
    echo "       what have you done with it? "
    goto Setup_scripts
endif

echo ""
echo "running a recursive Real-Space Patterson Search. "

# do this in more than one SG? 

# use the old Patterson map
${scriptDIR}rrsps.com ${mapDIR}FH_Patt.map $allSGs

# now convert the output file to something Phaser can read
nawk 'NF==1,/roduct/' rrsps.sites_$SG |\
nawk '$2 ~ /[01].[0-9][0-9]/{++i; \
  printf(" ATOM%-3d ANO  %6.3f %6.3f %6.3f   ?.???  ?.??? BFAC   ?.???\n",\
  i, $2,$3,$4)}' >! $coordfile

set sitefile = rrsps.sites_$SG

# incorperate these sites into the script
echo "using $sites sites in $sitefile"
goto GotSites
exit






















read_inscript:
################################################################################################################

 #####   ######    ##    #####          ####    ####   #####      #    #####    #####
 #    #  #        #  #   #    #        #       #    #  #    #     #    #    #     #
 #    #  #####   #    #  #    #         ####   #       #    #     #    #    #     #
 #####   #       ######  #    #             #  #       #####      #    #####      #
 #   #   #       #    #  #    #        #    #  #    #  #   #      #    #          #
 #    #  ######  #    #  #####          ####    ####   #    #     #    #          #

################################################################################################################
#   initialize variables from a pre-written mlphare script
################################################################################################################
grep "ATOM" "$inscript" >& /dev/null
if($status) then
    echo "WARNING: no atoms in $inscript"
    set inscript = ""
    goto GetSites
endif
echo "imitating script: $inscript " 


# transform atoms into rereadable file
nawk '$1 ~ /DERIV/,/END/{print}' $inscript >! ${tempfile}atoms
set temp = `cat ${tempfile}atoms | wc -l`
if($temp == 0) then
    # maybe just no DERIV cards? 
    nawk '$1 ~ /^ATOM/' $inscript >! ${tempfile}atoms
endif
cat ${tempfile}atoms |\
nawk '$1 ~ /^ATOM/ {while(/[0-9]-/ && safe < 20){\
      # insert spaces between numbers that are obviously stuck together \
      stuck=match($0,/[0-9]-/); \
      $0 = substr($0,1,stuck) " " substr($0, stuck+1);++safe}}\
      {print}' |\
nawk '$1 ~ /^ATOM/ {for(i=1;i<=NF;++i){test=$i;\
      if(gsub("[\.]", ".", test)>1){\
      # more than one decimal in a number, assume 3 decimal places \
      stuck=match($i,/[\.]/)+3; \
      $i = substr($i,1,stuck) " " substr($i, stuck+1);++safe}}}\
      {print}' |\
nawk '$1~/^ATOM/ && $7=="BFAC"{$7="0.000 BFAC"} {print}' |\
nawk '! /END/{print}' >! ${coordfile}
rm -f ${tempfile}atoms >& /dev/null
    
# $coordfile now contains a filtered version of these sites
# so this script will be imitated
    
# retrieve "BAD" atom list (to ignore in atom finding)
nawk '$1 ~ /^BADATOM/ || $1 ~ /^OLDATOM/' $inscript >! ${tempfile}badatoms
set temp = `cat ${tempfile}badatoms | wc -l`
if($temp == 0) then
    rm -f ${tempfile}badatoms >& /dev/null
else
    echo "found $temp previously-tried atom records in $inscript"
    if($?MORE_SITES) echo "these sites will be avoided in atom-finding."
endif
    
# check for valid atoms
grep ATOM ${coordfile} >& /dev/null
if($status) then
    echo "WARNING: no mlphare atoms found in $inscript ... "
    set inscript = ""
    rm -f ${coordfile} >& /dev/null
    goto GetSites
endif
goto GotSites
exit
















read_inlog:
################################################################################################################

 #####   ######    ##    #####           #        ####    ####
 #    #  #        #  #   #    #          #       #    #  #    #
 #    #  #####   #    #  #    #          #       #    #  #
 #####   #       ######  #    #          #       #    #  #  ###
 #   #   #       #    #  #    #          #       #    #  #    #
 #    #  ######  #    #  #####           ######   ####    ####

################################################################################################################
#   Read in a user-specified MLphare logfile
################################################################################################################

grep "ATOM" "$inlogfile" >& /dev/null
if($status) then
    echo "WARNING: no atoms in $inlogfile"
    set inlogfile = ""
    goto GetSites
endif

echo "imitating run that produced $inlogfile"

# transform log-file output into appropriate atom entries 
# (with full refinement cards)
cat $inlogfile |\
nawk '$1~/^DERIV/,$1~/^MLPHARE/{print}'|\
nawk '$1 ~ /^ATOM/ {while(/\.[0-9][0-9][0-9][0-9-]/ && safe < 20){\
      # insert spaces between numbers that are stuck together \
      stuck=match($0,/\.[0-9][0-9][0-9][0-9-]/)+3; \
      $0 = substr($0,1,stuck) " " substr($0, stuck+1);++safe}}\
      {print}' |\
nawk '$1~/^ATOM/ && $7=="BFAC"{$7="0.000 BFAC"} {print}' |\
nawk '! /MLPHARE/ && ! /SUMMARY/{print}' >! ${coordfile}

# check for valid atoms
grep ATOM ${coordfile} >& /dev/null
if($status) then
    echo "WARNING: no atoms found in ${inlogfile} ."
    rm -f ${coordfile}
    set inlogfile = ""
    goto GetSites
endif

# make sure coordfile goes atom ano x y z o ao BFAC b
cat ${coordfile} |\
nawk '$1~/^ATOM/ && $7=="BFAC"{$7="0.000 BFAC"} {print}' |\
cat >! ${tempfile}
mv $tempfile $coordfile

goto GotSites
exit














read_shelx:
################################################################################

 #####   ######    ##    #####            ####   #    #  ######  #       #    #
 #    #  #        #  #   #    #          #       #    #  #       #        #  #
 #    #  #####   #    #  #    #           ####   ######  #####   #         ##
 #####   #       ######  #    #               #  #    #  #       #         ##
 #   #   #       #    #  #    #          #    #  #    #  #       #        #  #
 #    #  ######  #    #  #####            ####   #    #  ######  ######  #    #

################################################################################
#   Read in a SHELX atom file
################################################################################

if(! -e "$shelxfile") then
    echo "ERROR: $shelxfile does not exist! "
    set shelxfile = ""
    goto GetSites
endif

#echo "getting atoms from $shelxfile ..."

# now write out these coordinates in an MLphare-readable format
cat $shelxfile |\
nawk 'p && NF>=5 && $2 ~ /[0-9]$/ {print} \
      /^PLAN/ || $2=="Atom"{p=1} /Reflections read/{p=0}' |\
nawk 'substr($0,1,8)=="        "{$0 = "1" $0} {print}' |\
nawk '$1 !~ /^Q/ && $2 !~ /^Q/' |\
nawk '{\
# "jiggle" special positions \
if($6 !~ /\.0/){$3+=0.001; $4+=0.001; $5+=0.001}\
++i; printf(" ATOM%-3d ANO  %6.3f %6.3f %6.3f   ?.???  ?.??? BFAC   ?.???\n",\
i, $3,$4,$5) }' |\
cat >! $coordfile

set sites = `cat $coordfile | wc -l`
if("$sites" == 0) then
    echo "ERROR: no atoms found in ${shelxfile}! "
    
    rm -f $coordfile >& /dev/null
    set shelxfile = ""
    goto GetSites
endif

# definitely want to refine these occupancies first! 
set params = 2

# get unit cell (so we can tell if sites are permuted)
set site_cell = `nawk '$1 == "CELL"{printf "%.3f %.3f %.3f %.3f %.3f %.3f\n", $3,$4,$5,$6,$7,$8;exit}' ${shelxfile}`
if($#site_cell == 6) then
    if("$SG" =~ [pPcC][21][21][21]*) then
	echo "SHELX Cell: $site_cell"
    endif
else
    echo ""
    echo "WARNING: no unit cell in $shelxfile. "
    echo "we will use $CELL"
    echo ""
    set site_cell = "$CELL"
endif

# $coordfile now contains the SHELX sites in mlphare format

echo "got $sites atoms from $shelxfile"
goto GotSites
exit






read_pdb:
################################################################################

 #####   ######    ##    #####           #####   #####   #####
 #    #  #        #  #   #    #          #    #  #    #  #    #
 #    #  #####   #    #  #    #          #    #  #    #  #####
 #####   #       ######  #    #          #####   #    #  #    #
 #   #   #       #    #  #    #          #       #    #  #    #
 #    #  ######  #    #  #####           #       #####   #####

################################################################################
#   Read in metal atoms given in standard PDB format
################################################################################
#echo "getting atom coordinates from $pdbfile"

# get unit cell (so we can tell if sites are permuted)
set site_cell = `nawk '$1 ~ /^CRYST/{printf "%.3f %.3f %.3f %.3f %.3f %.3f\n", $2,$3,$4,$5,$6,$7;exit}' $pdbfile`
if($#site_cell == 6) then
    if("$SG" =~ [pPcC][21][21][21]*) echo "pdb cell: $site_cell"
else
    echo ""
    echo "WARNING: no unit cell in $pdbfile. "
    echo "we will use $CELL"
    echo ""
    set site_cell = "$CELL"
endif

# try converting file
echo "CELL $site_cell" | pdbset XYZIN $pdbfile XYZOUT ${tempfile}.pdb >&! ${tempfile}.log
egrep "^CRYS|^SCALE" ${tempfile}.pdb >! ${tempfile}cannon.pdb
egrep "^ATOM|^HETATM" $pdbfile >> ${tempfile}cannon.pdb
echo "END" >> ${tempfile}cannon.pdb
echo "CELL $site_cell" | pdbset XYZIN ${tempfile}cannon.pdb XYZOUT ${tempfile}.pdb >&! ${tempfile}.log
rm -f ${tempfile}cannon.pdb >& /dev/null
coordconv XYZIN ${tempfile}.pdb XYZOUT ${tempfile}.xyz << eof-pdb >&! ${tempfile}.log
INPUT PDB ORTH 1
CELL $site_cell
OUTPUT FRAC
END
eof-pdb
if($status) then
    rm -f ${tempfile}.xyz >& /dev/null
endif
rm -f ${tempfile}.pdb >& /dev/null
if(! -e ${tempfile}.xyz) then
    echo ""
    echo "error from coordconv"
    mv ${tempfile}.log Phaser.coordconv.log >& /dev/null
    nawk 'toupper($0) ~ /ERROR/{print;getline;print;getline;print}' Phaser.coordconv.log
    echo ""
    echo "please examine Phaser.coordconv.log"
    echo "to see what went wrong, and"
    echo "check $pdbfile for errors."
    echo ""
    rm -f $coordfile >& /dev/null
    set pdbfile = ""
    goto GetSites
endif

# now convert the coordinates to MLphare format
cat ${tempfile}.xyz |\
nawk '{print $2,$3,$4,substr($0,46)+0,substr($0,36)+0}' |\
nawk '$4+0>0{++i; printf " ATOM%-3d ANO  %6.3f %6.3f %6.3f %6.3f %6.3f BFAC %8.3f\n",\
i, $1, $2, $3, $4, $4, $5}' >! $coordfile

set sites = `cat $coordfile | wc -l`
if("$sites" == 0) then
    echo "ERROR: no atoms found in ${pdbfile}! "
    rm -f $coordfile >& /dev/null
    set pdbfile = ""
    goto GetSites
endif
if("$sites" > 100) then
    echo "ERROR: way too many atoms found in ${pdbfile}! "
    echo "MLphare is for refining METAL sites! "
    rm -f $coordfile >& /dev/null
    set pdbfile = ""
    goto GetSites
endif

# clean up
rm -f ${tempfile}.xyz >& /dev/null
rm -f ${tempfile}.log >& /dev/null

# probaby best to refine these occupancies first
set params = 2

# $coordfile now contains the PDB sites in mlphare format

echo "got $sites atoms from $pdbfile"

set sitefile = "$pdbfile"
goto GotSites

exit


read_solve:
################################################################################

 #####   ######    ##    #####            ####    ####   #       #    #  ######
 #    #  #        #  #   #    #          #       #    #  #       #    #  #
 #    #  #####   #    #  #    #           ####   #    #  #       #    #  #####
 #####   #       ######  #    #               #  #    #  #       #    #  #
 #   #   #       #    #  #    #          #    #  #    #  #        #  #   #
 #    #  ######  #    #  #####            ####    ####   ######    ##    ######

################################################################################
#   Read in metal atoms given in solve.status file
################################################################################
#echo "getting atom coordinates from $solvefile"

# solve occupancies are not on electron scale
set params = 2

# convert LAST set of coordinates to MLphare format
cat $solvefile |\
nawk '/TOP SOLUTION FOUND BY SOLVE/{\
while($1 != "TIME")\
{status=getline; if((NF!=0)&&($1 != "TIME")) print; if(! status) break}}' |\
nawk '/OCC/{for(i in site){site[i]=""}; n=0}\
    /[0-9]/{++n; site[n]= $0}\
        END{for(i=1;i<=n;++i){print site[i]}}' |\
nawk '{++i; printf " ATOM%-3d ANO  %6.3f %6.3f %6.3f %6.3f %6.3f BFAC %8.3f\n",\
i, $(NF-5), $(NF-4), $(NF-3), $(NF-2), $(NF-2), $(NF-1)}' >! $coordfile

set sites = `cat $coordfile | wc -l`
if("$sites" == 0) then
    echo "ERROR: no atoms found in ${solvefile}! "
    rm -f $coordfile >& /dev/null
    set solvefile = ""
    goto GetSites
endif

# try to get unit cell (so we can tell if sites are permuted)
set dir = `dirname $solvefile`
foreach file ( `ls -ln $dir | sort -n +4 | nawk '/^-/ && /solve/ && mass<5000000 && len<900{print $NF; mass+=$5; len +=length($NF)+1}'` )
    grep -l CELL ${dir}/$file >& /dev/null
    if(! $status) then
	set site_cell = `nawk '$1 == "CELL"{printf "%.3f %.3f %.3f %.3f %.3f %.3f\n", $2,$3,$4,$5,$6,$7;exit}' ${dir}/$file | nawk '! /[^0-9 \.]/ && NF'`
	
	# make sure we got it
	if($#site_cell == 6) then
	    echo "retrieved SOLVE cell: $site_cell"
	    echo "from: ${dir}/$file"
	    break
	endif
	# keep looking
    endif
end
if($#site_cell != 6) then
    echo ""
    echo "WARNING: unable to obtain unit cell for $solvefile. "
    echo "we will assume $CELL"
    echo ""
    set site_cell = "$CELL"
endif


# $coordfile now contains the SOLVE sites in mlphare format

echo "got $sites atoms from $solvefile"
goto GotSites
exit







read_general:
################################################################################

 #####   ######    ##    #####            ####   ######  #    #  ######  #####
 #    #  #        #  #   #    #          #    #  #       ##   #  #       #    #
 #    #  #####   #    #  #    #          #       #####   # #  #  #####   #    #
 #####   #       ######  #    #          #  ###  #       #  # #  #       #####
 #   #   #       #    #  #    #          #    #  #       #   ##  #       #   #
 #    #  ######  #    #  #####            ####   ######  #    #  ######  #    #

################################################################################
#   Read in metal atoms given in solve.status file
################################################################################
if(! -e "$sitefile") then
    echo "ERROR: $sitefile does not exits! "
    set sitefile = ""
    goto GetSites
endif
    
if(! -e "${tempfile}sitereader.awk") then
    echo "ERROR: ${tempfile}sitereader.awk is missing! "
    echo "       unable to read $sitefile"
    set sitefile = ""
    goto GetSites
endif

# search for consecutive triplets of 3-decimal numbers between -1.1 and 1.1
nawk -f ${tempfile}sitereader.awk "$sitefile" >! ${tempfile}sites

set sites = `cat ${tempfile}sites | wc -l`
if("$sites" == 0) then
    echo "ERROR: no atoms found in ${sitefile}! "
    rm -f ${tempfile}sites >& /dev/null
    set sitefile = ""
    goto GetSites
endif

echo "found $sites sites in $sitefile"
mv ${tempfile}sites $coordfile

# best refine occupancies first
set params = 2
	
# see if we can get a cell too
cat "$sitefile" |\
nawk 'toupper($1) == "CELL" && NF>=7{n=0\
    for(i=1;i<=NF;++i){\
      if($i+0>5 && $i+0==$i){cell = cell " " $i;++n};\
      if(n==6){print cell; exit}}}' |\
cat >! ${tempfile}cell
set site_cell = `cat ${tempfile}cell`
rm -f ${tempfile}cell >& /dev/null

if(($#site_cell != 6)&&("$SG" =~ [pPcC]2[21][21]*)) then
    echo ""
    echo "WARNING: no unit cell in $sitefile. "
    echo "we will use $CELL"
    echo ""
    set site_cell = "$CELL"
endif
goto GotSites
exit

















flatten:
################################################################################################################

######  #     #
#     # ##   ##
#     # # # # #
#     # #  #  #
#     # #     #
#     # #     #
######  #     #

################################################################################################################
#	Solvent flatten with DM
################################################################################################################
# clear "best yet" flag (we are about to find out)
unset BEST_YET

set flatten_this = ${mtzDIR}mlphare.mtz

# run OASIS here?
#if($?SAD) then
if(0) then
    echo "running ${scriptDIR}oasis.com on mlphare sites"
    ${scriptDIR}oasis.com $scriptfile >! ${logDIR}oasis.log
    if(! $status) then
	mv oasised.mtz ${mtzDIR} >& /dev/null
	set flatten_this = ${mtzDIR}oasised.mtz
    else
	echo "failed! "
	echo "we will use ${flatten_this} instead"
    endif
endif

if(! -e "${scriptDIR}dm.com") then
    echo "dm is not available."
    set NO_DM
endif
if($?NO_DM) then
    # see if this FOM is better than the best so far
    set temp = `echo "$FOM $bestFOM" | nawk '$1+0>$2+0{print $1} $1+0<$2+0{print $2}'`
    if("$temp" != "$bestFOM") then
	# new winner
	set bestFOM = "$FOM"

	# remember all the conditions used to make this MTZ
	set bestDMtrial = "0% $Cycle $SG $FOM ${mtzDIR}mlphare${FLIP_label}.mtz"
	set BEST_YET	
    else
#	echo "$scriptfile seems inferior to ${scriptfile}.best"
    endif
    
    # skip over DM
    set mapmtz = ${mtzDIR}mlphare.mtz
    goto FFT
endif

################################################
if(! $?bestrial) then
    # initialize global scorekeepers
    set bestrial = "0"
    set bestDMscore = "999999"
endif
# "local" scorekeeper
set bestMAPscore = 999999

# make up a list of solvent contents to try
set trials = "$default_trials"

# user preference overrides default trials
if("$SOLVENT" != "") then
    set trials = "$SOLVENT"
endif

set trials = `echo "$trials"`
set temp = `echo "$trials" | nawk '{for(i=1;i<NF;++i) print $i"%,"} NF>1{print "and"} {print $NF"%"}'`
echo "running ${scriptDIR}dm.com with $temp solvent for auto cycles each"

foreach trial ( $trials )
    
    # should all be 2-characters wide
    echo -n "${trial}% : "
    
    # here is where we actually run the dm script
    ${scriptDIR}dm.com ${trial}% ${flatten_this} >! ${logDIR}dm_${trial}.log
    if(($status)||(! -e dmed.mtz)) then
	echo "DM crashed! "
	# keep going...
	continue
    endif
    
    # save the MTZ
    mv dmed.mtz ${mtzDIR}dm_${trial}.mtz
    rm -f solvent.map >& /dev/null
    
    # print out the free residual
    cat ${logDIR}dm_${trial}.log |\
    nawk '/Cycle_number  Free_R_factor/{getline; getline; \
	  while(NF==3){print; getline}}'  |\
    nawk 'BEGIN{min=10} \
	  $3<min && ($3+0) > 0 {min = $3; bstep=NR} \
	  $2<min && ($2+0) > 0 {min = $2; bstep=NR} \
	  END{if(bstep+0!=0){print min, bstep, NR}}'  |\
    cat >! ${tempfile}R_dm
    
    # get FOMDM too
    echo "go" | mtzdump hklin ${mtzDIR}dm_${trial}.mtz | nawk 'NF>5' |\
     nawk '$NF=="FOMDM" && $(NF-1)=="W" {FOM=$(NF-4)} END{print FOM+0}' |\
    cat >! ${tempfile}FOMDM

    # now retrieve the "DM residual"
    set R_dm = `nawk '$1+0>0{printf "%d", $1*1000}' ${tempfile}R_dm`
    set FOMDM = `cat ${tempfile}FOMDM`
    rm -f ${tempfile}FOMDM >& /dev/null
    
    # decide on how to score these?
    set DMscore = ""
    # use real-space residual as a "score"
    #set DMscore = "R_dm"
    
    if("$DMscore" == "") then
	# user edited the script?
	
	# print it out for user to see
	echo "$FOMDM" | nawk '{printf "FOMDM = %5.3f\n", $1}'
	
	set DMscore = `echo "$FOMDM" | nawk '$1+0>0{printf "%d", 1000/$1}'`
	if("$DMscore" == "") then
	    # must have just crashed
	    set DMscore = `echo $trial | nawk '{printf "%d", sqrt(($1-50)^2)*1000}'`
	endif	
    else
	# print it out for user to see
	nawk 'NF==3{printf "R_dm = %5.3f ( %d/%d )\n", $1, $2, $3}' ${tempfile}R_dm
    endif
    rm -f ${tempfile}R_dm >& /dev/null
    
    if($DMscore < $bestDMscore) then
	# we have a new, overall winner!
	set bestDMscore = "$DMscore"
	
	# remember all the conditions used to make this MTZ
	set bestDMtrial = "${trial}% $Cycle $SG $FOM ${mtzDIR}dm_${trial}.mtz"
	set BEST_YET
    endif
    # check for "local" winner
    if($DMscore < $bestMAPscore) then
	# update mtz file to use for the map this round
	set bestMAPscore = $DMscore
	set mapmtz = ${mtzDIR}dm_${trial}.mtz
    endif
end

goto FFT


FFT:
# make a map
${scriptDIR}fft.com $mapmtz >! ${logDIR}fftphase.log
if($status) then
    echo "WARNING: map generation failed! "
    rm -f ffted.map >& /dev/null
    goto Best_not_Best
endif

# look at largest fragment from bones trace (pretty good indicator of correctness)
set big_bone = `nawk '/Maximum fragment/{print $NF}' ${logDIR}fftphase.log`
if("$big_bone" != "") then
    if("$big_bone" > "$biggest_bone") then
	# call this the best mtz?
	set biggest_bone = "$big_bone"
#	set BEST_YET
    else
	# cancel DM score
#	if("$biggest_bone" > 0) unset BEST_YET
    endif
endif

# move all the files produced by fft.com to unique names (probably temporary)
if(-e fftpick.pdb) mv fftpick.pdb ${oDIR}pick.pdb
if(-e bones.o)     mv bones.o     ${oDIR}bones.o
if(-e ffted.map)   mv ffted.map   ${mapDIR}phased.map
if(-e ffted.omap)  mv ffted.omap  ${oDIR}phased.omap
rm -f ffted.omacro

# ${oDIR}sites.pdb and ${scriptDIR}sites.mlphare
# were made at the "make_pdb:" label


# now make an O macro for looking at this?


Best_not_Best:
################################################################################################################
                                 
 #####   ######   ####    #####   ##### 
 #    #  #       #          #    #     #
 #####   #####    ####      #        ##
 #    #  #            #     #       #
 #    #  #       #    #     #
 #####   ######   ####      #       #

################################################################################################################
# see if this round of flattening produced a superior map
################################################################################################################
if($?BEST_YET) then
    # map was judged to be the best one yet
    # regardless of "flip" state or atom finding
    
    # back up the script
    echo "best-looking solution yet! "
    echo "copying $scriptfile to ${scriptfile}.best"
    cp $scriptfile ${scriptfile}.best
    cp ${scriptDIR}sites.mlphare ${scriptDIR}best_sites.mlphare >& /dev/null
    
    # copy the MTZ from this new "best" run to a safe location
    set temp = `echo $bestDMtrial | nawk '{print $NF}'`
    echo "copying $temp to $bestmtz"
    cp $temp $bestmtz

    # change SG in the o macros
    cat ${oDIR}best.omacro |\
    awk -v SG=$SG '/^sym_set/{$NF=SG} {print}' |\
    cat >! ${tempfile}
    mv ${tempfile} ${oDIR}best.omacro
    cat ${oDIR}latest.omacro |\
    awk -v SG=$SG '/^sym_set/{$NF=SG} {print}' |\
    cat >! ${tempfile}
    mv ${tempfile} ${oDIR}latest.omacro

    echo ""
    
    # report on files produced by last FFT
    if(-e ${mapDIR}phased.map) then
	echo "${mapDIR}best_phased.map is a normalized ccp4 map from $bestmtz"
	if(-e ${oDIR}phased.omap) then
	    echo "${oDIR}best_phased.omap is an o version (${ofmt}) of ${mapDIR}best_phased.map"
	    if(-e ${oDIR}best.omacro) echo "you can view it now in o using ${oDIR}best.omacro"
	endif
    endif

    # back-up "best" files in o directory
    cp -f ${mapDIR}phased.map ${mapDIR}best_phased.map >& /dev/null
    cp -f ${oDIR}phased.omap ${oDIR}best_phased.omap >& /dev/null
    cp -f ${oDIR}bones.o     ${oDIR}best_bones.o     >& /dev/null
    cp -f ${oDIR}pick.pdb    ${oDIR}best_pick.pdb    >& /dev/null
    cp -f ${oDIR}sites.pdb   ${oDIR}best_sites.pdb   >& /dev/null

else
    echo "none of these were better than $bestmtz"
    echo "${scriptfile}.best is still the best mlphare script."
    echo ""
    
    # no improvement this round, so maybe we should
    # chuck the changes from last time?
    
    # register new atoms as bad?
endif

# now decide where to go next
if(("$FLIP_state" < 999)&&(($?FLIP_OCC)||($?FLIP_HAND)||($?FLIP_SG))) then
    # flipping is either in progress, or needs to be done
    goto Flip
endif

# we are either done with flipping,
# or user requested to forgo this rigamarol
    
# report on files produced by last FFT
if((-e ${mapDIR}phased.map)&&(! $?BEST_YET)) then
    # only display this message if we havn't already 
    # blabered about "best" maps above, and aren't going
    # to be moving these files to new "FLIP_label" names
    echo "${mapDIR}phased.map is a normalized ccp4 map from $mapmtz"
    if(-e ${oDIR}phased.omap) then
	echo "${oDIR}phased.omap is an o version (${ofmt}) of ${mapDIR}phased.map"
	if(-e ${oDIR}latest.omacro) echo "you can view it now in o using ${oDIR}latest.omacro"
    endif
endif
    
# go find more sites
goto more_atoms























Flip:
################################################################################################################

 ######  #          #    #####
 #       #          #    #    #
 #####   #          #    #    #
 #       #          #    #####
 #       #          #    #
 #       ######     #    #

################################################################################################################
#	Flip over axes, space group, etc.
################################################################################################################


# prepare to flip out
if($#FLIP_labels < 2) then
    # flipping is not set up yet

    # set up flipping of variable values
    set FLIP_state = 1
    
    # this flag indicates that Phaser is now screwing around
    # so, if anything bad happens, we should come back here
    # and try something else!
    set TENTATIVE
    
    echo ""
    echo "now we will try other sign/hand conventions."
    
    # start "flips" from best script we have
    if(-e "${scriptfile}.best") then
	echo "copying ${scriptfile}.best to ${scriptfile}.unflipped"
	cp ${scriptfile}.best ${scriptfile}.unflipped >& /dev/null
    else
	# what the hell?
	echo "ERROR: we have no sites! "
	set BAD
	goto done
    endif
    
    
    # create list of things to try (in the form of filename labels)
    set FLIP_labels = "flip"
    if(($?FLIP_SG)&&("$otherSG" != "")) then
	set FLIP_labels = "$SG $otherSG"
    endif
    if($?FLIP_HAND) then
	set temp = ""
	foreach label ( $FLIP_labels )
	    set temp = "$temp ${label}_+hand ${label}_-hand"
	end
	set FLIP_labels = "$temp"
    endif
    if($?FLIP_OCC) then
	set temp = ""
	foreach label ( $FLIP_labels )
	    set temp = "$temp ${label}_+occ ${label}_-occ"
	end
	set FLIP_labels = "$temp"
    endif
    
    # origin shift? 
    
    # make this a multiword variable (count number of flip-states)
    set FLIP_labels = ( $FLIP_labels )
    if($#FLIP_labels == 1) then
	# how did this happen?
	unset FLIP_SG
	set otherSG = ""
	set FLIP_state = 999
	goto more_atoms
    endif
    
    # now create an O macro for viewing all these maps!
    set odir = `cd ${oDIR}; pwd`
    set center = `echo $CELL | nawk '{print $1/2, $2/2, $3/2}'`
    
    # start the "master" o macro
    echo "! root working directory" >! ${oDIR}allmaps.omacro
    echo "symbol root $odir"        >> ${oDIR}allmaps.omacro
    echo ""                         >> ${oDIR}allmaps.omacro
    foreach FLIP_label ( $FLIP_labels )
	# create a directory under o directory for all this crap
	test -d ${oDIR}${FLIP_label}
	if($status) then
	    rm -f ${oDIR}${FLIP_label} >& /dev/null
	    mkdir ${oDIR}${FLIP_label}
	endif
	
	# make up a shorter label (for o objects)
	echo "$FLIP_label" | nawk 'BEGIN{FS="_"} \
	    $2 ~ /^[Pp]/{if(substr($2,3)<substr($2,2)/2){print "R"}else{print "L"}} \
	    /\+occ/{print "+"}\
	    /-occ/{print "-"}\
	    /\+hand/{print "R"}\
	    /-hand/{print "L"}' |\
	nawk '{printf "%s", $1}' >! ${tempfile}shortlabel
	set short_label = `cat ${tempfile}shortlabel`
	rm -f ${tempfile}shortlabel >& /dev/null
	
	# make up a color code
	echo "$FLIP_label" | nawk 'BEGIN{FS="_";R=0;G=0;B=1} \
	    $2 ~ /^[Pp]/{if(substr($2,3)<substr($2,2)/2){\
		R=0.5}else{R=1}} \
	    /\+occ/ {G+=0.5}\
	    /-occ/  {G-=0.5}\
	    /\+hand/{B+=0.5}\
	    /-hand/ {B-=0.5} END{print R+0 "," G+0 "," B+0}' |\
	nawk '{printf "%s", $1}' >! ${tempfile}rgb
	set RGB = `cat ${tempfile}rgb`
	rm -f ${tempfile}rgb >& /dev/null
	
	# cancel the above two (for now)
	set RGB = white
	set short_label = map
	set msg = `echo "$FLIP_label" | nawk 'BEGIN{FS="_"} {print $1, $2, $3, $4}'`
	
	# space group to put in macros
	set sg = "$SG"
	if("${FLIP_label}" =~ P*) set sg = `echo ${FLIP_label} | nawk 'BEGIN{FS="_"} {print $1}'`

	# make an "individual" macro for each flip state
	cat << EOF-omacro >! ${oDIR}${FLIP_label}/load.omacro
! current working directory
symbol cwd ${odir}/${FLIP_label}
symbol cwd .

! read in a peak-pick of phased.omap (for grabbing onto)
sam_atom_in \${cwd}/pick.pdb pick
mol pick
obj pick
zone ;
end

sym_set ; ; $sg
sym_cell
! cen_xyz $center

! read in pdb version of the metal sites
sam_atom_in \${cwd}/sites.pdb sites
mol sites
obj sites
zone ;
end
sym_set ; ; $sg
sym_cell

! display them as big spheres
sketch_cpk sites
sym_sphere sites sym 30
sketch_cpk sym1
clear_flags
sketch_cpk sym2
clear_flags
sketch_cpk sym3
clear_flags
sketch_cpk sym4
clear_flags
sketch_cpk sym5
clear_flags
sketch_cpk sym6
clear_flags
sketch_cpk sym7
clear_flags
sketch_cpk sym8
clear_flags
sketch_cpk sym9
clear_flags
sketch_cpk sym10
clear_flags

! read in and display the bones trace
read \${cwd}/bones.o
bone_setup skel bones 30 1 2 3 4 5
bone_draw

! use newer fastmap feature
fm_file \${cwd}/../maps/phased.map $short_label $sg
!          radius style n sig color
fm_set $short_label 25     solid 1 1.0 $RGB 


! use "old reliable" map commands
map_cache
map_active_center
map_file \${cwd}/phased.omap
map_object $short_label
!         dx dy dz sig color linestyle
map_param 25 25 25 1   $RGB  0.5 0 1
map_draw

! convenient @map macro
menu @map on

EOF-omacro

	# make an "individual" map macro for each flip state
	cat << EOF-omacro >! ${oDIR}${FLIP_label}/map
! use newer fastmap feature
fm_file \${cwd}/../maps/phased.map $short_label $sg
!          radius style n sig color
fm_set $short_label 25     solid 1 1.0 white 


! use "old reliable" map commands
map_cache
map_active_center
map_file \${cwd}/phased.omap
map_object $short_label
!         dx dy dz sig color linestyle
map_param 25 25 25 1 $RGB 0.5 0 1
map_draw

EOF-omacro

	# make a "master" o macro for viewing all maps in succession
	cat << EOF-omacro >> ${oDIR}allmaps.omacro
! --------------------------------------------
message "click on an atom to view $msg"
wait_id

! change the effective "working directory"
symbol cwd \${root}/${FLIP_label}
! --------------------------------------------
EOF-omacro
	# copy over the "individual" macro into this one
	tail +3 ${oDIR}${FLIP_label}/load.omacro |\
	cat >> ${oDIR}allmaps.omacro
    end
    
#    cat << EOF-omacro >> ${oDIR}allmaps.omacro
#message "click on an atom to view 'best' map"
#wait_id
#
#@${odir}/map
#EOF-omacro
    
    ###############################
    # now, preemtively set FLIP_label for "first" flipping round
    # output files are already generated, 
    # but we need the new label name
    set FLIP_label = `echo "$FLIP_labels $FLIP_state" | nawk '{print $($NF)}'`
endif

# flipping is now set up.




##########################
#  now begins the actual handling of flip-states
##########################

# clear convergence history
set Memory = 0
# proceed with caution? 
#set params = 3


###################
# back up all the files from the "last" flip run to this label

# first, do the script and logs
cp ${scriptfile}         ${scriptfile}.${FLIP_label}
cp ${logfile}            ${logfile}.${FLIP_label} 
mv ${logDIR}fftphase.log ${logDIR}fftphase.log.${FLIP_label}


# rename each of the flattened mtzs 
set files = `ls -1 ${mtzDIR} | nawk '/^dm_[0-9][0-9]\.mtz$/'`
if($?NO_DM) set files = ""
foreach dm_mtz ( $files )
    set trial = `echo $dm_mtz | nawk '{print substr($1,4,2)}'`
    set dm_mtz = ${mtzDIR}$dm_mtz
    mv $dm_mtz ${mtzDIR}dm_${trial}_${FLIP_label}.mtz
    mv ${logDIR}dm_${trial}.log ${logDIR}dm_${trial}_${FLIP_label}.log
    
    # update this for printing's sake
    if("$mapmtz" == "$dm_mtz") set mapmtz = "${mtzDIR}dm_${trial}_${FLIP_label}.mtz"
end

# rename the mlphare output file to this flip state
cp ${mtzDIR}mlphare.mtz ${mtzDIR}mlphare_${FLIP_label}.mtz
# update this for printing's sake
if("$mapmtz" == "${mtzDIR}mlphare.mtz") set mapmtz = "${mtzDIR}mlphare_${FLIP_label}.mtz"


# rename the O/rave stuff
if(-e ${mapDIR}phased.map)  cp ${mapDIR}phased.map  ${mapDIR}${FLIP_label}_phased.map
if(-e ${oDIR}phased.omap)   cp ${oDIR}phased.omap   ${oDIR}${FLIP_label}/phased.omap
if(-e ${oDIR}bones.o)       cp ${oDIR}bones.o       ${oDIR}${FLIP_label}/bones.o
if(-e ${oDIR}pick.pdb)      cp ${oDIR}pick.pdb      ${oDIR}${FLIP_label}/pick.pdb 
if(-e ${oDIR}sites.pdb)     cp ${oDIR}sites.pdb     ${oDIR}${FLIP_label}/sites.pdb 
if(-e ${scriptDIR}sites.mlphare) cp ${scriptDIR}sites.mlphare ${scriptDIR}sites_${FLIP_label}.mlphare


# report on files produced by last FFT
if((-e ${mapDIR}phased.map)&&(! $?BEST_YET)) then
    # only display this message if we havn't already blabered about "best" maps above
    echo "${mapDIR}${FLIP_label}_phased.map is a normalized ccp4 map from $mapmtz"
    if(-e ${oDIR}phased.omap) then
	echo "${oDIR}${FLIP_label}/phased.omap is an o version (${ofmt}) of ${mapDIR}${FLIP_label}_phased.map"
	if(-e ${oDIR}latest.omacro) echo "you can view it now in o using ${oDIR}latest.omacro"
    endif
endif


# need to change $bestDMtrial variable?
if($?BEST_YET) then
    # remember this for the end
    set best_FLIP = "$FLIP_label"
endif




############################
# move to the next flip state
@ FLIP_state = ( $FLIP_state + 1 )
# retrieve the appropriate filename label
set FLIP_label = `echo "$FLIP_labels $FLIP_state" | nawk '{print $($NF)}'`




############################
# if this was the last run, we need to close up shop
if( $FLIP_state > $#FLIP_labels ) then
    # we just finished the last "flip cycle"
    set FLIP_label = ""
    
    echo ""
    echo "finished flipping signs and axes."
    
    # we've tried everything, so go get the new winner
    if(-e "${scriptfile}.best") then
	echo "copying ${scriptfile}.best back to $scriptfile"
	cp ${scriptfile}.best $scriptfile >& /dev/null

	cp ${scriptDIR}best_sites.mlphare ${scriptDIR}sites.mlphare >& /dev/null
	cp ${mtzDIR}mlphare_${best_FLIP}.mtz ${mtzDIR}mlphare.mtz 
	if(-e "${scriptfile}.unflipped") mv ${scriptfile}.unflipped ${scriptfile}.old

	# restore "best" files in o directory as well
	cp -f ${mapDIR}best_phased.map ${mapDIR}phased.map >& /dev/null
	cp -f ${oDIR}best_phased.omap  ${oDIR}phased.omap  >& /dev/null
	cp -f ${oDIR}best_bones.o      ${oDIR}bones.o      >& /dev/null
	cp -f ${oDIR}best_pick.pdb     ${oDIR}pick.pdb     >& /dev/null
	cp -f ${oDIR}best_sites.pdb    ${oDIR}sites.pdb    >& /dev/null
	
	# recover the space group name that ended up being the "best"
	set newSG = `echo "$bestDMtrial" | nawk '{print $3}'`
	if("$?FLIP_SG") then
	    # we tried 2 space groups
	    set bestSG = "$newSG"
	    echo "$bestSG was the best space group."
	endif
    endif
    
    # no longer experimenting
    unset TENTATIVE
    # signal that filpping is done
    set FLIP_state = 999
    goto more_atoms
endif

#############################
# "ordinary" flip run

# start with the same script each "flip" cycle
cat ${scriptfile}.unflipped |\
nawk '$1 ~ /^DERIV/,/END/{print}' |\
cat >! $coordfile
# get rid of last script (and the "bad" atom list with it)
cp -f ${scriptfile}.unflipped ${scriptfile} >& /dev/null

# for separation
echo ""


# invert real occupancies every other FLIP state
if( "$FLIP_label" =~ *-occ* ) then
    echo "negating real occupancies "
    
    # invert real occupancies (they can get stuck on the wrong side of 0)
    cat $coordfile |\
    nawk '$1~/^ATOM/{$6 = -$6} {print}' |\
    cat >! ${tempfile}
    mv ${tempfile} $coordfile >& /dev/null
    
endif
if(( "$FLIP_label" =~ *-occ*)&&("$FLIP_label" =~ *-hand*)) echo -n "and "


# invert atom coordinates 
if( "$FLIP_label" =~ *-hand* ) then
    echo "flipping all atoms through the origin "
    
    # mirror-flip, sometimes this can be wrong too
    cat $coordfile |\
    nawk '$1~/^ATOM/{$3= -$3; $4= -$4; $5= -$5;} {print}' |\
    cat >! ${tempfile}
    mv ${tempfile} $coordfile >& /dev/null
endif
#if( "$FLIP_label" =~ "$otherSG"* ) echo -n "and "



# invert spacegroup hand when requested
if( "$FLIP_label" =~ P[1-6]* ) then
    # only need to reindex at beginning and end of the "Flip block"
    set newSG = `echo $FLIP_label | nawk -F "_" '{print $1}'`

    # print message every time SG is changed
    if("$SG" != "$newSG") then
	if(("$FLIP_label" =~ *-occ*)||("$FLIP_label" =~ *-hand*)) echo -n "and "
	echo "switching to $newSG "
    endif
else
    # switch "back" to firstSG
    #if("$firstSG" != "") set newSG = "$firstSG"
endif

    

# anything else?  

# put this new $coordfile and $newSG into refinement
if("$newSG" != "$SG") goto reindex

goto Write_Script


















MinusOne:
########################################################################################################

 #    #     #    #    #  #    #   ####            ####   #    #  ######
 ##  ##     #    ##   #  #    #  #               #    #  ##   #  #
 # ## #     #    # #  #  #    #   ####   #####   #    #  # #  #  #####
 #    #     #    #  # #  #    #       #          #    #  #  # #  #
 #    #     #    #   ##  #    #  #    #          #    #  #   ##  #
 #    #     #    #    #   ####    ####            ####   #    #  ######

########################################################################################################
#
#	Manage "minus-one" analysis
#
#	that is, throw out a site, and see if it comes back in a difference Fourier
#
########################################################################################################

echo "not implemented "


# always eliminate atom #1

# obliterate anything withing 1A of this site ?

# remember where it was? 
set MINUS_ONE = "X Y Z"




goto GotSites






















more_atoms:
################################################################################################################

 #    #   ####   #####   ######            ##     #####   ####   #    #   ####
 ##  ##  #    #  #    #  #                #  #      #    #    #  ##  ##  #
 # ## #  #    #  #    #  #####           #    #     #    #    #  # ## #   ####
 #    #  #    #  #####   #               ######     #    #    #  #    #       #
 #    #  #    #  #   #   #               #    #     #    #    #  #    #  #    #
 #    #   ####   #    #  ######          #    #     #     ####   #    #   ####

################################################################################################################
#   look for new sites using a difference Fourier
################################################################################################################


# get the best ordering of the Fs, based on refined occupancies
cat $scriptfile |\
nawk '$1~/^DERIV/{++deriv}\
      $1~/^ATOM/{++count[deriv]; \
	 fp[deriv]=fp[deriv]+$6; fpp[deriv]=fpp[deriv]+$7}\
      END{for(deriv in count) if(count[deriv]){\
	  print deriv, "F", fp[deriv];\
	  print deriv, "D", fpp[deriv]}}' |\
cat >! ${tempfile}fp_fpp

# get the dataset label names
cat $scriptfile |\
nawk '$1 ~ /^LABIN/{print; while($NF == "-"){getline; print}}' |\
nawk 'BEGIN{RS=" "} NF != 0 && $1 != "-"' |\
nawk 'NF!=0' |\
nawk '/^FPH/{print "F", substr($0,4)+0, substr($0,index($0,"=")+1)}\
      /^DPH/{print "D", substr($0,4)+0, substr($0,index($0,"=")+1)}' |\
cat >> ${tempfile}fp_fpp

# combine labels with occupancies
cat ${tempfile}fp_fpp |\
nawk '$2=="F"{fp[$2 $1]=$3} $2=="D"{fpp[$2 $1]=$3}\
      $1=="F"{print "F", $NF, fp[$1 $2]} $1=="D"{print "D", $NF, fpp[$1 $2]}' |\
cat >! ${tempfile}Forder

# sort them so that biggest magnitudes come first
set Forder = `sort -nr +2 ${tempfile}Forder | nawk '$1=="F"{print $2}'`
set Dorder = `sort -nr +2 ${tempfile}Forder | nawk '$1=="D"{print $2}'`
rm -f ${tempfile}Forder >& /dev/null

# check for missing data sets
if("$Forder" == "") then
    # no isomorphous differences?
    set temp = `nawk '$1=="F"' ${tempfile}fp_fpp | wc -l`
    if($temp) then
	# there WERE datasets, they just don't have any signal
	# so let's just leave them out of the analysis
	set Forder = "no "`nawk '$1=="F"{print $NF}' ${tempfile}fp_fpp`
    endif
endif
if("$Dorder" == "") then
    # no anomalous differences?
    set temp = `nawk '$1=="D"' ${tempfile}fp_fpp | wc -l`
    if($temp) then
	# there WERE datasets, they just don't have any signal
	# so let's just leave them out of the analysis
	set Dorder = "no "`nawk '$1=="D"{print $NF}' ${tempfile}fp_fpp`
    endif
endif
rm -f ${tempfile}fp_fpp >& /dev/null

# check for intentionally missing data sets
if(("$Forder" == "")||("$Dorder" == "")) then
    # there really are no (desired) differences of one kind or the other
    # make sure we don't accidentally include any from the mtz
    echo "go" | mtzdump hklin $bestmtz |\
    nawk '/OVERALL FILE/,/LIST OF REF/' |\
    nawk 'NF>3 && $1~/^[0-9]/' |\
    nawk '$(NF-1) ~ /^[FD]$/{print $NF}' |\
    cat >! ${tempfile}
    set temp = `nawk '$1=="F"{print $2}' ${tempfile}`
    if(("$Forder" == "")&&("$temp" != "")) then
	# make sure these are not included (without dangling "no"s)
	set Forder = "no $temp"
    endif
    set temp = `nawk '$1=="D"{print $2}' ${tempfile}`
    if(("$Dorder" == "")&&("$temp" != "")) then
	# make sure these are not included
	set Dorder = "no $temp"
    endif
    rm -f ${tempfile} >& /dev/null
endif

# get a good "exclude" value
cat $scriptfile |\
nawk '$1~/^EXCLUDE/{for(i=1;i<=NF;++i){\
      if($i~/^DI/)print "DISO", $(i+1);\
      if($i~/^DA/)print "DANO", $(i+1);}}' |\
nawk '/^DI/{if($2>max_diso) max_diso=$2} \
      /^DA/{if($2>max_dano) max_dano=$2} \
END{print max_diso, max_dano}' |\
cat >! ${tempfile}
# use a little wider spread
set max_diso = `nawk '$1+0>1{print 1.5*$1}' ${tempfile}`
set max_dano = `nawk '$2+0>1{print 1.5*$2}' ${tempfile}`
rm -f ${tempfile}

# calculate the average, overall B-factor of these sites
cat $scriptfile |\
nawk '$1~/^ATOM/{weight = sqrt($6*$6)+$7; wsum += weight;\
    B += $NF*weight;}\
    END{if(wsum+0==0)wsum=1; print B/wsum}' |\
cat >! ${tempfile}
set avgB = `nawk '$1+0>1' ${tempfile}`
if("$avgB" == "") set avgB = $wilsonB
rm -f ${tempfile}



########
# calculate a phase-combined difference Fourier
# back-up "old" copies?
mv ${mtzDIR}FH.mtz ${tempfile}FH.mtz  >& /dev/null
mv ${mtzDIR}FH.hkl ${tempfile}FH.hkl  >& /dev/null

# run the packaged difference-Fourier script:
${scriptDIR}bestFH.com $bestmtz ${hiRES}-1000A $Forder  $Dorder |\
egrep -v "FH.hkl|FH.mtz|FH_" 
rm -f FH.hkl FH.mtz wFH_Patt.map >& /dev/null
if(-e FH_Patt.map) then
    echo "${mapDIR}FH_Patt.map is ready."
    mv FH_Patt.map ${mapDIR}FH_Patt.map  >& /dev/null
    mv wFH_Patt.map ${mapDIR}wFH_Patt.map  >& /dev/null
endif
if(-e FH_Four.map) then
    echo "${mapDIR}FH_Four.map is ready."
    mv FH_Four.map ${mapDIR}FH_Four.map  >& /dev/null
endif
mv bestFH.log ${logDIR} >& /dev/null

# restore "old" copies?
mv ${tempfile}FH.mtz ${mtzDIR}FH.mtz  >& /dev/null
mv ${tempfile}FH.hkl ${mtzDIR}FH.hkl  >& /dev/null

# now we should have ${mapDIR}FH_Four.map and ${mapDIR}FH_Patt.map to look at.


########
# pick only peaks that we havn't already seen (listed in $scriptfile)
echo ""
echo "looking for new ${newSIG}-sigma peaks in ${mapDIR}FH_Four.map"
echo "that are not already in $scriptfile"
${scriptDIR}pick.com ${mapDIR}FH_Four.map $scriptfile $newSIG sigma >! ${tempfile}.log
mv pick.log ${logDIR} >& /dev/null
echo ""
set peaks = `nawk '/^ATOM/' pick.pdb | wc -l`
mv pick.pdb ${oDIR}/FH_Fourier.pdb >& /dev/null
if(! $peaks) then
    # no difference features?
    echo "no new sites found."
    rm -f ${tempfile}.log >& /dev/null
    goto no_new_atoms
endif

# vecref can't handle more than 50 sites
set max_new_sites = `cat ${scriptDIR}sites.mlphare | wc -l | nawk '$1+0<50{print 50-$1}'`
if("$max_new_sites" == "") then
    # uhh... hmm.  no what?
    set max_new_sites = 50
endif

# extract fractional coordinates from the pick.com screen output
cat ${tempfile}.log |\
nawk 'NF>5 && $1~/[01]\.[0-9][0-9][0-9]/{print}' |\
head -$max_new_sites >! ${tempfile}peaks
rm -f ${tempfile}.log >& /dev/null
# format: xf yf zf mult height/sig  dist  neighbor...



# check these against the Patterson map
set peaks = `cat ${tempfile}peaks | wc -l`
set lowsigma = `tail -1 ${tempfile}peaks | nawk '{print $5+0}'`
echo "checking $peaks new peaks > ${lowsigma}*sigma against ${mapDIR}FH_Patt.map"

# get "reference" sites from ${scriptDIR}sites.mlphare 
# these will serve to check for cross-vectors with new peaks
cat ${scriptDIR}sites.mlphare |\
nawk '$1~/^ATOM/{print "ATOM H 0", sqrt($6*$6)+$7, $3, $4, $5, $NF}' |\
cat >! ${tempfile}atoms.out
# vecref format: ATOM Ee atom# occ xf yf zf Bfac 
# we are using hydrogen because these occupancies should be in "electrons"

if("$max_new_sites" == "50") then
    # discard original sites if there are > 50 of them?
    echo -n "" >! ${tempfile}atoms.out
endif

# convert fractional coordinates to vecref format
cat ${tempfile}peaks |\
nawk -v avgB=$avgB '{++n;\
    print "ATOM H", n, $4,$1,$2,$3,avgB}' |\
cat >> ${tempfile}atoms.out
# vecref format: ATOM Ee atom# occ xf yf zf Bfac 



# use vecref to check against the Patterson
set rcycles = "3,0,0 3,0,0 3,10,0 3,10,0 3,0,10 3,0,10"
set rcycles = "3,0,0 3,0,0 3,0,0 3,0,0"

# reduce resolution a bit so vecref won't crash
set refRES = `echo $hiRES | nawk '{print (0.8*1/($1^3))^(-1/3)}'`
set refLOG = ${logDIR}/pick.log


# "dribble" peaks into refinement?

# do several cycles of rejection
foreach rcycle ( $rcycles )
    set rcycle = `echo "$rcycle" | nawk 'BEGIN{FS=","} {print $1, $2, $3}'`
    
    # vecref can't handle more than 50 sites
    sort -n +2 -3 ${tempfile}atoms.out |\
    head -50 |\
    cat >! ${tempfile}atoms.in
    
    # refine "occupancies" against the Patterson
    rm -f ${tempfile}atoms.out >& /dev/null
    set vecrefSG = $SG
    if("$vecrefSG" == "H3") set vecrefSG = R3
    if("$vecrefSG" == "H32") set vecrefSG = R32
    vecref mapin ${mapDIR}FH_Patt.map \
           ATOUT ${tempfile}atoms.out << EOF-vecref >>& $refLOG
    SPACEGROUP $vecrefSG
    RESOLUTION ${refRES}
    CYCLES $rcycle
    BREF
    @${tempfile}atoms.in
EOF-vecref
    if(($status)||(! -e ${tempfile}atoms.out)) then
	# all atoms rejected
	echo -n "" >! ${tempfile}atoms.out
	break
    endif
end
# delete the vecref input file
rm -f ${tempfile}atoms.in >& /dev/null

# combine the peak heights with the vecref occupancies
cat ${tempfile}atoms.out ${tempfile}peaks |\
nawk '/^ATOM/{occ[$3]=$4; next} NF>5{++n;\
    printf "%12.4f %s\n", occ[n]*5/$4, $0}' |\
sort -nr | nawk '$1+0>0' |\
cat >! ${tempfile}survivors
rm -f ${tempfile}atoms.out >& /dev/null
rm -f ${tempfile}peaks >& /dev/null
# ${tempfile}survivors contains a "combined" score for each peak
# format:  height*vecref_occ/mult xf yf zf mult height dist neighbor...

echo ""
set survivors = `cat ${tempfile}survivors | wc -l`
echo -n "$survivors survived, "

# only show a handful:
set sigCUT = 0
while( $survivors > 8 )
    # increase the sigma cutoff
    set sigCUT = `echo $sigCUT | nawk '{print $1+1}'`
    set survivors = `nawk -v sigCUT=$sigCUT '$1+0>sigCUT' ${tempfile}survivors | wc -l`
    # safety catch?
end

if($survivors) then
    # print sites out for user to review
    echo "top ${survivors}:"
    echo "  x        y        z        mult  height/sigma   dist  from nearest neighbor"
    head -${survivors} ${tempfile}survivors |\
    nawk '{print substr($0,14)}'

    # check for special positions
    set specials = `head -${survivors} ${tempfile}survivors | nawk '$5+0>1' | wc -l`
    if($specials) then
	# ignore these?  jiggle them? 
	echo "peaks with mult > 1 are on special positions."
	echo ""
    endif
else
    # no suitable sites were found.
    echo "what a shame..."
    rm -f ${tempfile}survivors >& /dev/null
    goto no_new_atoms
endif
echo ""



# now we need to decide which of these to add to the refinement
set NEW_SITES_FOUND

# don't add more sites than we already have
cat $scriptfile |\
nawk '$1~/^DERIV/{++deriv} $1~/^ATOM/{++atom[deriv]}\
    END{for(deriv in atom) printf "%d\n", atom[deriv]*0.7}' |\
sort -n >! ${tempfile}
set max_new_sites = `head -1 ${tempfile}`
rm -f ${tempfile} >& /dev/null
if(("$max_new_sites" == "")||("$max_new_sites" == "0")) set max_new_sites = 1

# if we're not in a hurry, don't add more than 2 sites at a time
if(("$max_new_sites" > 2)&&(! $?HURRY_UP)) set max_new_sites = 2

# don't look stupid
if($max_new_sites > $survivors) set max_new_sites = $survivors












# retrieve maximum atom number used so far
set atom = `nawk '$1~/^ATOM/{print substr($1,5)}' $scriptfile | sort -n | tail -1`

head -$max_new_sites ${tempfile}survivors |\
nawk -v atom=$atom '{++atom; \
    printf " ATOM%-3d ANO  %6.3f %6.3f %6.3f  ?.???  ?.??? BFAC    ?.???\n",\
       atom, $2, $3, $4;}' |\
cat >! ${tempfile}newATOMs
rm -f ${tempfile}survivors
# ${tempfile}newATOMs has the new mlphare atoms, mlphare format


# retrive full coordinate list from the script
cat $scriptfile |\
nawk '$1 ~ /DERIV/,/END/{print}' | nawk '! /END/{print}' |\
cat >! ${coordfile}

# as well as the LABIN cards
cat $scriptfile |\
nawk '$1 ~ /^LABIN/{print; while($NF == "-"){getline; print}}' |\
cat >! ${tempfile}.LABELS

# append the new atoms to the end of each derivative
cat ${tempfile}newATOMs ${coordfile} |\
nawk '/[?]/{++n; ATOM[n]=$0} \
      /DERIV/{deriv=1} \
      NF==0 && deriv!=""{\
        for(i=1;i<=n;++i){print ATOM[i]; print "ATREF"};\
	deriv=""}  ! /[?]/{print}' |\
cat >! ${tempfile}
mv ${tempfile} ${coordfile}

# add the new sites to the "previously tried" list (as OLDATOMs)
cat ${tempfile}newATOMs |\
nawk '{print "OLD" substr($0,2)}' |\
cat >! ${tempfile}badatoms
rm -f ${tempfile}newATOMs >& /dev/null
    
# go back and clean up these new sites for further refinement
if($?MORE_SITES) then
    echo ""
    echo "adding top $max_new_sites sites to $scriptfile"
	
    # occupancies will probably be okay, but not xyz! 
    set params = 3

    # this procedure will use ${coordfile} and ${tempfile}badatoms
    set sites = `cat ${coordfile} | wc -l`
    goto GotSites
endif

# user asked us to stop now
mv ${coordfile} newATOMS.mlphare
rm -f ${tempfile}badatoms  >& /dev/null
echo ""
echo "new mlphare atoms written to newATOMS.mlphare"

no_new_atoms:

# flip again?
if(($FLIP_state == 999)&&($?MORE_SITES)&&($?NEW_SITES_FOUND)&&(! $?FINAL_FLIP)) then
    # flipping was done before, and we have found new sites
    unset NEW_SITES_FOUND
    set FLIP_labels = ""
    #set FINAL_FLIP

    # combine bad,old atom lists in the "best" script
    egrep -v "^OLDATOM|^BADATOM" ${scriptfile}.best >! ${scriptfile}.unflipped
    cat ${scriptfile}.best ${scriptfile} |\
    egrep "^OLDATOM|^BADATOM" |\
    nawk '! seen[$0]{print $0; seen[$0]=$0}' |\
    cat >> ${scriptfile}.unflipped
    cp ${scriptfile}.unflipped ${scriptfile}.best >& /dev/null

    # make maps again?
    #goto FFT
    touch ${logDIR}fftphase.log
    goto Flip
endif

# jump to minus-one procedure now?
#goto MinusOne

# that's all she wrote...
goto done

exit

done:
################################################################################

 #####    ####   #    #  ######
 #    #  #    #  ##   #  #
 #    #  #    #  # #  #  #####
 #    #  #    #  #  # #  #
 #    #  #    #  #   ##  #
 #####    ####   #    #  ######

################################################################################
#   Clean up
################################################################################

# clean up
#rm -f ${tempfile}* >& /dev/null
rm -f ${tempfile}EXCLUDE >& /dev/null
rm -f ${tempfile}.LABELS >& /dev/null
rm -f $coordfile         >& /dev/null

# user-requested exit
if($?QUIT) then
    echo "Okay, Bye! "
    exit
endif

# unhappy ending
if((! -e "$scriptfile")||(! -e "$bestmtz")||($?BAD)) then
    echo "Dang! "
    exit 9
endif

# copy the "best" script back over to the new one
if(-e ${scriptfile}.older) mv ${scriptfile}.older ${scriptfile}.oldest
if(-e ${scriptfile}.old) mv ${scriptfile}.old ${scriptfile}.older
if(-e ${scriptfile}) mv ${scriptfile} ${scriptfile}.old
cp ${scriptfile}.best ${scriptfile}

echo ""
echo ""
echo "average values for sites used in this Phaser run: "
cat ${scriptDIR}sites.mlphare
if(-e ${oDIR}sites.pdb) echo "a PDB version of these sites is in: ${oDIR}sites.pdb"

echo ""
echo -n "phased version of $firstmtz is ${mtzDIR}mlphare.mtz"
if("$?bestSG") then
    echo -n "(in $bestSG)"
endif
echo ""
set temp = `echo $bestDMtrial | nawk '$1+0>0{print $1}'`
if("$temp" != "") then
    echo "${temp} solvent flattened ${mtzDIR}mlphare.mtz is $bestmtz"
else
    echo "and it has been backed up as $bestmtz"
endif

# explain the maps
if(-e ${mapDIR}best_phased.map) echo "${mapDIR}best_phased.map is a normalized ccp4 map from $bestmtz"
if(-e ${oDIR}best_phased.omap) then
    echo "${oDIR}best_phased.omap is an o version (${ofmt}) of ${mapDIR}best_phased.map"
    if(-e ${oDIR}best.omacro) echo "you can view it now in o using ${oDIR}best.omacro"
    if(-e ${oDIR}allmaps.omacro) echo "you can view all maps in o using ${oDIR}allmaps.omacro"
endif

echo ""
echo "Phaser Elves are done."
date +"%T %D"

exit






























































exit
Setup_scripts:
########################################################################################################

  ####    ####   #####      #    #####    #####   ####
 #       #    #  #    #     #    #    #     #    #
  ####   #       #    #     #    #    #     #     ####
      #  #       #####      #    #####      #         #
 #    #  #    #  #   #      #    #          #    #    #
  ####    ####   #    #     #    #          #     ####

########################################################################################################
#
#	Set up all auxillary scripts before we run mlphare
#
########################################################################################################
echo ""
echo "setting up scripts in $scriptDIR"








########################################################################################################
# write the README file first
cat << EOF >! README.Phaser

Phaser Elves guide to their scripts

1) In a rush?  Just type this:

$scriptfile >! logs/mlphare.log
${scriptDIR}dm.com 50% mlphared.mtz >! logs/dm_50.log
${scriptDIR}fft.com dmed.mtz >! logs/fft.log

# if you have "o" installed:
ono
  O > @ffted.omacro

That should give you something to look at in "o".

Phases calculated from the metal sites in $scriptfile will be placed in a file
called ./mlphared.mtz by the first command.  

The second command runs dm on these phases, using 50% as the solvent content.  
The solvent-flattening results are placed in ./dmed.mtz.

The fft.com script will calculate a phased electron density map from the 
solvent-flattening results.  If an appropriate conversion program is available 
(mapman, brix), an "o version" of this map will also be generated by fft.com, 
as well as a bones trace, and an o macro (called ffted.omacro) for viewing it.



2) Okay, how does it work?

    The details of how each script behaves are described below.  

    Phaser Elves do a lot of exploratory work to find the correct parameters.
for your phasing run.  It is HIGHLY recommended that you alow Phaser Elves to
explore alternative sign/hand/spacegroup/solvent-content parameters before you
look at any maps.  Particularly for MAD data, you can get exactly the same
phasing statistics for a number of choices of sign/hand/spacegroup, but
only one combination of these will give you an interpretable map.  

    However, once you are certain you have the correct sign and hand choice,
run Phaser with the words "no flip" on the command line, to keep them from 
re-exploring all these alternatives.

    Phaser Elves will also calculate a phased difference Fourier, using all
your difference data, once they are done doing all of the above.  By default,
peaks in this difference Fourier are added to the mlphare script (one at a
time), and then the refinement continues.  This process repeats until all the
significant peaks in the difference Fourier have been tried.  To turn this off,
put the words "no add" on the command line.
	

In the ${scriptDIR} directory:

################################################################################
$scriptfile	- mlphare script
    reads: $mtzfile	- cad-ed data sets
    makes: mlphared.mtz	- phases calculated from sites, ready for dm
    usage: $scriptfile [other.mtz]
    
    example: $scriptfile

    description:
	The $scriptfile script has been carefully written to have the best 
	chance of generating a sucessful mlphare run.  Phaser Elves are 
	careful not set up $scriptfile to refine indeterminant parameters, 
	such as anomalous occupancy when there are no anomalous diffs, and 
	things like one "y" position in P2.  For derivatives where the 
	anomalous differences seem stronger than the isomorphous ones 
	(typical for MAD), the atom refinement flags are set up to take 
	advantage of this.  
	
	Also, since Phaser Elves don't always refine all the atomic parameters, 
	$scriptfile might only be set up to refine a few.  So, don't believe
	a nice, high FOM if you havn't refined your occupancies yet! 
	
	$scriptfile also contains all the information Phaser Elves need to
	continue refining your atom sites.  So, once you have got it running, 
	you can run Phaser Elves with $scriptfile on their command line, and 
	let them tune things up for you.
	
	Note: if an occupancy is set to exactly zero, Phaser Elves will 
	interpret this as a "true" missing parameter and will never refine 
	it.  If you want to start refining from zero, try using "0.001" instead.
	
	See the mlphare manual for more details on how to edit and run mlphare 
	scripts.


################################################################################
${scriptDIR}dm.com	- "smart" dm script

    reads: mlphared.mtz	- an mtz with some kind of phase information in it
    makes: dmed.mtz	- the solvent-flattened results
    
    usage: ${scriptDIR}dm.com phased.mtz [Fset] [1.8A] [solc%]
    where: 
    phased.mtz contains the (phased) data to flatten   (default: ${mtzDIR}mlphare.mtz)
    Fset       is the set of Fs you want to flatten    (default: most complete F)
    1.8A       is the desired outer resolution limit   (default: ${hiRES}A)
    solc%      is the solvent content of your crystal  (default: 50%)
    
    examples: 
	${scriptDIR}dm.com ${mtzDIR}mlphare.mtz
	  - will flatten an automatically-chosen dataset in 
	    ${mtzDIR}mlphare.mtz at 50% solvent.  
	${scriptDIR}dm.com ${mtzDIR}mlphare.mtz $native 40%
	  - will flatten the $native dataset in ${mtzDIR}mlphare.mtz using 
	  a solvent content of 40%.  
    
    description:
	This script is meant to do a standard, highly-automatic dm run.  Most 
	of the "automatic" stuff it does it finding a suitable F and phase to 
	use in flattening.  There are a lot of things you can do to tweak dm 
	and make it give you better phases, not the least of which is averaging.
	For this reason, dm.com is designed to be easily edited by end-users to
	suit these needs.
	
	Although dm.com was meant for the phased data produced by the Phaser 
	Elves scripts, it should be applicable to almost any phased mtz data, 
	including the "solve.mtz" output from Terwilliger's SOLVE program.

    advanced:
	If you edit or replace this script, Phaser Elves will not overwrite
	your new version of it next time it is run.  You can, therefore, easily
	apply NCS operators just by adding them to dm.com

################################################################################
${scriptDIR}fft.com	- "smart" fft script

    reads: phased.mtz	- an mtz with some kind of phase information in it
           cover.pdb	- a PDB file to "cover" with the map
           phased.map   - (optional) an already-calculated CCP4 map you want to
			  convert to "o format"
    makes: ffted.map	- a CCP4 map of phased.mtz (one ASU only)
           ffted.omap	- a DSN6 or BRIX version of ffted.map 
			  (extended to cover either cover.pdb or 120% of the cell)
	   bones.o	- a "bones" trace of ffted.omap
	   fftpick.pdb  - a peak-pick of ffted.omap (unless cover.pdb is available)
	   ffted.omacro - an o macro for viewing all these files
	   
    usage: ${scriptDIR}fft.com phased.mtz [Fset] [PHI] [FOM] [1.8A]
    where: 
    phased.mtz contains the (phased) data to transform (default: $bestmtz)
    Fset       is the set of Fs you want to use        (default: most complete F)
    PHI        is the phase set you want to use	       (default: most recent phase)
    FOM        is the FOM weight to use		       (default: most recent FOM)
    1.8A       is the desired outer resolution limit   (default: ${hiRES}A)
    
    examples: 
	${scriptDIR}fft.com dmed.mtz $native FOM
	  - will transform the $native dataset in dmed.mtz using the most-
	    recently added phase in dmed.mtz (PHIDM), but weighted by 
	    mlphare's FOM (FOM in dmed.mtz).  
	${scriptDIR}fft.com maps/2fofc.map pdb/refmac123.pdb
	  - will calculate the ffted.omap and ffted.omacro to cover 
	    pdb/refmac123.pdb with maps/2fofc.map, (no actual fft will 
	    be done).  
    
    description:
	This script is meant to do a quick, "most reasonable" fft of the 
	"best" data in the mtz file provided to it, and make sure its easy for 
	you to see in o as quickly as possible.  Most of the "automatic" stuff
	it does it finding a suitable F and phase to use (in exactly the same 
	way dm.com does).
	
	For your convenience, a pdb file included on the command line, 
	ffted.omap will be extended to "cover" everything within 10A of the 
	atoms in this file.  This comes in very handy if your building takes 
	you significantly out of the unit cell.
	Also for your convenience, the above o-map calculation can be done on 
	a pre-calculated CCP4 map (like one you obtained from refmac).
	
	If no pdb file is provided, fft.com produced fftpeak.pdb, which is 
	just a simple peak-pick on the region covered by the "o" map.  
	fftpeak.pdb is handy when exploring a new map in "o", giving you atoms 
	to "cen_id" on as you move around the map.
	The ffted.omacro also contains a section for reading in the "sites.pdb"
	file (produced by Phaser Elves) to allow you to see the heavy-atom 
	positions used to calculate the map you are looking at.  However, 
	fft.com does not generate this sites.pdb file itself.
	
	Although fft.com was meant for the phased data produced by the Phaser 
	Elves scripts, it should be applicable to almost any phased mtz data, 
	including the "solve.mtz" output from Terwilliger's SOLVE program.

################################################################################
${scriptDIR}bestFH.com	- Matthews "best" FH estimator

    input:  all.mtz	- a cad-ed mtz file with multiple data sets
    output: FH.mtz	- an mtz containing only the estimate of FH
            fh.hkl	- shelx version of FH.mtz
	    FH_Patt.map - a Patterson map of FH
	    FH_Four.map - phased map of FH (if a phase is in all.mtz)
	   
    usage: ${scriptDIR}bestFH.com all.mtz [Fset] [Dset] [1.8A]
    where: 
    all.mtz    contains same-site reflection data      (default: mtz/all.mtz)
    Fset       are the sets of Fs you want to use      (default: all of them)
    Dset       are the sets of Danos you want to use   (default: all of them)
    1.8A       is the desired outer resolution limit   (default: all data)
    PHI        is the phase set you want to use	       (default: most recent phase)
    
    FH_Patt.map is calculated with a 4*rms(FH) cutoff, as calculated by scaleit.
    
    example1: ${scriptDIR}bestFH.com mtz/all.mtz
	will calculate an estimate of FH from all the difference data in 
	mtz/all.mtz.
	
    example2: ${scriptDIR}bestFH.com dmed.mtz
	will calculate an estimate of FH from all the difference data in 
	mtz/all.mtz. (same as above), but will also calculate a phased map 
	of FH, using the most recently-added phase in dmed.mtz (PHIDM).  
	This is usually superior to ordinary difference Fouriers for finding 
	new heavy-atom sites.
    
    example3: ${scriptDIR}bestFH.com mtz/all.mtz no DANOFlo Flo
	same thing, but leave the "DANOFlo difference data-set and "Flo" data
	set out of the calculation.
    
    description:
	This script offers the "new" functionality of computing a "Matthews FH" 
	estimate.  This analysis not only "averages" information from all your 
	diference data into a single data set, but reduces the systematic error
	produced by cross-terms in the substraction of anomalous and 
	isomorphous difference data: |FH| == |FPH-FP| != |FPH|-|FP|
    
	In bestFH.com, all anomalous difference data are scaled together, 
	and then added (sigma-weighted) together.  Then, all the possible
	isomorphous differences between "F"s in the mtz are subtracted, 
	scaled, and also added together.  Finally, Dano is scaled against
	Diso, and FH is calculated by the Pythagorean theorem.
	
	Care must be taken in the ordering of the "F" datasets.  For example, 
	in a 3-wavelength MAD experiment: Finf Fpeak Fhi should be the order
	used.  Fhi Fpeak Finf is okay too, but not Finf Fhi Fpeak.  The latter
	would result in Finf-Fhi and Fhi-Fpeak "canceling" each other, because
	the f' differences will have opposite signs.  bestFH.com will try to
	get this ordering right, but you should check the difference dataset
	list to make sure none of them are opposing each other.

	Note also that all the data in mtz/all.mtz should be from 
	crystals with metal sites at the same positions, otherwise, FH will 
	be a mix of the two site constellations.  
    
################################################################################
${scriptDIR}reindex.com	- general-purpose re-indexing script

    input:  data.mtz	    - mtz file to re-index (merged or unmerged)
    output: reindexed.mtz   - mtz file with the new space group
    
    examples: 
	${scriptDIR}reindex.com data.P41212.mtz P43212
	  - will change the space group of "data.P41212.mtz" to P43212 
	    (assuming that is possible), and write the results to 
	    "reindexed.mtz"
	${scriptDIR}reindex.com data.P2221.mtz P2122
	  - will change the space group of "data.P2221.mtz" to the "pseudo" 
	    space group "P2122", which is P2221, but with "a" as the screw 
	    axis.  This is done by leaving the mtz file in P2221, but 
	    permuting the cell (and the data) so that the shortest cell edge 
	    (normally "a"), is moved to the third cell parameter (the one 
	    with screw symmetry).
	
    description:
	This is a general utility for changing the assigned space group of 
	mtz data using the CCP4 program "reindex".  It works on merged and 
	unmerged data.  Re-assignment of the screw/rotation axes of 
	anisotropic orthorhombic space groups is supported (see example 2).
	
	"Flipping" between alternative axis assignments is also easily done.
	Just include the word "flip" on the reindex.com command line to switch
	to the "other" axis assignment.  This may be neccessary for any space
	group having two or more cell edges exactly the same length.  The only 
	tricky ones are R3 and P3x, which have four possible axis assignments.  
	To specify the remaining two, use the word "flip" two or three times 
	(respectively).  see ${CDOC}/reindexing.doc for details.
	
	Changing between space groups with different point group, or even
	lattice symmetry is allowed, but unadvisable!  These transformations
	involve mergeing or "un-mergeing" spots, which reindex can't do.
    
	Note: moving/removing screw axes will result in the "loss" of 
	some systematic absence reflections, so be careful.  It is probably
	advisable to always merge in P222, and reindex later.
    
################################################################################
${scriptDIR}shelx.com	- automatic, combinatorial shelx script

    input: data.mtz	- mtz file containing one or more data sets
    output: shelx.pdb	- PDB file containing shelx sites and peaks
    
    usage: ${scriptDIR}shelx.com data.mtz P212121 Fname 1s 4A 3 sites diff 200  no Fbad
    where:
	Fname - dataset or dataset pair you specifically want to use
	1s    - sigma cutoff
	4A    - resolution cutoff
	diff #- maximum difference cutoff (default: auto)
	sites - specify number of expected sites (3 above)
	Fbad  - dataset or dataset pair you do not want to use

    examples:
	shelx.com mtz/Fpp.mtz 
	  - use DANO from mtz/Fpp.mtz as the shelx data set
	shelx.com mtz/Fp.mtz mtz/Flo.mtz
	  - use both DANO data sets, as well as F-F between the two mtzs.
	shelx.com mtz/FH.mtz 
	  - use FH in mtz/FH.mtz (no DANO present)
	shelx.com mtz/FH.mtz 0s 1s 2s 4A 3A 2A 5 sites
	  - use sigma cutoffs of 0.0, 1.0, 2.0, and resolution cutoffs of
	    4, 3, and 2A in all possible combinations.  Looking for 5 sites.
	shelx.com mtz/all.mtz no DANOFlo Flo-Fhi
	  - use all difference data in mtz/all.mtz, except for DANOFlo and
	    the difference between Flo and Fhi
	shelx.com mtz/all.mtz P222 P212121 P21212 P21221 P22121 P21212
	  - try all the listed space groups, permuting axes appropriately
	shelx.com mtz/all.mtz /programs/bin/shelxs
	  - different location of shelxs executable
	
    description:
	shelx.com is designed to be an exhaustive, combinatorial interface
	to direct-methods heavy-atom finding with shelx.  Multiple space
	groups, sigma cutoffs, resolution cutoffs, maximum difference
	cutoffs, data files and target numbers of sites can all be specified, 
	and every combination of the specified parameters will be tried, 
	in turn.  This can, of course, take some time.  The terminal output
	from shelx.com is formatted so that you can cut-and-paste it as
	command-line arguments to a second run of shelx.com, if you want
	to re-run a particular combination.
	  The atoms positions reported by shelx are filtered for non-special
	positions, and then checked against a Patterson map generated with
	the same data that was provided to shelx.  The CCP4 program "vecref"
	is used to reject atoms that do not agree with the Patterson.
	  The output shelx.pdb file contains the shelx peak-heights/10 as
	the B-factor column, and the "occupancy" (refined by vecref) in the
	occupancy column.  Atoms on special positions are listed, but with
	zero occupancy.  
	  shelx.pdb is written whenever the sum of the products of the shelx 
	peak-heights and the Patterson "occupancies" exceeds the "last" 
	shelx.pdb written.  It is not clear if this is the "best" scoring 
	function, but you can always re-run a single-pass shelx.com run to 
	reproduce any combinations that you are curious about.

################################################################################
${scriptDIR}rantan.com	- automatic, combinatorial rantan script

    input: data.mtz	- mtz file containing one or more data sets
    output: rantan.pdb	- PDB file containing possible heavy-atom sites
    
    usage: ${scriptDIR}rantan.com data.mtz P212121 Fname 1s 4A 3 sites diff 200  no Fbad
    where:
	Fname - dataset or dataset pair you specifically want to use
	1s    - sigma cutoff
	4A    - resolution cutoff
	diff #- maximum difference cutoff (default: auto)
	sites - specify number of expected sites (3 above)
	Fbad  - dataset or dataset pair you do not want to use

    examples:
	see shelx.com (above)
	
    description:
	rantan.com is essentially identical to shelx.com (above), except
	that it uses the CCP4 program "rantan" instead of shelx to calculate
	direct phases.  In this case peaks are picked from an E-Fourier of
	the deduced phases, and then subjected to the "Patterson filter"
	in vecref.  The output file: rantan.pdb contains the height/sigma
	peak heights in the E-Fourier as the B-factor column and the 
	"vecref occupancy" in the occupancy column. 
    
################################################################################
${scriptDIR}rrsps.com	- Recursive Real-Space Patterson Search

    input: Patt.map	- Patterson map file to be deconvoluted
	   boring.pdb	- PDB file containing "uninteresting" sites (to be ignored)
    output: pick.pdb	- PDB file containing picked peaks
    
    usage: ${scriptDIR}rrsps.com FH_Patt.map P212121 P222
    where:
    FH_Patt.map	- is a CCP4 Patterson map 
    P212121	- is the space group you want to try
    
    examples:
	rrsps.com FH_Patt.map P212121 P222
	  - will run a recursive rsps procedure on FH_Patt.map in P212121 and
	    in P222

    description:
	rrsps.com is a recursive shell script.  It actuall calls itself.
    
################################################################################
${scriptDIR}pick.com	- sophisticated peak-picker

    input: pickme.map	- map file to be picked (must cover at least one asu)
	   boring.pdb	- PDB file containing "uninteresting" sites (to be ignored)
    output: pick.pdb	- PDB file containing picked peaks
    
    examples: 
	${scriptDIR}pick.com FH_Four.map o/sites.pdb
	  - will pick peaks in FH_Four.map that aren't already within a grid
	    unit or so of atoms in o/sites.pdb
	${scriptDIR}pick.com FH_Four.map
	  - just pick peaks in the map
	${scriptDIR}pick.com FH_Patt.map
	  - pick unique peaks in the Patterson map, displaying multiplicities
    
    description:
	pick.com does more than a simple "peakmax" run.  The actual pick is
	performed on a map of one asu, plus a 10% "buffer zone" padding the
	outside of the asu.  After the peakmax run, peaks picked outside the
	asu are thrown out.  After that, the remaining peaks are symmeytry-
	expanded and checked against each other to make sure none of their
	symmetry mates overlap. 
	
	This procedure prevents false, "map-edge" peaks from being picked near 
	the edges of the asymmetric unit, and it also gaurentees that none of 
	the output peaks are symmetry mates of each other.  
	
	Peaks on special positions are printed out with occupancies < 1.  The
	B-factor column of pick.pdb contains the height/sigma of the peak.
	
	Including the "boring.pdb" on the command line allows you to specify 
	atom locations that you aren't interested in seeing in the pick.pdb 
	output file (including all their symmetry mates).  This is useful in 
	picking difference Fouriers, which will already have large peaks for 
	the sites used to obtain the phases.
    
################################################################################
${scriptDIR}oasis.com	- experimental OASIS script

    input: sad.mtz	- mtz file containing anomalous differences
	   mlphare.com	- mlphare script containing refined sites
    output: oasised.mtz	- same as sad.mtz, but with improved phases
    
    examples: 
	${scriptDIR}oasis.com mtz/Fpp.mtz scripts/mlphare.com
	  - will convert the sites refined by scripts/mlphare.com and put
	    them into oasis.
	${scriptDIR}oasis.com mtz/Fpp.mtz scripts/mlphare.com 8.76e
	  - same thing, but use an f'' value of 8.76 (otherwise, use
	    the refine occupancy from mlphare)
    
    description:
	oasis is new in CCP4 4.0.  It uses a direct-phasing equation to try
	and resolve the bimodal phase distributions you get with Single-
	Anomalous Difference (SAD) data or SIR data.  oasis has demonstrated
	the ability to substantially improve SAD phases, but Phaser Elves do
	not have enough experience with it to make it the default for SAD
	data.  The results in oasised.mtz can then be solvent-flattened with
	dm.com (above).
    
################################################################################

3)  What about all these data files?

    Phaser Elves do produce a lot of different kinds of output.  This is mainly
    for your convenience, to make reviewing Phaser's exploratoins easy.  Once
    you have seen a map you can interpret, you can go ahead and delete most of
    these files.  Here is a quick review of what all the files are for:
    
    Heavy-atom positions:
	${scriptfile}
	- contains heavy-atom positions, as well as a record of all positons that
	  were added or removed from the starting script by Phaser Elves.  These
	  "BADATOM" and "OLDATOM" positions are avoided when picking the difference
	  Fourier.
	${scriptDIR}sites.mlphare
	- contains a symmetry-reduced version of the "good" sites in mlphare.com.
	  Atoms that are close enough to be indistinguishable (separation < d/3)
	  are "merged", giving you an "average" site constellation across all
	  derivatives.  This is useful for re-inputting a unique list of sites
	  back into Phaser.
	${oDIR}sites.pdb
	- same as ${scriptDIR}sites.mlphare, but in PDB format.

    Phased data:
	${mtzDIR}mlphare.mtz
	- the output file from the most recent mlphare run.
	${mtzDIR}dm_##.mtz	    
	- results from a ##%-solvent dm run on mlphare.mtz.
	${mtzDIR}best_phased.mtz
	- a copy of what Phaser Elves consider the "best" solution
    
    Maps:
	${mapDIR}best_phased.map
	- a copy of what Phaser Elves consider the "best" map (CCP4 format).
	${mapDIR}phased.map
	- the most-recently calculated map (usually from one of the dm_##.mtz files)
    
    O stuff:
	${oDIR}phased.omap
	- an "o version" of ${mapDIR}phased.map, (dsn6 or brix format) extended
	  to cover 1.7 unit cells (one cell, plus 10% in every direction). 
	${oDIR}bones.o
	- a simple bones trace of ${oDIR}phased.omap (molecule name: SKEL)
	${oDIR}pick.pdb
	- a peak-pick of the area covered by ${oDIR}phased.omap.  Useful for
	  providing atoms to cen_id on while you review the map in o.
	${oDIR}sites.pdb
	- a simple PDB file of where the mlphare-refined sites ended up.
	${oDIR}latest.omacro
	- an o macro for loading and displaying all of the above files.
	${oDIR}latest
	- sub-macro for just loading ${oDIR}phased.omap
	${oDIR}best_*
	copies of the above files ultimately calculated from ${mtzDIR}best_phased.mtz
	${oDIR}best.omacro
	- an o macro for loading and displaying the best_* files.
	${oDIR}allmaps.omacro
	- an o "super-macro" for displaying all of the alternative "flip"
	  combinations (one at a time) with a single command.
    
    
    Sign/hand flipping results:
	Files like:
	mlphare.com.flip_-hand_+occ or flip_-hand_+occ_phased.map
	- are copies of mlphare.com or phased.map (respectively) that were 
	  calculated for a particular combination of hand, sign and/or space
	  group conventions.  All indications are relative to the initial site
	  constellation provided to/found by Phaser Elves (+hand_+occ).  If you
	  have a chiral space group (i.e. P41/P43), then the space group name
	  is used in place of "flip".
	  
	- under the ${oDIR} directory, however, files are not renamed, but moved
	  into a subdirectory called ${oDIR}flip_+hand_+occ/.  This makes it
	  easy to change "flip states" by just "cd"-ing to the various directories
	  
    
    Deleting files:
	  Once you are satisfied that you have found an interpretable map 
	  (let's say its in flip_+hand_-occ/, at 45% solvent), 
	  then you can type this:
	  
	  # save the script
	cp ${scriptfile}.flip_+hand_-occ ${scriptfile}
	cp ${scriptDIR}sites_flip_+hand_-occ.mlphare ${scriptDIR}sites.mlphare 
	rm ${scriptfile}.*
	rm ${scriptDIR}sites_*
	
	  # save the data (this file contains the mlphare and dm results)
	cp ${mtzDIR}dm_45_flip_+hand_-occ.mtz ${mtzDIR}best_phased.mtz
	rm ${mtzDIR}dm_* ${mtzDIR}mlphare* 
	
	  # maybe save the CCP4 map
	cp ${mapDIR}flip_+hand_-occ_phased.map ${mapDIR}best_phased.map
	rm ${mapDIR}flip*
	
	  # then take care of the ${oDIR} directory
	cd ${oDIR}
	cp flip_+hand_-occ/* .
	rm -r flip*
	rm best*
	cd ..
	
	  this is easier than re-naming all the proper files by hand.
	  After all that, you should have reduced your disk consumption quite
	  a bit, and running ${oDIR}latest.omacro will display the results
	  of the flip_+hand_-occ run.
	  
################################################################################


4) What if something goes wrong?

    The Phaser elves have been trained to handle a number of common problems 
encountered in using mlphare.  However, if something happens that
is beyond their experience, they will quit, and display the error message they
couldn't understand.  It's up to you to edit $scriptfile, and get it working
again.  However, once you do, you can feed the (runnable) $scriptfile back to
Phaser Elves, and let them tune it up.


5) tips and tricks:

If you start Phaser Elves with "hurry" they will relax their convergence
criteria for mlphare, and finish the refinement earlier.

If you start Phaser Elves with "no flip" they won't try exploring all the
alternative possibilities of sign, hand and space group.

If you start Phaser Elves with "no add" they will exit after picking peaks
in the first difference Fourier they calculate.

If you start Phaser Elves with "6 params" on their command line, they will
begin by refining x, y, z, occ, aocc, and Bfac for all atoms, instead of 
starting with x, y, z only, and working their way up.

---
EOF












write_moreatoms:
########################################################################################################

# don't overwrite user-modified script
if(-e ${scriptDIR}moreatoms.com) goto write_reindex

#echo "writing ${scriptDIR}shelx.com "
cat << EOF-shelxscript >! ${scriptDIR}shelx.com
#! /bin/csh -f
#
#
#	shelx.com	- combinatorial shelx-based heavy-atom finder
#
#   will try all possible combinations of an arbitrary number of:
#   1) space groups
#   2) data sets (multiple mtz files are okay)
#   3) resolution cutoffs
#   4) F/sigma cutoffs
#   5) maximum delta-F cutoffs (default to a reasonable value)
#   6) expected site counts
#
#   the results of each shelx run are also passed through the CCP4 "vecref"
#   program, using a Patterson calculated with the same data presented to
#   shelx.  Sites that don't agree with the Patterson are rejected by vecref.
#
#   a pdb file ("outfile" below) is produced for each combinatorial run
#
#   "good" shelx sites are in chain:   D
#   "maybe" shelx sites are in chain:  Q
#   sites on special positons are in:  X
#
#   however, this pdb file will be overwritten if the peak heights look
#   "better" than the last "best" soluton.
#
#   the output lines that this script prints to the terminal can each be
#   used as command-line input to re-run the same procedure, updating the
#   output file.
#
# location of shelxs executable
set SHELX = $SHELX
set prefix = "shelx"

# location of awk
alias nawk $nawk
nawk 'BEGIN{print}' >& /dev/null
if(\$status) alias nawk awk

set mtzfile  = ./mtz/all.mtz
set outfile  = shelx.pdb
set logfile  = ${logDIR}shelx.log
set tempfile = ./shelx_temp

# crystal variables
set hiRES = ""
set loRES = 1000
set CELL  = ""
set SG    = ""
set Ee    = "Se"

# trials
set SGs           = ""
set RESOs         = ""
set target_sites  = "1 3 5 10 15 20"
set sigma_cutoffs = "0 1 2 3"
set max_diffs     = "auto"

# no args? need help
if(\$#argv == 0) goto Help

# this procedure (re)sets most of the above variables
# from either the provided files, or the command line
goto Setup

Help:
cat << EOF

usage: \$0 data.hkl 6 sites P212121

where:
data.mtz	- contains the F, Fs, or Dano you want to run shelx on
6 sites		- your expected number of sites
2A		- your desired resolution cutoff
1sig		- your desired F/sigma cutoff
diff 50		- your desired maximum-difference cutoff
P212121		- space group (optional)

\$0 will run shelxs (direct phasing) on the data provided in the
space group(s) given.

EOF

exit
#
#   This procedure (at the bottom of the script) does the following
#   1) scan the command line for the mtz file(s)
#   2) set the CELL, SG, and other variables
#   3) generate dataset list file: \${tempfile}datasets
Return_from_Setup:

################################################################################
# back-up values (shouldn't be empty)
if("\$SGs" == "")           set SGs           = "\$SG"
if("\$RESOs" == "")         set RESOs         = \`echo "\$hiRES 0.1 4" | nawk '\$3-1>0{v=1;vstep=(1-\$2)/(\$3-1); for(i=1;i<=\$3;++i){printf "%.1f ", (v*1/(\$1^3))^(-1/3);v-=vstep;}}'\`
if("\$RESOs" == "")         set RESOs         = "\$hiRES"
if("\$sigma_cutoffs" == "") set sigma_cotoffs = "1"
if("\$target_sites" == "")  set target_sites  = "1"
if("\$max_diffs" == "")     set max_diffs     = "auto"

# print intended combinatorial
cat << EOF | tee \$logfile
          SGs: \$SGs
  reso limits: \$RESOs
sigma cutoffs: \$sigma_cutoffs
 target sites: \$target_sites
    max diffs: \$max_diffs
EOF
echo -n "    data sets: " | tee -a \$logfile
cat \${tempfile}datasets |\\
nawk 'NF==4{print \$2,"from",\$4} NF>4{print \$2 "-" \$5,"from",\$4,\$7} {printf "               "} END{print ""}' |\\
tee -a \$logfile

echo "running SHELX:            ---- cutoffs ------           shelx results   Patterson-check"
echo "spacegroup data set       reso  sigma maximum    expect found    CFOM  survivors  avg  max  mtz file(s)"


################################################################################
# explore all indicated possibilities
foreach SG ( \$SGs )
    # retrieve ASU count in this space group
    set ASU_per_CELL = \`nawk -v SG=\$SG '\$4==SG{print \$2}' \$CLIBD/symop.lib | head -1\`
    
    
foreach dataset ( \`nawk '{print NR}' \${tempfile}datasets\` )

    # extract labels from the "dataset" file
    set F1    = \`nawk -v dataset=\$dataset 'NR==dataset{print \$2}' \${tempfile}datasets\`
    set SIGF1 = \`nawk -v dataset=\$dataset 'NR==dataset{print \$3}' \${tempfile}datasets\`
    set mtz1  = \`nawk -v dataset=\$dataset 'NR==dataset{print \$4}' \${tempfile}datasets\`
    
    set F2    = \`nawk -v dataset=\$dataset 'NR==dataset{print \$5}' \${tempfile}datasets\`
    set SIGF2 = \`nawk -v dataset=\$dataset 'NR==dataset{print \$6}' \${tempfile}datasets\`
    set mtz2  = \`nawk -v dataset=\$dataset 'NR==dataset{print \$7}' \${tempfile}datasets\`
        
    # (F or D)
    set type  = \`nawk -v dataset=\$dataset 'NR==dataset{print \$1}' \${tempfile}datasets\`
    
    if("\$F2" == "") then
	set F2 = "\$F1"
	set SIGF2 = "\$SIGF1"
	set mtz2  = "\$mtz1"
	unset SUBTRACT
    else
	# need to do the subtraction
	set SUBTRACT
    endif
    
    # check for orthorhombic "pseudo-spacegroups"
    set axes = ""
    set realSG = "\$SG"
    if("\$SG" == P222)    set axes = "a b c"
    if("\$SG" == P212121) set axes = "a b c"
    if("\$SG" == P2221) then
	# P2221 with screw along longest axis
	set axes  = "a b c"
	set realSG = "P2221"
    endif
    if("\$SG" == P2212) then
	# P2221 with screw along mid-length axis
	set axes  = "b c a"
	set realSG = "P2221"
    endif
    if("\$SG" == P2122) then
	# P2221 with screw along shortest axis
	set axes  = "c a b"
	set realSG = "P2221"
    endif
    
    if("\$SG" == P21212) then
	# P21212 with non-screw along longest axis
	set axes  = "a b c"
	set realSG = "P21212"
    endif
    if("\$SG" == P21221) then
	# P21212 with non-screw along mid-length axis
	set axes  = "b c a"
	set realSG = "P21212"
    endif
    if("\$SG" == P22121) then
	# P21212 with non-screw along shotest axis
	set axes  = "c a b"
	set realSG = "P21212"
    endif
    set realSGnum = \`nawk -v SG=\$realSG '\$4==SG{print \$1;exit}' \${CLIBD}/symop.lib\`
    
    if("\$axes" != "") then
	# we have an asymmetric orthorhombic space group 
	set i = 0
	foreach mtz ( \$mtz1 \$mtz2 )
	    @ i = ( \$i + 1 )
	    # get current axis ordering, then
	    # find out what the cannonical one would be
	    # then decide how to go from current ordering to the desired one
	    echo "head" |\\
	    mtzdump hklin \$mtz |\\
	    nawk '/Cell Dimensions/{getline;getline;print}' |\\
	    nawk '{\\
		# print out current axis order \\
		print \$1, "h"; print \$2, "k"; print \$3, "l"}' |\\
	    sort -n |\\
	    nawk '\\
		# add cannonical axis names\\
		NR==1{print \$0, "a"} NR==2{print \$0, "b"} NR==3{print \$0, "c"}' |\\
	    nawk -v axes="\$axes" 'BEGIN{split(axes, abc)} {\\
		# write desired axis ordering in front of cannonical one \\
		print abc[NR], \$0}' |\\
	    sort |\\
	    nawk '# print out new hkl order \\
	          {printf "%s ", \$3} END{print ""}' |\\
	    nawk '\$1 \$2 \$3 \$1 \$2 \$3 !~ /hkl/{\$3 = "-" \$3} {print "reindex", \$1",",\$2",",\$3}' |\\
	    reindex HKLIN \$mtz HKLOUT \${tempfile}.mtz >> \$logfile

	    # this should give us a mapping between any two orthorhombics

	    # now simply re-name the space group in the header
	    echo "SYMM \$realSGnum" |\\
	    mtzutils HKLIN \${tempfile}.mtz HKLOUT \${tempfile}mtz\${i}.mtz >> \$logfile
	    if(\$status) then
		set BAD
		goto Clean_up
	    endif
	    rm -f \${tempfile}.mtz >& /dev/null
	end
    else
	# just re-name the space group in each mtz header
	echo "SYMM \$realSGnum" |\\
	mtzutils HKLIN \$mtz1 HKLOUT \${tempfile}mtz1.mtz >> \$logfile
	if(\$status) then
	    set BAD
	    goto Clean_up
	endif
	echo "SYMM \$realSGnum" |\\
	mtzutils HKLIN \$mtz2 HKLOUT \${tempfile}mtz2.mtz >> \$logfile
	if(\$status) then
	    set BAD
	    goto Clean_up
	endif
    endif
    

    # extract the two datasets into a single file
    cad hklin1 \${tempfile}mtz1.mtz hklin2 \${tempfile}mtz2.mtz \\
        hklout \${tempfile}cadded.mtz << EOF-cad >> \$logfile
    LABIN FILE 1 E1=\$F1  E2=\$SIGF1
    CTYPI FILE 1 E1=F    E2=Q
    LABOU FILE 1 E1=F1   E2=SIGF1
    LABIN FILE 2 E1=\$F2  E2=\$SIGF2
    CTYPI FILE 2 E1=F    E2=Q
    LABOU FILE 2 E1=F2   E2=SIGF2
EOF-cad
    
    # make sure we have the proper cell
    set CELL  = \`echo "head" | mtzdump hklin \${tempfile}cadded.mtz | nawk '/Cell Dimensions/{getline;getline;print}'\`
    
    # come up with a reasonable protein size (assume Vm=2.5)
    echo \$CELL |\\
    nawk 'NF==6{s=3.1415926535897899419/180; A=cos(s*\$4); B=cos(s*\$5); G=cos(s*\$6); \\
     skew = 1 + 2*A*B*G - A*A - B*B - G*G ; if(skew < 0) skew = -skew;\\
     printf "%.3f\\n", \$1*\$2*\$3*sqrt(skew)}' |\\
    cat >! \${tempfile}volume
    set CELLvolume = \`cat \${tempfile}volume\`
    rm -f \${tempfile}volume >> /dev/null
        
    # estimate no of protein atoms in cell assume Vm=2.5 and protein atom = 14 amu
    set protein = \`echo "\$CELLvolume \$ASU_per_CELL" | nawk '\$2+0>0{printf "%d", (((\$1+1)/2.5)/14)/\$2}'\`
    if("\$protein" == "") set protein = 2000
    #echo "we estimate \$protein protein atoms in ASU."


    # iterate over resolution cutoffs
    foreach cutRES ( \$RESOs )
    
    # apply scaleit (to scale isomorphous differences, and check DIFF recommendation)
    if(\$?SUBTRACT) then
	set refine = "refine anisotropic"
	set scale  = ""
    else
	# either Dano or FH data (squash F2)
	set refine = "analyze"
	set scale  = "SCALE FPH1 0.000001"
    endif
    scaleit hklin \${tempfile}cadded.mtz \\
            hklout \${tempfile}scaled.mtz << EOF-scale | tee \${tempfile}scaleit.log >> \$logfile
    RESOLUTION 1000 \$cutRES
    \$refine
    \$scale
    LABIN FP=F1 SIGFP=SIGF1 FPH1=F2 SIGFPH1=SIGF2
EOF-scale
    
    # iterate over sigma- and maximum-difference cutoffs
    foreach cutSIG ( \$sigma_cutoffs )
    foreach diff ( \$max_diffs )
    
    # use scaleit's recommendation by default
    if("\$diff" == "auto") set diff = \`nawk '/acceptable differences/{print \$NF}' \${tempfile}scaleit.log\`
    if("\$diff" == "") set diff = 99999999
    
    # make a Patterson map
    set labin = "F1=F1 SIG1=SIGF1 F2=F2 SIG2=SIGF2"
    if(\$?SUBTRACT) then
	set scale = 1
	set fftmtz = \${tempfile}scaled.mtz
    else
	# F2 is zero (for calculating DIFF)
	set scale = 0.0000001
	set fftmtz = "\${tempfile}cadded.mtz"
    endif
    fft HKLIN \$fftmtz MAPOUT \${tempfile}patt.map << EOF-fft >> \$logfile
    TITLE \${cutRES}A Patterson of \$F1-\${scale}\$F2 excluding diff>\$diff
    RESOLUTION \$cutRES
    PATTERSON
    LABIN \$labin
    SCALE F2 \$scale 0
    EXCLUDE DIFF \$diff
    EXCLUDE SIG1 \$cutSIG
EOF-fft
    # normalize the Patterson
    echo "scale sigma" |\\
    mapmask mapin  \${tempfile}patt.map \\
           mapout \${tempfile}.map >> \$logfile
    mv \${tempfile}.map \${tempfile}patt.map
    if(\$?user_patt) then
	# option to use an outside Patterson?
	cp \$user_patt \${tempfile}patt.map
    endif
    
    # extract the dataset in shelx format
    set exportmtz = "\$fftmtz"
    set labin = "FP=F1 SIGFP=SIGF1"
    if(\$?SUBTRACT) set labin = "\$labin FPH=F2 SIGFPH=SIGF2"
    if("\$type" == "D") then
	# make sure we treat Danos properly
	set labin = "DP=\$F1 SIGDP=\$SIGF1"
	set exportmtz = \${tempfile}mtz1.mtz
    endif

    
    mtz2various hklin \$exportmtz hklout \${tempfile}.hkl << EOF >> \$logfile
    OUTPUT SHELX
    FSQUARED
    RESOLUTION \$cutRES
    #EXCLUDE SIGP \$cutSIG
    #EXCLUDE SIGH \$cutSIG
    EXCLUDE DIFF \$diff
    LABIN \$labin
EOF
    # strip off the text header
    nawk '! /[A-Z]/' \${tempfile}.hkl >! \${prefix}.hkl

    # use the output to make a shelx .ins file as well
    nawk '! /[A-Z]/{exit} {print}' \${tempfile}.hkl |\\
    grep -v HKLF >! \${prefix}.ins
    rm -f \${tempfile}.hkl >& /dev/null
    
    # finish it up
    echo "SFAC N \$Ee"    >> \${prefix}.ins
    echo "\$protein \$ASU_per_CELL" | nawk '{print "UNIT", sqrt(\$1),\$2}' >> \${prefix}.ins
    echo "OMIT \$cutSIG" >> \${prefix}.ins
    echo "TREF"         >> \${prefix}.ins
    echo "HKLF 4"       >> \${prefix}.ins
    echo "END"          >> \${prefix}.ins
    
    foreach expect ( \$target_sites )
    
    # print mini-report (suitable for re-input on command line)
    if(\$?SUBTRACT) then
	set dataset = "\${F1}-\${F2}"
    else
	set dataset = "\$F1"
    endif
    echo "\$SG \$dataset \$cutRES \$cutSIG \$diff \$expect" |\\
	 nawk '{printf "%-10s %-13s %4.1fA %4.1fs  diff %-5.1f %3d sites ", \$1, \$2, \$3, \$4, \$5, \$6}'
    
    # shelx wants sites in CELL not ASU
    set sites_in_cell = \`echo "\$expect \$ASU_per_CELL" | nawk '{print \$1 * \$2}'\`
    
    # edit the shelx input file
    cat \${prefix}.ins |\\
    nawk -v sites=\$sites_in_cell '/^UNIT/{\$3=sites} {print}' |\\
    cat >! \${tempfile}.ins
    mv \${tempfile}.ins \${prefix}.ins >& /dev/null
    
    #######################################
    # run shelx
    \${SHELX} \${prefix} >> \$logfile
    #######################################
    
    # check the results
    cat \${prefix}.lst |\\
    nawk '/Heavy-atom assignments:/{slist=1} /optimization/{slist=0}\\
	  /Peak  Atom     x/{plist=1}\\
          slist && \$5~/[0-9]/{++s;ssum+=\$6;mins=\$6}\\
	  plist && \$2~/^Q/{++q;qsum+=\$1;if(maxq+0==0)maxq=\$1}\\
          END{print "SITES", s+0; \\
	      if(s+0==0) s=1; savg=ssum/s;\\
	      if(q+0==0) q=1; qavg=qsum/q;\\
	      if(qavg==0) qavg=100; if(maxq+0==0)maxq=100;\\
	      print "HEIGHT", ssum/s, mins;\\
	      print "QAVG", qsum/q, maxq;\\
	      print "RATIO", savg/qavg;\\
	      print "DISCR", mins/maxq}' |\\
    cat >! \${tempfile}.results
    cat \${prefix}.lst |\\
    nawk '/with CFOM/{print "CFOM", \$NF+0; exit}' |\\
    cat >> \${tempfile}.results
    cat \${prefix}.lst |\\
    nawk '/Try    Ralpha Nqual/,/CFOM Range/' |\\
    nawk 'NF>=6 && ! /[A-Z]/{++n;sum+=\$6} \\
          END{if(n) print "meanCFOM", sum/n}' |\\
    cat >> \${tempfile}.results
    
    set found  = \`nawk '/^SITES/{print \$2}' \${tempfile}.results\`
    set DISCR  = \`nawk '/^DISCR/{print \$2}' \${tempfile}.results\`
    set RATIO  = \`nawk '/^RATIO/{print \$2}' \${tempfile}.results\`
    set bCFOM  = \`nawk '/^CFOM/{print \$2}' \${tempfile}.results\`
    set mCFOM  = \`nawk '/^meanCFOM/{print \$2}' \${tempfile}.results\`
    rm -f \${tempfile}.results >& /dev/null
    
    # display for user
    echo "\$found \$bCFOM " | nawk '{printf "%-4d  %4.2f  ", \$1, \$2, \$3 " " \$4 " " \$5}'
    
    # retrieve shelx's atoms in a more managable format
    cat \${prefix}.lst |\\
    nawk '/Heavy-atom assignments:/{list=1} /optimization/{list=0}\\
	  /Peak  Atom     x/{plist=1}\\
          list {print \$NF, \$0}\\
	  plist && \$2 ~ /^Q/{print \$0, \$1}' |\\
    nawk 'NF==7{n=substr(\$2,match(\$2,"[1-9]"));\\
                Ee=substr(\$2,1,match(\$2,"[1-9]")-1);\\
		x=\$3;y=\$4;z=\$5;occ=\$6;height=\$NF;\\
		chain="D"; if(Ee=="Q")chain=Ee; if(occ<1)chain="X";\\
    print x, y, z, occ, height/10, Ee, n, chain;}' |\\
    cat >! \${tempfile}allatoms
    # format: xf yf zf 1/mult height Ee atmno chain
    
    # use vecref to check against the Patterson
    cat \${tempfile}allatoms |\\
    nawk -v cutRES=\$cutRES '{++n} \$4 !~ /\\.0/{next;\$1+=0.001; \$2+=0.001; \$3+=0.001}\\
    NF>6{print "ATOM H", n, 1, \$1, \$2, \$3, 79*(cutRES/3)^2}' |\\
    cat >! \${tempfile}atoms.out
    set orig = \`cat \${tempfile}atoms.out | wc -l\`
    
    # do some cycles of atom rejection
    set rcycles = "3,0,0 3,0,0 3,10,0 3,10,0 3,0,10 3,0,10"
    set rcycles = "3,0,0 3,0,0 3,0,0"
    if(\$?NO_PATT) set rcycles = ""
    foreach rcycle ( \$rcycles )
	set rcycle = \`echo "\$rcycle" | nawk 'BEGIN{FS=","} {print \$1, \$2, \$3}'\`
	
	# reduce resolution a bit so vecref won't crash
	set refRES = \`echo \$cutRES | nawk '{print (0.8*1/(\$1^3))^(-1/3)}'\`
	
	# make sure there aren't >50 sites
	sort -n +2 \${tempfile}atoms.out |\\
	head -50 >! \${tempfile}atoms.in
	
	# refine "occupancies" against the Patterson
	rm -f \${tempfile}atoms.out >& /dev/null
	set vecrefSG = \$realSG
	if("\$vecrefSG" == "H3") set vecrefSG = R3
	if("\$vecrefSG" == "H32") set vecrefSG = R32
	vecref mapin \${tempfile}patt.map ATOUT \${tempfile}atoms.out << EOF-vecref >>& \$logfile
	SPACEGROUP \$vecrefSG
	RESOLUTION \${refRES}
	CYCLES \$rcycle
	BLIMIT 0 1000
	BREF
	@\${tempfile}atoms.in
EOF-vecref
	if((\$status)||(! -e \${tempfile}atoms.out)) then
	    # all atoms rejected
	    echo -n "" >! \${tempfile}atoms.out
	    break
	endif
    end
    rm -f \${tempfile}atoms.in >& /dev/null
    if(\$?NO_PATT) then
	# no data available from Patterson
	echo -n "" >! \${tempfile}atoms.out
    endif
    
    # label the peaks file with the vecref occupancies
    cat \${tempfile}atoms.out \${tempfile}allatoms |\\
    nawk '/^ATOM/{occ[\$3]=\$4; next} {++n; print \$0, occ[n]+0}' |\\
    cat >! \${tempfile}atoms.pocc
    # format xf yf zf 1/mult height Ee atmno chain pocc

    rm -f \${tempfile}atoms.out >& /dev/null
    rm -f \${tempfile}allatoms >& /dev/null
    
    # print out stats
    cat \${tempfile}atoms.pocc |\\
    nawk '\$6~/^Q/{next} \$NF+0>0{++patt}\\
	  {++n; sum+=\$NF} \$NF>max{max=\$NF}\\
          END{printf "%d ", patt; if(n==0)n=1; print sum/n, max+0}' |\\
    cat >! \${tempfile}.results
    if(\$?NO_PATT) then
	# fake it, but let user know nothing happened
	echo "? 0 0" >! \${tempfile}.results
    endif
    set patt = \`nawk '{print \$1}' \${tempfile}.results\`
    set avg  = \`nawk '{print \$2}' \${tempfile}.results\`
    set max  = \`nawk '{print \$3}' \${tempfile}.results\`
    rm -f \${tempfile}.results >& /dev/null

    # print out for user
    echo "\$patt \$orig \$avg \$max" | nawk '{printf "%9s %4.1f %4.1f ", \$1"/"\$2, \$3, \$4}'
    
    # print out data files used
    set mtzfiles = "\$mtz1"
    if("\$mtz2" != "\$mtz1") set mtzfiles = "\$mtz1 \$mtz2"
    echo -n " \$mtzfiles "
    
    
    # come up with some kind of holistic "score"?     
    echo "\$expect \$found \$bCFOM \$patt \$avg \$max" |\\
    nawk '{expect=\$1; found=\$2; CFOM=\$3; patt=\$4+0; avg=\$5; max=\$6;\\
    sites=found; if(sites<1) sites=0.5; if(patt<1) patt=0.5;\\
    if(\$4 ~ /^\\?/) avg=1/CFOM;\\
    printf "%s %s %s ",  1/CFOM, avg*sites*sites/patt, sites*max}' |\\
    cat >! \${tempfile}.score

    # sum-of-sums score
    # format xf yf zf 1/mult height Ee atmno chain pocc
    cat \${tempfile}atoms.pocc |\\
    nawk -v expect=\$expect '\$6~/^Q/ || n>=expect{next} {++n}\\
	  \$NF*\$5>0{sum+=\$NF+\$5; sum_s+=\$5}\\
          END{if(n==0)n=1; print sum/n, sum_s}' |\\
    cat >! \${tempfile}.score

    set score = \`nawk '{printf "%d", 1000*\$1}' \${tempfile}.score\`
    set sum_s = \`nawk '{print \$2}' \${tempfile}.score\`
    rm -f \${tempfile}.score >& /dev/null

    # create the output pdb
    cat \${tempfile}atoms.pocc |\\
    nawk '{x=\$1;y=\$2;z=\$3;occ=\$9;B=\$5;\\
	   Ee=\$6;n=\$7;chain=\$8;\\
        printf("%5d%10.5f%10.5f%10.5f%10.5f%5.2f   35%10d%2s   SLX %1s\\n",\\
    n, x,y,z, B, occ, n, Ee, chain)}' |\\
    cat >! \${tempfile}.frac

    # make sure we've got the right cell
    set CELL  = \`nawk '\$1 == "CELL"{print \$3, \$4, \$5, \$6, \$7, \$8; exit}' \${prefix}.lst\`
    coordconv xyzin \${tempfile}.frac xyzout \${tempfile}.pdb << EOF >> \$logfile
    CELL \$CELL
    INPUT FRAC
    OUTPUT PDB ORTH 1
    END
EOF
    

    # write a descriptive header to the PDB file
    cat << EOF | nawk '{print "REMARK", \$0}' >! \${tempfile}full.pdb
results here are from:
\$dataset in \$mtzfiles at \${cutRES}A in \$SG
reflections were excluded if F/sigF < \$cutSIG or |delta-F| > \$diff
internal score: \$score

the B-factor column here contains the "peak height", as reported
by shelx.

the occupancy column contains the result of vector-space refinement of all
the shelx peaks against a Patterson calculated with the same data
provided to shelx.  The CCP4 program vecref will have rejected most of
the atoms, leaving non-zero "occupancy" only for sites that are consistent 
with the Patterson.

ATOMs in the "D" chain are the highest peaks in shelx's direct-phased
Fourier, and represent sites that shelx was "sure" about.

ATOMs in the "Q" chain represent peaks in shelx's direct-phased Fourier
map that may or may not be "real" atoms, but shelx printed them out anyway.

ATOMs in the "X" chain were on special positions.

the symmetry of these positions is \$realSG
                           X       Y       Z      occ height
EOF
    # copy over the atoms
    cat \${tempfile}.pdb >> \${tempfile}full.pdb
    rm -f \${tempfile}.pdb >& /dev/null
    rm -f \${tempfile}.frac >& /dev/null
    rm -f \${tempfile}.atoms >& /dev/null


    if(\$?debug) then
	if(! -e scores.log) touch scores.log
	cat \${tempfile}full.pdb |\\
	nawk '/^ATOM/{\$0=substr(\$0, 1, 60) " 20.00"} {print}' |\\
	cat >! \${tempfile}test.pdb
	/max/jamesh/Develop/patteq.com \$SG o/best_sites.pdb \${tempfile}test.pdb |\\
	nawk 'NF==1{printf " %s ", \$0; exit}' |\\
	cat >> scores.log

	echo "\$expect \$found \$bCFOM \$patt \$avg \$max \$sum_s" >> scores.log	
    endif
    

    if(\$score > \$best_score) then
	set best_score = \$score
	if(-e \${outfile}.older) mv \${outfile}.older \${outfile}.oldest
	if(-e \${outfile}.old) mv \${outfile}.old \${outfile}.older
	if(-e \${outfile}) mv \${outfile} \${outfile}.old
	mv \${tempfile}full.pdb \$outfile >& /dev/null
	if(! \$status) echo -n " (saved as \$outfile)"
    endif
    
    
    
    # end the line
    echo ""
    
    end	# sites
    end	# diff
    end	# cutSIG
    end	# cutRES
    
end	# dataset
end	# SG

# clean up
Clean_up:
if(\$?debug) exit
rm -f \${prefix}.hkl >& /dev/null
rm -f \${prefix}.ins >& /dev/null
rm -f \${prefix}.res >& /dev/null
rm -f \${prefix}.lst >& /dev/null
rm -f \${tempfile}atoms.pocc >& /dev/null

rm -f \${tempfile}patt.map >& /dev/null
rm -f \${tempfile}datasets >& /dev/null
rm -f \${tempfile}mtz1.mtz >& /dev/null
rm -f \${tempfile}mtz2.mtz >& /dev/null
rm -f \${tempfile}cadded.mtz >& /dev/null
rm -f \${tempfile}scaled.mtz >& /dev/null
rm -f \${tempfile}scaleit.log >& /dev/null
rm -f \${tempfile}full.pdb >& /dev/null


exit






Setup:
#################################################

  ####   ######   #####  #    #  #####
 #       #          #    #    #  #    #
  ####   #####      #    #    #  #    #
      #  #          #    #    #  #####
 #    #  #          #    #    #  #
  ####   ######     #     ####   #

#################################################
#
#   gather information on:
#    mtz file
#    data sets
#    resolution limits
#    sigma cuttoff (for map generation)
#    difference cutoff
#
##################################################
# scan the command line for files
set user_SGs   = ""
set user_sigs  = ""
set user_resos = ""
set user_diffs = ""
set user_sites = ""
set mtzfiles   = ""
set best_score = -1

foreach arg ( \$* )
    # warn about probable mispellings
    if("\$arg" =~ *.mtz) then
	if(-e "\$arg") then
	    set mtzfiles = "\$mtzfiles \$arg"
	else
	    echo "WARNING: \$arg does not exist! "
	endif
    endif
    if(("\$arg" =~ *shelxs)&&(-e "\$arg")) then
	# user-specified shelx executable
	set SHELX = "\$arg"
    endif
end

# use default if nothing specified
if("\$mtzfiles" == "") set mtzfiles = "\$mtzfile"

# gather data set labels, etc from the mtzs
set i = 0
echo "" >! \${tempfile}labels
foreach mtz ( \$mtzfiles )
    @ i = ( \$i + 1 )
    
    # dump and characterize contents
    echo "go" | mtzdump HKLIN \$mtz >! \${tempfile}mtzdump

    cat \${tempfile}mtzdump |\\
    nawk '/Cell Dimensions/{getline;getline;print "CELL", \$0}\\
	  /Space group/{print "SYMM", \$NF+0}\\
	  /Resolution Range/{getline;getline;print "RESO", \$4, \$6}' |\\
    cat >! \${tempfile}info
    
    # read in dataset info
    cat \${tempfile}mtzdump |\\
    nawk 'NF>6' |\\
    nawk '\$(NF-1)=="F"{print "F", \$NF, \$(NF-4)}\\
          \$(NF-1)=="D"{print "D", \$NF, \$(NF-4)}\\
          \$(NF-1)=="Q"{print "S", \$NF, \$(NF-4)}' |\\
    nawk '/^F/{++n} /^D/{++n} {printf "%s", \$1; \\
           if(\$1=="S") printf "%s", last;\\
    printf " %d %s %s\\n",n, \$2, \$3; last=\$1}' |\\
    cat >> \${tempfile}info
    
    # check for empty mtzs
    set temp = \`nawk '/^[FD]/' \${tempfile}info | wc -l\`
    if(\$temp > 0) then
	# add it to the pile
	cat \${tempfile}info |\\
	nawk -v mtz=\$mtz '{print \$0, mtz}' |\\
	cat >> \${tempfile}labels
    else
	echo "WARNING: \$mtz contains no useful data!"
    endif
    if(! \$?debug) rm -f \${tempfile}mtzdump >& /dev/null
    if(! \$?debug) rm -f \${tempfile}info >& /dev/null
end

# get complete, unique lists of DANOs
cat \${tempfile}labels |\\
nawk '{l= \$2 " " \$NF}\\
      \$1=="D"{D[l]=\$3; signal[l]=\$4} \\
      \$1=="SD"{S[l]=\$3; noise[l]=\$4} \\
      END{for(l in S){if(noise[l]+0==0) noise[i]=1; \\
	print "D", D[l], S[l], signal[l]/noise[l], l}}' |\\
nawk 'NF==6' |\\
sort -nr +2 >! \${tempfile}mtzlabels

# get complete, unique lists of Fs
cat \${tempfile}labels |\\
nawk '{l= \$2 " " \$NF}\\
      \$1=="F"{F[l]=\$3; signal[l]=\$4} \\
      \$1=="SF"{S[l]=\$3; noise[l]=\$4} \\
      END{for(l in S){if(noise[l]+0==0) noise[l]=1; \\
	print "F", F[l], S[l], signal[l]/noise[l], l}}' |\\
nawk 'NF==6' |\\
sort -nr +2 >> \${tempfile}mtzlabels

# reconcile different cells? SGs?
set CELL  = \`nawk '/^CELL/{print \$2, \$3, \$4, \$5, \$6, \$7; exit}' \${tempfile}labels\`
set SG    = \`nawk '/^SYMM/{print \$2; exit}' \${tempfile}labels\`
set SGnum = \`nawk '/^SYMM/{print \$2; exit}' \${tempfile}labels\`
set SG    = \` nawk -v num=\$SGnum '\$1==num && NF>5{print \$4}' \${CLIBD}/symop.lib \`
set hiRES = \`nawk '/^RESO/{print \$3}' \${tempfile}labels | sort -n | head -1\`

if(! \$?debug) rm -f \${tempfile}labels >& /dev/null


set mtzlabels = \`cat \${tempfile}mtzlabels | wc -l\`
if("\$mtzlabels" == 0) then
    echo "no usefult data in \$mtzfiles "
    goto Help
endif

##################################################
# get crystal and dataset information from the final mtz file


# make a two-way difference-dataset list (for recognition)
cat \${tempfile}mtzlabels |\\
nawk '\$1=="F"{++n; set[n]= \$2 " " \$3 " " \$NF;\\
      for(i=1;i<n;++i){print "F", set[i], set[n]; print "F", set[n], set[i];}}' |\\
cat >! \${tempfile}Fpairs


# one last pass through command line
# allow user overrides of all internal variables
set i = 0
echo -n "" >! \${tempfile}userlabels
echo -n "" >! \${tempfile}userpairs
echo -n "" >! \${tempfile}not_pairs
while( \$i < \$#argv )
    @ i = ( \$i + 1 )
    @ nexti = ( \$i + 1 )
    @ lasti = ( \$i - 1 )
    if(\$nexti > \$#argv) set nexti = \$#argv
    if(\$lasti < 1) set lasti = 1
    set arg = "\$argv[\$i]"
    
    # check for space groups
    if("\$arg" =~ [PpCcIiFfRrHh][1-6]*) then
	set temp = \`echo \$arg | nawk '{print toupper(\$1)}'\`
	set temp = \`nawk -v SG=\$temp '\$4 == SG {print \$4}' \$CLIBD/symop.lib | head -1\`
	if("\$temp" == "") then
	    # check for "pseudo-spacegroup" language
	    set temp = \`echo "\$arg" | nawk 'toupper(\$1) ~ /[PC]2212|[PC]2122|P21221|P22121/'\`
	    # these are okay too, reindexing engine will understand
	endif
	if("\$temp" != "") then
	    # add this SG to the space group list
	    set user_SGs = "\$user_SGs \$temp"
	    continue
	endif
    endif
    
    # only look at non-file words
    if(! -e "\$arg") then
	# see if a dataset label was given
	set temp = \`nawk -v arg=\$arg '\$2==arg' \${tempfile}mtzlabels | wc -l\`
	if(\$temp) then
	    # a dataset label was mentioned
	    if(\$?NO) then
		# user doesn't want this label
		# filter it out of the input files
		nawk -v arg=\$arg '\$2!=arg' \${tempfile}mtzlabels >! \${tempfile}
		mv \${tempfile} \${tempfile}mtzlabels
	    else
		# must want only this label?
		nawk -v arg=\$arg '\$2==arg' \${tempfile}mtzlabels |\\
		cat >> \${tempfile}userlabels
	    endif
	    continue
	endif
    	
	# see if a difference-dataset label was given
	set temp = \`nawk -v arg=\$arg '\$2 "-" \$5 == arg{print \$2, \$5; exit}' \${tempfile}Fpairs\`
	if(\$#temp == 2) then
	    # a specific difference dataset was mentioned
	    if(\$?NO) then
		# user doesn't want this label pair
		# filter it out of the input files
		echo "NOT_A_PAIR \$temp" >> \${tempfile}not_pairs
	    else
		# must want only this pair (in this order)?
		cat \${tempfile}Fpairs |\\
		nawk -v F1=\$temp[1] -v F2=\$temp[2] '\$2==F1 && \$5==F2' |\\
		cat >> \${tempfile}userpairs
	    endif
	    continue	    
	endif
	
	# program options
	if("\$arg" =~ [Pp]att*) then
	    if(\$?NO) then
		set NO_PATT
	    else
		unset NO_PATT
	    endif
	endif
	
	# check for numbers
	if("\$arg" =~ [0-9]*) then
	    # we have a number
	    if(("\$arg" =~ *A)||("\$argv[\$nexti]" == "A")) then
		# user-preferred resolution limits
		set temp = \`echo "\$arg" | nawk 'BEGIN{FS="-"} \$1+0 > 0.1{print \$1+0} \$2+0 > 0.1{print \$2+0}'\`
		if(\$#temp != 1) then
		    set temp = \`echo \$temp | nawk '\$1>\$2{print \$1, \$2} \$2>\$1{print \$2, \$1}'\`
		    if(\$#temp == 2) then
				set loRES = "\$temp[1]"
				set user_resos = "\$user_resos \$temp[2]"
				continue
		    endif
		else
		    if("\$temp" != "") set user_resos = "\$user_resos \$temp"
		    continue
		endif
	    endif
	    
	    if(("\$arg" =~ *[0-9][Ss]*)||("\$argv[\$nexti]" =~ [Ss]igma)) then
		set user_sigs = "\$user_sigs "\`echo "\$arg" | nawk '\$1+0 > 0{print \$1+0}'\`
	    endif
	    
	    if(("\$argv[\$nexti]" =~ [Ss]ite)||("\$argv[\$nexti]" =~ [Ss]ites)) then
		set user_sites = "\$user_sites "\`echo "\$arg" | nawk '\$1+0 > 0{print \$1+0}'\`
	    endif
	    
	    if(("\$arg" =~ *[Dd]iff)||("\$argv[\$lasti]" =~ [Dd]iff)) then
		set user_diffs = "\$user_diffs "\`echo "\$arg" | nawk '\$1+0 > 0{print \$1+0}'\`
	    endif
	    if(("\$arg" =~ *[Dd]iso)||("\$argv[\$lasti]" =~ [Dd]iso)) then
		set MAX_diso = \`echo "\$arg" | nawk '\$1+0 > 0{print \$1+0}'\`
	    endif
	    if(("\$arg" =~ *[Dd]ano)||("\$argv[\$lasti]" =~ [Dd]ano)) then
		set MAX_dano = \`echo "\$arg" | nawk '\$1+0 > 0{print \$1+0}'\`
	    endif
	endif
	
	# allow "NO" logic to carry through
	unset NO
	if(("\$arg" == "no")||("\$arg" == "not")) set NO
	if(("\$arg" == "don't")||("\$arg" == "ignore")) set NO
	if("\$arg" == "except") set NO
    endif
end

# honor user requests (when mentioned)
if("\$user_SGs"   != "") set SGs           = "\$user_SGs"
if("\$user_resos" != "") set RESOs         = "\$user_resos"
if("\$user_sigs"  != "") set sigma_cutoffs = "\$user_sigs"
if("\$user_diffs" != "") set max_diffs     = "\$user_diffs"
if("\$user_sites" != "") set target_sites  = "\$user_sites"


# check the "user" labels
sort -u +5 \${tempfile}userlabels >! \${tempfile}mtzs
sort -u +5 \${tempfile}mtzlabels >> \${tempfile}mtzs
cat \${tempfile}mtzs |\\
nawk '{++seen[\$6]} END{for(mtz in seen) if(seen[mtz]==1) print mtz}' |\\
cat >! \${tempfile}orphans

foreach mtz ( \`cat \${tempfile}orphans\` )
    # some mtzs were mentioned, but aren't in \${tempfile}userlabels
    # so, we'll just assume everything in them was wanted
    cat \${tempfile}mtzlabels |\\
    nawk -v mtz=\$mtz '\$6==mtz' |\\
    cat >> \${tempfile}userlabels
end
if(! \$?debug) rm -f \${tempfile}orphans   >& /dev/null
if(! \$?debug) rm -f \${tempfile}mtzlabels >& /dev/null
if(! \$?debug) rm -f \${tempfile}mtzs      >& /dev/null

# make sure no label is mentioned twice
cat \${tempfile}userlabels |\\
nawk '! seen[\$2 " " \$NF]{print} {++seen[\$2 " " \$NF]}' |\\
cat >! \${tempfile}
mv \${tempfile} \${tempfile}userlabels

# organize user's desired labels into difference-data sets
cat \${tempfile}userlabels |\\
nawk '\$1=="F"{++n; set[n]= \$2 " " \$3 " " \$NF;\\
      for(i=1;i<n;++i) print "F", set[i], set[n]}' |\\
cat >! \${tempfile}Fpairs

# see if user had particular pairing in mind
set temp = \`cat \${tempfile}userpairs | wc -l\`
if(\$temp) then
    # this overrides the Fpairs we just generated
    cat \${tempfile}userpairs >! \${tempfile}Fpairs
    
    # also any user-specified individual labels must be lone Fs? 
    cat \${tempfile}userlabels |\\
    nawk '\$1=="F"{print "F", \$2, \$3, \$NF}' |\\
    cat >> \${tempfile}Fpairs
endif
if(! \$?debug) rm -f \${tempfile}userpairs >& /dev/null

set Fpairs = \`nawk '\$1=="F"' \${tempfile}Fpairs | wc -l\`
if(! \$Fpairs) then
    # maybe there's just one, lone F?
    cat \${tempfile}userlabels |\\
    nawk '\$1=="F"{print "F", \$2, \$3, \$NF}' |\\
    cat >> \${tempfile}Fpairs
endif


# now filter out user-unwanted pairings
set temp = \`cat \${tempfile}not_pairs | wc -l\`
if(\$temp) then
    # user didn't want particular pairings
    cat \${tempfile}not_pairs \${tempfile}Fpairs |\\
    nawk '/^NOT_A_PAIR/{not_a_pair[\$2 " " \$3]=1; not_a_pair[\$3 " " \$2]=1;}\\
        ! /^NOT_A_PAIR/{if(! not_a_pair[\$2 " " \$5]) print}' |\\
    cat >! \${tempfile}
    mv \${tempfile} \${tempfile}Fpairs
endif

# assemble Diso and Dano data into the same "dataset" file
cat \${tempfile}userlabels |\\
nawk '\$1=="D"{print "D", \$2, \$3, \$NF}' |\\
cat >! \${tempfile}datasets
cat \${tempfile}Fpairs >> \${tempfile}datasets

if(! \$?debug) rm -f \${tempfile}Fpairs     >& /dev/null
if(! \$?debug) rm -f \${tempfile}userlabels >& /dev/null
if(! \$?debug) rm -f \${tempfile}userpairs  >& /dev/null
if(! \$?debug) rm -f \${tempfile}not_pairs  >& /dev/null

goto Return_from_Setup

####################################################

The Future:
mtz or hkl input
support intensity data
EOF-shelxscript
chmod a+x ${scriptDIR}shelx.com









#echo "writing ${scriptDIR}rantan.com "
cat << EOF-rantanscript >! ${scriptDIR}rantan.com
#! /bin/csh -f
#
#
#	rantan.com	- combinatorial rantan-based heavy-atom finder
#
#	"Because it was there."
#
#   will try all possible combinations of an arbitrary number of:
#   1) space groups
#   2) data sets (multiple mtz files are okay)
#   3) resolution cutoffs
#   4) F/sigma cutoffs
#   5) maximum delta-F cutoffs (default to a reasonable value)
#   6) expected site counts
#
#   the results of each rantan run are also passed through the CCP4 "vecref"
#   program, using a Patterson calculated with the same data presented to
#   rantan.  Sites that don't agree with the Patterson are rejected by vecref.
#
#   a pdb file ("outfile" below) is produced for each combinatorial run
#
#   the B-factor column is the height/sigma of each peak in rantan's direct-
#   phased difference Fourier.  The occupancy column is the refined atomic
#   occupancy from vecref (no particular scale).
#
#   however, this pdb file will be overwritten when each combination finishes, 
#   leaving only the results of the last combination when this script exits.
#
#   the output lines that this script prints to the terminal can each be
#   used as command-line input to re-run the same procedure, updating the
#   output file.
#

# location of awk
alias nawk $nawk
nawk 'BEGIN{print}' >& /dev/null
if(\$status) alias nawk awk

set mtzfile  = ./mtz/all.mtz
set outfile  = rantan.pdb
set logfile  = ${logDIR}rantan.log
set tempfile = ./rantan_temp

# crystal variables
set hiRES = ""
set loRES = 1000
set CELL  = ""
set SG    = ""

# trials
set SGs           = ""
set RESOs         = ""
set target_sites  = "2 4 6 8 10 20"
set sigma_cutoffs = "0 0.5 1 2 3"
set max_diffs     = "auto"

# no args? need help
if(\$#argv == 0) goto Help

# this procedure (re)sets most of the above variables
# from either the provided files, or the command line
goto Setup

Help:
cat << EOF

usage: \$0 data.hkl 6 sites P212121

where:
data.mtz	- contains the F, Fs, or Dano you want to run rantan on
6 sites		- your expected number of sites
2A		- your desired resolution cutoff
1sig		- your desired F/sigma cutoff
diff 50		- your desired maximum-difference cutoff
P212121		- space group (optional)

\$0 will run rantan (direct phasing) on the data provided in the
space group(s) given.

EOF

exit
#
#   This procedure (at the bottom of the script) does the following
#   1) scan the command line for the mtz file(s)
#   2) set the CELL, SG, and other variables
#   3) generate dataset list file: \${tempfile}datasets
Return_from_Setup:

################################################################################
# back-up values (shouldn't be empty)
if("\$SGs" == "")           set SGs           = "\$SG"
if("\$RESOs" == "")         set RESOs         = \`echo "\$hiRES 0.1 4" | nawk '\$3-1>0{v=1;vstep=(1-\$2)/(\$3-1); for(i=1;i<=\$3;++i){printf "%.1f ", (v*1/(\$1^3))^(-1/3);v-=vstep;}}'\`
if("\$RESOs" == "")         set RESOs         = "\$hiRES"
if("\$sigma_cutoffs" == "") set sigma_cotoffs = "1"
if("\$target_sites" == "")  set target_sites  = "1"
if("\$max_diffs" == "")     set max_diffs     = "auto"

# print intended combinatorial
cat << EOF | tee \$logfile
          SGs: \$SGs
  reso limits: \$RESOs
sigma cutoffs: \$sigma_cutoffs
 target sites: \$target_sites
    max diffs: \$max_diffs
EOF
echo -n "    data sets: " | tee -a \$logfile
cat \${tempfile}datasets |\\
nawk 'NF==4{print \$2,"from",\$4} NF>4{print \$2 "-" \$5,"from",\$4,\$7} {printf "               "} END{print ""}' |\\
tee -a \$logfile

echo "running rantan:           ------ cutoffs ------  rantan results -- height -  Patterson  check"
echo "spacegroup data set       reso  sigma   maximum  WFOM   peaks    mean   max  good  mean   max mtz file(s)"


################################################################################
# explore all indicated possibilities
foreach SG ( \$SGs )
    # retrieve ASU count in this space group
    set ASU_per_CELL = \`nawk -v SG=\$SG '\$4==SG{print \$2}' \$CLIBD/symop.lib | head -1\`
    
    
foreach dataset ( \`nawk '{print NR}' \${tempfile}datasets\` )

    # extract labels from the "dataset" file
    set F1    = \`nawk -v dataset=\$dataset 'NR==dataset{print \$2}' \${tempfile}datasets\`
    set SIGF1 = \`nawk -v dataset=\$dataset 'NR==dataset{print \$3}' \${tempfile}datasets\`
    set mtz1  = \`nawk -v dataset=\$dataset 'NR==dataset{print \$4}' \${tempfile}datasets\`
    
    set F2    = \`nawk -v dataset=\$dataset 'NR==dataset{print \$5}' \${tempfile}datasets\`
    set SIGF2 = \`nawk -v dataset=\$dataset 'NR==dataset{print \$6}' \${tempfile}datasets\`
    set mtz2  = \`nawk -v dataset=\$dataset 'NR==dataset{print \$7}' \${tempfile}datasets\`
        
    # (F or D)
    set type  = \`nawk -v dataset=\$dataset 'NR==dataset{print \$1}' \${tempfile}datasets\`
    
    if("\$F2" == "") then
	set F2 = "\$F1"
	set SIGF2 = "\$SIGF1"
	set mtz2  = "\$mtz1"
	unset SUBTRACT
    else
	# need to do the subtraction
	set SUBTRACT
    endif
    
    # check for orthorhombic "pseudo-spacegroups"
    set axes = ""
    set realSG = "\$SG"
    if("\$SG" == P222)    set axes = "a b c"
    if("\$SG" == P212121) set axes = "a b c"
    if("\$SG" == P2221) then
	# P2221 with screw along longest axis
	set axes  = "a b c"
	set realSG = "P2221"
    endif
    if("\$SG" == P2212) then
	# P2221 with screw along mid-length axis
	set axes  = "b c a"
	set realSG = "P2221"
    endif
    if("\$SG" == P2122) then
	# P2221 with screw along shortest axis
	set axes  = "c a b"
	set realSG = "P2221"
    endif
    
    if("\$SG" == P21212) then
	# P21212 with non-screw along longest axis
	set axes  = "a b c"
	set realSG = "P21212"
    endif
    if("\$SG" == P21221) then
	# P21212 with non-screw along mid-length axis
	set axes  = "b c a"
	set realSG = "P21212"
    endif
    if("\$SG" == P22121) then
	# P21212 with non-screw along shotest axis
	set axes  = "c a b"
	set realSG = "P21212"
    endif
    set realSGnum = \`nawk -v SG=\$realSG '\$4==SG{print \$1;exit}' \${CLIBD}/symop.lib\`
    
    if("\$axes" != "") then
	# we have an asymmetric orthorhombic space group 
	set i = 0
	foreach mtz ( \$mtz1 \$mtz2 )
	    @ i = ( \$i + 1 )
	    # get current axis ordering, then
	    # find out what the cannonical one would be
	    # then decide how to go from current ordering to the desired one
	    echo "head" |\\
	    mtzdump hklin \$mtz |\\
	    nawk '/Cell Dimensions/{getline;getline;print}' |\\
	    nawk '{\\
		# print out current axis order \\
		print \$1, "h"; print \$2, "k"; print \$3, "l"}' |\\
	    sort -n |\\
	    nawk '\\
		# add cannonical axis names\\
		NR==1{print \$0, "a"} NR==2{print \$0, "b"} NR==3{print \$0, "c"}' |\\
	    nawk -v axes="\$axes" 'BEGIN{split(axes, abc)} {\\
		# write desired axis ordering in front of cannonical one \\
		print abc[NR], \$0}' |\\
	    sort |\\
	    nawk '# print out new hkl order \\
	          {printf "%s ", \$3} END{print ""}' |\\
	    nawk '\$1 \$2 \$3 \$1 \$2 \$3 !~ /hkl/{\$3 = "-" \$3} {print "reindex", \$1",",\$2",",\$3}' |\\
	    reindex HKLIN \$mtz HKLOUT \${tempfile}.mtz >> \$logfile

	    # this should give us a mapping between any two orthorhombics

	    # now simply re-name the space group in the header
	    echo "SYMM \$realSGnum" |\\
	    mtzutils HKLIN \${tempfile}.mtz HKLOUT \${tempfile}mtz\${i}.mtz >> \$logfile
	    if(\$status) then
		set BAD
		goto Clean_up
	    endif
	    rm -f \${tempfile}.mtz >& /dev/null
	end
    else
	# just re-name the space group in each mtz header
	echo "SYMM \$realSGnum" |\\
	mtzutils HKLIN \$mtz1 HKLOUT \${tempfile}mtz1.mtz >> \$logfile
	if(\$status) then
	    set BAD
	    goto Clean_up
	endif
	echo "SYMM \$realSGnum" |\\
	mtzutils HKLIN \$mtz2 HKLOUT \${tempfile}mtz2.mtz >> \$logfile
	if(\$status) then
	    set BAD
	    goto Clean_up
	endif
    endif
    

    # extract the two datasets into a single file
    cad hklin1 \${tempfile}mtz1.mtz hklin2 \${tempfile}mtz2.mtz \\
        hklout \${tempfile}cadded.mtz << EOF-cad >> \$logfile
    LABIN FILE 1 E1=\$F1  E2=\$SIGF1
    CTYPI FILE 1 E1=F    E2=Q
    LABOU FILE 1 E1=F1   E2=SIGF1
    LABIN FILE 2 E1=\$F2  E2=\$SIGF2
    CTYPI FILE 2 E1=F    E2=Q
    LABOU FILE 2 E1=F2   E2=SIGF2
EOF-cad
    
    # make sure we have the actual cell now
    set CELL  = \`echo "head" | mtzdump hklin \${tempfile}cadded.mtz | nawk '/Cell Dimensions/{getline;getline;print}'\`
    
    # iterate over resolution cutoffs
    foreach cutRES ( \$RESOs )
    
    # apply scaleit (to scale isomorphous differences, and check DIFF recommendation)
    if(\$?SUBTRACT) then
	set dataset = "\${F1}-\${F2}"
	set scale  = ""
    else
	set dataset = "\$F1"
	# either Dano or FH data (squash F2)
	set scale  = "SCALE FPH1 0.0000001"
    endif

    scaleit hklin \${tempfile}cadded.mtz \\
            hklout \${tempfile}scaled.mtz << EOF-scale | tee \${tempfile}scaleit.log >> \$logfile
    RESOLUTION 1000 \$cutRES
    refine anisotropic
    \$scale
    LABIN FP=F1 SIGFP=SIGF1 FPH1=F2 SIGFPH1=SIGF2
EOF-scale
    
    # iterate over sigma- and maximum-difference cutoffs
    foreach cutSIG ( \$sigma_cutoffs )
    foreach diff ( \$max_diffs )
    
    # use scaleit's recommendation by default
    if("\$diff" == "auto") set diff = \`nawk '/acceptable differences/{print \$NF}' \${tempfile}scaleit.log\`
    if("\$diff" == "") set diff = 99999999
    
    # print mini-report (suitable for re-input on command line)
    # display for user
    echo "\$SG \$dataset \$cutRES \$cutSIG \$diff" |\\
	 nawk '{printf "%-10s %-13s %4.1fA %4.1fs  diff %-5.1f ", \$1, \$2, \$3, \$4, \$5}'
    



    # make a Patterson map
    fft HKLIN \${tempfile}scaled.mtz MAPOUT \${tempfile}patt.map << EOF-fft >> \$logfile
    TITLE \${cutRES}A Patterson of \$dataset excluding diff>\$diff
    RESOLUTION \$cutRES
    PATTERSON
    LABIN F1=F1 SIG1=SIGF1 F2=F2 SIG2=SIGF2
    EXCLUDE DIFF \$diff
    EXCLUDE SIG1 \$cutSIG
EOF-fft
    # normalize the Patterson
    echo "scale sigma" |\\
    mapmask mapin  \${tempfile}patt.map \\
           mapout \${tempfile}.map >> \$logfile
    mv \${tempfile}.map \${tempfile}patt.map
    if(\$?user_patt) then
	# option to use an outside Patterson?
	cp \$user_patt \${tempfile}patt.map
    endif
    
    # calculate Es on exactly the same data
    ecalc hklin \${tempfile}scaled.mtz  hklout \${tempfile}E.mtz << EOF-ecalc >> \$logfile
    RESOLUTION \$cutRES
    EXCLUDE DIFF \$diff
    EXCLUDE SIGP \$cutSIG
    LABIN FP=F1 SIGFP=SIGF1 FPH=F2 SIGFPH=SIGF2
EOF-ecalc
    
    #######################################
    # run rantan
    rm -f \${tempfile}rantaned.mtz >& /dev/null
    echo "LABI EVAL=E " |\\
    rantan hklin \${tempfile}E.mtz \\
          hklout \${tempfile}rantaned.mtz |& tee \${tempfile}rantan.log >>& \$logfile

    #######################################
    if(\$status) then
	echo "crashed"
	rm -f \${tempfile}E.mtz \${tempfile}rantan.log >& /dev/null
	continue
    endif
    
    # get the mean FOM of the best data set
    set WFOM = \`nawk '/COMBINED FOM/{getline; print \$NF}' \${tempfile}rantan.log | tail -1\`
    rm -f \${tempfile}rantan.log >& /dev/null
    rm -f \${tempfile}E.mtz >& /dev/null

    # fourier and peak-pick
    rm -f \${tempfile}rantan.map
    fft HKLIN \${tempfile}rantaned.mtz MAPOUT \${tempfile}rantan.map << EOF-fft >> \$logfile
    resolution \$cutRES
    LABIN F1=E   PHI=PHI1 W=WT1 
EOF-fft
    # normalize it
    echo "SCALE SIGMA" |\\
    mapmask MAPIN \${tempfile}rantan.map \\
           MAPOUT \${tempfile}norm.map | tee \${tempfile}map.log >> \$logfile
    mv \${tempfile}norm.map \${tempfile}rantan.map
    set GRID = \`nawk '/Grid sampling on x, y, z/{print \$(NF-2), \$(NF-1), \$NF;exit}' \${tempfile}map.log\`
    set ASU = \`nawk '/Start and stop points on x, y, z/{print \$(NF-5)+0, \$(NF-4)+0, \$(NF-3)+0, \$(NF-2)+0, \$(NF-1)+0, \$NF+0; exit}' \${tempfile}map.log\`
    set CRS = \`nawk '/Start and stop points on cols/{print \$(NF-5)-1, \$(NF-4)+1, \$(NF-3)-1, \$(NF-2)+1, \$(NF-1)-1, \$NF+1; exit}' \${tempfile}map.log\`
    rm -f \${tempfile}map.log >& /dev/null

    # now extend the map to a little more than one ASU
    echo "\$ASU" |\\
     nawk '{print "XYZLIM", \$1-10, \$2+10, \$3-10, \$4+10, \$5-10, \$6+10}' |\\
    mapmask MAPIN \${tempfile}rantan.map \\
           MAPOUT \${tempfile}xtend.map >> \$logfile
    mv \${tempfile}xtend.map \${tempfile}rantan.map
    

    
    foreach expect ( \$target_sites )
    
    # re-print initial tag for multiple-target-site runs
    set temp = \`echo "\$target_sites" | nawk '{print \$1}'\`
    if("\$expect" != "\$temp") then
	# only do this 2nd time and beyond
	echo "\$SG \$dataset \$cutRES \$cutSIG \$diff" |\\
	     nawk '{printf "%-10s %-13s %4.1fA %4.1fs  diff %-5.1f ", \$1, \$2, \$3, \$4, \$5}'
    endif
    
    # pick peaks in the direct-phased map
    peakmax MAPIN \${tempfile}rantan.map \\
            XYZOUT \${tempfile}.pdb << EOF | tee \${tempfile}.log >> \$logfile
    xyzlimit \$CRS
    threshold 3
EOF
    # get only symmetry-unique peaks
    cat \${tempfile}.log |\\
    nawk '/Count Site Height/,/threshold/' |\\
    nawk 'NF>3 && ! /[^0-9 -\\.]/{print substr(\$0,9)+0, substr(\$0,length(\$0)-47,25), substr(\$0,5)+0}' |\\
    sort -nr |\\
    nawk '! seen[\$NF] {print \$2, \$3, \$4, \$1} {seen[\$NF]=1}' |\\
    head -\$expect >! \${tempfile}peaks.pick
    # format: xf yf zf height/sigma
    rm -f \${tempfile}.pdb >& /dev/null
    rm -f \${tempfile}.log >& /dev/null
    
    ##########################################
    # filter out special positions
    cat \${tempfile}peaks.pick |\\
    nawk '{++n; print "RES", n; print "ATOM X", \$1, \$2, \$3}' |\\
    cat >! \${tempfile}atoms
    # for gensym
    
    # get symmetry mates for (nearly) a whole cell
    gensym << EOF >! \${tempfile}sym.log
    SYMM \$realSG
    CELL \$CELL
    XYZLIM 0 0.999999 0 0.999999 0 0.999999
    @\${tempfile}atoms
EOF
    rm -f \${tempfile}atoms >& /dev/null
    
    # count the number of times each xyz position is "seen"
    # more than once implies a special position
    cat \${tempfile}sym.log |\\
    nawk '/List of sites/,/Normal termination/' |\\
    nawk '\$2 ~ /[01].[0-9][0-9][0-9]/{print \$2, \$3, \$4, \$(NF-1), \$NF}' |\\
    nawk '{++seen[\$1 " " \$2 " " \$3 " " \$4]}\\
       END{for(site in seen) print site, seen[site]}' |\\
    sort -un +3 |\\
    nawk '\$5+0>0{print \$4, \$5}' >! \${tempfile}mults
    rm -f \${tempfile}sym.log >& /dev/null
    
    # add "multiplicity" to peaks file
    cat \${tempfile}mults \${tempfile}peaks.pick |\\
    nawk 'NF==2{mult[\$1]=\$2} \\
           NF>2{++n; print \$0, mult[n]}' |\\
    cat >! \${tempfile}peaks.mult
    # format: xf yf zf height/sigma mult
    rm -f \${tempfile}mults \${tempfile}peaks.pick >& /dev/null
        
    #############################################
    # count number of peaks
    set peaks = \`cat \${tempfile}peaks.mult | wc -l\`
    
    # another partial print-out (before vecref run)
    echo "\$WFOM \$peaks" | nawk '{printf "%4.2f %3d sites ", \$1, \$2}'

    
    # use vecref to check against the Patterson
    set rcycles = "3,0,0 3,0,0 3,10,0 3,10,0 3,0,10 3,0,10"
    set rcycles = "3,0,0 3,0,0 3,0,0 3,0,0"
    if(\$?NO_PATT) set rcycles = ""
    # reduce resolution a bit so vecref won't crash
    set refRES = \`echo \$cutRES | nawk '{print (0.8*1/(\$1^3))^(-1/3)}'\`
    set Bstart = \`echo \$cutRES | nawk '{print 79*(\$1/3)^2}'\`
    
    cat \${tempfile}peaks.mult |\\
    nawk -v Bstart=\$Bstart 'NF>3{++n}\\
      \$5+0==1{print "ATOM H", n, \$4, \$1, \$2, \$3, Bstart}' |\\
    cat >! \${tempfile}atoms.out

    # run a few rounds of vecref
    foreach rcycle ( \$rcycles )
	set rcycle = \`echo "\$rcycle" | nawk 'BEGIN{FS=","} {print \$1, \$2, \$3}'\`
	
	# vecref can't handle more than 50 sites
	sort -nr +3 \${tempfile}atoms.out |\\
	head -50 >! \${tempfile}atoms.in
	
	# refine "occupancies" against the Patterson
	rm -f \${tempfile}atoms.out >& /dev/null
        set vecrefSG = \$realSG
        if("\$vecrefSG" == "H3") set vecrefSG = R3
        if("\$vecrefSG" == "H32") set vecrefSG = R32
	vecref mapin \${tempfile}patt.map ATOUT \${tempfile}atoms.out << EOF-vecref >>& \$logfile
	SPACEGROUP \$vecrefSG
	RESOLUTION \${refRES}
	CYCLES \$rcycle
	BLIM 0 1000
	@\${tempfile}atoms.in
EOF-vecref
	if((\$status)||(! -e \${tempfile}atoms.out)) then
	    # all atoms rejected
	    echo -n "" >! \${tempfile}atoms.out
	    break
	endif
    end
    rm -f \${tempfile}atoms.in >& /dev/null
    if(\$?NO_PATT) then
	# no data available from Patterson
	echo -n "" >! \${tempfile}atoms.out
    endif
    
    # label the peaks file with the vecref occupancies
    cat \${tempfile}atoms.out \${tempfile}peaks.mult |\\
    nawk '/^ATOM/{occ[\$3]=\$4; next} {++n; print \$0, occ[n]+0}' |\\
    cat >! \${tempfile}peaks.pocc
    # format: xf yf zf height/sigma mult pocc
    rm -f \${tempfile}atoms.out >& /dev/null
    rm -f \${tempfile}peaks.mult >& /dev/null

    
    # calculate scores
    echo "WFOM \$WFOM" >! \${tempfile}.results
    cat \${tempfile}peaks.pocc |\\
    nawk '\$5+0!=1{next} {++n; sig+=\$4} \$6+0>0{++p;pat+=\$6}\\
	  \$4*\$6>0{mix+=\$4+\$6}\\
	  \$4>max{max=\$4} \$6>pmax{pmax=\$6}\\
          END{print "PEAKS", n; if(n==0)n=1;\\
	      print "MEAN", sig/n;\\
	      print "MAX", max+0;\\
	      print "SURV", p; if(p==0)p=1;\\
	      print "PMEAN", pat/p;\\
	      print "PMAX", pmax+0;\\
	      print "COMB", (mix* 1/n)/10}' |\\
    cat >> \${tempfile}.results
    if(\$?NO_PATT) then
	# fake it, but let user know nothing happened
	echo "SURV  ?" >> \${tempfile}.results
	echo "PMEAN ?" >> \${tempfile}.results
	echo "PMAX  ?" >> \${tempfile}.results
	echo "COMB  ?" >> \${tempfile}.results
    endif

    # print out for user
    cat \${tempfile}.results |\\
     nawk 'NR>2{printf "%s ", \$NF} END{print ""}' |\\
     nawk '{printf "%5.2f %5.2f %5d %5.2f %5.2f", \$1, \$2, \$3, \$4, \$5, \$6, \$7}'
    
    # print out data files used
    set mtzfiles = "\$mtz1"
    if("\$mtz2" != "\$mtz1") set mtzfiles = "\$mtz1 \$mtz2"
    echo -n " \$mtzfiles "
    
    # come up with some kind of holistic "score"?     
    cat \${tempfile}.results |\\
    nawk '/^WFOM/{WFOM=\$NF}\\
          /^MAX/ {MAX=\$NF}\\
          /^PEAK/{PEAKS=\$NF}\\
          /^SURV/{PATT=\$NF}\\
          /^COMB/{COMB=\$NF}\\
          END{if(COMB != "?"){print COMB}else{print MAX}}' |\\
    cat >! \${tempfile}.score
    set score = \`cat \${tempfile}.score | nawk '{printf "%d", \$1*1000}'\`
    rm -f \${tempfile}.score >& /dev/null
    

    # create the output pdb
    cat \${tempfile}peaks.pocc |\\
    nawk -v Bstart=\$Bstart 'NF>=3{++n;\\
                Ee="HA";\\
		x=\$1;y=\$2;z=\$3;occ=\$6;B=\$4;\\
		chain="D"; if(\$5+0>1) chain="X";\\
    printf("%5d%10.5f%10.5f%10.5f%10.5f%5.2f   35%10d%2s   RAN %1s\\n",\\
    n, x,y,z, B, occ, n, Ee, chain)}' |\\
    cat >! \${tempfile}.frac

    # make sure we've got the right cell
    coordconv xyzin \${tempfile}.frac xyzout \${tempfile}.pdb << EOF >> \$logfile
    CELL \$CELL
    INPUT FRAC
    OUTPUT PDB ORTH 1
    END
EOF
    
    # write a descriptive header to the PDB file
    cat << EOF | nawk '{print "REMARK", \$0}' >! \${tempfile}full.pdb
results here are from:
\$dataset in \$mtzfiles at \${cutRES}A in \$SG
reflections were excluded if F/sigF < \$cutSIG or |delta-F| > \$diff
internal score: \$score

the B-factor column here contains the peak height of each atom in 
a difference Fourier calculated using the "best" set of directly-
obtained phases from rantan.

the occupancy column contains the result of vector-space refinement of all
the located peaks against a Patterson calculated with the same data
provided to rantan.  The CCP4 program vecref will have rejected most of
the atoms, leaving non-zero occupancy only for sites that are consistent 
with the Patterson.

ATOMs in the "X" chain were on special positions.

the symmetry of these positions is \$realSG
                           X       Y       Z      occ height
EOF
    # copy over the atoms
    cat \${tempfile}.pdb >> \${tempfile}full.pdb
    rm -f \${tempfile}.pdb >& /dev/null
    rm -f \${tempfile}.frac >& /dev/null
    rm -f \${tempfile}.atoms >& /dev/null
    rm -f \${tempfile}peaks.pocc >& /dev/null

    if(\$?debug) echo "\$CELL" | nawk '{printf "%d %d %d ", \$1, \$2, \$3}'

    
    if(\$score > \$best_score) then
	set best_score = \$score
	if(-e \${outfile}.older) mv \${outfile}.older \${outfile}.oldest
	if(-e \${outfile}.old) mv \${outfile}.old \${outfile}.older
	if(-e \${outfile}) mv \${outfile} \${outfile}.old
	cp \${tempfile}full.pdb \$outfile >& /dev/null
	if(! \$status) echo -n " (saved as \$outfile)"
#	if(! \$status) echo -n " (saved)"
    endif
    
    
    # diagnostics
    if(\$?debug) then
	/max/jamesh/Develop/origins.com \$realSG right_sites.pdb \${tempfile}full.pdb |\\
	nawk 'NF==5{right=\$4} END{printf "%d", right}'
	
	nawk '\$10+0>0 || ! /^ATOM/' \${tempfile}full.pdb >! \${tempfile}.pdb 
	/max/jamesh/Develop/origins.com \$realSG right_sites.pdb \${tempfile}.pdb |\\
	nawk 'NF==5{right=\$4;fit=\$5} END{printf "%s", "/" right " correct " fit}'
    endif

    # end the line
    echo ""
    
    end	# sites
    end	# diff
    end	# cutSIG
    end	# cutRES
    
end	# dataset
end	# SG

# clean up
Clean_up:
if(\$?debug) exit
rm -f \${tempfile}rantaned.mtz >& /dev/null
rm -f \${tempfile}rantan.map >& /dev/null
rm -f \${tempfile}.results >& /dev/null
rm -f \${tempfile}full.pdb >& /dev/null

rm -f \${tempfile}patt.map >& /dev/null
rm -f \${tempfile}datasets >& /dev/null
rm -f \${tempfile}mtz1.mtz >& /dev/null
rm -f \${tempfile}mtz2.mtz >& /dev/null
rm -f \${tempfile}cadded.mtz >& /dev/null
rm -f \${tempfile}scaled.mtz >& /dev/null
rm -f \${tempfile}scaleit.log >& /dev/null


exit






Setup:
#################################################

  ####   ######   #####  #    #  #####
 #       #          #    #    #  #    #
  ####   #####      #    #    #  #    #
      #  #          #    #    #  #####
 #    #  #          #    #    #  #
  ####   ######     #     ####   #

#################################################
#
#   gather information on:
#    mtz file
#    data sets
#    resolution limits
#    sigma cuttoff (for map generation)
#    difference cutoff
#
##################################################
# scan the command line for files
set user_SGs   = ""
set user_sigs  = ""
set user_resos = ""
set user_diffs = ""
set user_sites = ""
set mtzfiles   = ""
set best_score = -1

foreach arg ( \$* )
    # warn about probable mispellings
    if("\$arg" =~ *.mtz) then
	if(-e "\$arg") then
	    set mtzfiles = "\$mtzfiles \$arg"
	else
	    echo "WARNING: \$arg does not exist! "
	endif
    endif
end

# use default if nothing specified
if("\$mtzfiles" == "") set mtzfiles = "\$mtzfile"

# gather data set labels, etc from the mtzs
set i = 0
echo "" >! \${tempfile}labels
foreach mtz ( \$mtzfiles )
    @ i = ( \$i + 1 )
    
    # dump and characterize contents
    echo "go" | mtzdump HKLIN \$mtz >! \${tempfile}mtzdump

    cat \${tempfile}mtzdump |\\
    nawk '/Cell Dimensions/{getline;getline;print "CELL", \$0}\\
	  /Space group/{print "SYMM", \$NF+0}\\
	  /Resolution Range/{getline;getline;print "RESO", \$4, \$6}' |\\
    cat >! \${tempfile}info
    
    # read in dataset info
    cat \${tempfile}mtzdump |\\
    nawk 'NF>6' |\\
    nawk '\$(NF-1)=="F"{print "F", \$NF, \$(NF-4)}\\
          \$(NF-1)=="D"{print "D", \$NF, \$(NF-4)}\\
          \$(NF-1)=="Q"{print "S", \$NF, \$(NF-4)}' |\\
    nawk '/^F/{++n} /^D/{++n} {printf "%s", \$1; \\
           if(\$1=="S") printf "%s", last;\\
    printf " %d %s %s\\n",n, \$2, \$3; last=\$1}' |\\
    cat >> \${tempfile}info
    
    # check for empty mtzs
    set temp = \`nawk '/^[FD]/' \${tempfile}info | wc -l\`
    if(\$temp > 0) then
	# add it to the pile
	cat \${tempfile}info |\\
	nawk -v mtz=\$mtz '{print \$0, mtz}' |\\
	cat >> \${tempfile}labels
    else
	echo "WARNING: \$mtz contains no useful data!"
    endif
    if(! \$?debug) rm -f \${tempfile}mtzdump >& /dev/null
    if(! \$?debug) rm -f \${tempfile}info >& /dev/null
end

# get complete, unique lists of DANOs
cat \${tempfile}labels |\\
nawk '{l= \$2 " " \$NF}\\
      \$1=="D"{D[l]=\$3; signal[l]=\$4} \\
      \$1=="SD"{S[l]=\$3; noise[l]=\$4} \\
      END{for(l in S){if(noise[l]+0==0) noise[i]=1; \\
	print "D", D[l], S[l], signal[l]/noise[l], l}}' |\\
nawk 'NF==6' |\\
sort -nr +2 >! \${tempfile}mtzlabels

# get complete, unique lists of Fs
cat \${tempfile}labels |\\
nawk '{l= \$2 " " \$NF}\\
      \$1=="F"{F[l]=\$3; signal[l]=\$4} \\
      \$1=="SF"{S[l]=\$3; noise[l]=\$4} \\
      END{for(l in S){if(noise[l]+0==0) noise[l]=1; \\
	print "F", F[l], S[l], signal[l]/noise[l], l}}' |\\
nawk 'NF==6' |\\
sort -nr +2 >> \${tempfile}mtzlabels

# reconcile different cells? SGs?
set CELL  = \`nawk '/^CELL/{print \$2, \$3, \$4, \$5, \$6, \$7; exit}' \${tempfile}labels\`
set SG    = \`nawk '/^SYMM/{print \$2; exit}' \${tempfile}labels\`
set SGnum = \`nawk '/^SYMM/{print \$2; exit}' \${tempfile}labels\`
set SG    = \` nawk -v num=\$SGnum '\$1==num && NF>5{print \$4}' \${CLIBD}/symop.lib \`
set hiRES = \`nawk '/^RESO/{print \$3}' \${tempfile}labels | sort -n | head -1\`

if(! \$?debug) rm -f \${tempfile}labels >& /dev/null


set mtzlabels = \`cat \${tempfile}mtzlabels | wc -l\`
if("\$mtzlabels" == 0) then
    echo "no usefult data in \$mtzfiles "
    goto Help
endif

##################################################
# get crystal and dataset information from the final mtz file


# make a two-way difference-dataset list (for recognition)
cat \${tempfile}mtzlabels |\\
nawk '\$1=="F"{++n; set[n]= \$2 " " \$3 " " \$NF;\\
      for(i=1;i<n;++i){print "F", set[i], set[n]; print "F", set[n], set[i];}}' |\\
cat >! \${tempfile}Fpairs


# one last pass through command line
# allow user overrides of all internal variables
set i = 0
echo -n "" >! \${tempfile}userlabels
echo -n "" >! \${tempfile}userpairs
echo -n "" >! \${tempfile}not_pairs
while( \$i < \$#argv )
    @ i = ( \$i + 1 )
    @ nexti = ( \$i + 1 )
    @ lasti = ( \$i - 1 )
    if(\$nexti > \$#argv) set nexti = \$#argv
    if(\$lasti < 1) set lasti = 1
    set arg = "\$argv[\$i]"
    
    # check for space groups
    if("\$arg" =~ [PpCcIiFfRrHh][1-6]*) then
	set temp = \`echo \$arg | nawk '{print toupper(\$1)}'\`
	set temp = \`nawk -v SG=\$temp '\$4 == SG {print \$4}' \$CLIBD/symop.lib | head -1\`
	if("\$temp" == "") then
	    # check for "pseudo-spacegroup" language
	    set temp = \`echo "\$arg" | nawk 'toupper(\$1) ~ /[PC]2212|[PC]2122|P21221|P22121/'\`
	    # these are okay too, reindexing engine will understand
	endif
	if("\$temp" != "") then
	    # add this SG to the space group list
	    set user_SGs = "\$user_SGs \$temp"
	    continue
	endif
    endif
    
    # only look at non-file words
    if(! -e "\$arg") then
	# see if a dataset label was given
	set temp = \`nawk -v arg=\$arg '\$2==arg' \${tempfile}mtzlabels | wc -l\`
	if(\$temp) then
	    # a dataset label was mentioned
	    if(\$?NO) then
		# user doesn't want this label
		# filter it out of the input files
		nawk -v arg=\$arg '\$2!=arg' \${tempfile}mtzlabels >! \${tempfile}
		mv \${tempfile} \${tempfile}mtzlabels
	    else
		# must want only this label?
		nawk -v arg=\$arg '\$2==arg' \${tempfile}mtzlabels |\\
		cat >> \${tempfile}userlabels
	    endif
	    continue
	endif
    	
	# see if a difference-dataset label was given
	set temp = \`nawk -v arg=\$arg '\$2 "-" \$5 == arg{print \$2, \$5; exit}' \${tempfile}Fpairs\`
	if(\$#temp == 2) then
	    # a specific difference dataset was mentioned
	    if(\$?NO) then
		# user doesn't want this label pair
		# filter it out of the input files
		echo "NOT_A_PAIR \$temp" >> \${tempfile}not_pairs
	    else
		# must want only this pair (in this order)?
		cat \${tempfile}Fpairs |\\
		nawk -v F1=\$temp[1] -v F2=\$temp[2] '\$2==F1 && \$5==F2' |\\
		cat >> \${tempfile}userpairs
	    endif
	    continue	    
	endif
	
	# program options
	if("\$arg" =~ [Pp]att*) then
	    if(\$?NO) then
		set NO_PATT
	    else
		unset NO_PATT
	    endif
	endif
	
	# check for numbers
	if("\$arg" =~ [0-9]*) then
	    # we have a number
	    if(("\$arg" =~ *A)||("\$argv[\$nexti]" == "A")) then
		# user-preferred resolution limits
		set temp = \`echo "\$arg" | nawk 'BEGIN{FS="-"} \$1+0 > 0.1{print \$1+0} \$2+0 > 0.1{print \$2+0}'\`
		if(\$#temp != 1) then
		    set temp = \`echo \$temp | nawk '\$1>\$2{print \$1, \$2} \$2>\$1{print \$2, \$1}'\`
		    if(\$#temp == 2) then
				set loRES = "\$temp[1]"
				set user_resos = "\$user_resos \$temp[2]"
				continue
		    endif
		else
		    if("\$temp" != "") set user_resos = "\$user_resos \$temp"
		    continue
		endif
	    endif
	    
	    if(("\$arg" =~ *[0-9][Ss]*)||("\$argv[\$nexti]" =~ [Ss]igma)) then
		set user_sigs = "\$user_sigs "\`echo "\$arg" | nawk '\$1~/^[0-9\\.]/{print \$1+0}'\`
	    endif
	    
	    if(("\$argv[\$nexti]" =~ [Ss]ite)||("\$argv[\$nexti]" =~ [Ss]ites)) then
		set user_sites = "\$user_sites "\`echo "\$arg" | nawk '\$1+0 > 0{print \$1+0}'\`
	    endif
	    
	    if(("\$arg" =~ *[Dd]iff)||("\$argv[\$lasti]" =~ [Dd]iff)) then
		set user_diffs = "\$user_diffs "\`echo "\$arg" | nawk '\$1+0 > 0{print \$1+0}'\`
	    endif
	    if(("\$arg" =~ *[Dd]iso)||("\$argv[\$lasti]" =~ [Dd]iso)) then
		set MAX_diso = \`echo "\$arg" | nawk '\$1+0 > 0{print \$1+0}'\`
	    endif
	    if(("\$arg" =~ *[Dd]ano)||("\$argv[\$lasti]" =~ [Dd]ano)) then
		set MAX_dano = \`echo "\$arg" | nawk '\$1+0 > 0{print \$1+0}'\`
	    endif
	endif
	
	# allow "NO" logic to carry through
	unset NO
	if(("\$arg" == "no")||("\$arg" == "not")) set NO
	if(("\$arg" == "don't")||("\$arg" == "ignore")) set NO
	if("\$arg" == "except") set NO
    endif
end

# honor user requests (when mentioned)
if("\$user_SGs"   != "") set SGs           = "\$user_SGs"
if("\$user_resos" != "") set RESOs         = "\$user_resos"
if("\$user_sigs"  != "") set sigma_cutoffs = "\$user_sigs"
if("\$user_diffs" != "") set max_diffs     = "\$user_diffs"
if("\$user_sites" != "") set target_sites  = "\$user_sites"


# check the "user" labels
sort -u +5 \${tempfile}userlabels >! \${tempfile}mtzs
sort -u +5 \${tempfile}mtzlabels >> \${tempfile}mtzs
cat \${tempfile}mtzs |\\
nawk '{++seen[\$6]} END{for(mtz in seen) if(seen[mtz]==1) print mtz}' |\\
cat >! \${tempfile}orphans

foreach mtz ( \`cat \${tempfile}orphans\` )
    # some mtzs were mentioned, but aren't in \${tempfile}userlabels
    # so, we'll just assume everything in them was wanted
    cat \${tempfile}mtzlabels |\\
    nawk -v mtz=\$mtz '\$6==mtz' |\\
    cat >> \${tempfile}userlabels
end
if(! \$?debug) rm -f \${tempfile}orphans   >& /dev/null
if(! \$?debug) rm -f \${tempfile}mtzlabels >& /dev/null
if(! \$?debug) rm -f \${tempfile}mtzs      >& /dev/null

# make sure no label is mentioned twice
cat \${tempfile}userlabels |\\
nawk '! seen[\$2 " " \$NF]{print} {++seen[\$2 " " \$NF]}' |\\
cat >! \${tempfile}
mv \${tempfile} \${tempfile}userlabels

# organize user's desired labels into difference-data sets
cat \${tempfile}userlabels |\\
nawk '\$1=="F"{++n; set[n]= \$2 " " \$3 " " \$NF;\\
      for(i=1;i<n;++i) print "F", set[i], set[n]}' |\\
cat >! \${tempfile}Fpairs

# see if user had particular pairing in mind
set temp = \`cat \${tempfile}userpairs | wc -l\`
if(\$temp) then
    # this overrides the Fpairs we just generated
    cat \${tempfile}userpairs >! \${tempfile}Fpairs
    
    # also any user-specified individual labels must be lone Fs? 
    cat \${tempfile}userlabels |\\
    nawk '\$1=="F"{print "F", \$2, \$3, \$NF}' |\\
    cat >> \${tempfile}Fpairs
endif
if(! \$?debug) rm -f \${tempfile}userpairs >& /dev/null

set Fpairs = \`nawk '\$1=="F"' \${tempfile}Fpairs | wc -l\`
if(! \$Fpairs) then
    # maybe there's just one, lone F?
    cat \${tempfile}userlabels |\\
    nawk '\$1=="F"{print "F", \$2, \$3, \$NF}' |\\
    cat >> \${tempfile}Fpairs
endif


# now filter out user-unwanted pairings
set temp = \`cat \${tempfile}not_pairs | wc -l\`
if(\$temp) then
    # user didn't want particular pairings
    cat \${tempfile}not_pairs \${tempfile}Fpairs |\\
    nawk '/^NOT_A_PAIR/{not_a_pair[\$2 " " \$3]=1; not_a_pair[\$3 " " \$2]=1;}\\
        ! /^NOT_A_PAIR/{if(! not_a_pair[\$2 " " \$5]) print}' |\\
    cat >! \${tempfile}
    mv \${tempfile} \${tempfile}Fpairs
endif

# assemble Diso and Dano data into the same "dataset" file
cat \${tempfile}userlabels |\\
nawk '\$1=="D"{print "D", \$2, \$3, \$NF}' |\\
cat >! \${tempfile}datasets
cat \${tempfile}Fpairs >> \${tempfile}datasets

if(! \$?debug) rm -f \${tempfile}Fpairs     >& /dev/null
if(! \$?debug) rm -f \${tempfile}userlabels >& /dev/null
if(! \$?debug) rm -f \${tempfile}userpairs  >& /dev/null
if(! \$?debug) rm -f \${tempfile}not_pairs  >& /dev/null

goto Return_from_Setup

####################################################

The Future:
EOF-rantanscript
chmod a+x ${scriptDIR}rantan.com



























write_reindex:

# don't overwrite user-modified script
if(-e ${scriptDIR}reindex.com) goto write_rrsps

#echo "writing ${scriptDIR}reindex.com"
cat << EOF-reindexscript >! ${scriptDIR}reindex.com
#! /bin/csh -f
#
#	${scriptDIR}reindex.com
#
#	simple interface to reindexing of merged or unmerged mtz data
#
#
# set this to wherever your awk program is
alias nawk $nawk
nawk 'BEGIN{print}' >& /dev/null
if(\$status) alias nawk awk

set mtzfile    = ""
set logfile    = "${logDIR}reindex.log"
set newmtzfile = "reindexed.mtz"
set tempfile   = ./reindex_temp

set message    = ""
set newSG      = ""

goto Setup
#
#	scan command line for mtz files, space groups, and preferences
#
Help:
cat << EOF-Help
usage: \$0 mtzfile.mtz [SG] [flip] [newmtz.mtz]

where:

mtzfile.mtz	- the mtz data you want to re-index. (merged or unmerged)
SG		- the new space group (P43 for example)
flip		- flip ambiguous axes of P4x P3x and P6x cells
newmtz.mtz	- name you want for the reindexed mtz (default: reindexed.mtz)

Example:    \$0 Fpp.mtz P43212 Fpp.P43212.mtz
Example:    \$0 Fpp.mtz flip

Note: to put the screw axis of P2221 on the shortest cell axis, say P2122
The space group in the new file will be P2221 again, but the cell
will be flipped around appropriately.  Same goes for P2212, or
P21221 and P22121 with P21212

EOF-Help
exit
#
Return_from_Setup:

if(! -e "\$mtzfile") goto Help

# okay to proceed
if("\$message" != "") set message = " \$message"
echo "reindexing \$mtzfile to \$newmtzfile (in \${newSG}\${message})" | tee \$logfile

if(\$?MERGED) then
    # apply any "flip" directives with "reindex"
    echo "\$REINDEX" |\\
    reindex HKLIN \$mtzfile HKLOUT \${tempfile}reindexed.mtz >> \$logfile
    if(\$status) set BAD
    
    # just use mtzutils to change SG record in header
    echo "SYMM \$newSG" |\\
    mtzutils HKLIN \${tempfile}reindexed.mtz HKLOUT \${tempfile}newSG.mtz >> \$logfile
    if(\$status) set BAD
    
    rm -f \${tempfile}reindexed.mtz >& /dev/null
    
    # sort it again, just to be safe
    echo "H K L" |\\
    sortmtz HKLIN \${tempfile}newSG.mtz HKLOUT \$newmtzfile >> \$logfile
    if(\$status) set BAD
    
    rm -f \${tempfile}newSG.mtz >& /dev/null
endif
    
if(\$?UNMERGED) then
    reindex HKLIN \$mtzfile HKLOUT \${tempfile}reindexed.mtz << EOF-reindex >> \$logfile
    SYMM \$newSG
    \$REINDEX
EOF-reindex
    if(\$status) set BAD

    # sort again
    sortmtz HKLIN \${tempfile}reindexed.mtz HKLOUT \$newmtzfile << EOF-sort >> \$logfile
#VRSET -9E+38
H K L M/ISYM BATCH I SIGI
EOF-sort
    if(\$status) set BAD
    
endif
# remove intermediate file
rm -f \${tempfile}reindexed.mtz >& /dev/null


if((-e "\$newmtzfile")&&(! \$?BAD)) then
    echo "\$newmtzfile is \$mtzfile in \${newSG}\${message}"
    if("\$axes" != "") then
	echo "head" | mtzdump hklin "\$newmtzfile" |\\
	nawk '/Cell Dimensions/{getline;getline;\\
	print "new cell:", \$1, \$2, \$3, \$4, \$5, \$6}'
    endif
else
    # something went wrong
    echo "FAILED!  examine \$logfile to see what went wrong."
endif

exit







Setup:
#############################################################
# check up on essentials
if(! \$?CLIBD) then
    echo "Please set up CCP4, and then run \$0 again"
    goto Help
endif
if(! -e \$CLIBD/symop.lib) then
    echo "ERROR: no \$CLIBD/symop.lib"
    echo "Please set up CCP4, and then run \$0 again"
    goto Help
endif

set REINDEX
set axes
set FLIP = 0

#first, get filenames from the command line
foreach arg ( \$* )
    if("\$arg" =~ *.mtz) then
	if("\$mtzfile" == "") then
	    # havn't initialized this yet
	    if(-e "\$arg") then
		# check to make sure it is readable
		echo "HEAD" | mtzdump HKLIN \$arg >&! \${tempfile}mtzdump
		grep "Space group" \${tempfile}mtzdump >& /dev/null
		if(\$status) then
		    echo "WARNING: \$arg is not an MTZ file! "
		    continue
		endif
		set mtzfile = "\$arg"
	    else
		echo "WARNING: \$arg does not exist! "
		continue
	    endif
	else
	    # already have an input mtz, so assume this is output
	    set newmtzfile = "\$arg"
	    
	    # look for "hidden" space group in name? 
	    echo \$newmtzfile |\\
	     nawk '{for(i=1;i<=length(\$0);++i){c=substr(\$0,i,1);\\
	       if(c ~ /[PpCcIiFfRrHh1-6]/){printf "%s", c}else{print ""}}}' |\\
	     nawk '\$1~/^[PpCcIiFfRrHh][1-6]/' >! \${tempfile}SGs
	    foreach sg ( \`tail -10 \${tempfile}SGs\` )
		set temp = \`nawk -v SG=\$sg '\$4 == SG && \$1 < 500 {print \$4}' \$CLIBD/symop.lib | head -1\`
		if("\$temp" != "") then
		    set newSG = "\$temp"
		endif
	    end
	    rm -f \${tempfile}SGs >& /dev/null
	endif
    endif

    # check for new space group
    if("\$arg" =~ [PpCcIiFfRrHh][1-6]*) then
	# check for SGs listed in library (but not the screwy ones)
	set temp = \`echo \$arg | nawk '{print toupper(\$1)}'\`
	if(\$?CLIBD) then
	    set temp = \`nawk -v SG=\$temp '\$4 == SG && \$1 < 500 {print \$4}' \$CLIBD/symop.lib | head -1\`
	endif
	if("\$temp" != "") then
	    set newSG = "\$temp"
	endif
	
	set temp = \`echo \$arg | nawk '{print toupper(\$1)}'\`
	# check for orthorhombic "pseudo-spacegroup" language
	if("\$temp" == P2221) then
	    # P2221 with screw along longest axis
	    set axes  = "a b c"
	    set newSG = "P2221"
	    continue
	endif
	if("\$temp" == P2212) then
	    # P2221 with screw along mid-length axis
	    set axes  = "b c a"
	    set newSG = "P2221"
	    continue
	endif
	if("\$temp" == P2122) then
	    # P2221 with screw along shortest axis
	    set axes  = "c a b"
	    set newSG = "P2221"
	    continue
	endif
	
	if("\$temp" == P21212) then
	    # P21212 with non-screw along longest axis
	    set axes  = "a b c"
	    set newSG = "P21212"
	    continue
	endif
	if("\$temp" == P21221) then
	    # P21212 with non-screw along mid-length axis
	    set axes  = "b c a"
	    set newSG = "P21212"
	    continue
	endif
	if("\$temp" == P22121) then
	    # P21212 with non-screw along shotest axis
	    set axes  = "c a b"
	    set newSG = "P21212"
	    continue
	endif
	    
	if("\$temp" == P112) then
	    # P2 with twofold along longest axis
	    #set axes  = "a b c"
	    set newSG = "P2"
	    continue
	endif
	if("\$temp" == P121) then
	    # P2 with twofold along mid-length axis
	    set axes  = "b c a"
	    set newSG = "P2"
	    continue
	endif
	if("\$temp" == P211) then
	    # P2 with twofold along shortest axis
	    set axes  = "c a b"
	    set newSG = "P2"
	    continue
	endif
	    
	if("\$temp" == P1121) then
	    # P21 with twofold along longest axis
	    #set axes  = "a b c"
	    set newSG = "P21"
	    continue
	endif
	if("\$temp" == P1211) then
	    # P21 with twofold along mid-length axis
	    set axes  = "b c a"
	    set newSG = "P21"
	    continue
	endif
	if("\$temp" == P2111) then
	    # P21 with twofold along shortest axis
	    set axes  = "c a b"
	    set newSG = "P21"
	    continue
	endif
    endif
	
    # check for flipping of ambiguous axes
    if("\$arg" == "flip") then
	# user requested "flip" of axes
	@ FLIP = ( \$FLIP + 1 )
    endif
    
    # what about cubic? 
end

if(! -e "\$mtzfile") then
    goto Help
endif

# get info from the MTZ (should be left over from command-line check)
set CELL  = \`nawk '/Cell Dimensions/{getline;getline;print}' \${tempfile}mtzdump\`
set SGnum = \` nawk '/Space group/{print \$NF+0}' \${tempfile}mtzdump \`
cat \$CLIBD/symop.lib |\\
nawk -v SGnum=\$SGnum '\$1==SGnum{type=substr(tolower(\$6),1,1); key=substr(\$6,4,1);\\
    if((type=="t")&&(key=="G")) type="h"; if((type=="t")&&(key=="C")) type="a";\\
    print \$1, \$5, type substr(\$4,1,1), \$6,\$4;}' |\\
head -1 >! \${tempfile}sgdata
set SGnum = \`nawk '{print \$1}' \${tempfile}sgdata\`
set SG    = \`nawk '{print \$5}' \${tempfile}sgdata\`
set PG    = \`nawk '{print \$2}' \${tempfile}sgdata\`
set BRAV  = \`nawk '{print \$3}' \${tempfile}sgdata\`
set LATT  = \`nawk '{print \$4}' \${tempfile}sgdata\`

if("\$newSG" == "") set newSG = "\$SG"
cat \$CLIBD/symop.lib |\\
nawk -v SG=\$newSG '\$4==SG{type=substr(tolower(\$6),1,1); key=substr(\$6,4,1);\\
    if((type=="t")&&(key=="G")) type="h"; if((type=="t")&&(key=="C")) type="a";\\
    print \$1, \$5, type substr(\$4,1,1), \$6;}' |\\
head -1 >! \${tempfile}sgdata
set newSGnum = \`nawk '{print \$1}' \${tempfile}sgdata\`
set newPG    = \`nawk '{print \$2}' \${tempfile}sgdata\`
set newBRAV  = \`nawk '{print \$3}' \${tempfile}sgdata\`
set newLATT  = \`nawk '{print \$4}' \${tempfile}sgdata\`
rm -f \${tempfile}sgdata >& /dev/null

# decide how/if to flip
if(\$FLIP) then
    if(("\$SG" =~ [IP][46]*)||("\$SG" =~ P3*12)||("\$SG" =~ [PIF]2*3)) then
	set REINDEX = "reindex k, h, -l"
	set message = "with a and b axes flipped"
    endif
    if(("\$SG" == R32)||("\$SG" =~ P3*21)) then
	set REINDEX = "reindex -h, -k, l"
	set message = "with a and b axes inverted"
    endif
    if(("\$SG" =~ [PR]3)||("\$SG" =~ P3[12])) then
	# four possibilities here...
	if(\$FLIP == 1) then
	    set REINDEX = "reindex k, h, -l"
	    set message = "with a and b axes flipped"
	endif
	if(\$FLIP == 2) then
	    set REINDEX = "reindex -h, -k, l"
	    set message = "with a and b axes inverted"
	endif
	if(\$FLIP >  2) then
	    set REINDEX = "reindex -k, -h, -l"
	    set message = "with a and b axes flipped and inverted"
	endif
    endif
    if("\$SG" == P1) then
	# permute the axes (from their cannonical order)
	if(\$FLIP == 1) set axes = "a b c"
	if(\$FLIP == 2) set axes = "b c a"
	if(\$FLIP == 3) set axes = "c a b"
    endif
    if("\$PG" == PG2) then
	# flip the a and c axes?
	set REINDEX = "reindex l, -k, h"
	set message = "with a and c axes flipped"
    endif
    # for other space groups, "flip" is simply ignored
endif

if(("\$newSG" == "")&&(! \$FLIP)) then
    # WTF?
    echo "nothing to do! "
    rm -f \${tempfile}mtzdump >& /dev/null
    goto Help
endif

# now that whole command-line has been read, make some holistic decisions


# check to see if it's merged or unmerged
grep "Number of Batches" \${tempfile}mtzdump >& /dev/null
if(\$status) then
    set MERGED
else
    set UNMERGED
endif
rm -f \${tempfile}mtzdump


# decide on new axis ordering (for asymmetric orthorhombics)
if("\$newSG" == "P222")    set axes = "a b c"
if("\$newSG" == "P212121") set axes = "a b c"
if("\$axes" != "") then
    # get current axis ordering
    # find out what the cannonical one would be
    # then decide how to go from current ordering to the desired one
    echo "\$CELL" | nawk '{\\
	# print out current axis order \\
	print \$1, "h"; print \$2, "k"; print \$3, "l"}' |\\
    sort -n |\\
    nawk '\\
	# add cannonical axis names\\
	NR==1{print \$0, "a"} NR==2{print \$0, "b"} NR==3{print \$0, "c"}' |\\
    nawk -v axes="\$axes" 'BEGIN{split(axes, abc)} {\\
	# write desired axis ordering in front of cannonical one \\
	print abc[NR], \$0}' |\\
    sort |\\
    nawk '# print out new hkl order \\
          {printf "%s ", \$3} END{print ""}' |\\
    nawk '\$1 \$2 \$3 \$1 \$2 \$3 !~ /hkl/{\$3 = "-" \$3} {print \$1, \$2, \$3}' |\\
    cat >! \${tempfile}order    
    set REINDEX = "reindex "\`nawk '{print \$1 ",", \$2 ",", \$3}'  \${tempfile}order\`

    # this should give us a mapping between any two orthorhombics
    
    set temp = \`nawk '{print \$NF}' \${tempfile}order\`
    rm -f \${tempfile}order
    if("\$newSG" == "P2221") then
	set message = "with screw along \$temp axis"
    endif
    if("\$newSG" == "P21212") then
	set message = "with non-screw along \$temp axis"
    endif
    if("\$newSG" == "P2") then
	set message = "with twofold along \$temp axis"
    endif
    if("\$newSG" == "P21") then
	set message = "with twofold screw along \$temp axis"
    endif
    if("\$newSG" == "P1") then
	set message = "with axes in \$axes order."
    endif
endif


# known ways to convert between bravais lattices
if(("\$newBRAV" == "oC")&&("\$BRAV" =~ h[PR])) then
    # this lattice happens to fit, even if it's merged
    set REINDEX = "reindex h+k, k-h, l"
    set message = "with old hkl -> h+k,k-h,l"
    set OKAY
endif
set change_type = \`echo \$SG \$newSG | nawk 'substr(\$1,1,1) != substr(\$2,1,1)'\`
# cubic to just about anything should be okay
if(("\$change_type" == "")&&("\$BRAV" =~ c*)&&("\$newBRAV" !~ h*)) set OKAY
# tetratonal to anything below it should be okay
if(("\$change_type" == "")&&("\$BRAV" =~ t*)&&("\$newBRAV" !~ h*)&&("\$newBRAV" !~ c*)) set OKAY
# orthorhombic to anything below it is okay

# anything can be converted to P1
if("\$newBRAV" == aP) set OKAY

# now check for unhandlable situations


if((\$?MERGED)&&("\$newPG" != "\$PG")&&("\$PG" != "")&&(! \$?OKAY)) then
    # can't do this
    echo "WARNING: You Shouldn't reindex \$SG to \$newSG for a merged mtz! "
    echo ""
    if(\$SGnum < \$newSGnum) then
	cat << EOF
Since \$newSG has higher symmetry than \$SG, we would have
to merge some "unique" spots in \$mtzfile together.  Although
this is technically possible, it is definitely not advisable.
If your space group really is \$newSG, you will probably get
much better scaling and mergeing performance with the new
symmetry imposed during data processing, and probably in
data reduction as well.

EOF
    else
	cat << EOF
Since \$SG has higher symmetry than \$newSG, we would have
to "unmerge" the unique HKLs in \$mtzfile to form multiple, 
yet completely identical Fs.  So, all this will do is
make your mtz bigger, so we will just change the spacegroup
name.

If you think you "overmerged" your data in \$SG, then you
should go back to your mergeing step, and change \$SG to
\$newSG there.

EOF
    endif
#    exit
endif

if(\$?MERGED) goto Return_from_Setup
# now we are dealing with unmerged data

# what do we do about THIS! 
set temp = \`echo "\$newSG \$SG" | nawk 'substr(\$1,1,1) != substr(\$2,1,1)'\`
if("\$temp" != "") then
    # crystal system has changed!
    if(\$newSGnum < \$SGnum) then
	# we are going to loose some data
	
    else
	
    endif
    echo "WARNING: changing the lattice symmetry "
endif

goto Return_from_Setup
EOF-reindexscript
chmod a+x ${scriptDIR}reindex.com









write_rrsps:

# don't overwrite user-modified script
if(-e ${scriptDIR}rrsps.com) goto write_bestFH


#echo "writing ${scriptDIR}rrsps.com"
cat << EOF-rrspsscript >! ${scriptDIR}rrsps.com
#! /bin/csh -f
#
#	${scriptDIR}rrsps.com   - Recursive RSPS
#
#	For completely brute-force heavy atom searching :)
#
#   courtesy of the Phaser Elves
#
# set this to wherever your awk program is
alias nawk $nawk
nawk 'BEGIN{print}' >& /dev/null
if(\$status) alias nawk awk

set SIGMA_CUTOFF = 3
set CLOSE_peaks  = 0.5
set MAX_SITES    = 30
set MAXPEAKS     = 300
set outfile      = "./rrsps.sites"

set mapfiles     = ""
set SGs          = ""
set mapfile      = ""
set SG           = ""
set fixxyzfile   = ""
set tempfile     = ./rrsps_tempfile
set bestscore    = 0

set SYMOP
goto Setup
#############################################################
#
#   scan command line for map filename, space group(s)
#   and "fixed" site list file
#
Help:
cat << EOF-Help
usage: \$0 patt.map P212121 3 sigma 30 sites

where:

patt.map    - is the Patterson map you want to solve (CCP4 format)
P212121	    - is the space group you want to try
3 sigma	    - only consider harker peaks > 3*sigma
30 sites    - maximum number of sites you expect to find

Note: you CAN specify more than one space group, 
      and/or more than one map file.  Each will be
      considered in turn.

EOF-Help

goto Cleanup_RSPS
#############################################################
ReturnFrom_Setup:

# this is the tricky one, this file MUST NOT be
# overwritten by the child processes! 
if(! \$?RRSPS_DEPTH) then
    # this must be the first instance
    setenv RRSPS_DEPTH 0

    # do multi-spacegroup run
    foreach mapfile ( \$mapfiles )
	echo "using \$mapfile"
	foreach SG ( \$SGs )
	    # recurse
	    #echo "trying \$SG"
	    
	    # initialize the cumulative files
	    echo -n "" >! \${tempfile}_\${SG}_biglist
	    echo -n "" >! \${outfile}_\${SG}
	    
	    # recurse
	    \$0 \$SG \$mapfile \$PASSALONG
	end
    end
    # we are done
    goto Cleanup_RSPS
endif


# if we reach this point 
# there is only one SG and one map
set mapfile = "\$mapfiles"
set SG      = "\$SGs"


# this variable keeps rrsps child runs from overwriting
# their parent job's files
@ temp = ( \$RRSPS_DEPTH + 1 )
setenv RRSPS_DEPTH \$temp
set unique = "\${tempfile}_\$RRSPS_DEPTH"


# skip over harker procedure on all but first instance
if(-e "\$fixxyzfile") goto cross_scan


harker_scan:
#############################################################
# kick-start heavy atom search with a harker scan
echo -n "\$SG harker scan... "
rsps \$SYMOP << EOF-harker >! \${tempfile}.log
spacegroup \$SG
patfile \$mapfile
scorfile \${tempfile}rsps.map

mode harker
reject 0
scan au
pick scoremap \$MAXPEAKS
EOF-harker
grep ERROR \${tempfile}.log
if(! \$status) then
    echo "RSPS failed! see rsps.error.log to find out why."
    mv \${tempfile}.log rsps.error.log
    goto Cleanup_RSPS
endif

# convert any peaks > sigma cutoff to new, potential sites
cat \${tempfile}.log |\\
nawk 'BEGIN{rms=999999} \\
    /Rms deviation from mean/ && \$NF+0 != 0{rms=\$NF+0}\\
    /ANGSTROM COORDINATES/{table=1} \\
    NF>7 && ! /[A-Za-z]/{print \$2, \$3, \$4, \$5, \$6, \$7, \$8/rms}' |\\
nawk -v cutoff=\$SIGMA_CUTOFF '\$NF > cutoff' |\\
sort -nr +6 >! \${unique}_newsites
#cat >! \${unique}_newsites

rm -f \${tempfile}.log     >& /dev/null
rm -f \${tempfile}rsps.map >& /dev/null

set hits = \`cat \${unique}_newsites | wc -l\`
set temp = \`tail -1 \${unique}_newsites | nawk '{printf "%.2f", \$NF}'\`
echo "\$hits hits > \$temp sigma"
if((\$hits == 0)||("\$SG" == "P1")) then
    # woops! no harker vectors!
    echo "0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 \$SIGMA_CUTOFF" >! \${unique}_newsites
endif


# now act like we just found this in a "regular" (cross) scan
set fixxyzfile = \${tempfile}.empty
touch \$fixxyzfile

goto Recurse





cross_scan:
#############################################################
# look for new sites, given that we already have 
# the sites listed in \$fixxyzfile

# tell RSPS to scan ASU for sites with cross-vectors to \$fixxyzfile
cat << EOF-cross >! \${tempfile}rsps.in
spacegroup \$SG
patfile \$mapfile
scorfile \${tempfile}rsps.map

mode cross
#reject 0
EOF-cross
cat \$fixxyzfile |\\
nawk '{print "fixxyz", \$1, \$2, \$3}' >> \${tempfile}rsps.in

# also get the "harker score", and avoid crossvector search peaks
# that don't have Harker peaks
cat << EOF-cross2 >> \${tempfile}rsps.in
scan au
pick scoremap \$MAXPEAKS
mode harker
vlist site 1 \$MAXPEAKS
EOF-cross2

# run rsps and convert any peaks > sigma cutoff to new, potential sites
cat \${tempfile}rsps.in | rsps |\\
nawk 'BEGIN{rms=999999} \\
    /Rms deviation from mean/ && \$NF+0 != 0{rms=\$NF+0}\\
    /ANGSTROM COORDINATES/{table=1} \\
    \$1=="SCORE"{++n; print n, \$3} \\
    NF>7 && ! /[A-Za-z]/{print \$2, \$3, \$4, \$5, \$6, \$7}' |\\
nawk -v cutoff=\$SIGMA_CUTOFF '\\
      NF!=2{++n;line[n]=\$0}\\
      NF==2 && \$2+0>cutoff{print line[\$1], \$2}' |\\
sort -nr +6 >! \${unique}_newsites
#cat >! \${unique}_newsites

rm -f \${tempfile}rsps.in  >& /dev/null
rm -f \${tempfile}rsps.map >& /dev/null

# filter out the fixxyz sites, and their symm mates

# generate all symmetry mates of fixed sites
cat \$fixxyzfile |\\
nawk -v SG=\$SG -v CELL="\$CELL" '\\
    BEGIN{print "SYMM", SG; print "CELL", CELL;\\
          print "XYZLIM -0.1 1.1 -0.1 1.1 -0.1 1.1"} \\
    {print "ATOM X", \$1, \$2, \$3}' |\\
gensym |\\
nawk '/List of sites/,/Normal termination/' |\\
nawk '\$2 ~ /[01].[0-9][0-9][0-9]/ {print \$5, \$6, \$7, "fixed"}' |\\
cat >! \${tempfile}fixxyz


cat \${tempfile}fixxyz \${unique}_newsites |\\
nawk -v cut=\$CLOSE_peaks ' \\
      \$NF=="fixed"{++n; X[n]=\$1; Y[n]=\$2; Z[n]=\$3} \\
      \$NF!="fixed"{minD=999999; \\
         # find nearest "old" site \\
         for(i=1;i<=n;++i){\\
	     dist=sqrt((\$4-X[i])^2 +(\$5-Y[i])^2 +(\$6-Z[i])^2);\\
	     if(dist < minD){\\
	     minD=dist;}}\\
	 # now see if it is too close \\
	 if(minD > cut) {print}}' |\\
cat >! \${tempfile}
mv \${tempfile} \${unique}_newsites





# now represent each "new site" as it's true, full constellation
# of cross-vector related peaks

# label the "fixxyz" sites, and their scores
cat \$fixxyzfile |\\
nawk '{print \$1, \$2, \$3, \$NF, "fixed"}' >! \${tempfile}fixxyz

# explode the list
nawk 'NF>3' \${tempfile}fixxyz \${unique}_newsites |\\
nawk -v SG=\$SG -v file=\$mapfile 'BEGIN{ bigscore=1;} \\
      \$NF=="fixed"{# collect list of fixed sites \\
          ++n; X[n]=\$1; Y[n]=\$2; Z[n]=\$3; score[n]=\$(NF-1); bigscore*=\$(NF-1)} \\
      \$NF!="fixed"{ printf "%11.1f ", bigscore*\$NF \\
          # print out all fixed sites first \\
          for(i=1;i<=n;++i){printf "%s %s %s ", X[i], Y[i], Z[i];}\\
	  # each new site goes in the middle \\
          printf "%s %s %s | %s %s ", \$1, \$2, \$3, SG, file; \\
          # sigma scores follow in the same order \\
	  for(i=1;i<=n;++i){printf "%s ", score[i]}; \\
	  print \$NF }' |\\
cat >> \${tempfile}_\${SG}_biglist



# update "best" group of sites yet
set bestscore = \`tail -1 \${outfile}_\$SG |& nawk '{print \$NF+0}'\`
sort -n \${tempfile}_\${SG}_biglist | tail -1 |\\
nawk 'BEGIN{FS="|"} {print \$1; print \$2}' |\\
nawk 'NR==1{n=0; bigscore=\$1; for(i=2;i<=NF;i+=3){++n; \\
	X[n]=\$i; Y[n]=\$(i+1); Z[n]=\$(i+2);}} \\
      NR==2{print \$1, "in", \$2; for(i=1;i<=n;++i){\\
        printf "%4d %8.5f %8.5f %8.5f %8.1f*sigma\\n", i, X[i], Y[i], Z[i], \$(i+2);}\\
	printf "product: %31.1f\\n", bigscore}' |\\
cat >! \${outfile}_\$SG

set temp = \`tail -1 \${outfile}_\$SG | nawk '{print \$NF+0}'\`
if("\$bestscore" != "\$temp") then
    echo ""
    cat \${outfile}_\$SG
    set bestscore = "\$temp"
endif


# don't need this anymore
rm -f \${tempfile}fixxyz >& /dev/null

Recurse:
#############################################################
# run down the list of all peaks we found, and launch
# a new instance of \$0 to look for more cross peaks

set site = 0
set sites = \`cat \${unique}_newsites | wc -l\`
while (( \$site < \$sites )&&(\$RRSPS_DEPTH < \$MAX_SITES))
    @ site = ( \$site + 1 )
    
    # use all the "fixxyz" sites we just used
    cat \$fixxyzfile >! \${unique}_fixxyz
    
    # and add the one site we are interested in
    cat \${unique}_newsites |\\
    nawk -v site=\$site 'NR==site' |\\
    cat >> \${unique}_fixxyz
    
    # here we go!
    if(\$RRSPS_DEPTH == 1) then
	# progress meter
	echo "\$site \$sites" | nawk '\$2+0>0 && \$1+0>1{printf "%d%%", 100*\$1/\$2}'
    endif
    echo -n "."
    \$0 \$SG \$mapfile \$PASSALONG \${unique}_fixxyz

    # each child will report it's own findings in \${tempfile}_\${SG}_biglist
end

# done with this file
rm -f \${unique}_fixxyz \${unique}_newsites >& /dev/null

goto Cleanup_RSPS


exit







Setup:

set i = 0
while( \$i < \$#argv )
    @ i = ( \$i + 1 )
    @ nexti = ( \$i + 1 )
    @ lasti = ( \$i - 1 )
    if(\$nexti > \$#argv) set nexti = \$#argv
    if(\$lasti < 1) set lasti = 1
    set arg = "\$argv[\$i]"
    
    if(! -e "\$arg") then
	# space group
	if("\$arg" =~ [PpCcIiFfRrHh][1-6]*) then
	    set temp = \`echo \$arg | nawk '{print toupper(\$1)}'\`
	    if(\$?CLIBD) then
		set temp = \`nawk -v SG=\$temp '\$4 == SG {print \$4}' \$CLIBD/symop.lib | head -1\`
	    endif
	    if("\$temp" != "") then
		# add this SG to the space group list
		set SGs = "\$SGs \$temp"
		continue
	    else
		# check for "pseudo-spacegroup" language
		set temp = \`echo \$arg | nawk '{print toupper(\$1)}'\`
		set temp = \`echo \$temp | nawk '/P2212|P2122|P21221|P22121/'\`
		if("\$temp" != "") then
		    # PG222 with screw along non-standard axis
		    set SGs = "\$SGs \$temp"
		    # create new spacegroup library for RSPS
		    if(! -e "\${tempfile}rsps_spacegroups") mkdir \${tempfile}rsps_spacegroups
                    if(! -e "\${tempfile}rsps_spacegroups/data") mkdir \${tempfile}rsps_spacegroups
		    cat \$CLIBD/symop.lib >! \${tempfile}rsps_spacegroups/symop.lib
		    set SYMOP = "SYMOP \${tempfile}rsps_spacegroups/symop.lib"
		endif
	    endif
	endif
	
	if("\$arg" =~ [0-9]*) then
	    # we have a number
	    set temp = \`echo "\$arg \$argv[\$nexti]" | nawk 'tolower(\$0)~/sigma\$/ || tolower(\$1)~/sigma\$/{if(\$1+0>0) print \$1+0}'\`
	    if("\$temp" != "") then
		set SIGMA_CUTOFF = "\$temp"
		@ i = ( \$i + 1 )
		continue
	    endif
	    
	    # maximum number of sites to find (recursion depth)
	    set temp = \`echo "\$arg \$argv[\$nexti]" | nawk 'tolower(\$0)~/sites\$/ || tolower(\$1)~/sites\$/{if((\$1+0>1)&&(int(\$1+0)==\$1+0)) print \$1+1}'\`
	    if("\$temp" != "") then
		set MAX_SITES = "\$temp"
		@ i = ( \$i + 1 )
		continue
	    endif
	endif
    endif
    
    # files
    if(-e "\$arg") then
	if("\$arg" =~ *.map) then
	    # check to see if it's a ccp4 map
	    set temp = \`echo "" | mapdump MAPIN \$arg |& nawk '/Cell dimensions/{print \$4, \$5, \$6, \$7, \$8, \$9}'\`
	    if("\$temp" != "") then
		set mapfiles = "\$mapfiles \$arg"
		set CELL = "\$temp"
	    else
		echo "WARNING: \$arg is not a CCP4 map! "
	    endif
	else
	    # see if this is a "site" file
	    egrep "[01].[0-9][0-9][0-9]" \$arg >& /dev/null
	    if(! \$status) then
		set temp = \`nawk '\$1~/^[0-1].[0-9][0-9][0-9]/ || \$1~/^-[0-1].[0-9][0-9][0-9]/ ' \$arg | wc -l\`
		if(\$temp > 0) then
		    # use as the "sites" file
		    set fixxyzfile = "\$arg"
		endif
	    endif
	endif
    endif
end

set SGs = \`echo "\$SGs"\`
set mapfiles = \`echo "\$mapfiles"\`
if((\$#mapfiles == 0) || (\$#SGs == 0)) goto Help
set PASSALONG = "\$MAX_SITES sites \$SIGMA_CUTOFF sigma"


# extend spacegroup library
if("\$CLIBD" == "\${tempfile}rsps_spacegroups") then
    # add pseudo-spacegroups
    cat << EOF-new_sg_lib >> \${CLIBD}/symop.lib
1017 4 4 P2122 PG222 ORTHORHOMBIC 'P 21 2 2' !(unique axis a)
 X,Y,Z 
 -X,Y,-Z 
 1/2+X,-Y,-Z 
 1/2-X,-Y,Z
2017 4 4 P2212 PG222 ORTHORHOMBIC 'P 2 21 2' !(unique axis b)
 X,Y,Z 
 X,1/2-Y,-Z 
 -X,1/2+Y,-Z 
 -X,-Y,Z
1018 4 4 P21212a PG222 ORTHORHOMBIC 'P 21 21 2 (a)' ! origin on 21 21, shift (1/4,1/4,0)
 X,Y,Z 
 1/2-X,1/2-Y,Z 
 X+1/2,-Y,-Z 
 -X,Y+1/2,-Z
2018 4 4 P21221 PG222 ORTHORHOMBIC 'P 21 2 21'  !(unique axis b)
 X,Y,Z 
 -X,Y,-Z 
 1/2+X,-Y,1/2-Z 
 1/2-X,-Y,1/2+Z
3018 4 4 P22121 PG222 ORTHORHOMBIC 'P 2 21 21'  !(unique axis a)
 X,Y,Z 
 X,-Y,-Z 
 -X,1/2+Y,1/2-Z 
 -X,1/2-Y,1/2+Z
1020 2 1 C2221a PG222 ORTHORHOMBIC 'C 2 2 21a)' ! P212121 with C centring, shift(1/4,0,0)
 X,Y,Z 
 1/2-X,-Y,1/2+Z 
 1/2+X,1/2-Y,-Z 
 -X,1/2+Y,1/2-Z
 1/2+X,1/2+Y,Z 
 -X,1/2-Y,1/2+Z 
 X,-Y,-Z 
 1/2-X,Y,1/2-Z
1021 2 1 C222a PG222 ORTHORHOMBIC 'C 2 2 2a'  ! C21212a origin on 21 21
 X,Y,Z 
 1/2-X,1/2-Y,Z 
 X+1/2,-Y,-Z 
 -X,Y+1/2,-Z
 1/2+ X,1/2+Y,Z 
 -X,-Y,Z 
 X,1/2-Y,-Z 
 1/2-X,Y,-Z
1022 4 1 F222a PG222 ORTHORHOMBIC 'F 2 2 2a' ! same as 1018 with face centring ! shift (1/4,0,0)
 X,Y,Z         
 1/2-X,1/2-Y,Z 
 X+1/2,-Y,-Z 
 -X,Y+1/2,-Z
 X,Y+1/2,Z+1/2 
 1/2-X,-Y,Z+1/2 
 X+1/2,-Y+1/2,-Z+1/2 
 -X,Y,-Z+1/2
 X+1/2,Y,Z+1/2 
 -X,1/2-Y,Z+1/2 
 X,-Y,-Z+1/2 
 -X+1/2,Y+1/2,-Z+1/2
 X+1/2,Y+1/2,Z 
 -X,-Y,Z 
 X,-Y+1/2,-Z 
 -X+1/2,Y,-Z
EOF-new_sg_lib
endif


goto ReturnFrom_Setup


Cleanup_RSPS:


if(! \$?RRSPS_DEPTH) set RRSPS_DEPTH = -1

if(\$RRSPS_DEPTH == 0) then
    # the "real" finish
    echo ""
    echo "done! "
    
    foreach SG ( \$SGs )
	echo ""
	echo -n "best solution for "
	cat \${outfile}_\$SG

	sort -nr \${tempfile}_\${SG}_biglist |\\
	nawk 'BEGIN{FS="|"} NF==2{print \$1; print \$2}' |\\
	nawk 'NR%2==1{n=0; bigscore=\$1; for(i=2;i<=NF;i+=3){++n; \\
		X[n]=\$i; Y[n]=\$(i+1); Z[n]=\$(i+2);}} \\
	      NR%2==0{print ""; print \$1, "in", \$2; for(i=1;i<=n;++i){\\
	        printf "%4d %8.5f %8.5f %8.5f %8.1f*sigma\\n", i, X[i], Y[i], Z[i], \$(i+2);}\\
		printf "product: %31.1f\\n", bigscore}' |\\
	cat >! \${outfile}_\$SG
	echo "full listing for \$SG is in \${outfile}_\$SG"
	
	rm -f \${tempfile}_\${SG}_biglist >& /dev/null
    end
    
    rm -f \${tempfile}.empty >& /dev/null
endif

rm -rf \${tempfile}rsps_spacegroups/ >& /dev/null


exit

########################
# Future plans

- fix the non-standard orthorhombic implementation

EOF-rrspsscript
chmod a+x ${scriptDIR}rrsps.com




















write_bestFH:
#################################################################################

# don't overwrite user-modified script
if(-e ${scriptDIR}bestFH.com) goto write_dm

cat << EOF-bestFH_script >! ${scriptDIR}bestFH.com
#! /bin/csh -f
#
#
#	bestFH.com 				-James Holton 8-25-00
#
#	calculate a "best" estimate of F for the heavy atoms
#	alone (a la B. W. Matthews 1966 Acta Cryst 20, 230-239)
#	by combining ALL anomalous and isomorphous differences.  
#	(as many as you want)
#
#	FH should give cleaner Pattersons, difference Fouriers,
#	and direct methods.
#
#	Do NOT combine differences from data sets expected to have
#	different heavy atom locations.  That would be silly.
#
#
# set this to wherever your awk program is
alias nawk $nawk
nawk 'BEGIN{print}' >& /dev/null
if(\$status) alias nawk awk

set mtzfile   = "$mtzfile"		# data sets
set outfile   = "./FH.mtz"		# contains FH, SIGFH
set shelxfile = "./fh.hkl"		# same thing, shelx format
set fourfile  = "./FH_Four.map"		# Phased Fourier of FH (if phase is available)
set pattfile  = "./FH_Patt.map"		# Combined Patterson map
set wpattfile = "./wFH_Patt.map"	# Combined, weighted Patterson map
set logfile   = "./bestFH.log"		# all the CCP4 logs

set tempfile = ./bestFH_temp


# initialize internal variables
set loRES = 1000
set hiRES = ""
set order = ""		# "order" of increasing dispersive signal data sets

set DATA_cutoff = 1	# sigma cutoff      (not used)
set MAX_dano 		# upper Dano cutoff (not used)
set MAX_diso		# upper Diso cutoff (not used)
set scaling = scale	# apply one scale to each difference data set
#set scaling = isotropic # use a B-factor too (wise? )

if(\$#argv == 0) goto Help

# this procedure (re)sets most of the above variables
# from either the provided files, or the command line
goto Setup

Help:
cat << EOF

usage: \$0 alldata.mtz

where:
alldata.mtz	- contains all the Fs and DANOs you want to combine

\`basename \$0\` will calculate an (unscaled) estimate of FH, the scattering factor
of the heavy metal alone, by combining any and all anomalous and isomorphous
(dispersive) differences made available to it.  The procedure is derived
from Matthews et. al. 1966 Acta Cryst 20, 230.

FH is better than Dano or Diso alone, both because the averaging tends
to give better signal/noise, and because considering BOTH isomorphous
and anomalous simultaneously reduced the systematic errors arising
from cross-terms in difference intensity data.  FH usually gives 
cleaner Pattersons and difference Fouriers, as well as improved 
performace of direct methods programs.

Procedure:
The isomorphous difference data sets are computed from the provided Fs,
(assigned a sign), scaled together, and a sigma-weighed mean isomorphous 
difference is computed.  A similar procedure is applied to the anomalous 
differences.

The average Diso and Dano data sets are then scaled to each other, and
combined for the final estimate of FH = sqrt(Diso^2 + k * Dano^2).

REMEMBER: Do NOT combine differences from data sets expected to have
different heavy atom locations.  That would be silly.  If you don't
know why, you should read Matthews et. al. 1966 Acta Cryst 20, 230.

EOF

rm -f \${tempfile}Fs >& /dev/null
rm -f \${tempfile}Ds >& /dev/null
rm -f \${tempfile}Ps >& /dev/null
rm -f \${tempfile}Fpairs >& /dev/null
exit 2
#
#   This procedure (at the bottom of the script) does the following
#   1) scan the command line for the mtz file
#   2) set the CELL, SG, and other variables
#   3) generate dataset name lists: \${tempfile}Fs and \${tempfile}Ds
Return_from_Setup:






################################################################
#   initial report on intended program flow
################################################################
# start the logfile
echo "" >! \$logfile
echo "\$0 \$*" >> \$logfile

if(-e "\$mtzfile") then
    echo "calculating FH from data in \$mtzfile" | tee -a \$logfile
endif

echo "" | tee -a \$logfile
echo "resolution \$loRES \$hiRES" | tee -a \$logfile
# get a "better" resolution limit for scaling?
set scaleRES = "\$loRES \$hiRES"

# count how many datasets we have
set Fs = \`cat \${tempfile}Fs | wc -l\`
set Ds = \`cat \${tempfile}Ds | wc -l\`

# trivial assignments of dispersive difference order
# (2 datasets -> doesn't matter)
# (1 dataset  -> unusable)
if(\$Fs < 3) set order = \`nawk '{print \$1}' \${tempfile}Fs\`
if(\$Fs < 2) set order = ""

# if user doesn't care, pick Diso ordering automatically
if(("\$order" == "")&&(\$Fs > 2)) then
    
    echo -n "Evaluating difference data "
    echo -n "" >! \${tempfile}diso_dano
    
    # make a list of essential pairs
    cat \${tempfile}Fs |\\
    nawk '{++n; F[n]=\$1} \\
          END{for(i=1;i<=n;++i){for(j=i;j<=n;++j){if(i!=j){\\
              print F[i] " - " F[j];}}}}' |\\
    cat >! \${tempfile}Fpairs
    
    foreach pair ( \`nawk '{print NR}' \${tempfile}Fpairs\` )
	# retrieve the pair  
	set F1 = \`nawk -v pair=\$pair 'NR==pair{print \$1}' \${tempfile}Fpairs\`
	set F2 = \`nawk -v pair=\$pair 'NR==pair{print \$3}' \${tempfile}Fpairs\`
	
	# and get the sigmas too
	set SIGF1 = \`nawk -v F=\$F1 '\$1==F{print \$2}' \${tempfile}Fs\`
	set SIGF2 = \`nawk -v F=\$F2 '\$1==F{print \$2}' \${tempfile}Fs\`
	
	# also, get magnitudes of anomalous diffs
	if(\$Ds != 0) then
	    @ i = ( ( \$pair  % \$Ds ) + 1 )
	    set DANO    = \`nawk -v n=\$i 'NR==n{print \$1}' \${tempfile}Ds\`
	    set SIGDANO = \`nawk -v n=\$i 'NR==n{print \$2}' \${tempfile}Ds\`

	    set DANOcards = "DPH1=\$DANO SIGDPH1=\$SIGDANO"
	else
	    set DANOcards = ""
	endif
    
	# make some ordinary scaleit cards
	cat << EOF-scaleitin >! \${tempfile}scaleit.in
RESOLUTION \$scaleRES
refine \$scaling
weight
LABIN FP=\$F1 SIGFP=\$SIGF1 -
      FPH1=\$F2 SIGFPH1=\$SIGF2 \$DANOcards
END
EOF-scaleitin
	
	# entertainment
	echo -n "."
	
	# put the labels in the log file
	echo -n "\$F1 \$F2 \$DANO " >> \${tempfile}diso_dano
	
	# run scaleit to get Diso and Dano
	cat \${tempfile}scaleit.in |\\
	scaleit HKLIN \$mtzfile HKLOUT /dev/null |\\
	nawk '/Sc_kraut SCALE/{iso=index(\$0,"diso")-4; ano=index(\$0,"<ano>")+3}\\
	/THE TOTALS/{print substr(\$0,iso)+0, substr(\$0,ano)+0}' |\\
	cat >> \${tempfile}diso_dano
	
	# \${tempfile}diso_dano has format: F1 F2 DANOF3  <Diso> <Dano>
    end
    rm -f \${tempfile}Fpairs >& /dev/null
    rm -f \${tempfile}scaleit.in >& /dev/null

    # get the single largest isomorphous difference
    set order = \`sort -n +3 \${tempfile}diso_dano | tail -1 | nawk '{print \$2}'\`
    if("\$order" == "") then
	echo ""
	echo "no isomorphous differences"
	echo "estimating FH requires isomorphous/dispersive and anomalous differences! "
	echo "Therefore: \$mtzfile needs to contain at least two columns of Fs"
	echo "sorry"
	goto Clean_up
    endif

    # now order the remaining datasets with increasing distance from this "native"
    cat \${tempfile}diso_dano |\\
    nawk -v ref=\$order 'BEGIN{print ref, 0, "order"} \\
       \$1==ref{print \$2, \$4} \$2==ref{print \$1, \$4}' |\\
    sort -n +1 >! \${tempfile}order
    # \${tempfile}order has format: F diso(from Fref)
    
    # store the new order in a variable
    set order = \`nawk '{print \$1}' \${tempfile}order\`

    # make sure nothing bad happened (this isn't exactly ergodic! )
    set Fs = \`cat \${tempfile}Fs | wc -l\`
    if(\$#order != \$Fs) then
	# something has gone horribly wrong
	echo "ERROR: unable to determine the best order of"
	echo "       isomorphous/dispersive differences"
	echo "sorry! "
	echo ""
	goto Help
    endif
    
    # reorder the Dano list too (doesn't really matter)
    cat \${tempfile}diso_dano \${tempfile}Ds |\\
    nawk 'NF>2{dano[\$3]=\$5} \\
          NF==2{print \$0, dano[\$1]}' |\\
    sort -nru +2 |\\
    nawk '{print \$1, \$2}' >! \${tempfile}
    mv \${tempfile} \${tempfile}Ds >& /dev/null

    # clean up
    rm -f \${tempfile}scaleit.in >& /dev/null
    rm -f \${tempfile}diso_dano >& /dev/null
endif

# whatever its source, put the f' order in a file
echo "\$order" |\\
nawk 'BEGIN{RS=" "} NF==1{++i; print \$1, "order"}' |\\
cat >! \${tempfile}order

# re-order the Fs list
cat \${tempfile}Fs \${tempfile}order |\\
nawk '\$NF!="order"{sig[\$1]=\$2} \\
      \$NF=="order"{print \$1, sig[\$1]}' |\\
cat >! \${tempfile}
mv \${tempfile} \${tempfile}Fs >& /dev/null
set Fs = \`cat \${tempfile}Fs | wc -l\`



# print out final ordering results:
echo ""
echo -n " f' order: "
cat \${tempfile}Fs | nawk '{printf "%s ", \$1} END{print ""}'

echo -n " f"\\"" order: "
cat \${tempfile}Ds | nawk '{printf "%s ", \$1} END{print ""}'

# clean up
rm -f \${tempfile}order >& /dev/null


# jump ahead if there are no anomalous datasets (why bother?)
if("\$Ds" == 0) then
    # make sure this doesn't exist (this is a signal later on)
    rm -f \${tempfile}dano.mtz >& /dev/null
    goto calculate_iso
endif

# no need to weigh anomalous datasets if there is only one of them
if("\$Ds" == 1) then
    # all we need to do is rename the dataset
    set temp = \`cat \${tempfile}Ds\`
    # better than crashing, (I guess)
    if(\$#temp != 2) set temp = ( \$temp \$temp )

    echo "extracting \$temp[1] as Dano"
    cad hklin1 \$mtzfile hklout \${tempfile}dano.mtz << EOF-cad >> \$logfile
    LABIN FILE 1 E1=\$temp[1] E2=\$temp[2]
    CTYPO FILE 1 E1=F        E2=Q
    LABOU FILE 1 E1=Dano     E2=SIGDano
EOF-cad

    # go on to calculate isomorphous differences
    goto calculate_iso
endif

weigh_ano:
echo ""
echo "weighting anomalous differences"

# extract the anomalous differences, and treat them as Fs
cat \${tempfile}Ds |\\
nawk '{printf "%s %s ", \$1, \$2} END{print ""}' |\\
nawk 'BEGIN{printf "LABIN FILE 1 ";} \\
{for(i=1;i<=NF;++i){printf "E%d=%s ", i, \$i}; printf "\\nCTYPIN FILE 1 "; \\
 for(i=1;i<=NF;i+=2){printf "E%d=F E%d=Q ", i, i+1}; print ""}' |\\
cad HKLIN1 \$mtzfile HKLOUT \${tempfile}danos.mtz >> \$logfile

# put anomalous diffs on the same scale
set i = 1
echo -n "" >! \${tempfile}scaleit.log
while( \$i <= \$Ds )
    cat << EOF-scaleitin >! \${tempfile}scaleit.in
RESOLUTION \$scaleRES
#refine scale
refine \$scaling
weight
EOF-scaleitin

    # make the LABIN card (not too many FPHs! )
    head -1 \${tempfile}Ds |\\
    nawk '{printf "LABIN FP=%s SIGFP=%s ", \$1, \$2}' |\\
    cat >> \${tempfile}scaleit.in
    
    # no more than 6 at a time
    cat \${tempfile}Ds |\\
    nawk -v first=\$i '{++n} n>=first && n<(first+6){++i;\\
	printf "-\\nFPH%d=%s SIGFPH%d=%s ", i, \$1, i, \$2} \\
    END {print "\\nEND"}' |\\
    cat >> \${tempfile}scaleit.in
    
    # now actually run scaleit
    cat \${tempfile}scaleit.in |\\
    scaleit HKLIN \${tempfile}danos.mtz \\
            HKLOUT \${tempfile}danoscaled.mtz |\\
     tee -a \${tempfile}scaleit.log >> \$logfile
    
    # accumulate scaled datasets
    mv \${tempfile}danoscaled.mtz \${tempfile}danos.mtz >& /dev/null
    @ i = ( \$i + 6 )
end
mv \${tempfile}danos.mtz \${tempfile}danoscaled.mtz >& /dev/null

# print out "weights" (effective weighting is 1/scale^2)
cat \${tempfile}scaleit.log |\\
nawk '/APPLICATION OF SCALES/,/--------------------------/' |\\
nawk '\$1 == "Derivative"{++i; print \$1, i, \$3}' >! \${tempfile}scales

cat \${tempfile}Ds \${tempfile}scales |\\
nawk 'NF==2{++n; label[n]=\$1; if(length(\$1)>maxlen) maxlen=length(\$1)}\\
    \$1 == "Derivative"{w=0; if(\$3+0!=0) w=1/(\$3*\$3)\\
    printf "%-" maxlen "s : %.3f\\n", label[\$2], w}' |\\
sort -nr +2


rm -f \${tempfile}scaleit.log >& /dev/null
rm -f \${tempfile}scales      >& /dev/null


combine_ano:
echo "combining anomalous differences into Dano"

# now do a sigma-weighted average of all anomalous diffs
set Ds = \`cat \${tempfile}Ds | wc -l\`
echo \$Ds | nawk '{print "NREF -1"; print "FORMAT \\047(3i5,"\$1*2"f15.7)\\047"}' |\\
mtzdump HKLIN \${tempfile}danoscaled.mtz |\\
nawk '/LIST OF REFLECTIONS/,/MTZDUMP/' |\\
nawk '! /[A-Z]/ && NF>3{HKL=substr(\$0,1,15); sum=norm=n="";\\
    # run down list of D, sigD \\
    for(i=4;i<NF;i+=2){D=\$i;sigD=\$(i+1); w=0; \\
	# sigma of 0 means zero, but no weight \\
	if(sigD+0==0) norm+=0;\\
	# add up weigted sum \\
        if(sigD+0>0){w=1/(sigD*sigD); sum+=w*D; norm+=w;}}\\
    # do not print anything for "all-missing" HKLs \\
    # print zero for all-zero hkls \\
    if(norm==0) print HKL, 0, 0; \\
    # print sigma-weighted average (abs value? ) \\
    if(norm+0 > 0) print HKL, sum/norm, 1/sqrt(norm)}' |\\
cat >! \${tempfile}dano.hkl
rm -f \${tempfile}danoscaled.mtz >& /dev/null


f2mtz HKLIN \${tempfile}dano.hkl HKLOUT \${tempfile}sortme.mtz << EOF-f2mtz >> \$logfile
CELL \$CELL
SYMM \$SGnum
LABOUT H K L Dano SIGDano
CTYPO  H H H F Q
EOF-f2mtz
rm -f \${tempfile}dano.hkl >& /dev/null

echo "H K L" |\\
sortmtz HKLIN \${tempfile}sortme.mtz HKLOUT \${tempfile}dano.mtz >> \$logfile
rm -f \${tempfile}sortme.mtz

# indicate finish
echo ""

# summed anomalous diffs should now be loaded into:
# \${tempfile}dano.mtz, labeled as Dano SIGDano



calculate_iso:
###########################################################
# now we need to calculate isomorphous diffs 
# before we can treat them as we did the anomalous diffs


# jump ahead if there are no isomorphous datasets (why bother?)
if("\$Fs" < 2) then
    # make sure this doesn't exist (this is a signal later on)
    rm -f \${tempfile}diso.mtz >& /dev/null
    goto compute_k
endif

if(\$Fs == 2) then
    echo "calculating isomorphous (dispersive) difference as Diso"
else
    echo "subtracting isomorphous (dispersive) differences"
    echo "note: f' differences should all have the same sign! "
endif

# make sure this doesn't already exist
rm -f \${tempfile}disos.mtz >& /dev/null

# make list with largest difference first
cat \${tempfile}Fs |\\
nawk '{++n; F[n]=\$1} \\
    END{for(i=1;i<=n;++i){for(j=n;j>i;--j){if(i!=j){\\
    print F[i] " - " F[j];}}}}' |\\
cat >! \${tempfile}Fpairs
set Fpairs = \`cat \${tempfile}Fpairs | wc -l\`
set pair = 0

while ( \$pair < \$Fpairs )
    @ pair = ( \$pair + 1 )
    # retrieve the pair  
    set F1 = \`nawk -v pair=\$pair 'NR==pair{print \$1}' \${tempfile}Fpairs\`
    set F2 = \`nawk -v pair=\$pair 'NR==pair{print \$3}' \${tempfile}Fpairs\`
    
    # and get the sigmas too
    set SIGF1 = \`nawk -v F=\$F1 '\$1==F{print \$2}' \${tempfile}Fs\`
    set SIGF2 = \`nawk -v F=\$F2 '\$1==F{print \$2}' \${tempfile}Fs\`
    
    echo "\${F1}-\${F2}"
    
    # extract these columns from the file
    echo "LABIN FILE 1 E1=\$F1 E2=\$SIGF1 E3=\$F2 E4=\$SIGF2" |\\
    cad HKLIN1 \$mtzfile HKLOUT \${tempfile}dump.mtz >> \$logfile
    
    # dump the Fs as text, calculate F1-F2 sqrt(SIG1^2+SIG2^2)
    echo "NREF -1" |\\
    mtzdump HKLIN \${tempfile}dump.mtz |\\
    nawk '/LIST OF REFLECTIONS/,/MTZDUMP/' |\\
    nawk '! /[A-Z?]/ && NF>1{HKL=substr(\$0,1,13);\\
        F1=substr(\$0,14,12); F2=substr(\$0,36,10);\\
	SIGF1=substr(\$0,26,10); SIGF2=substr(\$0,46,10); \\
     print HKL, F1-F2, sqrt(SIGF1*SIGF1 + SIGF2*SIGF2)}' |\\
    cat >! \${tempfile}diso.hkl
    rm -f \${tempfile}dump.mtz >& /dev/null
    
    # special case: only one difference set
    if(\$Fs == 2) set pair = ""

    # read back into mtz format
    f2mtz HKLIN \${tempfile}diso.hkl HKLOUT \${tempfile}sortme.mtz << EOF-f2mtz >> \$logfile
    CELL \$CELL
    SYMM \$SGnum
    LABOUT H K L Diso\${pair} SIGDiso\${pair}
    CTYPO  H H H F Q
EOF-f2mtz
    rm -f \${tempfile}diso.hkl >& /dev/null
    
    if("\$pair" == "") set pair = 999    
    
    # sort it (just in case)
    echo "H K L" |\\
    sortmtz HKLIN \${tempfile}sortme.mtz HKLOUT \${tempfile}diso.mtz >> \$logfile
    rm -f \${tempfile}sortme.mtz
    
    # add columns into an mtz
    if(-e \${tempfile}disos.mtz) then
	cad HKLIN1 \${tempfile}disos.mtz HKLIN2 \${tempfile}diso.mtz \\
	    HKLOUT \${tempfile}cadded.mtz << EOF-cadadd >> \$logfile
	LABIN FILE 1 ALL
	LABIN FILE 2 ALL
EOF-cadadd
	# update the cumulative mtz
	mv \${tempfile}cadded.mtz \${tempfile}disos.mtz
    else
	# create the cumulative mtz
	mv \${tempfile}diso.mtz \${tempfile}disos.mtz
    endif
end


# check for trivial case: one difference dataset
if(\$Fs <= 2) then
    # no need to calculate relative weights for one dataset
    mv \${tempfile}disos.mtz \${tempfile}diso.mtz
    
    goto compute_k
endif


weigh_iso:
echo "weighting isomorphous differences"

# now put all these differences on the same scale
set pair = 1
set Fpairs = \`cat \${tempfile}Fpairs | wc -l\`
echo -n "" >! \${tempfile}scaleit.log 
while ( \$pair <= \$Fpairs )
    cat << EOF-scaleitin >! \${tempfile}scaleit.in
RESOLUTION \$scaleRES
#refine scale
refine \$scaling
weight
EOF-scaleitin
    # first one is the reference
    echo -n "LABIN FP=Diso1 SIGFP=SIGDiso1 " >> \${tempfile}scaleit.in
    
    # scale 6 at a time
    cat \${tempfile}Fpairs |\\
    nawk -v first=\$pair '{++n} n>=first && n<(first+6){++i;\\
	printf "-\\nFPH%d=Diso%d SIGFPH%d=SIGDiso%d ", i, n, i, n;}\\
	END{print "\\nEND"}' |\\
    cat >> \${tempfile}scaleit.in
    
    # run scaleit
    cat \${tempfile}scaleit.in |\\
    scaleit HKLIN \${tempfile}disos.mtz \\
            HKLOUT \${tempfile}disoscaled.mtz  |\\
     tee -a \${tempfile}scaleit.log >> \$logfile
    rm -f \${tempfile}scaleit.in >& /dev/null

    @ pair = ( \$pair + 6 )
    
    # output is input for next round of scaling
    mv \${tempfile}disoscaled.mtz \${tempfile}disos.mtz >& /dev/null
end
mv \${tempfile}disos.mtz \${tempfile}disoscaled.mtz >& /dev/null


# print out "weights" (effective weighting is 1/scale^2)
cat \${tempfile}scaleit.log |\\
nawk '/APPLICATION OF SCALES/,/--------------------------/' |\\
nawk '\$1 == "Derivative"{++n; print \$1, n, \$3}' >! \${tempfile}scales

cat \${tempfile}Fpairs \${tempfile}scales |\\
nawk '\$2 == "-"{++n; label[n]=\$1 \$2 \$3; if(length(label[n])>maxlen) maxlen=length(label[n])}\\
    \$1 == "Derivative"{w=0; if(\$3+0!=0) w=1/(\$3*\$3)\\
    printf "%-" maxlen "s : %.3f\\n", label[\$2], w}' |\\
sort -nr +2

rm -f \${tempfile}scaleit.log >& /dev/null
rm -f \${tempfile}scales      >& /dev/null



combine_iso:
echo "combining isomorphous differences into Diso"

# add these scaled data sets together (sigma-weighted again)
# (hopefully, our "ordering" procedure has made sure all
#  these differences have the same sign)
set Fpairs = \`cat \${tempfile}Fpairs | wc -l\`
rm -f \${tempfile}Fpairs >& /dev/null

echo \$Fpairs | nawk '{print "NREF -1"; print "FORMAT \\047(3i5,"\$1*2"f15.7)\\047"}' |\\
mtzdump HKLIN \${tempfile}disoscaled.mtz |\\
nawk '/LIST OF REFLECTIONS/,/MTZDUMP/' |\\
nawk '! /[A-Z]/ && NF>3{HKL=substr(\$0,1,15); sum=norm=n="";\\
    # run down list of D, sigD \\
    for(i=4;i<NF;i+=2){D=\$i;sigD=\$(i+1); w=0; \\
	# sigma of 0 means zero, but no weight \\
	if(sigD+0==0) norm+=0;\\
	# add up weigted sum \\
        if(sigD+0>0){w=1/(sigD*sigD); sum+=w*D; norm+=w;}}\\
    # do not print anything for "all-missing" HKLs \\
    # print zero for all-zero hkls \\
    if(norm==0) print HKL, 0, 0; \\
    # print sigma-weighted average (abs value?) \\
    if(norm+0 > 0) print HKL, sum/norm, 1/sqrt(norm)}' |\\
cat >! \${tempfile}diso.hkl
rm -f \${tempfile}disoscaled.mtz >& /dev/null

# read the averaged isomorphous differences back into an mtz
f2mtz HKLIN \${tempfile}diso.hkl HKLOUT \${tempfile}sortme.mtz << EOF-f2mtz >> \$logfile
CELL \$CELL
SYMM \$SGnum
LABOUT H K L Diso SIGDiso
CTYPO  H H H F Q
EOF-f2mtz
rm -f \${tempfile}diso.hkl >& /dev/null

echo "H K L" |\\
sortmtz HKLIN \${tempfile}sortme.mtz HKLOUT \${tempfile}diso.mtz >> \$logfile
rm -f \${tempfile}sortme.mtz

# indicate finish



compute_k:
##############################################################
# now we need the "k" that will put Diso and Dano on the same
# scale: k/2 = <|Diso|>/<|Dano|>

##############################################################
echo ""

# handle special cases
if((\$Ds == 0)&&(\$Fs <= 1)) then
    # this should never happen, but...
    echo "ERROR: no difference data in \${mtzfile}! "
    goto Help
endif
if(\$Ds == 0) then
    # anomalous difference data is totally missing
    # just rename the isomorphous differences
    echo "WARNING: treating Diso data as FH (FH is better with Dano and Diso)"
    cad hklin1 \${tempfile}diso.mtz hklout \${tempfile}sortme.mtz << EOF >> \$logfile
    labin file 1 E1=Diso E2=SIGDiso
    labou file 1 E1=FH   E2=SIGFH
EOF
    rm -f \${tempfile}diso.mtz >& /dev/null
    rm -f \${tempfile}dano.mtz >& /dev/null
    set wpattfile = ""

    goto sort_final
endif
if(\$Fs <= 1) then
    # isomorphous difference data is totally missing
    # just re-name the anomalous differences
    echo "WARNING: treating Dano data as FH (FH is better with Dano and Diso)"
    cad hklin1 \${tempfile}dano.mtz hklout \${tempfile}sortme.mtz << EOF >> \$logfile
    labin file 1 E1=Dano E2=SIGDano
    labou file 1 E1=FH   E2=SIGFH
EOF
    rm -f \${tempfile}diso.mtz >& /dev/null
    rm -f \${tempfile}dano.mtz >& /dev/null
    set wpattfile = ""

    goto sort_final
endif

##############################################################
echo -n "scaling Diso to Dano  "

# use scaleit to put Diso and Dano on the same scale
cad HKLIN1 \${tempfile}diso.mtz HKLIN2 \${tempfile}dano.mtz \\
HKLOUT \${tempfile}diso_dano.mtz << EOF-cad >> \$logfile
LABIN FILE 1 ALL
LABIN FILE 2 ALL
EOF-cad
rm -f \${tempfile}diso.mtz >& /dev/null
rm -f \${tempfile}dano.mtz >& /dev/null


scaleit HKLIN \${tempfile}diso_dano.mtz \\
 HKLOUT \${tempfile}scaled.mtz << EOF-scaleit | tee \${tempfile}scaleit.log >> \$logfile
RESOLUTION \$scaleRES
#refine scale
refine \$scaling
weight
LABIN FP=Diso SIGFP=SIGDiso FPH1=Dano SIGFPH1=SIGDano
end
EOF-scaleit
rm -f \${tempfile}diso_dano.mtz >& /dev/null
rm -f \${tempfile}scaleit.in >& /dev/null


# print out "k" value from scaling
cat \${tempfile}scaleit.log |\\
nawk '/APPLICATION OF SCALES/,/--------------------------/' |\\
nawk '\$1 == "Derivative"{printf "k= %.3f\\n", 2*\$3}'

rm -f \${tempfile}scaleit.log >& /dev/null


combine_diso_dano:
##############################################################
# now combine Diso and Dano as the "best" estimate of total FH

echo "calculating FH = sqrt( Diso^2 + (k/2)^2 * Dano^2 )"

# also do a "Patterson weight" of 1/(sigma(FH^2))^2
echo "NREF -1" |\\
mtzdump HKLIN \${tempfile}scaled.mtz |\\
nawk '/LIST OF REFLECTIONS/,/MTZDUMP/' |\\
nawk '! /[A-Z]/ && NF>1{ HKL=substr(\$0,1,13); \\
    Diso=substr(\$0,14,12); SIGDiso=substr(\$0,26,10); \\
    Dano=substr(\$0,36,10); SIGDano=substr(\$0,46,10); \\
    FH=fh=sqrt(Diso*Diso + Dano*Dano); if(fh==0) fh=1;\\
    varFH  = (Diso*SIGDiso/fh)^2 + (Dano*SIGDano/fh)^2;\\
    varFH2 = 4 * FH^2 * varFH; \\
    W=0; if(varFH) W = 1/sqrt(varFH); if(W>maxW) maxW=W;\\
    print HKL, FH, sqrt(varFH), W} \\
    END{if(maxW) print 1/maxW > "'\${tempfile}'norm"}' |\\
cat >! \${tempfile}FH.hkl

set norm = \`tail -1 \${tempfile}norm\`
rm -f \${tempfile}norm >& /dev/null
if(\$#norm != 1) set norm = 1

# read FH back into mtz (finally)
f2mtz HKLIN \${tempfile}FH.hkl HKLOUT \${tempfile}sortme.mtz << EOF-f2mtz >> \$logfile
CELL \$CELL
SYMM \$SGnum
LABOUT H K L FH SIGFH W
CTYPO  H H H F Q W
SCALE  1 1 1 1 1 \$norm
EOF-f2mtz
rm -f \${tempfile}FH.hkl >& /dev/null


sort_final:
# sort it (for good measure)
echo "H K L" |\\
sortmtz HKLIN \${tempfile}sortme.mtz HKLOUT \$outfile >> \$logfile
if((! \$status)&&(-e "\$outfile")) then
    echo "\$outfile is ready."
else
    echo "ERROR! see \$logfile for what happened..."
    exit 9
endif

rm -f \${tempfile}sortme.mtz
##############################################################


# use scaleit to get the recommended DIFF
scaleit hklin \$outfile << EOF >! \${tempfile}scaleit
analyze
labin FP=FH SIGFP=SIGFH FPH1=FH SIGFPH1=SIGFH
SCALE FPH1 0.000001
END
EOF
if("\$DIFF" == "") set DIFF = \`nawk '/acceptable differences/{print \$NF}' \${tempfile}scaleit\`
rm -f \${tempfile}scaleit >& /dev/null
if("\$DIFF" == "") set DIFF = 1000




Shelx_format:
if("\$shelxfile" == "") goto Fourier

# dump FH out in shelx format too.
mtz2various HKLIN \$outfile HKLOUT \$shelxfile << EOF-shelx >> \$logfile
OUTPUT SHELX
FSQUARED
LABIN FP=FH SIGFP=SIGFH
END
EOF-shelx

if((! \$status)&&(-e "\$shelxfile")) echo "\$shelxfile is the SHELX version of \$outfile."


Fourier:
###########################################
if(("\$PHASE" == "")||("\$fourfile" == "")) goto Patterson
# calculate a phased Fourier of FH
# first, we need to retrieve the phase columns
set FOM_cad = ""
if("\$FOM" != "") set FOM_cad = "E2=\$FOM"
# set up FOM weight
set W = ""
if("\$FOM" != "") set W = "W=\$FOM"

# special case of Diso or Dano only
if((\$Ds == 0)||(\$Fs <= 1)) then
    # no double-difference data
    set E1 = "E1=F"
    set fftF1="F1=FH"
    if(\$Fs <= 1) then
	# anomalous data only
	set E1="E1=D"
	set fftF1="DANO=FH"
    endif
    
    # but still want phased FH map
    cad hklin1 \$outfile hklin2 \$mtzfile \\
	hklout \${tempfile}phased.mtz << EOF-cad >> \$logfile
    LABIN FILE 1 E1=FH E2=SIGFH
    LABIN FILE 2 E1=\$PHASE \$FOM_cad
    CTYPO FILE 1 \$E1 E2=Q
EOF-cad

    # ordinary, boring difference Fourier
    fft HKLIN \${tempfile}phased.mtz MAPOUT \${tempfile}FH.map << EOF-fft >> \$logfile
    TITLE \${hiRES}A map of FH @ \$PHASE \$FOM
    RESOLUTION \$scaleRES
    LABIN \$fftF1 SIG1=SIGFH PHI=\$PHASE \$W
    EXCLUDE SIG1 0
EOF-fft
    
    goto norm_Four
endif

# calculate a combined, phased difference Fourier
cad hklin1 \${tempfile}scaled.mtz hklin2 \$mtzfile \\
    hklout \${tempfile}phased.mtz << EOF-cad >> \$logfile
LABIN FILE 1 E1=Diso E2=SIGDiso E3=Dano E4=SIGDano
LABIN FILE 2 E1=\$PHASE \$FOM_cad
CTYPO FILE 1 E1=F E2=Q E3=D E4=Q
EOF-cad

# now we make the isomorphous difference Fourier
fft HKLIN \${tempfile}phased.mtz MAPOUT \${tempfile}Diso.map << EOF-fft >> \$logfile
TITLE \${hiRES}A map of Diso @ \$PHASE \$FOM
RESOLUTION \$scaleRES
LABIN F1=Diso SIG1=SIGDiso PHI=\$PHASE \$W
EXCLUDE SIG1 0
EOF-fft

# then make the anomalous difference Fourier (phases rotated 90 degrees)
fft HKLIN \${tempfile}phased.mtz MAPOUT \${tempfile}Dano.map << EOF-fft >> \$logfile
TITLE \${hiRES}A map of Dano @ \$PHASE \$FOM
RESOLUTION \$scaleRES
LABIN DANO=Dano SIG1=SIGDiso PHI=\$PHASE \$W
EXCLUDE SIG1 0
EOF-fft

# try to make sure we get the right sign
# assume map has at least one large peak (which should be positive)
echo "go" | mapdump mapin \${tempfile}Diso.map |\\
nawk '/Minimum density/{print -\$NF, -1} /Maximum density/{print \$NF, 1}' |\\
sort -nr |\\
nawk 'NR==1{print \$NF}' >! \${tempfile}sign
set diso_sign = \`cat \${tempfile}sign\`
rm -f \${tempfile}sign >& /dev/null

# invert the Diso map if it seems to be upside-down
#if(("\$diso_sign" == "-1")) then
if(("\$diso_sign" == "-1")&&(! \$?USER_ORDER)) then
    echo "inverting sequence of Diso for difference Fourier"
    echo "SCALE FACTOR -1 0" |\\
    mapmask mapin1 \${tempfile}Diso.map mapout \${tempfile}temp.map >> \$logfile
    mv \${tempfile}temp.map \${tempfile}Diso.map >& /dev/null
endif

# add these two maps together (equivalent to vector sum to FH)
echo "MAPS ADD" |\\
mapmask mapin1 \${tempfile}Diso.map mapin2 \${tempfile}Dano.map \\
mapout \${tempfile}FH.map >> \$logfile

norm_Four:
# normalize it for output
echo "SCALE SIGMA" |\\
mapmask mapin \${tempfile}FH.map mapout \$fourfile >> \$logfile

if((! \$status)&&(-e "\$fourfile")) echo "\$fourfile is the map of \$PHASE applied to FH"

rm -f \${tempfile}phased.mtz >& /dev/null
rm -f \${tempfile}Diso.map >& /dev/null
rm -f \${tempfile}Dano.map >& /dev/null
rm -f \${tempfile}FH.map >& /dev/null


Patterson:
###########################################
if("\$pattfile" == "") goto Clean_up

# calculate the unweighted Patterson map (if desired)
fft HKLIN \$outfile MAPOUT \${tempfile}FH.map << EOF-fft >> \$logfile
TITLE \${hiRES}A Patterson of FH
RESOLUTION \$scaleRES
PATTERSON
LABIN F1=FH SIG1=SIGFH F2=FH SIG2=SIGFH
SCALE F2 0.000001 0
EXCLUDE DIFF \$DIFF
EXCLUDE SIG1 0
EOF-fft
peakmax MAPIN \${tempfile}FH.map XYZOUT \${tempfile}.pdb << EOF-pick >> \$logfile
THRESHOLD RMS 3
NUMPEAKS 50
EOF-pick
# extend it to whole unit cell?
mapmask mapin \${tempfile}FH.map mapout "\$pattfile" << EOF >> \$logfile
scale sigma
#xyzlim 0 1 0 1 0 1
EOF
if((! \$status)&&(-e "\$pattfile")) echo "\$pattfile is the Patterson of FH."
rm -f \${tempfile}FH.map >& /dev/null


if("\$wpattfile" == "") goto Clean_up
# calculate the weighted Patterson map (if desired)
fft HKLIN \$outfile MAPOUT \${tempfile}FH.map << EOF-fft >> \$logfile
TITLE \${hiRES}A Patterson of FH/SIGFH
RESOLUTION \$scaleRES
PATTERSON
LABIN F1=FH SIG1=SIGFH W=W F2=FH SIG2=SIGFH
SCALE F2 0.000001 0
#EXCLUDE DIFF \$DIFF
EXCLUDE SIG1 0
EOF-fft
peakmax MAPIN \${tempfile}FH.map XYZOUT \${tempfile}.pdb << EOF-pick >> \$logfile
THRESHOLD RMS 3
NUMPEAKS 50
EOF-pick
# extend it to whole unit cell?
mapmask mapin \${tempfile}FH.map mapout "\$wpattfile" << EOF >> \$logfile
scale sigma
#xyzlim 0 1 0 1 0 1
EOF
if((! \$status)&&(-e "\$wpattfile")) echo "\$wpattfile is the Patterson of FH / sig(FH)^2"
rm -f \${tempfile}FH.map >& /dev/null


# clean up
Clean_up:

rm -f \${tempfile}scaled.mtz >& /dev/null
rm -f \${tempfile}Fs     >& /dev/null
rm -f \${tempfile}Ds     >& /dev/null
rm -f \${tempfile}Fpairs >& /dev/null
rm -f \${tempfile}.pdb   >& /dev/null


exit






Setup:
#################################################

  ####   ######   #####  #    #  #####
 #       #          #    #    #  #    #
  ####   #####      #    #    #  #    #
      #  #          #    #    #  #####
 #    #  #          #    #    #  #
  ####   ######     #     ####   #

#################################################
#
#   gather information on:
#    mtz file
#    data sets
#    resolution limits
#    sigma cuttoff (for map generation)
#    difference cutoff
#
##################################################
if(! \$?DIFF) set DIFF = ""

# scan the command line for files
foreach arg ( \$* )
    # warn about probable mispellings
    if("\$arg" =~ *.mtz) then
	if(-e "\$arg") then
	    set mtzfile = "\$arg"
	else
	    if(! \$?useroutfile) then
		set useroutfile = "\$arg"
		set outfile = "\$arg"
	    endif
	endif
    endif
end


# now all filenames have been initialized

if(! -e "\$mtzfile") goto Help

##################################################
# get crystal and dataset information from the mtz file
echo "go" | mtzdump HKLIN \$mtzfile >! \${tempfile}mtzdump
set CELL  = \`nawk '/Cell Dimensions/{getline;getline;print}' \${tempfile}mtzdump\`
set SGnum = \`nawk '/Space group/{print \$NF+0}' \${tempfile}mtzdump\`
set SG    = \`nawk -v num=\$SGnum '\$1==num && NF>5{print \$4}' \${CLIBD}/symop.lib \`
set hiRES = \`nawk '/Resolution Range/{getline;getline;print \$6}' \${tempfile}mtzdump\`

# get column label names from the mtz file
nawk 'NF>3' \${tempfile}mtzdump |\\
nawk '\$(NF-1)=="F"{print "F", \$NF}\\
      \$(NF-1)=="D"{print "D", \$NF}\\
      \$(NF-1)=="Q"{print "S", \$NF}\\
      \$(NF-1)=="P"{print "P", \$NF}\\
      \$(NF-1)=="W"{print "W", \$NF}' |\\
nawk '/^F/{++n} /^D/{++n} /^P/{++n} {printf "%s", \$1; \\
       if(\$1=="S") printf "%s", last;\\
printf " %d %s \\n",n, \$2; last=\$1}' |\\
cat >! \${tempfile}datasets
rm -f \${tempfile}mtzdump >& /dev/null

# check extent of available data
set temp = \`nawk '/^F/ || /^D/' \${tempfile}datasets |& wc -l\`
if(\$temp < 1) then
    # this is useless, bail out now
    rm -f \${tempfile}datasets >& /dev/null
    echo "ERROR: no usable data in \${mtzfile}! "
    set mtzfile = ""
    goto Help
endif

# get complete, unique lists of DANO/SIGDANOs
cat \${tempfile}datasets |\\
nawk '\$1=="D"{D[\$2]=\$NF} \$1=="SD"{S[\$2]=\$NF} \\
      END{for(i in D) print i, D[i], S[i];}' |\\
sort -un |\\
nawk 'NF==3{print \$2,\$3}' |\\
cat >! \${tempfile}Ds

# get complete, unique lists of F/SIGFs
cat \${tempfile}datasets |\\
nawk '\$1=="F"{F[\$2]=\$NF} \$1=="SF"{S[\$2]=\$NF} \\
      END{for(i in F) print i, F[i], S[i];}' |\\
sort -un |\\
nawk 'NF==3{print \$2,\$3}' |\\
cat >! \${tempfile}Fs

# get complete, unique lists of Phases/FOMs
cat \${tempfile}datasets |\\
nawk '\$1=="P"{P[\$2]=\$NF} \$1=="W"{W[\$2]=\$NF} \\
      END{for(i in P) print i, P[i], W[i];}' |\\
sort -un |\\
nawk 'NF>=2{print \$2,\$3, " "}' |\\
cat >! \${tempfile}Ps


# one last pass through command line
# allow user overrides of all internal variables
set i = 0
echo -n "" >! \${tempfile}userlabels
while( \$i < \$#argv )
    @ i = ( \$i + 1 )
    @ nexti = ( \$i + 1 )
    @ lasti = ( \$i - 1 )
    if(\$nexti > \$#argv) set nexti = \$#argv
    if(\$lasti < 1) set lasti = 1
    set arg = "\$argv[\$i]"
    
    # see if a dataset label was given
    egrep " \$arg " \${tempfile}datasets >& /dev/null
    if(! \$status) then
	if(\$?NO) then
	    # user doesn't want this label
	    # filter it out of the input files
	    egrep -v "^\$arg " \${tempfile}Fs >! \${tempfile}
	    mv \${tempfile} \${tempfile}Fs
	    egrep -v "^\$arg " \${tempfile}Ds >! \${tempfile}
	    mv \${tempfile} \${tempfile}Ds
	    egrep -v "^\$arg " \${tempfile}Ps >! \${tempfile}
	    mv \${tempfile} \${tempfile}Ps
	else
	    # must want only this label?
	    cat \${tempfile}datasets |\\
	    nawk -v label=\$arg 'NF>2 && \$NF==label{print \$NF}' |\\
	    cat >> \${tempfile}userlabels
	endif
	# "NO" stays set for next word
	continue
    endif
    
    # only look at non-file words
    if(-e "\$arg") then
	unset NO
	continue
    endif
    
    if("\$arg" =~ [0-9]*) then
	# we have a number
	if(("\$arg" =~ *A)||("\$argv[\$nexti]" == "A")) then
	    # user-preferred resolution limits
	    set temp = \`echo "\$arg" | nawk 'BEGIN{FS="-"} \$1+0 > 0.1{print \$1+0} \$2+0 > 0.1{print \$2+0}'\`
	    if(\$#temp != 1) then
		set temp = \`echo \$temp | nawk '\$1>\$2{print \$1, \$2} \$2>\$1{print \$2, \$1}'\`
		if(\$#temp == 2) then
		    set loRES = "\$temp[1]"
		    set hiRES = "\$temp[2]"
		endif
	    else
		if("\$temp" != "") set hiRES = "\$temp"
	    endif
	endif
	    
	if(("\$arg" =~ *[Ss]igma)||("\$argv[\$nexti]" =~ [Ss]igma)) then
	    set DATA_cutoff = \`echo "\$arg" | nawk '\$1+0 > 0{print \$1+0}'\`
	endif
	
	if(("\$arg" =~ *[Dd]iff)||("\$argv[\$lasti]" =~ [Dd]iff)) then
	    set MAX_diso = \`echo "\$arg" | nawk '\$1+0 > 0{print \$1+0}'\`
	    set MAX_dano = \`echo "\$arg" | nawk '\$1+0 > 0{print \$1+0}'\`
	endif
	if(("\$arg" =~ *[Dd]iso)||("\$argv[\$lasti]" =~ [Dd]iso)) then
	    set MAX_diso = \`echo "\$arg" | nawk '\$1+0 > 0{print \$1+0}'\`
	endif
	if(("\$arg" =~ *[Dd]ano)||("\$argv[\$lasti]" =~ [Dd]ano)) then
	    set MAX_dano = \`echo "\$arg" | nawk '\$1+0 > 0{print \$1+0}'\`
	endif
    endif
    
    # allow "NO" logic to carry through to next word(s)
    unset NO
    if(("\$arg" == "no")||("\$arg" == "not")) set NO
    if(("\$arg" == "don't")||("\$arg" == "ignore")) set NO
    if("\$arg" == "except") set NO
end

rm -f \${tempfile}datasets >& /dev/null

# turn the "user" labels into real label files
cat \${tempfile}Ds \${tempfile}userlabels |\\
nawk 'NF==2{set[\$1]=\$0}\\
      NF==1{print set[\$1]}' |\\
nawk 'NF==2' >! \${tempfile}
set Ds = \`cat \${tempfile} | wc -l\`
if(\$Ds > 0) then
    # user mentioned which Ds to use
    mv \${tempfile} \${tempfile}Ds
endif
set Ds = \`cat \${tempfile}Ds | wc -l\`

# same for Fs
cat \${tempfile}Fs \${tempfile}userlabels |\\
nawk 'NF==2{set[\$1]=\$0}\\
      NF==1{print set[\$1]}' |\\
nawk 'NF==2' >! \${tempfile}
set Fs = \`cat \${tempfile} | wc -l\`
if(\$Fs > 1) then
    # user mentioned which Fs to use
    mv \${tempfile} \${tempfile}Fs
    
    # get user-specified F dataset order 
    set order = \`nawk 'NF==2{print \$1}' \${tempfile}Fs\`
    set USER_ORDER
endif
set Fs = \`cat \${tempfile}Fs | wc -l\`

# same for Phases
cat \${tempfile}Ps \${tempfile}userlabels |\\
nawk 'NF==1{print set[\$1]}\\
           {set[\$1]=\$0}' |\\
nawk 'NF!=0' >! \${tempfile}
set Ps = \`cat \${tempfile} | wc -l\`
if(\$Ps > 0) then
    # user mentioned which Phase to use
    tail -1 \${tempfile} >! \${tempfile}Ps
endif
# only use one phase
tail -1 \${tempfile}Ps >! \${tempfile}
set PHASE = \`nawk '{print \$1}' \${tempfile}\`
set FOM   = \`nawk '{print \$2}' \${tempfile}\`


# clean up
rm -f \${tempfile} >& /dev/null
rm -f \${tempfile}Ps >& /dev/null
rm -f \${tempfile}userlabels >& /dev/null

goto Return_from_Setup


####################################################

The Future:
re-order Diso if scales come out screwy?
make the interface a little slicker, less wordy?
use correlation instead of scale as a weight?
implement use of max_DANO, DATA_cutoff, etc. ?

EOF-bestFH_script
chmod a+x ${scriptDIR}bestFH.com
























write_dm:
####################################################################
#
#	generate adaptive DM script
#
####################################################################

# don't overwrite user-modified script
if(-e ${scriptDIR}dm.com) goto write_fft

#echo "writing ${scriptDIR}dm.com"
cat << EOF-DMscript >! ${scriptDIR}dm.com
#! /bin/csh -f
#
#	Phaser Elves: Automatic DM Solvent flattening script
#
#
####################################################################
set mtzfile  = "${mtzDIR}mlphare.mtz"
set outfile  = "dmed.mtz"
set tempfile = "./dm_temp"

defaults:
set SCHEME  = "AUTO"
set method  = "$DM_method"
set steps   = "auto"
set Solvent = "50%"

# these are all reset from the command line
set F       = "FP"
set SIGF    = "SIGFP"
set PHI     = PHIB
set FOM     = FOM
set HL      = "HLA=HLA HLB=HLB HLC=HLC HLD=HLD"

# different systems have different nawks
alias nawk $nawk
nawk 'BEGIN{print}' >& /dev/null
if(\$status) alias nawk awk

if("\$1" == "") goto Help
goto Setup
# scan the command line
Help:
####################################################################
cat << EOF

usage: \$0 [mlphare.mtz] [50%] [FP]

where:
    mlphare.mtz - output MTZ from MLPHARE
    50%		- solvent content (must end with %)
    FP		- F you want to flatten with (default to best F in mtz)

EOF
exit 9
ReturnFromSetup:
if(("\$F" == "")||(! -e \$mtzfile)) goto Help

# make solvent content fractional
set Solvent = \`echo \$Solvent | nawk '{print \$1/100}' \`

####################################################################

$DM \\
HKLIN \$mtzfile \\
HKLOUT \$outfile \\
 << eof-dm | nawk '{print} /Cycle  500/{exit}'
TITLE flattening in dm by \$Solvent solvent
# do flattening, histogram matching, and "multi-resolution" modification
MODE SOLV HIST MULT
# method of detecting bias in the flattened map
COMBINE \$method
# number of flattening cycles to execute
NCYCLE \$steps
# change last two values for buffer between solvent and protein
#SOLC 0.628 MASK 0.60 0.37
SOLC \$Solvent

# "SCHEME ALL" is all spots at once (could also be AUTO, RES, FOM, MAG, etc.)
SCHEME \$SCHEME
# print out realspace residuals
REALFREE

# defaults <radius>=-1.0 <mode>=2 <rhomin>=0.32 <rhomax>=2.0 e/A^3
#WANG -1.0 2 LIMITS 0.32 2.0
LABI FP=\$F SIGFP=\$SIGF PHIO=\$PHI FOMO=\$FOM \$HL
LABO PHIDM=PHIDM FOMDM=FOMDM HLADM=HLADM HLBDM=HLBDM HLCDM=HLCDM HLDDM=HLDDM
END

eof-dm
if(! \$status) then
    echo "\$outfile is ready."
else
    exit 2
endif

exit
####################################################################

Setup:
####################################################################
# scan the command line
foreach arg ( \$* )
    if( "\$arg" =~ *.mtz ) set mtzfile  = "\$arg"
    if( "\$arg" =~ *% ) set Solvent     = "\$arg"
    if(( "\$arg" =~ *[0-9] )&&( "\$arg" =~ [1-9]* )) set steps = "\$arg"
    if( "\$arg" == omit ) set method    = "OMIT"
end

#let DM choose scheme for known # of steps
#if("\$steps" != "auto") set SCHEME = AUTO

#get variables from mtz file
echo "go" | mtzdump hklin \$mtzfile |\\
nawk '/OVERALL FILE STATISTICS/,/No. of reflections used/' |\\
nawk 'NF>10 && \$(NF-1) ~ /[FQPWADI]/' |\\
cat >! \${tempfile}mtzdmp

# use completeness, or F/sigF to pick default F
cat \${tempfile}mtzdmp |\\
nawk '\$(NF-1) == "F"{F=\$NF; meanF=\$8; reso=\$(NF-2); comp=substr(\$0,32)+0; \\
      getline; S=\$NF; if(\$8) meanF /= \$8; print F, S, reso, comp, meanF;}' |\\
sort +2n -3 +3nr -4 +4nr >! \${tempfile}F

# and extract all dataset types/labels
cat \${tempfile}mtzdmp |\\
nawk 'NF>2{print \$(NF-1), \$NF, " "}' |\\
cat >! \${tempfile}cards

#clean up
rm -f \${tempfile}mtzdmp

# pick F with best resolution, or F/sigma
set F    = \`head -1 \${tempfile}F\`
if(\$#F > 2) then
    set SIGF = \$F[2]
    set F    = \$F[1]
endif

# pick most recent phase/FOM
grep "P \$PHI " \${tempfile}cards >& /dev/null
if(\$status) then
    set temp = \`nawk '/^P/{print \$2}' \${tempfile}cards  | tail -1\`
    if("\$temp" != "") set PHI = "\$temp"
endif
grep "W \$FOM " \${tempfile}cards >& /dev/null
if(\$status) then
    set temp = \`nawk '/^W/{print \$2}' \${tempfile}cards | tail -1\`
    if("\$temp" != "") set FOM = "\$temp"
endif

# pick most recent HL coefficients
cat \${tempfile}cards |\\
nawk '\$1=="A"{++n;HL[n]=\$NF} END{for(i=1;i<=n;i+=4) \\
      print "HLA="HL[i],"HLB="HL[i+1],"HLC="HL[i+2],"HLD="HL[i+3]}' |\\
cat >! \${tempfile}HL
set HL = \`tail -1 \${tempfile}HL\`

# see if user specified an F, Phase, FOM, or HL set
foreach arg ( \$* )
    set temp = \`grep " \$arg " \${tempfile}cards\`
    if("\$temp" =~ F*) then
	set F = "\${arg}"
	set temp = \`nawk -v arg="\$arg" '\$1==arg{print \$2}' \${tempfile}F\`
	if(\$#temp == 1) set SIGF = "\$temp"
	continue
    endif
    if("\$temp" =~ P*) set PHI = "\${arg}"
    if("\$temp" =~ W*) set FOM = "\${arg}"
    if("\$temp" =~ A*) set HL = \`grep "=\${arg} " \${tempfile}HL | tail -1\`
end

rm -f \${tempfile}cards \${tempfile}F \${tempfile}HL >& /dev/null

goto ReturnFromSetup

EOF-DMscript
chmod a+x ${scriptDIR}dm.com






































write_fft:
####################################################################
#
#	generate fft script
#
####################################################################

# don't overwrite user-modified script
if(-e ${scriptDIR}fft.com) goto write_macros

#echo "writing ${scriptDIR}fft.com"

# presume type of phasing
set tempPHI = "PHIDM"
set tempFOM = "FOMDM"
if($?NO_DM) then
    set tempPHI = "PHIB"
    set tempFOM = "FOM"
endif
cat << EOF-fftscript >! ${scriptDIR}fft.com
#! /bin/csh -f
#
#	Phaser's fft script for making the phased map
#
#
##############################################################################
# set up awk
alias nawk $nawk
nawk 'BEGIN{print 1; exit}' >& /dev/null
if(\$status) alias nawk awk

# defaults
set MAPMAN   = $MAPMAN
set BRIX     = $BRIX
set BONES    = $BONES
set pdbfile  = ""
set mtzfile  = $bestmtz
set tempfile = fft_temp

# oversampling makes maps look prettier
set hiRES    = ""
set F        = $native
set SIGF     = $SIGnative
set PHI      = $tempPHI
set FOM      = $tempFOM

# output files
set ccp4map  = ffted.map
set omap     = ffted.omap
set pickpdb  = fftpick.pdb
set omacro   = ffted.omacro
set bonefile = bones.o

set pick = pick

if("\$1" == "") goto Help
goto Setup
#  Procedure (at bottom) to read command-line args
#  mtz, Fs, or resolution
Help:
cat << EOF

usage: \$0 mtzfile.mtz [F] [PHI] [coverme.pdb]
   or: \$0 mapfile.map [coverme.pdb]

where:
mtzfile.mtz  - a (phased) mtz file you want a map from
mapfile.map  - a pre-calculated map you want to convert to o format
coverme.pdb  - a pdb file you want the output map to cover

EOF
exit 9
ReturnFromSetup:

if(\$?user_mapfile) then
    # jump ahead if user specified a map
    cp \$user_mapfile \${tempfile}.map
    goto normalize
endif


################################################################################
fft HKLIN \$mtzfile MAPOUT \${tempfile}.map << EOF-fft
RESOLUTION 1000 \$hiRES
#EXCLUDE SIG1 1
title  \$hiRES A map of \$FOM * \$F @ \$PHI
LABIN F1=\$F \$SIG1 PHI=\$PHI \$WFOM
END
EOF-fft
if(\$status) exit

normalize:
# normalize the map
echo "SCALE SIGMA" |\\
mapmask mapin \${tempfile}.map mapout \$ccp4map
rm -f \${tempfile}.map >& /dev/null

extend:
# extend the map

# quit if user wasn't interested in an O map
if("\$omap" == "") exit

# get space group/CELL from map header
set CELL = \`echo "go" | mapdump mapin \$ccp4map | nawk '/Cell dimensions/{print \$(NF-5), \$(NF-4), \$(NF-3), \$(NF-2), \$(NF-1), \$NF; exit}'\`
set SGnum = \`echo "go" | mapdump mapin \$ccp4map | nawk '/Space-group/{print \$NF}'\`
set SG = \`nawk -v SGnum=\$SGnum '\$1 == SGnum {print \$4;exit}' \$CLIBD/symop.lib\`

# decide on how to extend the map (cover or fill cell)
if(-e "\$pdbfile") then
    set pickpdb = "\$pdbfile"
    set pick = "build"
    set center = \`echo "COM" | pdbset xyzin \$pdbfile XYZOUT \${tempfile}.pdb | nawk '\$1=="Center" && \$3=="Mass:"{print \$4, \$5, \$6}'\`
    rm -f \${tempfile}.pdb >& /dev/null
else
    set center  = \`echo \$CELL | nawk '{print \$1/2, \$2/2, \$3/2}'\`
endif


# make an O macro for looking at the results
set temp = \`dirname \$pickpdb\`
set temp = \`cd \$temp ; pwd\`
#set pickpdb = \${temp}/\`basename \$pickpdb\`
cat << EOF >! \$omacro

! read in a pdb file
sam_atom_in \$pickpdb \$pick
mol \$pick
obj \$pick
zone ;
end

sym_set ; ; \$SG
sym_cell
! cen_xyz \$center

! read in the sites
sam_atom_in sites.pdb sites
mol sites
obj sites
zone ;
end
sym_set ; ; \$SG
sym_cell

! display them as big spheres
sketch_cpk sites
sym_sphere sites sym 30
sketch_cpk sym1
clear_flags
sketch_cpk sym2
clear_flags
sketch_cpk sym3
clear_flags
sketch_cpk sym4
clear_flags
sketch_cpk sym5
clear_flags
sketch_cpk sym6
clear_flags
sketch_cpk sym7
clear_flags
sketch_cpk sym8
clear_flags

! do bones
read \$bonefile
bone_setup skel bones 30 1 2 3 4 5
bone_draw

! use newer fastmap feature
fm_file \${ccp4map} map \$SG
!          radius style n sig color
fm_set map 25     solid 1 1.0 white 


! use "old reliable" map commands
map_cache
map_active_center
map_file \$omap
map_object map
!         dx dy dz sig color linestyle
map_param 25 25 25 1   white 0.5 0 1
map_draw

menu @\$omacro on

EOF

# now actually do the map extension
if("\$pick" == "build") then
    echo "border 10" |\\
    mapmask mapin \$ccp4map xyzin \$pdbfile mapout \${tempfile}.map
else
    # just extend to unit cell (and then some)
    echo "XYZLIM -0.1 1.1   -0.1 1.1   -0.1 1.1 " |\\
    mapmask mapin \$ccp4map mapout \${tempfile}.map

    # pick peaks in it (just to have something to grab onto)
    peakmax MAPIN \${tempfile}.map XYZOUT \$pickpdb << eof-pick
    THRESHOLD RMS 2
    OUTPUT BROOKHAVEN
    END
eof-pick
endif


mapman:
# convert to O format (and do a bones trace)
setenv MAPSIZE \`ls -ln \${tempfile}.map | nawk '{printf "%d", \$5/3.9}'\`
\$MAPMAN << end-mapman |& grep -v Toodle
read map1 \${tempfile}.map CCP4
mappage map1 \$omap
bone skel map1 1.5 0.5 100
bone conn \$bonefile skel 5
quit
y
end-mapman
if(! \$status) then
    # no need to do further conversions
    set BONES = ""
    set BRIX  = ""
endif

if(-e "\$BRIX") then
    # use the brix program to make an o-readable file
    \$BRIX \${tempfile}.map \$omap
endif

if((-e "\$BONES")&&(! -e "\$pdbfile")) then
    \$BONES \${tempfile}.map << EOF
    1.5 0.5
    5
\$bonefile
skel
EOF
endif

rm -f \${tempfile}.map >& /dev/null



####################################################################
exit
####################################################################



Setup:
# scan the command line for files
foreach arg ( \$* )
    if( "\$arg" =~ *.mtz ) then
	if(! -e "\$arg") then
	    echo "WARNING: \$arg does not exist! "
	    continue
	endif
	set mtzfile  = "\$arg"
	continue
    endif
    if(("\$arg" =~ *.map)||("\$arg" =~ *.ext)) then
	if(! -e "\$arg") then
	    echo "WARNING: \$arg does not exist! "
	    continue
	endif
	set user_mapfile  = "\$arg"
	continue
    endif
    if(("\$arg" =~ *.pdb)||("\$arg" =~ *.brk)) then
	if(! -e "\$arg") then
	    echo "WARNING: \$arg does not exist! "
	    continue
	endif
	set pdbfile  = "\$arg"
	continue
    endif
    if( "\$arg" =~ [0-9]* ) then
	set temp = \`echo "\$arg" | nawk '\$1+0>0.1{print \$1+0}'\`
	if("\$temp" != "") set hiRES = "\$temp"
    endif
end

# return early if a map was specified (not actually going to do an fft! )
if(\$?user_mapfile) goto ReturnFromSetup

#get variables from mtz file
echo "go" | mtzdump hklin \$mtzfile |\\
nawk '/OVERALL FILE STATISTICS/,/No. of reflections used/' |\\
nawk 'NF>10 && \$(NF-1) ~ /[FQPWADI]/' |\\
cat >! \${tempfile}mtzdmp

# use completeness, or F/sigF to pick default F
cat \${tempfile}mtzdmp |\\
nawk '\$(NF-1) == "F"{F=\$NF; meanF=\$8; reso=\$(NF-2); comp=substr(\$0,32)+0; \\
      getline; S=\$NF; if(\$8) meanF /= \$8; print F, S, reso, comp, meanF;}' |\\
sort +2n -3 +3nr -4 +4nr >! \${tempfile}F

# and extract all dataset types/labels
cat \${tempfile}mtzdmp |\\
nawk 'NF>2{print \$(NF-1), \$NF, " "}' |\\
cat >! \${tempfile}cards

#clean up
rm -f \${tempfile}mtzdmp

# pick F with best resolution, or <F>/<sigma(F)>
set F    = \`head -1 \${tempfile}F\`
if(\$#F > 2) then
    set SIGF = \$F[2]
    set F    = \$F[1]
endif

# pick most recent phase/FOM
grep "P \$PHI" \${tempfile}cards >& /dev/null
if(\$status) then
    set temp = \`nawk '/^P/{print \$2}' \${tempfile}cards  | tail -1\`
    if("\$temp" != "") set PHI = "\$temp"
endif
grep "W \$FOM" \${tempfile}cards >& /dev/null
if(\$status) then
    set temp = \`nawk '/^W/{print \$2}' \${tempfile}cards | tail -1\`
    if("\$temp" != "") then
	set FOM = "\$temp"
    else
	# there are no FOMs in this mtz file
	set FOM = ""
    endif
endif

# see if user specified an F, Phase, or FOM
foreach arg ( \$* )
    set temp = \`grep " \$arg " \${tempfile}cards\`
    if("\$temp" =~ F*) then
	set F = "\${arg}"
	set temp = \`nawk -v arg="\$arg" '\$1==arg{print \$2}' \${tempfile}F\`
	if(\$#temp == 1) set SIGF = "\$temp"
	continue
    endif
    if("\$temp" =~ Q*) set SIGF = "\${arg}"
    if("\$temp" =~ P*) set PHI  = "\${arg}"
    if("\$temp" =~ W*) set FOM  = "\${arg}"
    
    if(\$?NO && ("\$arg" == FOM)) set FOM = ""
    
    unset NO
    if("\$arg" == "no") set NO
end

# now check and see if the sigma is really there
grep "Q \$SIGF" \${tempfile}cards >& /dev/null
if(\$status) then
    # no sigma availale (doesn't really matter anyway)
    set SIGF = ""
endif

rm -f \${tempfile}cards \${tempfile}F >& /dev/null


# assign the actual fft cards here (and blank them if they don't exist)
set SIG1 = "SIG1=\$SIGF"
set WFOM = "W=\$FOM"

if("\$SIGF" == "") set SIG1 = ""
if("\$FOM"  == "") then
    set FOM  = 1
    set WFOM = ""
endif

if(("\$hiRES" == "")&&(-e "\$mtzfile")&&(! \$?user_mapfile)) then
    set hiRES = \`echo "head" | mtzdump hklin \$mtzfile | nawk '/Resolution Range/{getline;getline;print \$6}' \`
    # zero-fill for better-looking map
    set hiRES = \`echo \$hiRES |  nawk '\$1>0{print 1/(2.0*((1/\$1)^3))^(1/3)}'\`
endif

goto ReturnFromSetup


EOF-fftscript

chmod a+x ${scriptDIR}fft.com



















write_macros:

set sg = "$SG"
if ("$sg" == "") set sg = "$newSG"

# make an o macro for loading and viewing "best" map
set pwd = `cd $oDIR ; pwd `
if(! -e ${oDIR}map) then
    cat << EOF-omac >! ${oDIR}map
! render what Phaser Elves consider the best map
map_cache
map_active_center
map_file \${cwd}/best_phased.omap
map_object map
!         dx dy dz sig color linestyle
map_param 25 25 25 1   white 0.5 0 1
map_draw

EOF-omac
endif
# make an o macro for loading and viewing best stuff
if(! -e ${oDIR}best.omacro) then
    set center = `echo $CELL | nawk '{print $1/2, $2/2, $3/2}'`
    cat << EOF-omac >! ${oDIR}best.omacro
! current working directory
symbol cwd ${pwd}
symbol cwd .

! read in a peak-pick of best_phased.omap (for grabbing onto)
sam_atom_in \${cwd}/best_pick.pdb pick
mol pick
obj pick
zone ;
end

sym_set ; ; $sg
sym_cell
! cen_xyz $center

! read in pdb version of the metal sites
sam_atom_in \${cwd}/best_sites.pdb sites
mol sites
obj sites
zone ;
end
sym_set ; ; $sg
sym_cell

! display them as big spheres
sketch_cpk sites
sym_sphere sites sym 30
sketch_cpk sym1
clear_flags
sketch_cpk sym2
clear_flags
sketch_cpk sym3
clear_flags
sketch_cpk sym4
clear_flags
sketch_cpk sym5
clear_flags
sketch_cpk sym6
clear_flags
sketch_cpk sym7
clear_flags
sketch_cpk sym8
clear_flags

! read in and display the bones trace
read \${cwd}/best_bones.o
bone_setup skel bones 30 1 2 3 4 5
bone_draw

! render what Phaser Elves consider the best map
@\${cwd}/map
menu @map on
EOF-omac
endif



if(! -e ${oDIR}latest) then
    cat << EOF-omac >! ${oDIR}latest
! render the latest Phaser Elves map
map_cache
map_active_center
map_file \${cwd}/phased.omap
map_object map
!         dx dy dz sig color linestyle
map_param 25 25 25 1   white 0.5 0 1
map_draw

EOF-omac
endif
# make an o macro for loading and viewing latest stuff
if(! -e ${oDIR}latest.omacro) then
    set center = `echo $CELL | nawk '{print $1/2, $2/2, $3/2}'`
    cat << EOF-omac >! ${oDIR}latest.omacro
! current working directory
symbol cwd ${pwd}
symbol cwd .

! read in a peak-pick of phased.omap (for grabbing onto)
sam_atom_in \${cwd}/pick.pdb pick
mol pick
obj pick
zone ;
end

sym_set ; ; $sg
sym_cell
! cen_xyz $center

! read in pdb version of the metal sites
sam_atom_in \${cwd}/sites.pdb sites
mol sites
obj sites
zone ;
end
sym_set ; ; $sg
sym_cell

! display them as big spheres
sketch_cpk sites
sym_sphere sites sym 30
sketch_cpk sym1
clear_flags
sketch_cpk sym2
clear_flags
sketch_cpk sym3
clear_flags
sketch_cpk sym4
clear_flags
sketch_cpk sym5
clear_flags
sketch_cpk sym6
clear_flags
sketch_cpk sym7
clear_flags
sketch_cpk sym8
clear_flags

! read in and display the bones trace
read \${cwd}/bones.o
bone_setup skel bones 30 1 2 3 4 5
bone_draw

! render what Phaser Elves consider the best map
@latest
menu @latest on
EOF-omac
endif












write_pick:
#################################################################
# don't overwrite user-modified script
if(-e ${scriptDIR}pick.com) goto write_oasis

# create a general-purpose peak-pick script
#echo "writing ${scriptDIR}pick.com"

cat << EOF-pickscript >! ${scriptDIR}pick.com
#! /bin/csh -f
#
#	Pick unique peaks in a map, 
#	avoiding "map-edge" false peaks
#	optionally avoiding a list of "boring" positions
#	in a "symmetry-aware" fashion
#
# defaults
set mapfile = "maps/FH_Four.map"
set pdbfile = ""
set logfile = "pick.log"
set outfile = "pick.pdb"

set sigma   = 6

set tempfile = pick_temp

# set this to wherever your awk program is
alias nawk $nawk
nawk 'BEGIN{print}' >& /dev/null
if(\$status) alias nawk awk

if("\$1" == "") goto Help
echo -n "" >! \$logfile
################################################################################
goto Setup
# set/reset \$mapfile \$pdbfile \$sigma from command-line
Help:
cat << EOF

usage: \$0 \$mapfile [\$sigma] [boring.pdb]

where: \$mapfile is the map you want to pick
       \$sigma (optional) is the minimum peak height (sigma units)
       boring.pdb (optional) sites to avoid in peak-picking

EOF
exit 9
Return_from_Setup:
################################################################################

if(! -e "\$mapfile") goto Help

set sign = \`echo "\$sigma" | nawk '\$1+0<0{print "+/-" 0-\$1} \$1+0>0{print \$1}'\`
echo -n "looking for \${sign}*sigma peaks in \$mapfile "
if(-e "\$sitefile") then
    echo "not already withing \${CLOSE_peaks}A"
    echo -n "of the \$boring_sites atoms listed in \$sitefile"
endif
echo ""

# extract a single ASU from the input map
mapmask mapin \$mapfile mapout \${tempfile}asu.map << EOF-xtend | tee \${tempfile}xtend >> \$logfile
scale sigma
xyzlim ASU
# re-axis to X,Y,Z
AXIS X Y Z
# fill blank spaces with zero
pad 0
EOF-xtend

# get size of the ASU
cat \${tempfile}xtend |\\
nawk '/Grid sampling on x, y, z/{print \$(NF-2), \$(NF-1), \$NF;} \\
      /Start and stop points on x, y, z/{print \$(NF-5), \$(NF-4), \$(NF-3), \$(NF-2), \$(NF-1), \$NF}' |\\
nawk 'NF==3{gx=\$1;gy=\$2;gz=\$3}\\
      NF==6{print "ASU", \$1/gx, \$2/gx, \$3/gy, \$4/gy, \$5/gz, \$6/gz}' |\\
cat >! \${tempfile}asu
rm -f \${tempfile}xtend >& /dev/null

# calculate a 10% "edge pad"
set xyzlim = \`nawk '/^ASU/{print \$2-0.1, \$3+0.1, \$4-0.1, \$5+0.1, \$6-0.1, \$7+0.1}' \${tempfile}asu\`
set asu = \`nawk '/^ASU/{print \$2, \$3, \$4, \$5, \$6, \$7}' \${tempfile}asu\`

# re-extend the map by a 10% pad in every direction
echo "xyzlim \$xyzlim" |\\
mapmask mapin \${tempfile}asu.map mapout \${tempfile}pick.map >> \$logfile
rm -f \${tempfile}asu.map >& /dev/null


repeat:
# reformat to peakmax vernacular
set sigma = \`echo \$sigma | awk '\$1+0>0{print \$1} \$1+0<0{print -\$1,"NEGATIVES"}'\`

# do the actual peak-pick
peakmax MAPIN \${tempfile}pick.map PEAKS \${tempfile}.xyz TO \${tempfile}.xyz << eof-pick >> \$logfile
THRESHOLD \$sigma
OUTPUT PEAKS
END
eof-pick
if(\$status) then
    grep "Threshold too high" \$logfile >& /dev/null
    if(! \$status) then
	set sigma = \`echo "\$sigma" | nawk '\$1+0>0.5{print  \$1*2/3}'\`
	if("\$sigma" == "") then
	    echo "no peaks."
	    set BAD
	    goto cleanup
	endif
	echo "reducing sigma to \$sigma"
	goto repeat
    endif
endif
rm -f \${tempfile}pick.map >& /dev/null

# re-format the peaks list (with no stuck-together numbers)
cat \${tempfile}.xyz |\\
nawk 'NF>6 && /[^1-9.-]/{ \\
print substr(\$0,23,8), substr(\$0,31,8), substr(\$0,39,8),\\
      substr(\$0,49,8), substr(\$0,57,8), substr(\$0,65,8), substr(\$0,6,8)}' |\\
cat >! \${tempfile}peaks.pick
rm -f \${tempfile}.xyz >& /dev/null

# now filter out out-of-bounds (map-edge) peaks
################################################################
# trim off peaks outside the CCP4 ASU limits
# (they should either be redundant, or map-edge peaks)
cat \${tempfile}asu \${tempfile}peaks.pick |\\
nawk '/^ASU/{xmin=\$2-0;ymin=\$4-0;zmin=\$6-0;\\
	     xmax=\$3+0;ymax=\$5+0;zmax=\$7+0;next;} \\
      {x=\$1+0;y=\$2+0;z=\$3+0}\\
      x>=xmin && x<=xmax && y>=ymin && y<=ymax && z>=zmin && z<=zmax {print}' |\\
sort -nr +6 >! \${tempfile}peaks.trimmed

# add another 5% pad (just in case we lost a few)
cat \${tempfile}asu \${tempfile}peaks.pick |\\
nawk -v pad=0.05 '/^ASU/{xmino=\$2-pad;ymino=\$4-pad;zmino=\$6-pad;\\
			 xmaxo=\$3+pad;ymaxo=\$5+pad;zmaxo=\$7+pad;\\
                         xmini=\$2-0;ymini=\$4-0;zmini=\$6-0;\\
			 xmaxi=\$3+0;ymaxi=\$5+0;zmaxi=\$7+0;\\
                         next;} \\
      {x=\$1+0;y=\$2+0;z=\$3+0}\\
      x>=xmini && x<=xmaxi && y>=ymini && y<=ymaxi && z>=zmini && z<=zmaxi {next}\\
      x>xmino && x<xmaxo && y>ymino && y<ymaxo && z>zmino && z<zmaxo {print}' |\\
sort -nr +6 >> \${tempfile}peaks.trimmed
rm -f \${tempfile}peaks.pick \${tempfile}asu >& /dev/null

# these peaks are sorted by "priority" of their ASU convention
# \${tempfile}peaks.trimmed had
# format: xf yf zy X Y Z height

################################################################
# near-edge peaks have probably been counted twice, so we need
# to filter them out

# generate ALL symmetry-equivalent positions for the trimmed peaks
cat << EOF >! \${tempfile}gensym.in
SYMM \$SG
CELL \$CELL
XYZLIM \$xyzlim
EOF
cat \${tempfile}peaks.trimmed |\\
nawk '{++n; print "RESIDUE",n; print "ATOM X", \$1, \$2, \$3}' |\\
cat >> \${tempfile}gensym.in
cat \${tempfile}gensym.in | gensym |&\\
nawk '/List of sites/,/Normal termination/' |\\
nawk '\$2 ~ /[01].[0-9][0-9][0-9]/{print \$2, \$3, \$4, \$5, \$6, \$7, \$(NF-1), "sym"}' |\\
cat >! \${tempfile}peaks.symm
rm -f \${tempfile}gensym.in >& /dev/null

#  \${tempfile}peaks.symm is now an indexed list of all 
# symmetry-related peak positions within \$xyzlim
#format: xf yf zf X Y Z peak# "sym" 

# to preserve the ASU coordinates:
# sort the trimmed coordinates into the list so 
# they will be the "first" symmetry mate considered 
# for each "site"
cat \${tempfile}peaks.trimmed |\\
nawk '{++n; print \$1, \$2, \$3, \$4, \$5, \$6, n, \$NF*\$NF, \$NF}' |\\
cat - \${tempfile}peaks.symm |\\
sort +6n -7 +7nr -8 >! \${tempfile}peaks.xpanded
rm -f \${tempfile}peaks.trimmed >& /dev/null
rm -f \${tempfile}peaks.symm >& /dev/null


# now filter the symmetry-expanded list for the
# unique list of non-symmetry related peaks
cat \${tempfile}peaks.xpanded |\\
nawk '! seen[\$1 " " \$2 " " \$3] {print} {seen[\$1 " " \$2 " " \$3]=1}' |\\
nawk -v cut=\$CLOSE_peaks '\$NF!="sym"{height[\$7]=\$NF}\\
        NF>3{++n; X[n]=\$4; Y[n]=\$5; Z[n]=\$6; site[n]=\$7;\\
        # compare this peak to all sites seen so far \\
	for(i=1;i<n;++i){\\
	    dist=sqrt((\$4-X[i])^2 +(\$5-Y[i])^2 +(\$6-Z[i])^2);\\
	    # see if an equivalent peak has already been printed \\
	    if(dist < cut){ ++taken[site[n]]; break}}; \\
	if(! taken[site[n]]) print \$1, \$2, \$3, \$4, \$5, \$6, height[site[n]];\\
	# register all the symm mates as taken too \\
	++taken[site[n]]}' |\\
cat >! \${tempfile}peaks.reduced
rm -f \${tempfile}peaks.xpanded >& /dev/null

# \${tempfile}peaks.reduced should now contain only unique peaks from \$mapin
# format: xf yf zf X Y Z height


################################################################
# now look for special positions
cat \${tempfile}peaks.reduced |\\
nawk 'NF>0{++n; print "RESIDUE",n; print "ATOM X", \$1, \$2, \$3}' |\\
cat >! \${tempfile}gensym.in
gensym << EOF >! \${tempfile}.log
SYMM \$SG
CELL \$CELL
XYZLIM 0 0.999999 0 0.999999 0 0.999999
@\${tempfile}gensym.in
EOF
# count the number of times each site is "seen"
# more than once implies a special position
cat \${tempfile}.log |\\
nawk '/List of sites/,/Normal termination/' |\\
nawk '\$2 ~ /[01].[0-9][0-9][0-9]/{print \$2, \$3, \$4, \$(NF-1), \$NF}' |\\
nawk '{++seen[\$1 " " \$2 " " \$3 " " \$4]}\\
   END{for(site in seen) print site, seen[site]}' |\\
sort -un +3 |\\
nawk '\$5+0>0{print \$4, 1/\$5}' >! \${tempfile}occs
rm -f  \${tempfile}.log >& /dev/null


# now add these "occupancies" to the master list
cat \${tempfile}occs \${tempfile}peaks.reduced |\\
nawk 'NF==2{occ[\$1]=\$2} \\
       NF>2{++n;print \$1,\$2,\$3,\$4,\$5,\$6,occ[n],\$7}' |\\
cat >! \${tempfile}peaks.final
rm -f \${tempfile}peaks.reduced >& /dev/null
# \${tempfile}peaks.final now contains the "final" list of output peaks
# and should faithfully represent the unique peaks in the map
# format: xf yf zf X Y Z 1/mult height

set peaks = \`cat \${tempfile}peaks.final | wc -l\`
echo -n "\$peaks found "
rm -f  \${tempfile}occs >& /dev/null

################################################################
# filter out "boring" sites
if(! -e \${tempfile}boring_sites) touch \${tempfile}boring_sites
# format: xf yf zf

# count number of "boring" sites
set boring_sites = \`cat \${tempfile}boring_sites | wc -l\`

# symmetry-expand the "boring" sites
cat << EOF >! \${tempfile}gensym.in
SYMM \$SG
CELL \$CELL
XYZLIM \$xyzlim
EOF
cat \${tempfile}boring_sites |\\
nawk 'NF>2{print "ATOM X", \$1, \$2, \$3}' |\\
cat >> \${tempfile}gensym.in

cat \${tempfile}gensym.in | gensym |&\\
nawk '/List of sites/,/Normal termination/' |\\
nawk '\$2 ~ /[01].[0-9][0-9][0-9]/{print \$2, \$3, \$4, \$5, \$6, \$7, "boring"}' |\\
cat >! \${tempfile}boring_sites.symm
rm -f \${tempfile}gensym.in >& /dev/null

# \${tempfile}all_boring_sites now contains ALL symmetry-equivalent
# positions to the sites the user entered on the command-line
# format: xf yf zf X Y Z "boring"

# now remove peaks that were too close to "boring" sites
cat \${tempfile}boring_sites.symm \${tempfile}peaks.final |\\
nawk -v cut=\$CLOSE_peaks ' \\
      \$NF=="boring"{++n; X[n]=\$4; Y[n]=\$5; Z[n]=\$6} \\
      \$NF!="boring"{minD=999999; \\
         # find nearest "boring" site \\
         for(i=1;i<=n;++i){\\
	     dist=sqrt((\$4-X[i])^2 +(\$5-Y[i])^2 +(\$6-Z[i])^2);\\
	     if(dist < minD){\\
	     minD=dist;}}\\
	 # now see if it is too close \\
	 if(minD > cut) {print}}' |\\
cat >! \${tempfile}peaks.interesting
rm -f  \${tempfile}boring_sites.symm >& /dev/null
rm -f  \${tempfile}peaks.final >& /dev/null

# sort the picked peaks by height
sort -nr +7 \${tempfile}peaks.interesting >! \${tempfile}
mv \${tempfile} \${tempfile}peaks.interesting

# \${tempfile}peaks.interesting now contains only "interesting" peaks
# format: xf yf zf X Y Z 1/mult height

# see how many are left
set interesting = \`cat \${tempfile}peaks.interesting | wc -l\`
if(\$boring_sites) echo -n "(\$interesting) new"
echo ""

################################################################
# make a pdb output file

echo "REMARK B-factors are peak heights in \$mapfile" >! \$outfile
echo "REMARK Occs are 1/multiplicity (for special positions)" >> \$outfile
echo "\$CELL" | nawk '{printf "CRYST1%9.3f%9.3f%9.3f%7.2f%7.2f%7.2f\\n",\\
      \$1, \$2, \$3, \$4, \$5, \$6}' >> \$outfile
cat \${tempfile}peaks.interesting |\\
nawk '{++i; printf "ATOM   %4d  OW  WAT X%4d    %8.3f%8.3f%8.3f%6.2f%6.2f\\n",\\
         i, i, \$4, \$5, \$6, \$7, \$8}' |\\
cat >> \$outfile

################################################################
# calculate distance to nearest "old" site

# by default, use the "boring" list as "old" sites
cat \${tempfile}boring_sites >! \${tempfile}old_sites
if(! -e "\$sitefile") then
    # no sites input, so just get inter-peak distances
    cat \${tempfile}peaks.interesting >! \${tempfile}old_sites
endif
if(\$?scriptfile) then
    # input file was an mlphare script, so only interested in real atoms
    cat "\$scriptfile" |\\
    nawk '\$1~/^ATOM/{print \$3, \$4, \$5}' |\\
    cat >! \${tempfile}old_sites
endif
# count number of "old" sites
set old_sites = \`cat \${tempfile}old_sites | wc -l\`

# calculate the largest expected inter-atom distance (cell center to origin)
echo "0.5 0.5 0.5" |\\
nawk 'NF>2{++n; printf "%5d%10.5f%10.5f%10.5f%10.5f%5.2f%5d%10d%2s%3s%3s %1s\\n", \\
       n, \$1, \$2, \$3, 80, 1, "38", n, "H", "", "IUM", " "}' |\\
cat >! \${tempfile}.frac
coordconv XYZIN \${tempfile}.frac \\
         XYZOUT \${tempfile}.pdb << EOF-conv >& /dev/null
CELL \$CELL
INPUT FRAC
OUTPUT PDB ORTH 1
END
EOF-conv
cat \${tempfile}.pdb |\\
nawk '/^ATOM/{print substr(\$0, 31, 8), substr(\$0, 39, 8), substr(\$0, 47, 8)}' |\\
cat >! \${tempfile}center
set max_dist = \`nawk '{print sqrt(\$1*\$1 + \$2*\$2 + \$3*\$3)+3}' \${tempfile}center\`
rm -f \${tempfile}.frac >& /dev/null
rm -f \${tempfile}.pdb >& /dev/null
rm -f \${tempfile}center >& /dev/null


# convert "old" sites to a pdb
cat \${tempfile}old_sites |\\
nawk 'NF>2{++n; printf "%5d%10.5f%10.5f%10.5f%10.5f%5.2f%5d%10d%2s%3s%3s %1s\\n", \\
       n, \$1, \$2, \$3, 80, 1, "38", n, "C", "", "IUM", "A"}' |\\
cat >! \${tempfile}old.frac
coordconv XYZIN \${tempfile}old.frac \\
         XYZOUT \${tempfile}old.pdb << EOF-conv >& /dev/null
CELL \$CELL
INPUT FRAC
OUTPUT PDB ORTH 1
END
EOF-conv
rm -f \${tempfile}old.frac >& /dev/null

# strip off unneeded cards
nawk '/^ATOM/ || /^CRYS/ || /^SCALE/' \${tempfile}old.pdb |\\
cat >! \${tempfile}both.pdb
rm -f \${tempfile}old.pdb >& /dev/null

# append peak list to the combined PDB file
nawk '/^ATOM/{print}' \$outfile >> \${tempfile}both.pdb
@ start_peaks = ( \$old_sites + 1 )

# renumber the atoms so that distang won't get confused
cat \${tempfile}both.pdb |\\
nawk '/^ATOM/{++n; \$0 = sprintf("ATOM  %5d%s",n,substr(\$0,12))} {print \$0}' |\\
cat >! \${tempfile}.pdb
mv \${tempfile}.pdb \${tempfile}both.pdb >& /dev/null

# use distang to calculate all inter-atom distances (and then sort them)
distang xyzin \${tempfile}both.pdb << EOF |\\
    nawk '\$1=="Z"{print "dist", \$6, \$2, \$9}' | sort -n +3 |\\
    nawk '! seen[\$2]{seen[\$2]=1;print}' | sort -n +1 >! \${tempfile}dists
SYMM \$SG
DIST ALL
RADII C 1
RADII OW \$max_dist
DMIN \$CLOSE_peaks
FROM ATOM 1 to \$old_sites
TO   ATOM \$start_peaks to 99999
END
EOF
rm -f \${tempfile}both.pdb >& /dev/null
#\${tempfile}dists now contains the minimum 
# format: peak# old# min_dist


# gaurentee a label file for the "old" sites
cat \${tempfile}old_sites |\\
nawk 'NF>2{++n; printf "label %5d atom %d\\n", n, n}' |\\
cat >! \${tempfile}labels

# get a descriptive label from the "old site" source file
if(-e "\$pdbfile") then
    # input file was a PDB file
    cat \$sitefile |\\
    nawk '/^ATOM/ || /^HETATM/{++n; printf "label %5d %s\\n", n, substr(\$0,12,15)}' |\\
    cat >! \${tempfile}labels
endif
if(\$?scriptfile) then
    # input file was an mlphare script
    cat \$sitefile |\\
    nawk '\$1~/^DERIV/{deriv=\$0}\\
          \$1~/^ATOM/{++n; printf "label %5d %6s in %s\\n", n, \$1, deriv}' |\\
    cat >! \${tempfile}labels
endif
if(! -e "\$sitefile") then
    # peaks list was used for distnace self-caclulation
    cat \${tempfile}peaks.interesting |\\
    nawk 'NF>2{++n;printf "label %5d peak %d\\n", n, n}' |\\
    cat >! \${tempfile}labels
endif

# add these descriptive labels to the peaks list
cat \${tempfile}dists \${tempfile}labels \${tempfile}peaks.interesting |\\
nawk '/^dist/{dist[\$2]=\$4; neighbor[\$2]=\$3; next}\\
      /^label/{label[\$2]=substr(\$0,13); next}\\
      {++n; print \$0, dist[n], "label:", label[neighbor[n]]}' |\\
cat >! \${tempfile}peaks.distlabel
#
# format: xf yf zf X Y Z 1/mult height   dist neighbor name ...


################################################################

# print surviving peaks out to screen
echo ""
echo "unique peaks:"
set xyz = "x        y        z"
if(\$?PATT) set xyz = "u        v        w"
echo "  \$xyz        mult  height/sigma   dist  from nearest neighbor"
cat \${tempfile}peaks.distlabel |\\
nawk '{printf "%8.5f %8.5f %8.5f   %4d %8.2f %10.1fA  %s\\n", \$1, \$2, \$3, 1/\$7, \$8,\\
       \$9, substr(\$0, index(\$0,"label:")+7)}'


echo "written to \$outfile"

cleanup:
# clean up 
rm -f  \${tempfile}labels >& /dev/null
rm -f  \${tempfile}dists >& /dev/null
rm -f  \${tempfile}peaks.distlabel >& /dev/null
rm -f  \${tempfile}peaks.interesting >& /dev/null
rm -f  \${tempfile}boring_sites >& /dev/null
rm -f  \${tempfile}old_sites >& /dev/null

if(\$?BAD) exit 9
exit


################################################################
################################################################
################################################################
Setup:
set siteCELL
set sitefile

# scan command line
foreach arg ( \$* )
    # recognize map files
    if(("\$arg" =~ *.map)||("\$arg" =~ *.ext)) then
	if(! -e "\$arg") then
	    echo "WARNING: \$arg does not exist! "
	    continue
	endif
	set mapfile = "\$arg"

	continue
    endif
    
    # recognize pdb files
    if(("\$arg" =~ *.pdb)||("\$arg" =~ *.brk)) then
	if(! -e "\$arg") then
	    echo "WARNING: \$arg does not exist! "
	    continue
	endif
	set pdbfile = "\$arg"
	set siteCELL = \`nawk '/^CRYST/{print \$2, \$3, \$4, \$5, \$6, \$7}' \$pdbfile | tail -1\`
	
	continue
    endif
    
    # recognize mlphare scripts? 
    if(-e "\$arg") then
	cat "\$arg" |\\
	nawk '\$1~/^ATOM/ || \$1~/^BADATOM/ || \$1~/^OLDATOM/{print \$3, \$4, \$5}' |\\
	cat >! \${tempfile}boring_sites
	# format: xf yf zf
	set sitefile = "\$arg"
	set scriptfile = "\$arg"
    endif
    
    # recognize sigma-cutoff
    set temp = \`echo "\$arg" | awk '\$1+0 != 0{print \$1+0}'\`
    if("\$temp" != "") then
	set sigma = "\$temp"
	continue
    endif
end

# get map parameters
echo "go" | mapdump mapin \$mapfile >! \${tempfile}.mapdump
if(\$status) goto Help

set CELL = \`nawk '/Cell dimensions/{print \$4, \$5, \$6, \$7, \$8, \$9; exit}' \${tempfile}.mapdump\`
set SG   = \`nawk '/Space-group/{print \$3; exit}' \${tempfile}.mapdump\`
set GRID = \`nawk '/Grid sampling on x, y, z/{print \$(NF-2), \$(NF-1), \$NF; exit}' \${tempfile}.mapdump\`
rm -f \${tempfile}.mapdump >& /dev/null

# see if this is a Patterson map
set temp = \`nawk -v SG="\$SG" '\$1==SG{print \$4}' \$CLIBD/symop.lib | nawk '/[abcdmn-]/{print "PATT"}'\`
if("\$temp" != "") set PATT


# convert input coordinate file formats to fractional
if(\$#siteCELL != 6) set siteCELL = ( \$CELL )

if(-e "\$pdbfile") then
    # convert orthogonal PDB coordinates to fractional
    coordconv xyzin \$pdbfile xyzout \${tempfile}.xyz << EOF >> \$logfile
CELL \$siteCELL 
INPUT PDB
OUTPUT FRAC
END
EOF
    # all we need are fractional coordinates
    cat \${tempfile}.xyz |\\
    nawk '{print \$2, \$3, \$4}' |\\
    cat >! \${tempfile}boring_sites
    rm -f \${tempfile}.xyz >& /dev/null
    set sitefile = "\$pdbfile"
endif
if(! -e "\${tempfile}boring_sites") touch \${tempfile}boring_sites
set boring_sites = \`cat \${tempfile}boring_sites | wc -l\`
# format: xf yf zf 

# decide on a "closeness" cutoff for two peaks being the same
if(! \$?CLOSE_peaks) set CLOSE_peaks
if("\$CLOSE_peaks" == "") then
    # set the "close" criteria to be one grid unit
    echo "\$GRID \$CELL" |\\
    nawk '\$1+0>0{print \$4/\$1}\\
          \$2+0>0{print \$5/\$2}\\
          \$3+0>0{print \$6/\$3}' |\\
sort -n >! \${tempfile}close
    set CLOSE_peaks = \`nawk 'NR==1{printf "%.2f", \$1}' \${tempfile}close\`
    rm -f \${tempfile}close >& /dev/null
endif
# guess? 
if("\$CLOSE_peaks" == "") set CLOSE_peaks = 0.5


goto Return_from_Setup

exit
#################################
# the future? 
- support other coordinate file formats
EOF-pickscript
chmod a+x ${scriptDIR}pick.com



















write_oasis:
#################################################################
# don't overwrite user-modified script
if(-e ${scriptDIR}oasis.com) goto write_mad

# create a quick-and-dirty oasis script
#echo "writing ${scriptDIR}oasis.com"

cat << EOF-oasisscript >! ${scriptDIR}oasis.com
#! /bin/csh -f
#
#	Experimental OASIS script
#
#
# set this to wherever your awk program is
alias nawk $nawk
nawk 'BEGIN{print}' >& /dev/null
if(\$status) alias nawk awk


set script  = ./scripts/mlphare.com
set mtzfile = ./mtz/all.mtz
set F1      = ""		# these can be assigned on the command line
set SIGF1   = ""		# but are normally just read in from the mtz
set F2      = ""
set SIGF2   = ""
set PHI     = ""

# these might be important
set Ee      = Se
set sites   = ""		# use the default
set fpp     = ""
set lambda  = "0.9794"		# only needed if fpp is unknown
set outfile = ./oasised.mtz

set tempfile = oasis_temp

if(\$#argv == 0) goto Help
goto Setup
# scan command line for:
# mlphare script (and read in atom coordinates)
# new element name
Help:
cat << EOF

usage: \$0 mlphare.com FP DANO sad.mtz \$Ee 6.32e

where:
mlphare.com	- is an mlphare script, containing the refined sites
sad.mtz		- is an MTZ containing FP and DANO for your SAD data
FP		- the full protein F data set
DANO		- the anomalous difference data set
\$Ee		- is the anomalous scatterer
6.32e		- is the expected f" value (in electron equivalents)

defaults:
FP      will default to the first F found in sad.mtz
DANO    will default to the first D found in sad.mtz
f"      will default to the highest anomalous occupancy in mlphare.com
sad.mtz will default to the mtz file used in mlphare.com

note: all mlphare.com really need be is a list of:
ATOM ANO x y z 0 occ
where x,y,z are fractional coordinates, and occ is a "relative" occupancy
EOF

exit 2
ReturnFromSetup:

if(! -e "\$mtzfile") then
    echo "ERROR: \$mtzfile does not exist! "
    goto Help
endif

#####################################################################################
#####################################################################################
#####################################################################################
cat << EOF >! \${tempfile}oasis.in
CEL \$CELL
HCO \$EE \$cell_sites
ANO \$EE \$fpp
LCE 7
FIT
OAS
\$usePHI
LABIN  F1=\$F1 SIGF1=\$SIGF1 F2=\$F2 SIGF2=\$SIGF2 \$TPHI
LABOUT F1=\$F1 SIGF1=\$SIGF1 PHI=PHIOAS W=FOMOAS
EOF
# append the site list
cat \${tempfile}sites >> \${tempfile}oasis.in
echo "END"           >> \${tempfile}oasis.in

# display this, so user can actually see it! 
cat << EOF
Running "oasis" with \$sites \$Ee sites from \$script on
\$F1 and \$F2 from \$mtzfile
Assuming f" of \$Ee is \$fpp at the wavelength used for \$F1
EOF
if("\$PHI" != "") echo "\$PHI will be used for comparison only"
echo "-------------------------------------------------------"
echo "OASIS input:"
cat \${tempfile}oasis.in
echo "-------------------------------------------------------"

# now, actually run oasis:
cat \${tempfile}oasis.in |\\
oasis hklin \$mtzfile hklout \$outfile 
if(\$status) then
    echo "Woops! See above for what went wrong."
    exit 9
endif

echo "\$outfile is ready. "

# clean up
rm -f \${tempfile}sites >& /dev/null
rm -f \${tempfile}oasis.in >& /dev/null

exit


Setup:
#####################################################################################
#####################################################################################
#####################################################################################
set TPHI   = ""
set usePHI = ""
set EE

# scan command line for args
foreach arg ( \$* )
    # warn about probable mispellings
    if("\$arg" =~ *.mtz) then
	if(! -e "\$arg") then
	    echo "WARNING: \$arg does not exist! "
	    continue
	endif
	set mtzfile = "\$arg"
	continue
    endif
    
    if("\$arg" =~ *phare*) then
	if(! -e "\$arg") then
	    echo "WARNING: \$arg does not exist! "
	    continue
	endif
	grep "ATOM" "\$arg" >& /dev/null
	if(\$status) then
	    echo "WARNING: no atoms in \$arg"
	    continue
	endif 
	set script = "\$arg"
	continue
    endif
    
    if(("\$arg" =~ [A-Z][a-y])||("\$arg" =~ [HBCNOFPSKVYIWU])) then
	# might as well consider this an element
	set temp = "\$arg"
	if(\$?CLIBD) then
	    # check the CCP4 atom database
	    set temp = \`nawk -v arg=\$arg 'NF==1 && toupper(\$1)==toupper(arg){print; exit}' \$CLIBD/atomsf.lib\`
	endif
	if("\$temp" != "") then
	    set Ee = "\$arg"
	    continue
	endif
    endif
end

grep "ATOM" \$script >& /dev/null
if(\$status) then
    echo "ERROR! no atoms in \$script"
    goto Help
endif

if(! -e "\$mtzfile") then
    # see if we can retrieve the script's mtz
    set mtzfiles = \`nawk 'BEGIN{RS=" "} {gsub("[\\042\\047]"," ",\$0); print}' \$script | nawk '/\\.mtz\$/{print}'\`
    
    foreach mtz ( \$mtzfile \$mtzfiles )
	echo "head" | mtzdump hklin \$mtz >& /dev/null
	if(! \$status) then
	    echo "reading \$mtz"
	    set mtzfile = \$mtz
	    break
	endif
    end
endif

if(! -e "\$mtzfile") then
    echo "ERROR: \$mtzfile does not exist! "
    goto Help
endif

# read important constants from the mtz header
echo "go" | mtzdump HKLIN \$mtzfile >! \${tempfile}mtzdump
set CELL  = \`nawk '/Cell Dimensions/{getline;getline;print}' \${tempfile}mtzdump\`
set SG    = \`nawk '/Space group/{print \$5}' \${tempfile}mtzdump\`
set SGnum = \` nawk '/Space group/{print \$NF+0}' \${tempfile}mtzdump \`
set SG = \` nawk -F "[\\047]" '/Space group/{print \$2}' \${tempfile}mtzdump \`
set SG = \` nawk -v num=\$SGnum '\$1==num && NF>5{print \$4}' \${CLIBD}/symop.lib \`
set hiRES = \`nawk '/Resolution Range/{getline;getline;print \$6}' \${tempfile}mtzdump\`

if(\$#CELL != 6) then
    echo "ERROR: unable to read \$mtzfile"
    goto Help
endif

# get data column label names from the mtz file
nawk 'NF>3' \${tempfile}mtzdump |\\
nawk '\$(NF-1)=="F"{print "F", \$NF}\\
      \$(NF-1)=="D"{print "D", \$NF}\\
      \$(NF-1)=="P"{print "P", \$NF}\\
      \$(NF-1)=="Q"{print "S", \$NF}' |\\
nawk '/^F/{++n} {printf "%s", \$1; \\
       if(\$1=="S") printf "%s", last;\\
printf " %d %s\\n",n, \$2; last=\$1}' |\\
nawk '\$1=="F"{F[\$2]=\$NF} \$1=="SF"{SF[\$2]=\$NF} \\
      \$1=="D"{D[\$2]=\$NF} \$1=="SD"{SD[\$2]=\$NF} \\
      \$1=="P"{P=P \$NF} \\
      END{for(i in F){\\
	print i, F[i], SF[i], D[i],SD[i], P, " ";}}' |\\
sort -n >! \${tempfile}datasets


# one last pass through command line
# allow user overrides of all internal variables
set i = 0
echo -n "" >! \${tempfile}userlabels
while( \$i < \$#argv )
    @ i = ( \$i + 1 )
    @ nexti = ( \$i + 1 )
    @ lasti = ( \$i - 1 )
    if(\$nexti > \$#argv) set nexti = \$#argv
    if(\$lasti < 1) set lasti = 1
    set arg = "\$argv[\$i]"
    
    # see if a dataset label was given
    grep " \$arg " \${tempfile}datasets >& /dev/null
    if(! \$status) then
	if(\$?NO) then
	    # user doesn't want this label
	    # filter it out of the input files
	    egrep -v " \$arg " \${tempfile}datasets >! \${tempfile}
	    mv \${tempfile} \${tempfile}datasets
	    unset NO
	else
	    # must want only this label?
	    cat \${tempfile}mtzdump |\\
	    nawk -v label=\$arg 'NF>2 && \$NF==label{print \$NF}' |\\
	    cat >> \${tempfile}userlabels
	endif
	continue
    endif
    
    # only look at non-file words now
    if(! -e "\$arg") then
	if("\$arg" =~ [0-9]*) then
	    # we have a number
	    if(("\$arg" =~ *A)||("\$argv[\$nexti]" == "A")) then
		# user-preferred resolution limits
		set temp = \`echo "\$arg" | nawk 'BEGIN{FS="-"} \$1+0 > 0.1{print \$1+0} \$2+0 > 0.1{print \$2+0}'\`
		if(\$#temp != 1) then
		    set temp = \`echo \$temp | nawk '\$1>\$2{print \$1, \$2} \$2>\$1{print \$2, \$1}'\`
		    if(\$#temp == 2) then
			set loRES = "\$temp[1]"
			set hiRES = "\$temp[2]"
		    endif
		else
		    # this isn't used, but...
		    if("\$temp" != "") set hiRES = "\$temp"
		endif
		unset NO
		continue
	    endif
	    
	    # maybe setting f" value?
	    if(("\$arg" =~ *e)||("\$argv[\$nexti]" == "e")) then
		set temp = \`echo "\$arg" | nawk '\$1+0>0.1 && \$1+0<100{print \$1+0}'\`
		if(\$#temp == 1) then
		    set fpp = "\$temp"
		    unset NO
		    continue
		endif
	    endif
	endif
	
	# allow "NO" logic to carry through
	unset NO
	if(("\$arg" == "no")||("\$arg" == "not")) set NO
	if(("\$arg" == "don't")||("\$arg" == "ignore")) set NO
	if("\$arg" == "except") set NO
    endif
end

rm -f \${tempfile}mtzdump >& /dev/null

#######################################################
# see if user specified particular labels
set temp = \`cat \${tempfile}userlabels | wc -l\`
if(\$temp != 0) then
    # turn the "user" labels into real label files
    cat \${tempfile}userlabels \${tempfile}datasets |\\
    nawk 'NF==1{++n; label[n]=\$NF}\\
          NF>1 {for(i in label) for(j=2;j<=NF;++j){\\
		if(label[i]==\$j) print i, \$2,\$3,\$4,\$5;break}}' |\\
    sort -n >! \${tempfile}
    mv \${tempfile} \${tempfile}datasets >& /dev/null
endif
if((\$temp == 0)&&("\$F1" != "")) then
    # apply the labels from the top of this script?
    echo "\$F1 \$F2 \$SIGF1 \$SIGF2 \$PHI" | nawk '{for(i=1;i<=NF;++i) print \$i}' >! \${tempfile}userlabels
    cat \${tempfile}userlabels \${tempfile}datasets |\\
    nawk 'NF==1{++n; label[n]=\$NF}\\
          NF>1 {for(i in label) for(j=2;j<=NF;++j){\\
		if(label[i]==\$j) print i, \$2,\$3,\$4,\$5;break}}' |\\
    sort -n >! \${tempfile}
    set temp = \`cat \${tempfile} | wc -l\`
    if(\$temp != 0) mv \${tempfile} \${tempfile}datasets >& /dev/null    
endif
rm -f \${tempfile}userlabels >& /dev/null

###############################################################
# assign actual "LABIN" cards for oasis for each dataset
if("\$F1" == "")    set F1 = \`head -1 \${tempfile}datasets | nawk '{print \$2}'\`
if("\$F2" == "")    set F2 = \`head -1 \${tempfile}datasets | nawk '{print \$4}'\`
if("\$SIGF1" == "") set SIGF1 = \`head -1 \${tempfile}datasets | nawk '{print \$3}'\`
if("\$SIGF2" == "") set SIGF2 = \`head -1 \${tempfile}datasets | nawk '{print \$5}'\`
grep " \$PHI " \${tempfile}datasets >& /dev/null
if(\$status) set PHI = ""
if("\$PHI" == "") set PHI = \`head -1 \${tempfile}datasets | nawk '{print \$6}'\`
if("\$PHI" != "") then
    set TPHI = "TPHI=\$PHI"
    set usePHI = "PHI"
endif

rm -f \${tempfile}datasets >& /dev/null

# oasis needs this to be uppercase
set EE = \`echo "\$Ee" | nawk '{print toupper(\$1)}'\`

###############################################################
# convert MLphare sites to OASIS sites
set maxocc = \`nawk '\$1 ~ /^ATOM/{print \$7}' \$script | sort -n | tail -1\`
cat \$script |\\
nawk -v Ee=\$EE -v norm=\$maxocc 'BEGIN{pos = "POS"}\\
 \$1 ~ /^ATOM/{++i; printf "%-4s %2s %7s %7s %7s %3d %7.5f\\n", pos, toupper(Ee), \$3, \$4, \$5, i, \$7/norm; pos=""}' |\\
cat >! \${tempfile}sites
set sites = \`cat \${tempfile}sites | wc -l\`

# need number in cell
set ASU_per_CELL = \`nawk -v SG=\$SG '\$4==SG{print \$3}' \$CLIBD/symop.lib | head -1\`
set cell_sites = \`echo "\$sites \$ASU_per_CELL" | nawk '{print \$1 * \$2}'\`

# this is probably the best indicator of f" anyway
if("\$fpp" == "") set fpp = "\$maxocc"

if("\$fpp" == "") then
    # use crossec to calculate this from the wavelength?
    set fpp = 1
endif


goto ReturnFromSetup


exit
#############################################
# the future?

read solve.status (where do we get \$Ee?)
EOF-oasisscript
chmod a+x ${scriptDIR}oasis.com













write_mad:
#################################################################
# don't overwrite user-modified script
if(-e ${scriptDIR}mad.awk) goto write_next

# write the MAD-constraint averaging awk program
#echo "writing ${scriptDIR}mad.awk"

cat << EOF-madscript >! ${scriptDIR}mad.awk
#! $nawk -f
#
#   Constrain an mlphare script to have sites with
#   the same B-factor and overall occupancy,
#   regardless of wavelength
#
#   That is, impose the constraint that atoms with the same "ATOM#"
#   card should correspond to the same "site", and have the same xyz 
#   coordinate, B-factor, and "true" occupancy, regardless of wavelength.
#   The mlphare occupancies, however are modified by an fp and fpp 
#   value that is the same for every atom in a particular DERIV block (wavelength).
#
#   occ  = fp*tocc(site)
#   aocc = fpp*tocc(site)
#   Bfac = Bfac(site)
#
#   This script assumes a single value for fp and fpp for each wavelength, 
#   and, therefore, should not be used for double-edge MAD experiments! 
#
BEGIN{
    #  same_occ==1 means force all sites to have same (effective) occupancy
    if(same_occ=="") same_occ = 0
    #  same_B==1 means force all sites to have same B-factor
    if(same_B=="")   same_B   = 0
}

\$1 ~ /^LABIN/ || labinline{
    labinline= ( \$NF == "-" )
    for(i=1;i<=NF;++i){
	if(\$i ~ /^FP=/)
	{
	    FP = substr(\$i,index(\$i,"=")+1)
	}
	if(\$i ~ /^FPH/)
	{
	    wave = substr(\$i,4)+0
	    FPH[wave] = substr(\$i,index(\$i,"=")+1)
	}
	if(\$i ~ /^DPH/)
	{
	    wave = substr(\$i,4)+0
	    DPH[wave] = substr(\$i,index(\$i,"=")+1)
	}
    }
}

\$1 ~ /^DERIV/{
    ++waves
}

\$1 ~ /^ATOM/{
    # use atom number as "site" identifier
    atomnum = substr(\$1, 5)+0
    Bfac[atomnum] += \$NF;
    X[atomnum] += \$3; Y[atomnum] += \$4; Z[atomnum] += \$5;
    ++count[atomnum];
    
     fp[waves] += \$6;
    fpp[waves] += \$7;
    ++atoms[waves];
    
    # remember each atom's occupancy
     Occ[waves " " atomnum] = \$6;
    aOcc[waves " " atomnum] = \$7;
    ++occs[num];

    ++tatoms;
    B += \$NF
}

{line[NR] = \$0}

END{
    if(! waves) exit
    if(! tatoms) exit
    # overall, average B-factor
    B /= tatoms

    # compute mean f' and f" for each wavelength
    for(wave in fp)
    {
	# normalize anomalous and dispersive differences
	if(atoms[wave]) fp[wave]  =  fp[wave]/atoms[wave];
	if(atoms[wave]) fpp[wave] = fpp[wave]/atoms[wave];
    }
    
    # add up combined occupancy for each atom (averaged over all wavelengths)
    for(atomnum in Bfac)
    {
	# compute mean B-factor for each site
	if(count[atomnum]) Bfac[atomnum] = Bfac[atomnum]/count[atomnum];
	
	# compute mean XYZ position for each site
	if(count[atomnum])
	{
	    X[atomnum] = X[atomnum] / count[atomnum];
	    Y[atomnum] = Y[atomnum] / count[atomnum];
	    Z[atomnum] = Z[atomnum] / count[atomnum];
	}
	
	# compute mean occ,aocc for each wavelength
	occ_count = 0
	for(wave in fp)
	{
	    if( fp[wave])
	    {
		tOcc[atomnum] += Occ[wave " " atomnum]/fp[wave]
		++occ_count
	    }
	    if( fpp[wave])
	    {
		tOcc[atomnum] += aOcc[wave " " atomnum]/fpp[wave]
		++occ_count
	    }
	}
	# divide by total number of occupancies contributing to this "total"
	if(occ_count) tOcc[atomnum] = tOcc[atomnum]/occ_count
	
	# optionally set all occs to same value
	if(same_occ) tOcc[atomnum] = 1;
	if(same_B)   Bfac[atomnum] = B;
    }
    
    
    # now re-write the lines in the script
    wave = 0
    for(i=1;i<=NR;++i)
    {
	split(line[i],w);
	if(w[1] ~ /^DERIV/) ++wave
	
	if(w[1] ~ /^ATOM/)
	{
	    atomnum = substr(w[1], 5)+0
	    
	    line[i] = sprintf(" ATOM%-3d %3s  %6.3f %6.3f %6.3f %6.3f %6.3f BFAC %8.3f",\\
	    atomnum, w[2], X[atomnum], Y[atomnum], Z[atomnum], \\
	    fp[wave]*tOcc[atomnum], fpp[wave]*tOcc[atomnum], Bfac[atomnum])
	}
	
	print line[i];
    }
    
    print "Summary:"
    for(wave=1;wave<=waves;++wave)
    {
	printf "%s - %s = %6.3f \\n", FPH[wave], FP, fp[wave];
    }
    for(wave=1;wave<=waves;++wave)
    {
	printf "%s = %6.3f \\n", DPH[wave], fpp[wave];
    }
    for(wave=1;wave<=waves;++wave)
    {
#	printf "DERIV %d %6.3f %6.3f\\n", wave, fp[wave], fpp[wave];
    }
}

EOF-madscript
chmod a+x ${scriptDIR}mad.awk








write_next:

goto Return_Setup_scripts


















Unwrap_Awk_Scripts:
################################################################################

 #    #  #    #  #    #  #####     ##    #####
 #    #  ##   #  #    #  #    #   #  #   #    #
 #    #  # #  #  #    #  #    #  #    #  #    #
 #    #  #  # #  # ## #  #####   ######  #####
 #    #  #   ##  ##  ##  #   #   #    #  #
  ####   #    #  #    #  #    #  #    #  #

################################################################################
#   unwrap some utility scripts
################################################################################
cat << EOF-mtzstuff >! ${tempfile}mtzstuff.awk
#! $nawk -f
#
#
#	Organize info from an mtzdump in more accessible format
#
#
#
# resolution limits
/Resolution Range/ {
    getline; getline;
    hires = \$6;
    lores = \$4;
}

# cell
/Cell Dimensions/ {
    getline; getline;
    cell = \$0
}

# space group
/Space group/{
    SG = \$NF+0
}

/Column Labels/{
    getline; getline;
    while(NF>0)
    {
	for(i=1;i<=NF;++i)
	{
	    ++labels;
	    label[labels] = \$i
	}
	getline
    }
}

/Column Types/{
    getline; getline;
    while(NF>0)
    {
	for(i=1;i<=NF;++i)
	{
	    ++t;
	    type[t] = \$i
	}
	getline
    }
}

/OVERALL FILE STATISTICS/,/LIST OF REFLECTIONS/{
    for(l in label)
    {
	if(\$NF == label[l])
	{
	    # retrieve interesting numbers from the summary list
	    mean[l] = \$(NF-4)+0
	    completeness[l] = substr(\$0, 32)+0
	}
    }
}



END {
    # now print everything out
    print "CELL", cell
    print "SYMM", SG
    print "RESO", lores, hires
    for(l=1;l<=labels;++l)
    {
	printf "COL %-15s %s %6.2f %6.2f%%\\n", label[l], type[l], mean[l], completeness[l]
    }
    
    # now try to equate Fs and DANOs with their sigmas
    for(l=1;l<=labels;++l)
    {
	# structure factors
	if(type[l]=="F")
	{
	    ++fs; 
	    F[fs]=label[l];
	    meanF[F[fs]]=mean[l];

	    last = F[fs];
	}
	
	# anomalous differences (in F units)
	if(type[l]=="D")
	{
	    ++ds; 
	    D[ds]=label[l];
	    meanD[D[ds]]=mean[l];

	    last = D[ds];
	}
	
	# sigmas (of anything)
	if(type[l]=="Q")
	{
	    ++ss;
	    Q[ss]=label[l];
	    meanQ[Q[ss]]=mean[l]; 
	    
	    # putatively assign sigmas to most recent F (almost always right)
	    sigma[last]=Q[ss];
	}
    }

    # now go see if any sigmas have nearly identical names with an F or DANO
    for(s=1;s<=ss;++s)
    {
	# run down all Fs
	for(f=1;f<=fs;++f)
	{   
	    # look for SIG(name) to match name
	    if(Q[s] == "SIG" F[f])
	    {
		sigma[F[f]]=Q[s];
	    }
	}
	
	# same for DANOs
	for(d=1;d<=ds;++d)
	{
	    if(Q[s] == "SIG" D[d])
	    {
		sigma[D[d]]=Q[s];
	    }
	}
    }
    
    # now print out putative pairs
    for(f=1;f<=fs;++f)
    {
	if(meanQ[sigma[F[f]]]!=0)
	{
	    meanF[F[f]] = meanF[F[f]] / meanQ[sigma[F[f]]];
	}
	else
	{
	    meanF[F[f]]="";
	}
      
	printf "F: %-10s %-10s %-10s\\n", F[f], sigma[F[f]], meanF[F[f]];
    }
    for(d=1;d<=ds;++d)
    {
	if(meanQ[sigma[D[d]]]!=0)
	{
	    meanD[D[d]] = meanD[D[d]] / meanQ[sigma[D[d]]];
	}
	else
	{
	    meanD[D[d]]="";
	}
      
	printf "D: %-10s %-10s %-10s\\n", D[d], sigma[D[d]], meanD[D[d]];
    }
}
EOF-mtzstuff
chmod a+x ${tempfile}mtzstuff.awk













cat << EOF-sitereader >! ${tempfile}sitereader.awk
#! $nawk -f
#
#
#	Retrieve arbitrarily formatted heavy-metal sites
#
#	looks for contiguous blocks of lines 
#	all of them containing three consecutive numbers written
#	to 3 or more decimal places and betwen -1.1 and +1.1
#
BEGIN{
    # estimate of B, to aid in finding real B-factors
    if(! B) B = 50
    if(! wilsonB) wilsonB = B
    if(! expectB) expectB = wilsonB
    
    if(! OCC) OCC = 1
    if(! expectOCC) expectOCC = OCC
}

# line must have at least 3 words

NF>=3{
    coords=0;
    # recognize mlphare format
    if(\$1 ~ /^ATOM/ && \$2 == "ANO" && \$8 == "BFAC")
    {
	OCC_col=4; AOCC_col=5; BFAC_col=6;
    }
    
    for(i=1;i<=NF;++i)
    {
	# pattern to recognize fractional coordinates
	if(((\$i ~ /^[01].[0-9][0-9][0-9]/)||(\$i ~ /^-[01].[0-9][0-9][0-9]/))&&
	    (\$i+0>=-1.1)&&(\$i+0<=1.1))
	{
	    ++coords
	}
	else
	{
	    coords=0
	}
      
	# check for 3 consecutive hits \\
	if(coords==3)
	{
	    # this line is a site
	    ++site;
	    ++sites;
	    
	    XYZ[site] = ""
	    for(i=i-2;i<=NF;++i)
	    {
		# remember this line
		if(\$i !~ /[^0-9.-]/)
		{
		    XYZ[site] = XYZ[site] " " \$i;
		}
	    }

	    # reset site list with each new block
	    if(sites>site) sites = site;
	}
    }
    
    if(coords != 3)
    {
	# this was not a site, so reset the site counter for the
	# next block of sites
	site = 0;
    }
}

END {
    # bail if we found no sites
    if(! sites) exit

    # see if we can assign the rest of the numbers
    maxnums = 10
    for(site=1;site<=sites;++site)
    {
	nums=split(XYZ[site], num)
	
	# remember length of shortest list
	if(nums<maxnums) maxnums = nums;
	
	# compute the mean of each position
	for(i=4;i<=nums;++i)
	{
	    mean[i] += num[i];
	}
    }
    for(i=4;i<=maxnums;++i)
    {
	# finish computing the mean
	if(sites) mean[i]=mean[i]/sites;
	
	# keep track of the column with biggest numbers
	if(mean[i]>mean[max_value]) max_value = i;
    }
    	
    # biggest B-factor dominates
    if(sqrt((mean[max_value]-expectB)^2)/expectB < 0.5)
    {
	BFAC_col=max_value;
    }
    
    # start on right side searching for B factor
    for(i=maxnums;i>3;--i)
    {
	# check each column against the expected B value
	if(sqrt((mean[i]-expectB)^2)/expectB < 0.5)
	{
	    # this column is within 50% of expected B factor
	    if(! BFAC_col) BFAC_col=i;
	}
    }
    
    # start at 4th position for occupancy
    for(i=4;i<=maxnums;++i)
    {
	# real occupancy could be positive or negative
	if((sqrt((sqrt(mean[i]^2)-expectOCC)^2)/expectOCC < 0.8)&&(i != BFAC_col))
	{
	    # this column is within 50% of expected occupancy value 
	    # (ignoring sign)
	    if(! OCC_col) OCC_col=i;
	}
	
	# anomalous occupancy is always positive, and usually after OCC
	if((sqrt((mean[i]-sqrt(expectOCC^2))^2)/expectOCC < 0.8) && (i != OCC_col) && (i != BFAC_col))
	{
	    # this column is within 50% of expected occupancy value
	    if(! AOCC_col) AOCC_col=i;
	}	
    }
    
    # now print out the last, contiguous block of sites
    for(site=1;site<=sites;++site)
    {
	# preliminary assignment of variables
	nums=split(XYZ[site], num)
	X   = num[1];
	Y   = num[2];
	Z   = num[3];
	OCC ="?.???";
	AOCC="?.???";
	BFAC="?.???";
	
	# assign values (if they are known)
	if( OCC_col)  OCC = sprintf("%6.3f", num[OCC_col]);
	if(AOCC_col) AOCC = sprintf("%6.3f", num[AOCC_col]);
	if(BFAC_col) BFAC = sprintf("%8.3f", num[BFAC_col]);

	if((AOCC=="?.???")&&(OCC!="?.???"))
	{
	    # might as well...
	    AOCC = sprintf("%6.3f", sqrt(OCC^2));
	}
	
	# now print out the site
	printf " ATOM%-3d ANO  %6.3f %6.3f %6.3f %6s %6s BFAC %8s\\n", 
	  site, X, Y, Z, OCC, AOCC, BFAC;
    }
}
EOF-sitereader
chmod a+x ${tempfile}sitereader.awk


goto Return_Unwrap_Awk_Scripts




exit
############################################

todo:
change atom-find cutoff for HURRY mode
update README file
test site permutation logic
test CUI
don't reject atoms with OCC==AOCC==0 in MAD mode


# needed?
"NO_B" refinement?
use oasis.com
More agressive atom eliminator?
automatic heavy-atom cutoff in DM



# wish list
implement minus-one feature
skeletonize in DM?
anisotropic Bs
Run SOLVE?
read in scalepack files?
optionally impose MAD restraints on sites

